bodhi/tools/log_stats.py | 101 +++++++++++++++++++++++++++++++ bodhi/tools/metrics.py | 152 ++++++++++++++++++++++++++++++++++++++++++----- bodhi/tools/pickledb.py | 42 +++++++----- 3 files changed, 263 insertions(+), 32 deletions(-)
New commits: commit 78195e8fec65f29801bbaca3aa8557f7e44f8a86 Author: Luke Macken lmacken@redhat.com Date: Mon May 3 17:41:55 2010 -0400
Make our pickledb script skip duplicate updates & releases
diff --git a/bodhi/tools/pickledb.py b/bodhi/tools/pickledb.py index 964fde3..cdc1dba 100755 --- a/bodhi/tools/pickledb.py +++ b/bodhi/tools/pickledb.py @@ -97,7 +97,10 @@ def load_db(): # {'updates': [], 'releases': []} if isinstance(data, dict): for release in data['releases']: - Release(**release) + try: + Release.byName(release['name']) + except SQLObjectNotFound: + Release(**release) data = data['updates']
progress = ProgressBar(maxValue=len(data)) @@ -124,20 +127,23 @@ def load_db(): if not u.has_key('date_modified'): u['date_modified'] = None
- update = PackageUpdate(title=u['title'], - date_submitted=u['date_submitted'], - date_pushed=u['date_pushed'], - date_modified=u['date_modified'], - release=release, - submitter=u['submitter'], - updateid=u['updateid'], - type=u['type'], - status=u['status'], - pushed=u['pushed'], - notes=u['notes'], - karma=u['karma'], - request=request, - approved=u['approved']) + try: + update = PackageUpdate.byTitle(u['title']) + except SQLObjectNotFound: + update = PackageUpdate(title=u['title'], + date_submitted=u['date_submitted'], + date_pushed=u['date_pushed'], + date_modified=u['date_modified'], + release=release, + submitter=u['submitter'], + updateid=u['updateid'], + type=u['type'], + status=u['status'], + pushed=u['pushed'], + notes=u['notes'], + karma=u['karma'], + request=request, + approved=u['approved'])
## Create Package and PackageBuild objects for pkg, nvr in u['builds']:
commit bb59d772c55e2be3dd5e1a822fc38596e96ee22d Author: Luke Macken lmacken@redhat.com Date: Mon May 3 17:41:29 2010 -0400
Store the 'anonymous' comment property in our pickledb tool
diff --git a/bodhi/tools/pickledb.py b/bodhi/tools/pickledb.py index bdac628..964fde3 100755 --- a/bodhi/tools/pickledb.py +++ b/bodhi/tools/pickledb.py @@ -74,7 +74,7 @@ def save_db(): data['pushed'] = update.pushed data['notes'] = update.notes data['request'] = update.request - data['comments'] = [(c.timestamp, c.author, c.text, c.karma) for c in update.comments] + data['comments'] = [(c.timestamp, c.author, c.text, c.karma, c.anonymous) for c in update.comments] if hasattr(update, 'approved'): data['approved'] = update.approved else: @@ -167,9 +167,9 @@ def load_db(): except SQLObjectNotFound: cve = CVE(cve_id=cve_id) update.addCVE(cve) - for timestamp, author, text, karma in u['comments']: + for timestamp, author, text, karma, anonymous in u['comments']: comment = Comment(timestamp=timestamp, author=author, text=text, - karma=karma, update=update) + karma=karma, update=update, anonymous=anonymous)
progress()
commit bd3daef805a45d8d81b2ccc07c42b1aa4ecc1bdf Author: Luke Macken lmacken@redhat.com Date: Mon May 3 17:39:52 2010 -0400
Add a metrics.py tool for generating more in-dept statistics
diff --git a/bodhi/tools/0day.py b/bodhi/tools/0day.py old mode 100644 new mode 100755 diff --git a/bodhi/tools/metrics.py b/bodhi/tools/metrics.py index d10e10b..aa8e6dd 100755 --- a/bodhi/tools/metrics.py +++ b/bodhi/tools/metrics.py @@ -1,30 +1,154 @@ -#!/usr/bin/python -tt - +#!/usr/bin/env -tt """ -A tool for spitting out basic update and bug metrics for each release +A tool for generating statistics for each release. + +.. moduleauthor:: Luke Macken lmacken@redhat.com """
+from operator import itemgetter from sqlobject import AND +from datetime import timedelta +from collections import defaultdict from turbogears.database import PackageHub
-from bodhi.util import load_config +from bodhi.util import load_config, header from bodhi.model import PackageUpdate, Release
+statuses = ('stable', 'testing', 'pending', 'obsolete') +types = ('bugfix', 'enhancement', 'security', 'newpackage') + def main(): load_config() + stats = {} # {release: {'stat': ...}} + feedback = 0 # total number of updates that received feedback + karma = {} # {username: # of karma submissions} + num_updates = PackageUpdate.select().count() + for release in Release.select(): - print release.long_name - updates = PackageUpdate.select( - AND(PackageUpdate.q.releaseID == release.id, - PackageUpdate.q.status == 'stable')) - num_updates = updates.count() - print " * %d stable updates" % num_updates - bugs = set() - for update in updates: + print header(release.long_name) + updates = PackageUpdate.select(PackageUpdate.q.releaseID==release.id) + stats[release.name] = { + 'num_updates': updates.count(), + 'num_testing': 0, + 'num_testing_without_karma': 0, + 'num_feedback': 0, + 'num_anon_feedback': 0, + 'num_critpath': 0, + 'critpath_without_karma': set(), + 'bugs': set(), + 'karma': {}, + 'deltas': [], + 'occurrences': {}, + 'accumulative': timedelta(), + 'packages': defaultdict(int), + } + data = stats[release.name] + + for status in statuses: + data['num_%s' % status] = PackageUpdate.select(AND( + PackageUpdate.q.releaseID==release.id, + PackageUpdate.q.status==status)).count() + + for type in types: + data['num_%s' % type] = PackageUpdate.select(AND( + PackageUpdate.q.releaseID==release.id, + PackageUpdate.q.type==type)).count() + + for update in release.updates: + for build in update.builds: + data['packages'][build.package] += 1 for bug in update.bugs: - bugs.add(bug) - print " * %d bugs" % len(bugs) + data['bugs'].add(bug.bz_id) + + feedback_done = False + testingtime_done = False + + for comment in update.comments: + if not feedback_done and (not comment.author.startswith('bodhi') + and comment.karma != 0 and '@' not in comment.author): + data['num_feedback'] += 1 + feedback += 1 + feedback_done = True + if not comment.author.startswith('bodhi'): + if comment.anonymous or '@' in comment.author: + if comment.karma != 0: + data['num_anon_feedback'] += 1 + else: + if comment.author not in data['karma']: + data['karma'][comment.author] = 0 + karma[comment.author] = 0 + data['karma'][comment.author] += 1 + karma[comment.author] += 1 + if (not testingtime_done and + comment.text == 'This update has been pushed to testing'): + for othercomment in update.comments: + if othercomment.text == 'This update has been pushed to stable': + delta = othercomment.timestamp - comment.timestamp + data['deltas'].append(delta) + data['occurrences'][delta.days] = \ + data['occurrences'].setdefault( + delta.days, 0) + 1 + data['accumulative'] += delta + testingtime_done = True + break + + if update.critpath: + data['num_critpath'] += 1 + if not feedback_done: + data['critpath_without_karma'].add(update) + if testingtime_done: + data['num_testing'] += 1 + if not feedback_done: + data['num_testing_without_karma'] += 1 + + data['deltas'].sort() + + print " * %d updates" % data['num_updates'] + for status in statuses: + print " * %d %s updates" % (data['num_%s' % status], status) + for type in types: + print " * %d %s updates (%0.2f%%)" % (data['num_%s' % type], type, + float(data['num_%s' % type]) / data['num_updates'] * 100) + print " * %d critical path updates (%0.2f%%)" % (data['num_critpath'], + float(data['num_critpath']) / data['num_updates'] * 100) + print " * %d updates received feedback (%0.2f%%)" % ( + data['num_feedback'], (float(data['num_feedback']) / + data['num_updates'] * 100)) + print " * %d unique authenticated karma submitters" % ( + len(data['karma'])) + print " * %d anonymous users gave feedback (%0.2f%%)" % ( + data['num_anon_feedback'], float(data['num_anon_feedback']) / + (data['num_anon_feedback'] + sum(data['karma'].values())) * 100) + print " * %d out of %d updates went through testing (%0.2f%%)" % ( + data['num_testing'], data['num_updates'], + float(data['num_testing']) / data['num_updates'] * 100) + print " * %d testing updates were pushed *without* karma (%0.2f%%)" %( + data['num_testing_without_karma'], + float(data['num_testing_without_karma']) / + data['num_testing'] * 100) + print " * %d critical path updates pushed *without* karma" % ( + len(data['critpath_without_karma'])) + for update in data['critpath_without_karma']: + print " * %s submitted by %s" % (update.title, update.submitter) + print " * Time spent in testing:" + print " * mean = %d days" % (data['accumulative'].days / + len(data['deltas'])) + print " * median = %d days" % ( + data['deltas'][len(data['deltas']) / 2].days) + print " * mode = %d days" % ( + sorted(data['occurrences'].items(), key=itemgetter(1))[-1][0]) + print " * %d packages updated" % (len(data['packages'])) + for package in sorted(data['packages'].items(), key=itemgetter(1), reverse=True): + print " * %s: %d" % (package[0].name, package[1]) + print
+ print + print "Out of %d total updates, %d received feedback (%0.2f%%)" % ( + num_updates, feedback, (float(feedback) / num_updates * 100)) + print "Out of %d total unique karma submitters, the top 30 are:" % ( + len(karma)) + for submitter in sorted(karma.iteritems(), key=itemgetter(1), reverse=True)[:30]: + print " * %s (%d)" % (submitter[0], submitter[1])
if __name__ == '__main__': main()
commit e46ccc0366f2d263d3fda65a00c0aabc43d507e7 Author: Luke Macken lmacken@redhat.com Date: Mon May 3 17:38:03 2010 -0400
Add a script that scrapes bodhi's mod_wsgi logs to calculate autokarma statistics.
In bodhi v2.0, we'll do better tracking of these transitions in the database, and will not have to scrape logs.
diff --git a/bodhi/tools/log_stats.py b/bodhi/tools/log_stats.py new file mode 100755 index 0000000..d8b3109 --- /dev/null +++ b/bodhi/tools/log_stats.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +""" +A script that scrapes bodhi's mod_wsgi logs to calculate autokarma statistics. + +In bodhi v2.0, we'll do better tracking of these transitions in the database, +and will not have to scrape logs. +""" + +import subprocess + +from datetime import timedelta +from operator import itemgetter +from sqlobject import SQLObjectNotFound +from collections import defaultdict +from turbogears.database import PackageHub + +from bodhi.util import load_config, header +from bodhi.model import PackageUpdate, Release, PackageBuild + +def parse_output(out): + updates = defaultdict(set) + num_critpath = defaultdict(int) + debug = file('bodhi.debug', 'a') + deltas = [] + occurrences = {} + accumulative = timedelta() + + for line in out.split('\n'): + line = line.strip() + if '}' in line: + continue + if line: + debug.write(line + '\n') + title = line.split()[-1] + update = None + for build in title.split(','): + try: + update = PackageBuild.byNvr(build).updates[0] + break + except SQLObjectNotFound: + pass + #print "Cannot find update for %s" % build + if update: + if update.title not in updates[update.release.name]: + updates[update.release.name].add(update.title) + if update.critpath: + num_critpath[update.release.name] += 1 + for comment in update.comments: + if comment.text == 'This update has been pushed to testing': + for othercomment in update.comments: + if othercomment.text == 'This update has been pushed to stable': + delta = othercomment.timestamp - comment.timestamp + deltas.append(delta) + occurrences[delta.days] = occurrences.setdefault(delta.days, 0) + 1 + accumulative += delta + break + break + debug.close() + deltas.sort() + return updates, num_critpath, deltas, accumulative, occurrences + + +def main(): + unstable = subprocess.Popen('grep "[Fedora Update] [unstable]" bodhi.logs', + stdout=subprocess.PIPE, shell=True) + out, err = unstable.communicate() + (unstable_updates, unstable_critpath, unstable_deltas, + unstable_accum, unstable_occur) = parse_output(out) + + stable = subprocess.Popen('grep "[Fedora Update] [stablekarma]" bodhi.logs', + stdout=subprocess.PIPE, shell=True) + out, err = stable.communicate() + (stable_updates, stable_critpath, stable_deltas, + stable_accum, stable_occur) = parse_output(out) + + for release in Release.select(): + print '\n' + header(release.long_name) + num_updates = PackageUpdate.select( + PackageUpdate.q.releaseID==release.id).count() + num_stable = len(stable_updates[release.name]) + num_unstable = len(unstable_updates[release.name]) + num_testing = len(unstable_deltas) + len(stable_deltas) + print " * %d updates automatically unpushed due to karma (%0.2f%%)" % ( + num_unstable, float(num_unstable) / num_updates * 100) + print " * %d of which were critical path updates" % ( + unstable_critpath[release.name]) + print " * %d updates automatically pushed due to karma (%0.2f%%)" % ( + num_stable, float(num_stable) / num_updates * 100) + print " * %d of which were critical path updates" % ( + stable_critpath[release.name]) + + print " * Time spent in testing of updates that were pushed by karma:" + print " * mean = %d days" % (stable_accum.days / len(stable_deltas)) + print " * median = %d days" % stable_deltas[len(stable_deltas)/2].days + print " * mode = %d days" % sorted(stable_occur.items(), + key=itemgetter(1))[-1][0] + + +if __name__ == '__main__': + load_config() + main()