bodhi/tools/log_stats.py | 101 +++++++++++++++++++++++++++++++
bodhi/tools/metrics.py | 152 ++++++++++++++++++++++++++++++++++++++++++-----
bodhi/tools/pickledb.py | 42 +++++++-----
3 files changed, 263 insertions(+), 32 deletions(-)
New commits:
commit 78195e8fec65f29801bbaca3aa8557f7e44f8a86
Author: Luke Macken <lmacken(a)redhat.com>
Date: Mon May 3 17:41:55 2010 -0400
Make our pickledb script skip duplicate updates & releases
diff --git a/bodhi/tools/pickledb.py b/bodhi/tools/pickledb.py
index 964fde3..cdc1dba 100755
--- a/bodhi/tools/pickledb.py
+++ b/bodhi/tools/pickledb.py
@@ -97,7 +97,10 @@ def load_db():
# {'updates': [], 'releases': []}
if isinstance(data, dict):
for release in data['releases']:
- Release(**release)
+ try:
+ Release.byName(release['name'])
+ except SQLObjectNotFound:
+ Release(**release)
data = data['updates']
progress = ProgressBar(maxValue=len(data))
@@ -124,20 +127,23 @@ def load_db():
if not u.has_key('date_modified'):
u['date_modified'] = None
- update = PackageUpdate(title=u['title'],
- date_submitted=u['date_submitted'],
- date_pushed=u['date_pushed'],
- date_modified=u['date_modified'],
- release=release,
- submitter=u['submitter'],
- updateid=u['updateid'],
- type=u['type'],
- status=u['status'],
- pushed=u['pushed'],
- notes=u['notes'],
- karma=u['karma'],
- request=request,
- approved=u['approved'])
+ try:
+ update = PackageUpdate.byTitle(u['title'])
+ except SQLObjectNotFound:
+ update = PackageUpdate(title=u['title'],
+ date_submitted=u['date_submitted'],
+ date_pushed=u['date_pushed'],
+ date_modified=u['date_modified'],
+ release=release,
+ submitter=u['submitter'],
+ updateid=u['updateid'],
+ type=u['type'],
+ status=u['status'],
+ pushed=u['pushed'],
+ notes=u['notes'],
+ karma=u['karma'],
+ request=request,
+ approved=u['approved'])
## Create Package and PackageBuild objects
for pkg, nvr in u['builds']:
commit bb59d772c55e2be3dd5e1a822fc38596e96ee22d
Author: Luke Macken <lmacken(a)redhat.com>
Date: Mon May 3 17:41:29 2010 -0400
Store the 'anonymous' comment property in our pickledb tool
diff --git a/bodhi/tools/pickledb.py b/bodhi/tools/pickledb.py
index bdac628..964fde3 100755
--- a/bodhi/tools/pickledb.py
+++ b/bodhi/tools/pickledb.py
@@ -74,7 +74,7 @@ def save_db():
data['pushed'] = update.pushed
data['notes'] = update.notes
data['request'] = update.request
- data['comments'] = [(c.timestamp, c.author, c.text, c.karma) for c in
update.comments]
+ data['comments'] = [(c.timestamp, c.author, c.text, c.karma, c.anonymous)
for c in update.comments]
if hasattr(update, 'approved'):
data['approved'] = update.approved
else:
@@ -167,9 +167,9 @@ def load_db():
except SQLObjectNotFound:
cve = CVE(cve_id=cve_id)
update.addCVE(cve)
- for timestamp, author, text, karma in u['comments']:
+ for timestamp, author, text, karma, anonymous in u['comments']:
comment = Comment(timestamp=timestamp, author=author, text=text,
- karma=karma, update=update)
+ karma=karma, update=update, anonymous=anonymous)
progress()
commit bd3daef805a45d8d81b2ccc07c42b1aa4ecc1bdf
Author: Luke Macken <lmacken(a)redhat.com>
Date: Mon May 3 17:39:52 2010 -0400
Add a metrics.py tool for generating more in-dept statistics
diff --git a/bodhi/tools/0day.py b/bodhi/tools/0day.py
old mode 100644
new mode 100755
diff --git a/bodhi/tools/metrics.py b/bodhi/tools/metrics.py
index d10e10b..aa8e6dd 100755
--- a/bodhi/tools/metrics.py
+++ b/bodhi/tools/metrics.py
@@ -1,30 +1,154 @@
-#!/usr/bin/python -tt
-
+#!/usr/bin/env -tt
"""
-A tool for spitting out basic update and bug metrics for each release
+A tool for generating statistics for each release.
+
+.. moduleauthor:: Luke Macken <lmacken(a)redhat.com>
"""
+from operator import itemgetter
from sqlobject import AND
+from datetime import timedelta
+from collections import defaultdict
from turbogears.database import PackageHub
-from bodhi.util import load_config
+from bodhi.util import load_config, header
from bodhi.model import PackageUpdate, Release
+statuses = ('stable', 'testing', 'pending', 'obsolete')
+types = ('bugfix', 'enhancement', 'security',
'newpackage')
+
def main():
load_config()
+ stats = {} # {release: {'stat': ...}}
+ feedback = 0 # total number of updates that received feedback
+ karma = {} # {username: # of karma submissions}
+ num_updates = PackageUpdate.select().count()
+
for release in Release.select():
- print release.long_name
- updates = PackageUpdate.select(
- AND(PackageUpdate.q.releaseID == release.id,
- PackageUpdate.q.status == 'stable'))
- num_updates = updates.count()
- print " * %d stable updates" % num_updates
- bugs = set()
- for update in updates:
+ print header(release.long_name)
+ updates = PackageUpdate.select(PackageUpdate.q.releaseID==release.id)
+ stats[release.name] = {
+ 'num_updates': updates.count(),
+ 'num_testing': 0,
+ 'num_testing_without_karma': 0,
+ 'num_feedback': 0,
+ 'num_anon_feedback': 0,
+ 'num_critpath': 0,
+ 'critpath_without_karma': set(),
+ 'bugs': set(),
+ 'karma': {},
+ 'deltas': [],
+ 'occurrences': {},
+ 'accumulative': timedelta(),
+ 'packages': defaultdict(int),
+ }
+ data = stats[release.name]
+
+ for status in statuses:
+ data['num_%s' % status] = PackageUpdate.select(AND(
+ PackageUpdate.q.releaseID==release.id,
+ PackageUpdate.q.status==status)).count()
+
+ for type in types:
+ data['num_%s' % type] = PackageUpdate.select(AND(
+ PackageUpdate.q.releaseID==release.id,
+ PackageUpdate.q.type==type)).count()
+
+ for update in release.updates:
+ for build in update.builds:
+ data['packages'][build.package] += 1
for bug in update.bugs:
- bugs.add(bug)
- print " * %d bugs" % len(bugs)
+ data['bugs'].add(bug.bz_id)
+
+ feedback_done = False
+ testingtime_done = False
+
+ for comment in update.comments:
+ if not feedback_done and (not comment.author.startswith('bodhi')
+ and comment.karma != 0 and '@' not in comment.author):
+ data['num_feedback'] += 1
+ feedback += 1
+ feedback_done = True
+ if not comment.author.startswith('bodhi'):
+ if comment.anonymous or '@' in comment.author:
+ if comment.karma != 0:
+ data['num_anon_feedback'] += 1
+ else:
+ if comment.author not in data['karma']:
+ data['karma'][comment.author] = 0
+ karma[comment.author] = 0
+ data['karma'][comment.author] += 1
+ karma[comment.author] += 1
+ if (not testingtime_done and
+ comment.text == 'This update has been pushed to testing'):
+ for othercomment in update.comments:
+ if othercomment.text == 'This update has been pushed to
stable':
+ delta = othercomment.timestamp - comment.timestamp
+ data['deltas'].append(delta)
+ data['occurrences'][delta.days] = \
+ data['occurrences'].setdefault(
+ delta.days, 0) + 1
+ data['accumulative'] += delta
+ testingtime_done = True
+ break
+
+ if update.critpath:
+ data['num_critpath'] += 1
+ if not feedback_done:
+ data['critpath_without_karma'].add(update)
+ if testingtime_done:
+ data['num_testing'] += 1
+ if not feedback_done:
+ data['num_testing_without_karma'] += 1
+
+ data['deltas'].sort()
+
+ print " * %d updates" % data['num_updates']
+ for status in statuses:
+ print " * %d %s updates" % (data['num_%s' % status],
status)
+ for type in types:
+ print " * %d %s updates (%0.2f%%)" % (data['num_%s' %
type], type,
+ float(data['num_%s' % type]) / data['num_updates'] *
100)
+ print " * %d critical path updates (%0.2f%%)" %
(data['num_critpath'],
+ float(data['num_critpath']) / data['num_updates'] * 100)
+ print " * %d updates received feedback (%0.2f%%)" % (
+ data['num_feedback'], (float(data['num_feedback']) /
+ data['num_updates'] * 100))
+ print " * %d unique authenticated karma submitters" % (
+ len(data['karma']))
+ print " * %d anonymous users gave feedback (%0.2f%%)" % (
+ data['num_anon_feedback'],
float(data['num_anon_feedback']) /
+ (data['num_anon_feedback'] + sum(data['karma'].values()))
* 100)
+ print " * %d out of %d updates went through testing (%0.2f%%)" % (
+ data['num_testing'], data['num_updates'],
+ float(data['num_testing']) / data['num_updates'] * 100)
+ print " * %d testing updates were pushed *without* karma (%0.2f%%)" %(
+ data['num_testing_without_karma'],
+ float(data['num_testing_without_karma']) /
+ data['num_testing'] * 100)
+ print " * %d critical path updates pushed *without* karma" % (
+ len(data['critpath_without_karma']))
+ for update in data['critpath_without_karma']:
+ print " * %s submitted by %s" % (update.title, update.submitter)
+ print " * Time spent in testing:"
+ print " * mean = %d days" % (data['accumulative'].days /
+ len(data['deltas']))
+ print " * median = %d days" % (
+ data['deltas'][len(data['deltas']) / 2].days)
+ print " * mode = %d days" % (
+ sorted(data['occurrences'].items(), key=itemgetter(1))[-1][0])
+ print " * %d packages updated" % (len(data['packages']))
+ for package in sorted(data['packages'].items(), key=itemgetter(1),
reverse=True):
+ print " * %s: %d" % (package[0].name, package[1])
+ print
+ print
+ print "Out of %d total updates, %d received feedback (%0.2f%%)" % (
+ num_updates, feedback, (float(feedback) / num_updates * 100))
+ print "Out of %d total unique karma submitters, the top 30 are:" % (
+ len(karma))
+ for submitter in sorted(karma.iteritems(), key=itemgetter(1), reverse=True)[:30]:
+ print " * %s (%d)" % (submitter[0], submitter[1])
if __name__ == '__main__':
main()
commit e46ccc0366f2d263d3fda65a00c0aabc43d507e7
Author: Luke Macken <lmacken(a)redhat.com>
Date: Mon May 3 17:38:03 2010 -0400
Add a script that scrapes bodhi's mod_wsgi logs to calculate autokarma
statistics.
In bodhi v2.0, we'll do better tracking of these transitions in the database,
and will not have to scrape logs.
diff --git a/bodhi/tools/log_stats.py b/bodhi/tools/log_stats.py
new file mode 100755
index 0000000..d8b3109
--- /dev/null
+++ b/bodhi/tools/log_stats.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+"""
+A script that scrapes bodhi's mod_wsgi logs to calculate autokarma statistics.
+
+In bodhi v2.0, we'll do better tracking of these transitions in the database,
+and will not have to scrape logs.
+"""
+
+import subprocess
+
+from datetime import timedelta
+from operator import itemgetter
+from sqlobject import SQLObjectNotFound
+from collections import defaultdict
+from turbogears.database import PackageHub
+
+from bodhi.util import load_config, header
+from bodhi.model import PackageUpdate, Release, PackageBuild
+
+def parse_output(out):
+ updates = defaultdict(set)
+ num_critpath = defaultdict(int)
+ debug = file('bodhi.debug', 'a')
+ deltas = []
+ occurrences = {}
+ accumulative = timedelta()
+
+ for line in out.split('\n'):
+ line = line.strip()
+ if '}' in line:
+ continue
+ if line:
+ debug.write(line + '\n')
+ title = line.split()[-1]
+ update = None
+ for build in title.split(','):
+ try:
+ update = PackageBuild.byNvr(build).updates[0]
+ break
+ except SQLObjectNotFound:
+ pass
+ #print "Cannot find update for %s" % build
+ if update:
+ if update.title not in updates[update.release.name]:
+ updates[update.release.name].add(update.title)
+ if update.critpath:
+ num_critpath[update.release.name] += 1
+ for comment in update.comments:
+ if comment.text == 'This update has been pushed to
testing':
+ for othercomment in update.comments:
+ if othercomment.text == 'This update has been pushed
to stable':
+ delta = othercomment.timestamp - comment.timestamp
+ deltas.append(delta)
+ occurrences[delta.days] =
occurrences.setdefault(delta.days, 0) + 1
+ accumulative += delta
+ break
+ break
+ debug.close()
+ deltas.sort()
+ return updates, num_critpath, deltas, accumulative, occurrences
+
+
+def main():
+ unstable = subprocess.Popen('grep "\[Fedora Update\] \[unstable\]"
bodhi.logs',
+ stdout=subprocess.PIPE, shell=True)
+ out, err = unstable.communicate()
+ (unstable_updates, unstable_critpath, unstable_deltas,
+ unstable_accum, unstable_occur) = parse_output(out)
+
+ stable = subprocess.Popen('grep "\[Fedora Update\] \[stablekarma\]"
bodhi.logs',
+ stdout=subprocess.PIPE, shell=True)
+ out, err = stable.communicate()
+ (stable_updates, stable_critpath, stable_deltas,
+ stable_accum, stable_occur) = parse_output(out)
+
+ for release in Release.select():
+ print '\n' + header(release.long_name)
+ num_updates = PackageUpdate.select(
+ PackageUpdate.q.releaseID==release.id).count()
+ num_stable = len(stable_updates[release.name])
+ num_unstable = len(unstable_updates[release.name])
+ num_testing = len(unstable_deltas) + len(stable_deltas)
+ print " * %d updates automatically unpushed due to karma (%0.2f%%)" %
(
+ num_unstable, float(num_unstable) / num_updates * 100)
+ print " * %d of which were critical path updates" % (
+ unstable_critpath[release.name])
+ print " * %d updates automatically pushed due to karma (%0.2f%%)" % (
+ num_stable, float(num_stable) / num_updates * 100)
+ print " * %d of which were critical path updates" % (
+ stable_critpath[release.name])
+
+ print " * Time spent in testing of updates that were pushed by karma:"
+ print " * mean = %d days" % (stable_accum.days / len(stable_deltas))
+ print " * median = %d days" %
stable_deltas[len(stable_deltas)/2].days
+ print " * mode = %d days" % sorted(stable_occur.items(),
+ key=itemgetter(1))[-1][0]
+
+
+if __name__ == '__main__':
+ load_config()
+ main()