[bodhi] 4 commits - bodhi/tools

4 May 2010

bodhi/tools/log_stats.py |  101 +++++++++++++++++++++++++++++++
 bodhi/tools/metrics.py   |  152 ++++++++++++++++++++++++++++++++++++++++++-----
 bodhi/tools/pickledb.py  |   42 +++++++-----
 3 files changed, 263 insertions(+), 32 deletions(-)
New commits:
commit 78195e8fec65f29801bbaca3aa8557f7e44f8a86
Author: Luke Macken lmacken@redhat.com
Date:   Mon May 3 17:41:55 2010 -0400
Make our pickledb script skip duplicate updates & releases

diff --git a/bodhi/tools/pickledb.py b/bodhi/tools/pickledb.py
index 964fde3..cdc1dba 100755
--- a/bodhi/tools/pickledb.py
+++ b/bodhi/tools/pickledb.py
@@ -97,7 +97,10 @@ def load_db():
     # {'updates': [], 'releases': []}
     if isinstance(data, dict):
         for release in data['releases']:
-            Release(**release)
+            try:
+                Release.byName(release['name'])
+            except SQLObjectNotFound:
+                Release(**release)
         data = data['updates']
progress = ProgressBar(maxValue=len(data))
@@ -124,20 +127,23 @@ def load_db():
         if not u.has_key('date_modified'):
             u['date_modified'] = None
-        update = PackageUpdate(title=u['title'],
-                               date_submitted=u['date_submitted'],
-                               date_pushed=u['date_pushed'],
-                               date_modified=u['date_modified'],
-                               release=release,
-                               submitter=u['submitter'],
-                               updateid=u['updateid'],
-                               type=u['type'],
-                               status=u['status'],
-                               pushed=u['pushed'],
-                               notes=u['notes'],
-                               karma=u['karma'],
-                               request=request,
-                               approved=u['approved'])
+        try:
+            update = PackageUpdate.byTitle(u['title'])
+        except SQLObjectNotFound:
+            update = PackageUpdate(title=u['title'],
+                                   date_submitted=u['date_submitted'],
+                                   date_pushed=u['date_pushed'],
+                                   date_modified=u['date_modified'],
+                                   release=release,
+                                   submitter=u['submitter'],
+                                   updateid=u['updateid'],
+                                   type=u['type'],
+                                   status=u['status'],
+                                   pushed=u['pushed'],
+                                   notes=u['notes'],
+                                   karma=u['karma'],
+                                   request=request,
+                                   approved=u['approved'])
## Create Package and PackageBuild objects
         for pkg, nvr in u['builds']:
commit bb59d772c55e2be3dd5e1a822fc38596e96ee22d
Author: Luke Macken lmacken@redhat.com
Date:   Mon May 3 17:41:29 2010 -0400
Store the 'anonymous' comment property in our pickledb tool
diff --git a/bodhi/tools/pickledb.py b/bodhi/tools/pickledb.py
index bdac628..964fde3 100755
--- a/bodhi/tools/pickledb.py
+++ b/bodhi/tools/pickledb.py
@@ -74,7 +74,7 @@ def save_db():
         data['pushed'] = update.pushed
         data['notes'] = update.notes
         data['request'] = update.request
-        data['comments'] = [(c.timestamp, c.author, c.text, c.karma) for c in update.comments]
+        data['comments'] = [(c.timestamp, c.author, c.text, c.karma, c.anonymous) for c in update.comments]
         if hasattr(update, 'approved'):
             data['approved'] = update.approved
         else:
@@ -167,9 +167,9 @@ def load_db():
             except SQLObjectNotFound:
                 cve = CVE(cve_id=cve_id)
             update.addCVE(cve)
-        for timestamp, author, text, karma in u['comments']:
+        for timestamp, author, text, karma, anonymous in u['comments']:
             comment = Comment(timestamp=timestamp, author=author, text=text,
-                              karma=karma, update=update)
+                              karma=karma, update=update, anonymous=anonymous)
progress()
commit bd3daef805a45d8d81b2ccc07c42b1aa4ecc1bdf
Author: Luke Macken lmacken@redhat.com
Date:   Mon May 3 17:39:52 2010 -0400
Add a metrics.py tool for generating more in-dept statistics
diff --git a/bodhi/tools/0day.py b/bodhi/tools/0day.py
old mode 100644
new mode 100755
diff --git a/bodhi/tools/metrics.py b/bodhi/tools/metrics.py
index d10e10b..aa8e6dd 100755
--- a/bodhi/tools/metrics.py
+++ b/bodhi/tools/metrics.py
@@ -1,30 +1,154 @@
-#!/usr/bin/python -tt
-
+#!/usr/bin/env -tt
 """
-A tool for spitting out basic update and bug metrics for each release
+A tool for generating statistics for each release.
+
+.. moduleauthor:: Luke Macken lmacken@redhat.com
 """
+from operator import itemgetter
 from sqlobject import AND
+from datetime import timedelta
+from collections import defaultdict
 from turbogears.database import PackageHub
-from bodhi.util import load_config
+from bodhi.util import load_config, header
 from bodhi.model import PackageUpdate, Release
+statuses = ('stable', 'testing', 'pending', 'obsolete')
+types = ('bugfix', 'enhancement', 'security', 'newpackage')
+
 def main():
     load_config()
+    stats = {} # {release: {'stat': ...}}
+    feedback = 0 # total number of updates that received feedback
+    karma = {} # {username: # of karma submissions}
+    num_updates = PackageUpdate.select().count()
+
     for release in Release.select():
-        print release.long_name
-        updates = PackageUpdate.select(
-                AND(PackageUpdate.q.releaseID == release.id,
-                    PackageUpdate.q.status == 'stable'))
-        num_updates = updates.count()
-        print " * %d stable updates" % num_updates
-        bugs = set()
-        for update in updates:
+        print header(release.long_name)
+        updates = PackageUpdate.select(PackageUpdate.q.releaseID==release.id)
+        stats[release.name] = {
+                'num_updates': updates.count(),
+                'num_testing': 0,
+                'num_testing_without_karma': 0,
+                'num_feedback': 0,
+                'num_anon_feedback': 0,
+                'num_critpath': 0,
+                'critpath_without_karma': set(),
+                'bugs': set(),
+                'karma': {},
+                'deltas': [],
+                'occurrences': {},
+                'accumulative': timedelta(),
+                'packages': defaultdict(int),
+                }
+        data = stats[release.name]
+
+        for status in statuses:
+            data['num_%s' % status] = PackageUpdate.select(AND(
+                PackageUpdate.q.releaseID==release.id,
+                PackageUpdate.q.status==status)).count()
+
+        for type in types:
+            data['num_%s' % type] = PackageUpdate.select(AND(
+                PackageUpdate.q.releaseID==release.id,
+                PackageUpdate.q.type==type)).count()
+
+        for update in release.updates:
+            for build in update.builds:
+                data['packages'][build.package] += 1
             for bug in update.bugs:
-                bugs.add(bug)
-        print " * %d bugs" % len(bugs)
+                data['bugs'].add(bug.bz_id)
+
+            feedback_done = False
+            testingtime_done = False
+
+            for comment in update.comments:
+                if not feedback_done and (not comment.author.startswith('bodhi')
+                    and comment.karma != 0 and '@' not in comment.author):
+                    data['num_feedback'] += 1
+                    feedback += 1
+                    feedback_done = True
+                if not comment.author.startswith('bodhi'):
+                    if comment.anonymous or '@' in comment.author:
+                        if comment.karma != 0:
+                            data['num_anon_feedback'] += 1
+                    else:
+                        if comment.author not in data['karma']:
+                            data['karma'][comment.author] = 0
+                            karma[comment.author] = 0
+                        data['karma'][comment.author] += 1
+                        karma[comment.author] += 1
+                if (not testingtime_done and
+                    comment.text == 'This update has been pushed to testing'):
+                    for othercomment in update.comments:
+                        if othercomment.text == 'This update has been pushed to stable':
+                            delta = othercomment.timestamp - comment.timestamp
+                            data['deltas'].append(delta)
+                            data['occurrences'][delta.days] = \
+                                data['occurrences'].setdefault(
+                                        delta.days, 0) + 1
+                            data['accumulative'] += delta
+                            testingtime_done = True
+                            break
+
+            if update.critpath:
+                data['num_critpath'] += 1
+                if not feedback_done:
+                    data['critpath_without_karma'].add(update)
+            if testingtime_done:
+                data['num_testing'] += 1
+                if not feedback_done:
+                    data['num_testing_without_karma'] += 1
+
+        data['deltas'].sort()
+
+        print " * %d updates" % data['num_updates']
+        for status in statuses:
+            print " * %d %s updates" % (data['num_%s' % status], status)
+        for type in types:
+            print " * %d %s updates (%0.2f%%)" % (data['num_%s' % type], type,
+                    float(data['num_%s' % type]) / data['num_updates'] * 100)
+        print " * %d critical path updates (%0.2f%%)" % (data['num_critpath'],
+                float(data['num_critpath']) / data['num_updates'] * 100)
+        print " * %d updates received feedback (%0.2f%%)" % (
+                data['num_feedback'], (float(data['num_feedback']) /
+                 data['num_updates'] * 100))
+        print " * %d unique authenticated karma submitters" % (
+                len(data['karma']))
+        print " * %d anonymous users gave feedback (%0.2f%%)" % (
+                data['num_anon_feedback'], float(data['num_anon_feedback']) /
+                (data['num_anon_feedback'] + sum(data['karma'].values())) * 100)
+        print " * %d out of %d updates went through testing (%0.2f%%)" % (
+                data['num_testing'], data['num_updates'],
+                float(data['num_testing']) / data['num_updates'] * 100)
+        print " * %d testing updates were pushed *without* karma (%0.2f%%)" %(
+                data['num_testing_without_karma'],
+                float(data['num_testing_without_karma']) /
+                data['num_testing'] * 100)
+        print " * %d critical path updates pushed *without* karma" % (
+                len(data['critpath_without_karma']))
+        for update in data['critpath_without_karma']:
+            print "   * %s submitted by %s" % (update.title, update.submitter)
+        print " * Time spent in testing:"
+        print "   * mean = %d days" % (data['accumulative'].days /
+                len(data['deltas']))
+        print "   * median = %d days" % (
+                data['deltas'][len(data['deltas']) / 2].days)
+        print "   * mode = %d days" % (
+                sorted(data['occurrences'].items(), key=itemgetter(1))[-1][0])
+        print " * %d packages updated" % (len(data['packages']))
+        for package in sorted(data['packages'].items(), key=itemgetter(1), reverse=True):
+            print "    * %s: %d" % (package[0].name, package[1])
+        print
+    print
+    print "Out of %d total updates, %d received feedback (%0.2f%%)" % (
+            num_updates, feedback, (float(feedback) / num_updates * 100))
+    print "Out of %d total unique karma submitters, the top 30 are:" % (
+            len(karma))
+    for submitter in sorted(karma.iteritems(), key=itemgetter(1), reverse=True)[:30]:
+        print " * %s (%d)" % (submitter[0], submitter[1])
if __name__ == '__main__':
     main()
commit e46ccc0366f2d263d3fda65a00c0aabc43d507e7
Author: Luke Macken lmacken@redhat.com
Date:   Mon May 3 17:38:03 2010 -0400
Add a script that scrapes bodhi's mod_wsgi logs to calculate autokarma statistics.
In bodhi v2.0, we'll do better tracking of these transitions in the database,
    and will not have to scrape logs.
diff --git a/bodhi/tools/log_stats.py b/bodhi/tools/log_stats.py
new file mode 100755
index 0000000..d8b3109
--- /dev/null
+++ b/bodhi/tools/log_stats.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+"""
+A script that scrapes bodhi's mod_wsgi logs to calculate autokarma statistics.
+
+In bodhi v2.0, we'll do better tracking of these transitions in the database,
+and will not have to scrape logs.
+"""
+
+import subprocess
+
+from datetime import timedelta
+from operator import itemgetter
+from sqlobject import SQLObjectNotFound
+from collections import defaultdict
+from turbogears.database import PackageHub
+
+from bodhi.util import load_config, header
+from bodhi.model import PackageUpdate, Release, PackageBuild
+
+def parse_output(out):
+    updates = defaultdict(set)
+    num_critpath = defaultdict(int)
+    debug = file('bodhi.debug', 'a')
+    deltas = []
+    occurrences = {}
+    accumulative = timedelta()
+
+    for line in out.split('\n'):
+        line = line.strip()
+        if '}' in line:
+            continue
+        if line:
+            debug.write(line + '\n')
+            title = line.split()[-1]
+            update = None
+            for build in title.split(','):
+                try:
+                    update = PackageBuild.byNvr(build).updates[0]
+                    break
+                except SQLObjectNotFound:
+                    pass
+                    #print "Cannot find update for %s" % build
+            if update:
+                if update.title not in updates[update.release.name]:
+                    updates[update.release.name].add(update.title)
+                    if update.critpath:
+                        num_critpath[update.release.name] += 1
+                    for comment in update.comments:
+                        if comment.text == 'This update has been pushed to testing':
+                            for othercomment in update.comments:
+                                if othercomment.text == 'This update has been pushed to stable':
+                                    delta = othercomment.timestamp - comment.timestamp
+                                    deltas.append(delta)
+                                    occurrences[delta.days] = occurrences.setdefault(delta.days, 0) + 1
+                                    accumulative += delta
+                                    break
+                            break
+    debug.close()
+    deltas.sort()
+    return updates, num_critpath, deltas, accumulative, occurrences
+
+
+def main():
+    unstable = subprocess.Popen('grep "[Fedora Update] [unstable]" bodhi.logs',
+                                stdout=subprocess.PIPE, shell=True)
+    out, err = unstable.communicate()
+    (unstable_updates, unstable_critpath, unstable_deltas,
+     unstable_accum, unstable_occur) = parse_output(out)
+
+    stable = subprocess.Popen('grep "[Fedora Update] [stablekarma]" bodhi.logs',
+                              stdout=subprocess.PIPE, shell=True)
+    out, err = stable.communicate()
+    (stable_updates, stable_critpath, stable_deltas,
+     stable_accum, stable_occur) = parse_output(out)
+
+    for release in Release.select():
+        print '\n' + header(release.long_name)
+        num_updates = PackageUpdate.select(
+                PackageUpdate.q.releaseID==release.id).count()
+        num_stable = len(stable_updates[release.name])
+        num_unstable = len(unstable_updates[release.name])
+        num_testing = len(unstable_deltas) + len(stable_deltas)
+        print " * %d updates automatically unpushed due to karma (%0.2f%%)" % (
+                num_unstable, float(num_unstable) / num_updates * 100)
+        print "   * %d of which were critical path updates" % (
+                unstable_critpath[release.name])
+        print " * %d updates automatically pushed due to karma (%0.2f%%)" % (
+                num_stable, float(num_stable) / num_updates * 100)
+        print "   * %d of which were critical path updates" % (
+                stable_critpath[release.name])
+
+        print " * Time spent in testing of updates that were pushed by karma:"
+        print "   * mean = %d days" % (stable_accum.days / len(stable_deltas))
+        print "   * median = %d days" % stable_deltas[len(stable_deltas)/2].days
+        print "   * mode = %d days" % sorted(stable_occur.items(),
+                                             key=itemgetter(1))[-1][0]
+
+
+if __name__ == '__main__':
+    load_config()
+    main()

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

[bodhi] 4 commits - bodhi/tools