[Collab-qa-commits] r1271 - in udd: . sql udd web/cgi-bin

lucas at alioth.debian.org lucas at alioth.debian.org
Mon Sep 15 11:39:41 UTC 2008


Author: lucas
Date: 2008-09-15 11:39:40 +0000 (Mon, 15 Sep 2008)
New Revision: 1271

Added:
   udd/udd/ubuntu_bugs_gatherer.py
   udd/web/cgi-bin/ubuntubugs.cgi
Modified:
   udd/config-standalone.yaml
   udd/config.yaml
   udd/sql/drop.sql
   udd/sql/setup.sql
Log:
add importer for ubuntu bugs

Modified: udd/config-standalone.yaml
===================================================================
--- udd/config-standalone.yaml	2008-09-15 11:38:54 UTC (rev 1270)
+++ udd/config-standalone.yaml	2008-09-15 11:39:40 UTC (rev 1271)
@@ -13,6 +13,7 @@
     bugs: exec DEBBUGS_CONFIG_FILE=/org/udd.debian.net/mirrors/bugs.debian.org/etc/config perl /org/udd.debian.net/udd/udd/bugs_gatherer.pl 
     carnivore: module udd.carnivore_gatherer
     lintian: module udd.lintian_gatherer
+    ubuntu-bugs: module udd.ubuntu_bugs_gatherer
   debug: 1
   timestamp-dir: /org/udd.debian.net/timestamps
   lock-dir: /org/udd.debian.net/locks
@@ -214,3 +215,6 @@
   path: /org/udd.debian.net/mirrors/lintian.log
   table: lintian
   schema: lintian
+
+ubuntu-bugs:
+   type: ubuntu-bugs

Modified: udd/config.yaml
===================================================================
--- udd/config.yaml	2008-09-15 11:38:54 UTC (rev 1270)
+++ udd/config.yaml	2008-09-15 11:39:40 UTC (rev 1271)
@@ -13,6 +13,7 @@
     bugs: exec DEBBUGS_CONFIG_FILE=/org/udd.debian.net/mirrors/bugs.debian.org/etc/config perl /org/udd.debian.net/udd/udd/bugs_gatherer.pl 
     carnivore: module udd.carnivore_gatherer
     lintian: module udd.lintian_gatherer
+    ubuntu-bugs: module udd.ubuntu_bugs_gatherer
   debug: 1
   timestamp-dir: /org/udd.debian.net/timestamps
   lock-dir: /org/udd.debian.net/locks
@@ -208,3 +209,6 @@
   path: /org/udd.debian.net/mirrors/lintian.log
   table: lintian
   schema: lintian
+
+ubuntu-bugs:
+   type: ubuntu-bugs

Modified: udd/sql/drop.sql
===================================================================
--- udd/sql/drop.sql	2008-09-15 11:38:54 UTC (rev 1270)
+++ udd/sql/drop.sql	2008-09-15 11:39:40 UTC (rev 1271)
@@ -32,3 +32,8 @@
 DROP TABLE upload_history CASCADE;
 DROP TABLE upload_history_architecture CASCADE;
 DROP TABLE upload_history_closes CASCADE;
+DROP TABLE ubuntu_bugs CASCADE;
+DROP TABLE ubuntu_bugs_duplicates CASCADE;
+DROP TABLE ubuntu_bugs_subscribers CASCADE;
+DROP TABLE ubuntu_bugs_tags CASCADE;
+DROP TABLE ubuntu_bugs_tasks CASCADE;

Modified: udd/sql/setup.sql
===================================================================
--- udd/sql/setup.sql	2008-09-15 11:38:54 UTC (rev 1270)
+++ udd/sql/setup.sql	2008-09-15 11:39:40 UTC (rev 1271)
@@ -295,3 +295,61 @@
 GRANT SELECT ON upload_history TO PUBLIC;
 GRANT SELECT ON upload_history_architecture TO PUBLIC;
 GRANT SELECT ON upload_history_closes TO PUBLIC;
+
+-- Ubuntu bugs
+CREATE TABLE ubuntu_bugs (
+bug int,
+title text,
+reporter_login text,
+reporter_name text,
+duplicate_of int,
+date_reported text,
+date_updated text,
+security boolean,
+PRIMARY KEY (bug));
+
+CREATE TABLE ubuntu_bugs_duplicates (
+bug int REFERENCES ubuntu_bugs,
+duplicate int,
+PRIMARY KEY (bug, duplicate));
+
+CREATE TABLE ubuntu_bugs_subscribers (
+bug int REFERENCES ubuntu_bugs,
+subscriber_login text,
+subscriber_name text);
+
+CREATE TABLE ubuntu_bugs_tags (
+bug int REFERENCES ubuntu_bugs,
+tag text,
+PRIMARY KEY (bug, tag));
+
+CREATE TABLE ubuntu_bugs_tasks (
+bug int REFERENCES ubuntu_bugs,
+package text,
+distro text,
+status text,
+importance text,
+component text,
+milestone text,
+date_created text,
+date_assigned text,
+date_closed text,
+date_incomplete text,
+date_confirmed text,
+date_inprogress text,
+date_fix_committed text,
+date_fix_released text,
+date_left_new text,
+date_triaged text,
+watch text,
+reporter_login text,
+reporter_name text,
+assignee_login text,
+assignee_name text,
+PRIMARY KEY (bug, package, distro));
+
+GRANT SELECT ON ubuntu_bugs TO PUBLIC;
+GRANT SELECT ON ubuntu_bugs_duplicates TO PUBLIC;
+GRANT SELECT ON ubuntu_bugs_subscribers TO PUBLIC;
+GRANT SELECT ON ubuntu_bugs_tags TO PUBLIC;
+GRANT SELECT ON ubuntu_bugs_tasks TO PUBLIC;

Added: udd/udd/ubuntu_bugs_gatherer.py
===================================================================
--- udd/udd/ubuntu_bugs_gatherer.py	                        (rev 0)
+++ udd/udd/ubuntu_bugs_gatherer.py	2008-09-15 11:39:40 UTC (rev 1271)
@@ -0,0 +1,249 @@
+#!/usr/bin/env python
+
+"""
+This script imports the Ubuntu bugs from Launchpad
+"""
+
+from aux import quote
+import sys
+from gatherer import gatherer
+import re
+import urllib
+from Queue import Queue, Empty
+from threading import Thread, currentThread
+import time
+import httplib
+import email
+
+def get_gatherer(connection, config, source):
+  return ubuntu_bugs_gatherer(connection, config, source)
+
+class ubuntu_bugs_gatherer(gatherer):
+  debug = False
+
+  def __init__(self, connection, config, source):
+    gatherer.__init__(self, connection, config, source)
+
+  def run(self):
+    my_config = self.my_config
+    num_fetchers = 8
+    num_writers = 1
+    bugs = self.fetch_all_bugs()
+    httpq = Queue()
+    dbq = Queue()
+    for b in bugs:
+      if self.debug:
+        if b > 10000:
+          continue
+      httpq.put(b)
+
+    # start workers
+    for i in range(num_fetchers):
+      t = Thread(target=self.bugfetcher, name="Fetcher-"+str(i),args=[httpq, dbq])
+      t.setDaemon(True)
+      t.start()
+
+    c = self.cursor()
+    c.execute("delete from ubuntu_bugs_subscribers")
+    c.execute("delete from ubuntu_bugs_duplicates")
+    c.execute("delete from ubuntu_bugs_tags")
+    c.execute("delete from ubuntu_bugs_tasks")
+    c.execute("delete from ubuntu_bugs")
+
+    ok = True
+    while ok:
+      try:
+        if self.debug:
+          print "HTTPQ: ", httpq.qsize(), " DBQ: ", dbq.qsize()
+        d = dbq.get(True, 5) # 10 secs timeout
+        self.dbimport(c, d)
+        dbq.task_done()
+      except Empty:
+        if httpq.qsize() == 0:
+          ok = False
+
+  def fetch_all_bugs(self):
+    fh = urllib.urlopen('https://launchpad.net/ubuntu/+bugs-text')
+    text = fh.read()
+    # convert to a list
+    bugs = text.split('\n')
+    # remove '', or map() will complain
+    bugs.remove('')
+    # convert each bug (string) to a int
+    bugs = map(int, bugs)
+    # sort, so that we can remove duplicates in O(n) later
+    bugs.sort()
+    # remove duplicates. apparently not in lib, see
+    # http://www.python.org/dev/peps/pep-0270/
+    # FIXME use set()
+    nbugs = []
+    on = 0
+    for n in bugs:
+      if n != on:
+        nbugs.append(n)
+#      else:
+#        print "Duplicate bug: " +  str(n)
+      on = n
+    fh.close()
+    return nbugs
+
+  # "worker". Fetch a specific bug as text from launchpad.
+  def bugfetcher(self, hq, dq):
+    while True:
+      conn = httplib.HTTPSConnection('bugs.launchpad.net')
+      ok = True
+      b = None
+      while ok:
+        try:
+          b = hq.get(False)
+        except Empty:
+          return
+        except:
+          print "Other exception raised in bugfetcher. exiting."
+          exit(1)
+
+        try:
+          conn.request('GET', 'https://edge.launchpad.net/bugs/' + str(b) + '/+text')
+          r = conn.getresponse()
+          if r.status == 200:
+            data = r.read()
+            if data != '':
+              dq.put(data)
+              hq.task_done()
+            else:
+              print "[", currentThread().getName(), "] Bug ", b, ": Empty data."
+              ok = False
+              hq.put(b)
+              hq.task_done()
+          else:
+            print "[", currentThread().getName(), "] Bug ", b, ": Wrong status: ", r.status, " ", r.reason
+            ok = False
+            hq.put(b)
+            hq.task_done()
+        except httplib.BadStatusLine, line:
+          print "[", currentThread().getName(), "] Bug ", b, ": BadStatusLine: ", line
+          print str(r.getheaders())
+          print r.read()
+          ok = False
+          hq.put(b)
+          hq.task_done()
+
+  parre = re.compile('^\s*(.*) \(([^(]*)\)$')
+  def splitpar(self, text):
+    mo = re.search(self.parre, text)
+    if mo == None:
+      return (text, '')
+    return mo.groups()
+
+  contenttype = re.compile('^Content-Type: ')
+  def dbimport(self, c, data):
+    d = data.split('\n\n')
+    bug = d[0] + '\n'
+    tasks = []
+    for di in d[1:-1]:
+      if re.match(self.contenttype, di + '\n'):
+        break
+      else:
+        tasks.append(di)
+    # OK, we have bugs and tasks.
+    bm = email.message_from_string(bug)
+    bugno = int(bm['bug'])
+    # Check that we are not missing some fields
+    # ignore attachments for now
+    s = set(bm.keys()) - set(['bug', 'title', 'reporter', 'attachments',
+      'subscribers', 'tags', 'duplicate-of', 'duplicates', 'date-reported',
+      'date-updated', 'security'])
+    if len(s) > 0:
+      print s
+    name, login = self.splitpar(bm['reporter'])
+    if bm['duplicate-of'] != '':
+      dup = int(bm['duplicate-of'])
+    else:
+      dup = None
+    reported = time.strptime(bm['date-reported'], "%a, %d %b %Y %H:%M:%S -0000")
+    updated = time.strptime(bm['date-updated'], "%a, %d %b %Y %H:%M:%S -0000")
+    if bm['security'] != None:
+      security = 't'
+    else:
+      security = 'f'
+    treported = time.strftime("%a, %d %b %Y %H:%M:%S +0000", reported)
+    tupdated = time.strftime("%a, %d %b %Y %H:%M:%S +0000", updated)
+    c.execute('insert into ubuntu_bugs values (%s, %s, %s, %s, %s, %s, %s, %s)',
+        (bugno, bm['title'], login, name, dup, treported, tupdated, security))
+    # subscribers
+    for sub in bm['subscribers'].split('\n'):
+      name, login = self.splitpar(sub)
+      c.execute('insert into ubuntu_bugs_subscribers values (%s, %s, %s)', (bugno, login, name))
+    # duplicates
+    for d in bm['duplicates'].split():
+      c.execute('insert into ubuntu_bugs_duplicates values (%s, %s)', (bugno, int(d)))
+    # tags
+    for tag in bm['tags'].split():
+      c.execute('insert into ubuntu_bugs_tags values (%s, %s)', (bugno, tag))
+    ### Import tasks
+    for t in tasks:
+      tm = email.message_from_string(t)
+      pkg, distro = self.splitpar(tm['task'])
+      rep_name, rep_login = self.splitpar(tm['reporter'])
+      if tm['assignee'] != '':
+        ass_name, ass_login = self.splitpar(tm['assignee'])
+      else:
+        ass_name = None
+        ass_login = None
+      created = time.strftime("%a, %d %b %Y %H:%M:%S +0000", 
+        time.strptime(tm['date-created'], "%a, %d %b %Y %H:%M:%S -0000"))
+      if tm['date-assigned']:
+        assigned = time.strftime("%a, %d %b %Y %H:%M:%S +0000",
+          time.strptime(tm['date-assigned'], "%a, %d %b %Y %H:%M:%S -0000"))
+      else:
+        assigned = ''
+      if tm['date-closed']:
+        closed = time.strftime("%a, %d %b %Y %H:%M:%S +0000",
+          time.strptime(tm['date-closed'], "%a, %d %b %Y %H:%M:%S -0000"))
+      else:
+        closed = ''
+      if tm['date-incomplete']:
+        incomplete = time.strftime("%a, %d %b %Y %H:%M:%S +0000",
+          time.strptime(tm['date-incomplete'], "%a, %d %b %Y %H:%M:%S -0000"))
+      else:
+        incomplete = ''
+      if tm['date-confirmed']:
+        confirmed = time.strftime("%a, %d %b %Y %H:%M:%S +0000",
+          time.strptime(tm['date-confirmed'], "%a, %d %b %Y %H:%M:%S -0000"))
+      else:
+        confirmed = ''
+      if tm['date-inprogress']:
+        inprogress = time.strftime("%a, %d %b %Y %H:%M:%S +0000",
+          time.strptime(tm['date-inprogress'], "%a, %d %b %Y %H:%M:%S -0000"))
+      else:
+        inprogress = ''
+      if tm['date-fix-committed']:
+        fixcommitted = time.strftime("%a, %d %b %Y %H:%M:%S +0000",
+          time.strptime(tm['date-fix-committed'], "%a, %d %b %Y %H:%M:%S -0000"))
+      else:
+        fixcommitted = ''
+      if tm['date-fix-released']:
+        fixreleased = time.strftime("%a, %d %b %Y %H:%M:%S +0000",
+          time.strptime(tm['date-fix-released'], "%a, %d %b %Y %H:%M:%S -0000"))
+      else:
+        fixreleased = ''
+      if tm['date-left-new']:
+        leftnew = time.strftime("%a, %d %b %Y %H:%M:%S +0000",
+          time.strptime(tm['date-left-new'], "%a, %d %b %Y %H:%M:%S -0000"))
+      else:
+        leftnew = ''
+      if tm['date-triaged']:
+        triaged = time.strftime("%a, %d %b %Y %H:%M:%S +0000",
+          time.strptime(tm['date-triaged'], "%a, %d %b %Y %H:%M:%S -0000"))
+      else:
+        triaged = ''
+      # check for missing headers
+      s = set(tm.keys()) - set(['task', 'reporter', 'assignee', 'status', 'date-created', 'importance', 'component', 'milestone', 'date-assigned', 'date-closed', 'date-incomplete', 'date-confirmed', 'date-inprogress', 'date-fix-committed', 'date-fix-released', 'watch', 'date-left-new', 'date-triaged'])
+      if len(s) > 0:
+        print s
+        print t
+      c.execute('insert into ubuntu_bugs_tasks values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)', (bugno, pkg, distro,
+        tm['status'], tm['importance'], tm['component'], tm['milestone'], created,
+        assigned, closed, incomplete, confirmed, inprogress, fixcommitted, fixreleased, leftnew, triaged, tm['watch'],
+        rep_login, rep_name, ass_login, ass_name))
+

Added: udd/web/cgi-bin/ubuntubugs.cgi
===================================================================
--- udd/web/cgi-bin/ubuntubugs.cgi	                        (rev 0)
+++ udd/web/cgi-bin/ubuntubugs.cgi	2008-09-15 11:39:40 UTC (rev 1271)
@@ -0,0 +1,18 @@
+#!/usr/bin/ruby -w
+
+require 'dbi'
+
+puts "Content-type: text/plain\n\n"
+
+dbh = DBI::connect('DBI:Pg:udd')
+sth = dbh.prepare("select package, count(distinct bugs.bug)
+from ubuntu_bugs_tasks tasks,ubuntu_bugs bugs
+where tasks.bug = bugs.bug
+and distro in ('', 'Ubuntu')
+and status not in ('Invalid', 'Fix Released', 'Won''t Fix')
+group by package order by package asc")
+sth.execute
+while row = sth.fetch do
+  puts "#{row['package']}|#{row['count']}"
+end
+sth.finish


Property changes on: udd/web/cgi-bin/ubuntubugs.cgi
___________________________________________________________________
Name: svn:executable
   + *




More information about the Collab-qa-commits mailing list