[Collab-qa-commits] r1293 - in udd: . sql udd

zack at alioth.debian.org zack at alioth.debian.org
Fri Sep 26 15:55:19 UTC 2008


Author: zack
Date: 2008-09-26 15:55:17 +0000 (Fri, 26 Sep 2008)
New Revision: 1293

Added:
   udd/udd/debtags_gatherer.py
Modified:
   udd/config-standalone.yaml
   udd/config.yaml
   udd/sql/drop.sql
   udd/sql/setup.sql
   udd/test.yaml
Log:
add gatherer to inject debtags data into UDD


Modified: udd/config-standalone.yaml
===================================================================
--- udd/config-standalone.yaml	2008-09-24 22:58:23 UTC (rev 1292)
+++ udd/config-standalone.yaml	2008-09-26 15:55:17 UTC (rev 1293)
@@ -13,6 +13,7 @@
     bugs: exec DEBBUGS_CONFIG_FILE=/org/udd.debian.net/mirrors/bugs.debian.org/etc/config perl /org/udd.debian.net/udd/udd/bugs_gatherer.pl 
     carnivore: module udd.carnivore_gatherer
     lintian: module udd.lintian_gatherer
+    debtags: module udd.debtags_gatherer
     ubuntu-bugs: module udd.ubuntu_bugs_gatherer
   debug: 1
   timestamp-dir: /org/udd.debian.net/timestamps
@@ -216,5 +217,12 @@
   table: lintian
   schema: lintian
 
+debtags:
+  type: debtags
+  update-command: rm -f /org/udd.debian.net/mirrors/debtags.txt && wget -q http://svn.debian.org/viewsvn/*checkout*/debtags/tagdb/tags -O /org/udd.debian.net/mirrors/debtags.txt
+  path: /org/udd.debian.net/mirrors/debtags.txt
+  table: debtags
+  schema: debtags
+
 ubuntu-bugs:
    type: ubuntu-bugs

Modified: udd/config.yaml
===================================================================
--- udd/config.yaml	2008-09-24 22:58:23 UTC (rev 1292)
+++ udd/config.yaml	2008-09-26 15:55:17 UTC (rev 1293)
@@ -13,6 +13,7 @@
     bugs: exec DEBBUGS_CONFIG_FILE=/org/udd.debian.net/mirrors/bugs.debian.org/etc/config perl /org/udd.debian.net/udd/udd/bugs_gatherer.pl 
     carnivore: module udd.carnivore_gatherer
     lintian: module udd.lintian_gatherer
+    debtags: module udd.debtags_gatherer
     ubuntu-bugs: module udd.ubuntu_bugs_gatherer
   debug: 1
   timestamp-dir: /org/udd.debian.net/timestamps
@@ -210,5 +211,12 @@
   table: lintian
   schema: lintian
 
+debtags:
+  type: debtags
+  update-command: rm -f /org/udd.debian.net/mirrors/debtags.txt && wget -q http://svn.debian.org/viewsvn/*checkout*/debtags/tagdb/tags -O /org/udd.debian.net/mirrors/debtags.txt
+  path: /org/udd.debian.net/mirrors/debtags.txt
+  table: debtags
+  schema: debtags
+
 ubuntu-bugs:
    type: ubuntu-bugs

Modified: udd/sql/drop.sql
===================================================================
--- udd/sql/drop.sql	2008-09-24 22:58:23 UTC (rev 1292)
+++ udd/sql/drop.sql	2008-09-26 15:55:17 UTC (rev 1293)
@@ -27,6 +27,7 @@
 DROP TABLE ubuntu_popcon_src_average CASCADE;
 DROP DOMAIN lintian_tag_type CASCADE;
 DROP TABLE lintian CASCADE;
+DROP TABLE debtags CASCADE;
 DROP TABLE orphaned_packages CASCADE;
 DROP TABLE migrations CASCADE;
 DROP TABLE upload_history CASCADE;

Modified: udd/sql/setup.sql
===================================================================
--- udd/sql/setup.sql	2008-09-24 22:58:23 UTC (rev 1292)
+++ udd/sql/setup.sql	2008-09-26 15:55:17 UTC (rev 1293)
@@ -262,6 +262,18 @@
 
 GRANT SELECT ON lintian TO PUBLIC;
 
+-- Debtags
+
+-- one row per <package, tag> *pair*
+CREATE TABLE debtags (
+  package TEXT NOT NULL,
+  tag TEXT NOT NULL
+);
+
+GRANT SELECT ON debtags TO PUBLIC;
+
+CREATE INDEX debtags_tag_idx ON debtags(tag);
+
 -- Orphaned packages
 
 CREATE TABLE orphaned_packages (

Modified: udd/test.yaml
===================================================================
--- udd/test.yaml	2008-09-24 22:58:23 UTC (rev 1292)
+++ udd/test.yaml	2008-09-26 15:55:17 UTC (rev 1293)
@@ -200,3 +200,10 @@
   path: /org/udd.debian.net/mirrors/lintian.log
   table: lintian
   schema: lintian
+
+debtags:
+  type: debtags
+  update-command: rm -f /org/udd.debian.net/mirrors/debtags.txt && wget -q http://svn.debian.org/viewsvn/*checkout*/debtags/tagdb/tags -O /org/udd.debian.net/mirrors/debtags.txt
+  path: /org/udd.debian.net/mirrors/debtags.txt
+  table: debtags
+  schema: debtags

Added: udd/udd/debtags_gatherer.py
===================================================================
--- udd/udd/debtags_gatherer.py	                        (rev 0)
+++ udd/udd/debtags_gatherer.py	2008-09-26 15:55:17 UTC (rev 1293)
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+# This file is a part of the Ultimate Debian Database
+# <http://wiki.debian.org/UltimateDebianDatabase>
+#
+# Copyright (C) 2008 Stefano Zacchiroli <zack at debian.org>
+#
+# This file is distributed under the terms of the General Public
+# License version 3 or (at your option) any later version.
+
+""" import debtags data into the database
+
+tags information are downloaded from SVN (though via http/websvn to
+avoid an extra dependency on svn), see the "update-command"
+configuration of the debtags gatherer
+"""
+
+import re
+import sys
+
+from gatherer import gatherer
+from aux import quote
+
+
+# a "live" instance of the tag database, whose lines should match the regexp
+# below, is at: http://svn.debian.org/viewsvn/*checkout*/debtags/tagdb/tags
+tag_line_RE = re.compile(r'^(?P<pkg>[a-z0-9+-\.]+):\s+(?P<tags>[\w:+-]+(,\s+[\w:+-]+)*)$')
+tag_sep_RE = re.compile(r',\s+')
+# field_sep_RE = re.compile(r':\s+')
+
+def parse_tags(fname):
+    global tag_line_RE, tag_sep_RE
+
+    line_no = 0
+    tags_db = file(fname)
+    for line in tags_db:
+        line_no += 1
+        line = line.strip()
+        parsed_line = tag_line_RE.match(line)
+        if not parsed_line:
+            print >> sys.stderr, \
+                "debtags: can not parse line %d: %s" % (line_no, line)
+        else:
+            parts = parsed_line.groupdict()
+            pkg = parts['pkg']
+            for tag in tag_sep_RE.split(parts['tags']):
+                yield (pkg, tag)
+    tags_db.close()
+
+
+def get_gatherer(connection, config, source):
+    return debtags_gatherer(connection, config, source)
+
+
+class debtags_gatherer(gatherer):
+    """import debtags data into the database"""
+    
+    def __init__(self, connection, config, source):
+        gatherer.__init__(self, connection, config, source)
+        self.assert_my_config('path', 'table')
+
+    def run(self):
+        conf = self.my_config
+        cur = self.cursor()
+        cur.execute('DELETE FROM %s' % conf['table'])
+        cur.execute('PREPARE debtags_insert ' \
+                        'AS INSERT INTO %s (package, tag) VALUES ($1, $2)' \
+                        % conf['table'])
+        for (pkg, tag) in parse_tags(conf['path']):
+            cur.execute('EXECUTE debtags_insert (%s, %s)' \
+                            % (quote(pkg), quote(tag)))
+        cur.execute('DEALLOCATE debtags_insert')
+
+
+def test():
+    """given a filename on the cmdline, print all tuples <pkg, tag>
+    that would be inserted in the db. For debugging/testing purposes.
+    """
+    for (pkg, tag) in parse_tags(sys.argv[1]):
+        print "%s\t%s" % (pkg, tag)
+
+if __name__ == '__main__':
+    test()


Property changes on: udd/udd/debtags_gatherer.py
___________________________________________________________________
Name: svn:executable
   + *




More information about the Collab-qa-commits mailing list