[Collab-qa-commits] r1529 - udd/udd

Lucas Nussbaum lucas at alioth.debian.org
Thu Jul 23 00:28:12 UTC 2009


Author: lucas
Date: 2009-07-23 00:28:12 +0000 (Thu, 23 Jul 2009)
New Revision: 1529

Modified:
   udd/udd/packages_gatherer.py
   udd/udd/sources_gatherer.py
   udd/udd/upload_history_gatherer.py
Log:
add _email and _name columns in sources, packages and upload_history. use executemany() in upload-history.

Modified: udd/udd/packages_gatherer.py
===================================================================
--- udd/udd/packages_gatherer.py	2009-07-23 00:27:03 UTC (rev 1528)
+++ udd/udd/packages_gatherer.py	2009-07-23 00:28:12 UTC (rev 1529)
@@ -11,6 +11,7 @@
 from aux import ConfigException
 import psycopg2
 from gatherer import gatherer
+import email.Utils
 import re
 
 def get_gatherer(connection, config, source):
@@ -80,7 +81,7 @@
     debian packages file."""
     pkgs = ()
     query = """EXECUTE package_insert
-      (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
+      (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s, %(maintainer_name)s, %(maintainer_email)s,
       %(Description)s, %(Long_Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
       %(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
       %(Pre-Depends)s, %(Breaks)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
@@ -124,8 +125,10 @@
 	else:
 	  d['Source'] = split[0]
 	  d['Source_Version'] = split[1].strip("()")
-        pkgs += (d,)
 
+      pkgs += (d,)
+
+      d['maintainer_name'], d['maintainer_email'] = email.Utils.parseaddr(d['Maintainer'])
     try:
       cur.executemany(query, pkgs)
     except psycopg2.ProgrammingError:
@@ -169,7 +172,7 @@
 	path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
 	try:
 	  cur.execute("""PREPARE package_insert AS INSERT INTO %s
-	    (Package, Version, Architecture, Maintainer, Description, Long_Description, Source,
+	    (Package, Version, Architecture, Maintainer, maintainer_name, maintainer_email, Description, Long_Description, Source,
 	    Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
 	    Pre_Depends, Breaks, Installed_Size, Homepage, Size,
 	    build_essential, origin, sha1, replaces, section,
@@ -179,7 +182,7 @@
 	  VALUES
 	    ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
 	      $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28,
-	      $29, $30, $31, $32, $33, '%s', '%s', '%s')
+	      $29, $30, $31, $32, $33, $34, $35, '%s', '%s', '%s')
 	    """ %  (table, self._distr, src_cfg['release'], comp))
 #	  aux.print_debug("Reading file " + path)
 	  # Copy content from gzipped file to temporary file, so that apt_pkg is
@@ -198,10 +201,9 @@
     # Fill the summary tables
     cur.execute("DELETE FROM %s" % (table + '_summary'));
     cur.execute("""INSERT INTO %s (package, version, source, source_version,
-        maintainer, distribution, release, component)
+        maintainer, maintainer_name, maintainer_email, distribution, release, component)
       SELECT DISTINCT ON (package, version, distribution, release, component)
-        package, version, source, source_version, maintainer, distribution,
-        release, component
+        package, version, source, source_version, maintainer, maintainer_name, maintainer_email, distribution, release, component
       FROM %s""" % (table + '_summary', table));
     cur.execute("DELETE FROM %s" % (table + '_distrelcomparch'));
     cur.execute("""INSERT INTO %s

Modified: udd/udd/sources_gatherer.py
===================================================================
--- udd/udd/sources_gatherer.py	2009-07-23 00:27:03 UTC (rev 1528)
+++ udd/udd/sources_gatherer.py	2009-07-23 00:28:12 UTC (rev 1529)
@@ -102,18 +102,20 @@
       %(Section)s, %(Vcs-Type)s, %(Vcs-Url)s, %(Vcs-Browser)s,
       %(Python-Version)s, %(Checksums-Sha1)s, %(Checksums-Sha256)s,
       %(Original-Maintainer)s, %(Dm-Upload-Allowed)s)"""
-    query_uploaders = """EXECUTE uploader_insert (%(Package)s, %(Version)s, %(Name)s, %(Email)s)"""
+    query_uploaders = """EXECUTE uploader_insert (%(Package)s, %(Version)s,
+      %(Uploader)s, %(Name)s, %(Email)s)"""
     uploaders = ()
     for control in debian_bundle.deb822.Packages.iter_paragraphs(file):
       d = self.build_dict(control)
       d['maintainer_name'], d['maintainer_email'] = email.Utils.parseaddr(d['Maintainer'])
       pkgs += (d,)
 
-      ud = {}
-      ud['Package'] = d['Package']
-      ud['Version'] = d['Version']
       if d['Uploaders']:
         for uploader in email.Utils.getaddresses([d['Uploaders']]):
+          ud = {}
+          ud['Package'] = d['Package']
+          ud['Version'] = d['Version']
+          ud['Uploader'] = email.Utils.formataddr(uploader)
           ud['Name'] = uploader[0]
           ud['Email'] = uploader[1]
           uploaders += (ud,)
@@ -156,8 +158,8 @@
 	  % (table, src_cfg['distribution'], src_cfg['release'], comp)
 	cur.execute(query)
 	query = """PREPARE uploader_insert as INSERT INTO %s
-	  (Source, Version, Distribution, Release, Component, Name, Email) VALUES
-	  ($1, $2, '%s', '%s', '%s', $3, $4) """ % \
+	  (Source, Version, Distribution, Release, Component, Uploader, Name, Email) VALUES
+	  ($1, $2, '%s', '%s', '%s', $3, $4, $5) """ % \
 	(utable, src_cfg['distribution'], src_cfg['release'], comp)
 	cur.execute(query)
 

Modified: udd/udd/upload_history_gatherer.py
===================================================================
--- udd/udd/upload_history_gatherer.py	2009-07-23 00:27:03 UTC (rev 1528)
+++ udd/udd/upload_history_gatherer.py	2009-07-23 00:28:12 UTC (rev 1529)
@@ -7,6 +7,7 @@
 import gzip
 import psycopg2
 import sys
+import email.Utils
 
 def get_gatherer(config, connection, source):
   return upload_history_gatherer(config, connection, source)
@@ -34,8 +35,8 @@
     cursor.execute("DELETE FROM " + self.my_config['table'])
 
     cursor.execute("PREPARE uh_insert AS INSERT INTO %s (id, package, \
-        version, date, changed_by, maintainer, nmu, signed_by, key_id, fingerprint) VALUES \
-	($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)" % self.my_config['table'])
+        version, date, changed_by, changed_by_name, changed_by_email, maintainer, maintainer_name, maintainer_email, nmu, signed_by, signed_by_name, signed_by_email, key_id, fingerprint) VALUES \
+	($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)" % self.my_config['table'])
     cursor.execute("PREPARE uh_arch_insert AS INSERT INTO %s (id, \
     	architecture) VALUES \
 	($1, $2)" % (self.my_config['table'] + '_architecture'))
@@ -43,6 +44,16 @@
     	VALUES ($1, $2)" % (self.my_config['table'] + '_closes'))
 
     id = 0
+    query = "EXECUTE uh_insert(%(id)s, %(Source)s, %(Version)s, %(Date)s, \
+      %(Changed-By)s, %(Changed-By_name)s, %(Changed-By_email)s, \
+      %(Maintainer)s, %(Maintainer_name)s, %(Maintainer_email)s, %(NMU)s, \
+      %(Signed-By)s, %(Signed-By_name)s, %(Signed-By_email)s, %(Key)s, \
+      %(Fingerprint)s)"
+    query_archs = "EXECUTE uh_arch_insert(%(id)s, %(arch)s)"
+    query_closes = "EXECUTE uh_close_insert(%(id)s, %(closes)s)"
+    uploads = ()
+    uploads_archs = ()
+    uploads_closes = ()
     for name in glob(path + '/debian-devel-changes.*'):
       # print name
       f = None
@@ -50,33 +61,36 @@
 	f = gzip.open(name)
       else:
 	f = open(name)
-      
       current = {'id': id}
       current['Fingerprint'] = 'N/A' # hack: some entries don't have fp
       last_field = None
       line_count = 0
+
       for line in f:
 	line_count += 1
 	line = line.strip()
 	# Stupid multi-line maintainer fields *grml*
 	if line == '':
-	  try:
-	    query = "EXECUTE uh_insert(%(id)s, %(Source)s, %(Version)s, %(Date)s, %(Changed-By)s, \
-		%(Maintainer)s, %(NMU)s, %(Signed-By)s, %(Key)s, %(Fingerprint)s)"
-	    cursor.execute(query, current)
-	    for arch in set(current['Architecture'].split()):
-	      current['arch'] = arch
-	      query = "EXECUTE uh_arch_insert(%(id)s, %(arch)s)"
-	      cursor.execute(query, current)
-	    if current['Closes'] != 'N/A':
-	      for closes in set(current['Closes'].split()):
-		current['closes'] = closes
-		query = "EXECUTE uh_close_insert(%(id)s, %(closes)s)"
-		cursor.execute(query, current)
-	  except psycopg2.ProgrammingError, s:
-	    print "Error at line %d of file %s" % (line_count, name)
-	    continue
-	    #raise
+          current['Changed-By_name'], current['Changed-By_email'] = email.Utils.parseaddr(current['Changed-By'])
+          current['Maintainer_name'], current['Maintainer_email'] = email.Utils.parseaddr(current['Maintainer'])
+          current['Signed-By_name'], current['Signed-By_email'] = email.Utils.parseaddr(current['Signed-By'])
+          uploads += (current,)
+	  for arch in set(current['Architecture'].split()):
+	    current_arch = {'id': id}
+	    current_arch['arch'] = arch
+            uploads_archs += (current_arch,)
+	  if current['Closes'] != 'N/A':
+	    for closes in set(current['Closes'].split()):
+	      current_closes = {'id': id}
+	      current_closes['closes'] = closes
+              uploads_closes += (current_closes,)
+          if len(uploads) > 100:
+            cursor.executemany(query, uploads)
+            cursor.executemany(query_archs, uploads_archs)
+            cursor.executemany(query_closes, uploads_closes)
+            uploads = ()
+            uploads_archs = ()
+            uploads_closes = ()
 	  id += 1
 	  current = {'id': id}
 	  current['Fingerprint'] = 'N/A' # hack: some entries don't have fp
@@ -89,11 +103,13 @@
 	  current[last_field] += line
 	  continue
 
-
 	(field, data) = line.split(':', 1)
 	data = data.strip()
 	current[field] = data
 	
 	last_field = field
-    
+      
+    cursor.executemany(query, uploads)
+    cursor.executemany(query_archs, uploads_archs)
+    cursor.executemany(query_closes, uploads_closes)
     cursor.execute("DEALLOCATE uh_insert")




More information about the Collab-qa-commits mailing list