[Collab-qa-commits] r1529 - udd/udd
Lucas Nussbaum
lucas at alioth.debian.org
Thu Jul 23 00:28:12 UTC 2009
Author: lucas
Date: 2009-07-23 00:28:12 +0000 (Thu, 23 Jul 2009)
New Revision: 1529
Modified:
udd/udd/packages_gatherer.py
udd/udd/sources_gatherer.py
udd/udd/upload_history_gatherer.py
Log:
add _email and _name columns in sources, packages and upload_history. use executemany() in upload-history.
Modified: udd/udd/packages_gatherer.py
===================================================================
--- udd/udd/packages_gatherer.py 2009-07-23 00:27:03 UTC (rev 1528)
+++ udd/udd/packages_gatherer.py 2009-07-23 00:28:12 UTC (rev 1529)
@@ -11,6 +11,7 @@
from aux import ConfigException
import psycopg2
from gatherer import gatherer
+import email.Utils
import re
def get_gatherer(connection, config, source):
@@ -80,7 +81,7 @@
debian packages file."""
pkgs = ()
query = """EXECUTE package_insert
- (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
+ (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s, %(maintainer_name)s, %(maintainer_email)s,
%(Description)s, %(Long_Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
%(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
%(Pre-Depends)s, %(Breaks)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
@@ -124,8 +125,10 @@
else:
d['Source'] = split[0]
d['Source_Version'] = split[1].strip("()")
- pkgs += (d,)
+ pkgs += (d,)
+
+ d['maintainer_name'], d['maintainer_email'] = email.Utils.parseaddr(d['Maintainer'])
try:
cur.executemany(query, pkgs)
except psycopg2.ProgrammingError:
@@ -169,7 +172,7 @@
path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
try:
cur.execute("""PREPARE package_insert AS INSERT INTO %s
- (Package, Version, Architecture, Maintainer, Description, Long_Description, Source,
+ (Package, Version, Architecture, Maintainer, maintainer_name, maintainer_email, Description, Long_Description, Source,
Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
Pre_Depends, Breaks, Installed_Size, Homepage, Size,
build_essential, origin, sha1, replaces, section,
@@ -179,7 +182,7 @@
VALUES
( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28,
- $29, $30, $31, $32, $33, '%s', '%s', '%s')
+ $29, $30, $31, $32, $33, $34, $35, '%s', '%s', '%s')
""" % (table, self._distr, src_cfg['release'], comp))
# aux.print_debug("Reading file " + path)
# Copy content from gzipped file to temporary file, so that apt_pkg is
@@ -198,10 +201,9 @@
# Fill the summary tables
cur.execute("DELETE FROM %s" % (table + '_summary'));
cur.execute("""INSERT INTO %s (package, version, source, source_version,
- maintainer, distribution, release, component)
+ maintainer, maintainer_name, maintainer_email, distribution, release, component)
SELECT DISTINCT ON (package, version, distribution, release, component)
- package, version, source, source_version, maintainer, distribution,
- release, component
+ package, version, source, source_version, maintainer, maintainer_name, maintainer_email, distribution, release, component
FROM %s""" % (table + '_summary', table));
cur.execute("DELETE FROM %s" % (table + '_distrelcomparch'));
cur.execute("""INSERT INTO %s
Modified: udd/udd/sources_gatherer.py
===================================================================
--- udd/udd/sources_gatherer.py 2009-07-23 00:27:03 UTC (rev 1528)
+++ udd/udd/sources_gatherer.py 2009-07-23 00:28:12 UTC (rev 1529)
@@ -102,18 +102,20 @@
%(Section)s, %(Vcs-Type)s, %(Vcs-Url)s, %(Vcs-Browser)s,
%(Python-Version)s, %(Checksums-Sha1)s, %(Checksums-Sha256)s,
%(Original-Maintainer)s, %(Dm-Upload-Allowed)s)"""
- query_uploaders = """EXECUTE uploader_insert (%(Package)s, %(Version)s, %(Name)s, %(Email)s)"""
+ query_uploaders = """EXECUTE uploader_insert (%(Package)s, %(Version)s,
+ %(Uploader)s, %(Name)s, %(Email)s)"""
uploaders = ()
for control in debian_bundle.deb822.Packages.iter_paragraphs(file):
d = self.build_dict(control)
d['maintainer_name'], d['maintainer_email'] = email.Utils.parseaddr(d['Maintainer'])
pkgs += (d,)
- ud = {}
- ud['Package'] = d['Package']
- ud['Version'] = d['Version']
if d['Uploaders']:
for uploader in email.Utils.getaddresses([d['Uploaders']]):
+ ud = {}
+ ud['Package'] = d['Package']
+ ud['Version'] = d['Version']
+ ud['Uploader'] = email.Utils.formataddr(uploader)
ud['Name'] = uploader[0]
ud['Email'] = uploader[1]
uploaders += (ud,)
@@ -156,8 +158,8 @@
% (table, src_cfg['distribution'], src_cfg['release'], comp)
cur.execute(query)
query = """PREPARE uploader_insert as INSERT INTO %s
- (Source, Version, Distribution, Release, Component, Name, Email) VALUES
- ($1, $2, '%s', '%s', '%s', $3, $4) """ % \
+ (Source, Version, Distribution, Release, Component, Uploader, Name, Email) VALUES
+ ($1, $2, '%s', '%s', '%s', $3, $4, $5) """ % \
(utable, src_cfg['distribution'], src_cfg['release'], comp)
cur.execute(query)
Modified: udd/udd/upload_history_gatherer.py
===================================================================
--- udd/udd/upload_history_gatherer.py 2009-07-23 00:27:03 UTC (rev 1528)
+++ udd/udd/upload_history_gatherer.py 2009-07-23 00:28:12 UTC (rev 1529)
@@ -7,6 +7,7 @@
import gzip
import psycopg2
import sys
+import email.Utils
def get_gatherer(config, connection, source):
return upload_history_gatherer(config, connection, source)
@@ -34,8 +35,8 @@
cursor.execute("DELETE FROM " + self.my_config['table'])
cursor.execute("PREPARE uh_insert AS INSERT INTO %s (id, package, \
- version, date, changed_by, maintainer, nmu, signed_by, key_id, fingerprint) VALUES \
- ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)" % self.my_config['table'])
+ version, date, changed_by, changed_by_name, changed_by_email, maintainer, maintainer_name, maintainer_email, nmu, signed_by, signed_by_name, signed_by_email, key_id, fingerprint) VALUES \
+ ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)" % self.my_config['table'])
cursor.execute("PREPARE uh_arch_insert AS INSERT INTO %s (id, \
architecture) VALUES \
($1, $2)" % (self.my_config['table'] + '_architecture'))
@@ -43,6 +44,16 @@
VALUES ($1, $2)" % (self.my_config['table'] + '_closes'))
id = 0
+ query = "EXECUTE uh_insert(%(id)s, %(Source)s, %(Version)s, %(Date)s, \
+ %(Changed-By)s, %(Changed-By_name)s, %(Changed-By_email)s, \
+ %(Maintainer)s, %(Maintainer_name)s, %(Maintainer_email)s, %(NMU)s, \
+ %(Signed-By)s, %(Signed-By_name)s, %(Signed-By_email)s, %(Key)s, \
+ %(Fingerprint)s)"
+ query_archs = "EXECUTE uh_arch_insert(%(id)s, %(arch)s)"
+ query_closes = "EXECUTE uh_close_insert(%(id)s, %(closes)s)"
+ uploads = ()
+ uploads_archs = ()
+ uploads_closes = ()
for name in glob(path + '/debian-devel-changes.*'):
# print name
f = None
@@ -50,33 +61,36 @@
f = gzip.open(name)
else:
f = open(name)
-
current = {'id': id}
current['Fingerprint'] = 'N/A' # hack: some entries don't have fp
last_field = None
line_count = 0
+
for line in f:
line_count += 1
line = line.strip()
# Stupid multi-line maintainer fields *grml*
if line == '':
- try:
- query = "EXECUTE uh_insert(%(id)s, %(Source)s, %(Version)s, %(Date)s, %(Changed-By)s, \
- %(Maintainer)s, %(NMU)s, %(Signed-By)s, %(Key)s, %(Fingerprint)s)"
- cursor.execute(query, current)
- for arch in set(current['Architecture'].split()):
- current['arch'] = arch
- query = "EXECUTE uh_arch_insert(%(id)s, %(arch)s)"
- cursor.execute(query, current)
- if current['Closes'] != 'N/A':
- for closes in set(current['Closes'].split()):
- current['closes'] = closes
- query = "EXECUTE uh_close_insert(%(id)s, %(closes)s)"
- cursor.execute(query, current)
- except psycopg2.ProgrammingError, s:
- print "Error at line %d of file %s" % (line_count, name)
- continue
- #raise
+ current['Changed-By_name'], current['Changed-By_email'] = email.Utils.parseaddr(current['Changed-By'])
+ current['Maintainer_name'], current['Maintainer_email'] = email.Utils.parseaddr(current['Maintainer'])
+ current['Signed-By_name'], current['Signed-By_email'] = email.Utils.parseaddr(current['Signed-By'])
+ uploads += (current,)
+ for arch in set(current['Architecture'].split()):
+ current_arch = {'id': id}
+ current_arch['arch'] = arch
+ uploads_archs += (current_arch,)
+ if current['Closes'] != 'N/A':
+ for closes in set(current['Closes'].split()):
+ current_closes = {'id': id}
+ current_closes['closes'] = closes
+ uploads_closes += (current_closes,)
+ if len(uploads) > 100:
+ cursor.executemany(query, uploads)
+ cursor.executemany(query_archs, uploads_archs)
+ cursor.executemany(query_closes, uploads_closes)
+ uploads = ()
+ uploads_archs = ()
+ uploads_closes = ()
id += 1
current = {'id': id}
current['Fingerprint'] = 'N/A' # hack: some entries don't have fp
@@ -89,11 +103,13 @@
current[last_field] += line
continue
-
(field, data) = line.split(':', 1)
data = data.strip()
current[field] = data
last_field = field
-
+
+ cursor.executemany(query, uploads)
+ cursor.executemany(query_archs, uploads_archs)
+ cursor.executemany(query_closes, uploads_closes)
cursor.execute("DEALLOCATE uh_insert")
More information about the Collab-qa-commits
mailing list