[Collab-qa-commits] r902 - in udd/src: . udd
neronus-guest at alioth.debian.org
neronus-guest at alioth.debian.org
Sun Jun 29 10:56:24 UTC 2008
Author: neronus-guest
Date: 2008-06-29 10:56:23 +0000 (Sun, 29 Jun 2008)
New Revision: 902
Added:
udd/src/udd/aux.py
udd/src/udd/gatherer.py
udd/src/udd/packages_gatherer.py
udd/src/udd/popcon_gatherer.py
udd/src/udd/sources_gatherer.py
Removed:
udd/src/aux.py
udd/src/packages_gatherer.py
udd/src/popcon_gatherer.py
udd/src/sources_gatherer.py
Modified:
udd/src/setup-db.sql
udd/src/test.yaml
udd/src/udd-dispatch.py
Log:
* It is possible to use python modules now instead of executable files
* Modified our three existing gatherers to do so
Deleted: udd/src/aux.py
===================================================================
--- udd/src/aux.py 2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/aux.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -1,101 +0,0 @@
-"""Auxillary methods for the UDD"""
-
-import syck
-import sys
-import psycopg2
-
-# If debug is something that evaluates to True, then print_debug actually prints something
-debug = 0
-
-def get_archs(conn):
- """Return a dicitionary, mapping from architecture names to their ids.
-
- This mapping is retrivied from the connection <conn>"""
- cur = conn.cursor()
- cur.execute("SELECT * from arch_ids")
- result = {}
- for row in cur.fetchall():
- result[row[1]] = row[0]
- return result
-
-def get_distrs(conn):
- """Return a dicitionary, mapping from distribution names to their ids.
-
- This mapping is retrivied from the connection <conn>"""
- cur = conn.cursor()
- cur.execute("SELECT * from distr_ids")
- result = {}
- for row in cur.fetchall():
- result[row[1]] = row[0]
- return result
-
-class ConfigException(Exception):
- def __init__(self, message):
- Exception(self)
- self.message = message
-
- def __str__(self):
- return "ConfigException: " + self.message
-
-def open_connection(config):
- """Open the connection to the database and return it"""
- return psycopg2.connect("dbname=" + config['general']['dbname'])
-
-def load_config(str):
- """Load and check configuration from the string"""
- config = syck.load(str)
- if not 'general' in config:
- raise ConfigException('general section not specified')
-
- general = config['general']
-
- if not 'dbname' in general:
- raise ConfigException('dbname not specified')
-
- if not 'archs' in general:
- raise ConfigException('archs not specified')
-
- if not 'types' in general:
- raise ConfigException('types not specified')
-
- if not 'debug' in general:
- general['debug'] = 0
-
- # Check that the source-entries are well-formed
- for name in config:
- if name == 'general':
- continue
-
- src = config[name]
- if not 'type' in src:
- raise ConfigException('type not specified for "%s"' % name)
- if src['type'] not in general['types']:
- raise ConfigException('Type of %s not specified in types' % name)
-
- return config
-
-def insert_distr(conn, distr_name):
- "Insert distribution <distr_name> into DB"
- cur = conn.cursor()
- cur.execute("INSERT INTO distr_ids (name) VALUES ('%s')" % distr_name)
-
-def print_debug(*args):
- "Print arguments to stdout if debug is set to something that evaluates to true"
- if debug:
- sys.stdout.write(*args)
- sys.stdout.write("\n")
-
-class BufferedLineReader:
- """Provides an iterator over the input of the specified file."""
- def __init__(self, file, cache_size):
- self.cache_size = cache_size
- self.file = file
-
- def __iter__(self):
- while True:
- lines = self.file.readlines(self.cache_size)
- if len(lines) == 0:
- break
- for line in lines:
- yield line
-
Deleted: udd/src/packages_gatherer.py
===================================================================
--- udd/src/packages_gatherer.py 2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/packages_gatherer.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -1,188 +0,0 @@
-#/usr/bin/env python
-# Last-Modified: <Sat Jun 28 15:48:36 2008>
-
-import debian_bundle.deb822
-import gzip
-import os
-import sys
-import aux
-import tempfile
-from aux import ConfigException
-import psycopg2
-from gatherer import gatherer
-
-def quote(s):
- return "'" + s.replace("'", "\\'") + "'"
-
-def null_or_quote(dict, key):
- if key in dict:
- return quote(dict[key])
- else:
- return 'NULL'
-
-class packages_gatherer(gatherer):
- # For efficiency, these are dictionaries
- mandatory = {'Package': 0, 'Version': 0, 'Architecture': 0, 'Maintainer': 0,
- 'Description': 0}
- non_mandatory = {'Source': 0, 'Essential': 0, 'Depends': 0, 'Recommends': 0,
- 'Suggests': 0, 'Enhances': 0, 'Pre-Depends': 0, 'Installed-Size': 0,
- 'Homepage': 0, 'Size': 0, 'MD5Sum': 0}
- ignorable = ()
-
- warned_about = {}
- # A mapping from <package-name><version> to 1 If <package-name><version> is
- # included in this dictionary, this means, that we've already added this
- # package with this version for architecture 'all' to the database. Needed
- # because different architectures include packages for architecture 'all'
- # with the same version, and we don't want these duplicate entries
- imported_all_pkgs = {}
-
- def __init__(self, connection, config):
- gatherer.__init__(self, connection, config)
- # The ID for the distribution we want to include
- self._distr = None
-
- def build_dict(self, control):
- """Build a dictionary from the control dictionary.
-
- Influenced by class variables mandatory, non_mandatory and ignorable"""
- d = {}
- for k in packages_gatherer.mandatory:
- if k not in control:
- raise "Mandatory field %s not specified" % k
- d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
- for k in packages_gatherer.non_mandatory:
- d[k] = packages_gatherer.null_or_quote(control, k)
- for k in control.keys():
- if k not in packages_gatherer.mandatory and k not in packages_gatherer.non_mandatory and k not in packages_gatherer.ignorable:
- if k not in packages_gatherer.warned_about:
- packages_gatherer.warned_about[k] = 1
- else:
- packages_gatherer.warned_about[k] += 1
- return d
-
- def import_packages(self, sequence):
- """Import the packages from the sequence into the database-connection
- conn.
-
- Sequence has to have an iterator interface, that yields a line every time
- it is called.The Format of the sequence is expected to be that of a
- debian packages file."""
- # The fields that are to be read. Other fields are ignored
- cur = self.connection.cursor()
- for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence):
- # Check whether packages with architectue 'all' have already been
- # imported
- if control['Architecture'] == 'all':
- t = control['Package'] + control['Version']
- if t in packages_gatherer.imported_all_pkgs:
- continue
- packages_gatherer.imported_all_pkgs[t] = 1
-
- d = self.build_dict(control)
-
- # These are integer values - we don't need quotes for them
- if d['Installed-Size'] != 'NULL':
- d['Installed-Size'] = d['Installed-Size'].strip("'")
- if d['Size'] != 'NULL':
- d['Size'] = d['Size'].strip("'")
-
- # We just use the first line of the description
- if d['Description'] != "NULL":
- d['Description'] = d['Description'].split("\n",1)[0]
- # If the description was a one-liner only, we don't need to add
- # a quote
- if d['Description'][-1] != "'" or d['Description'][-2] == '\\':
- d['Description'] += "'"
-
- # Source is non-mandatory, but we don't want it to be NULL
- if d['Source'] == "NULL":
- d['Source'] = d['Package']
- d['Source_Version'] = d['Version']
- else:
- split = d['Source'].strip("'").split()
- if len(split) == 1:
- d['Source_Version'] = d['Version']
- else:
- d['Source'] = quote(split[0])
- d['Source_Version'] = quote(split[1].strip("()"))
-
- query = """EXECUTE package_insert
- (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
- %(Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
- %(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
- %(Pre-Depends)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
- %(MD5Sum)s)""" % d
- try:
- cur.execute(query)
- except psycopg2.ProgrammingError:
- print query
- raise
-
- def run(self, source):
- if not source in self.config:
- raise ConfigException, "Source %s not specified" %(source)
- src_cfg = self.config[source]
-
- if not 'directory' in src_cfg:
- raise ConfigException('directory not specified for source %s' %
- (source))
-
- if not 'archs' in src_cfg:
- raise ConfigException('archs not specified for source %s' %
- (source))
-
- if not 'release' in src_cfg:
- raise ConfigException('release not specified for source %s' %
- (source))
-
- if not 'components' in src_cfg:
- raise ConfigException('components not specified for source %s' %
- (source))
-
- if not 'distribution' in src_cfg:
- raise ConfigException('distribution not specified for source %s' %
- (source))
-
- aux.debug = self.config['general']['debug']
-
- # Get distribution ID. If it does not exist, create it
- self._distr = src_cfg['distribution']
-
- cur = self.cursor()
- #cur.execute("PREPARE pkg_insert AS INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ($1, $2, $3, $4, $5);")
-
- # For every part and every architecture, import the packages into the DB
- for comp in src_cfg['components']:
- for arch in src_cfg['archs']:
- path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
- try:
- cur.execute("""PREPARE package_insert AS INSERT INTO Packages
- (Package, Version, Architecture, Maintainer, Description, Source,
- Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
- Pre_Depends, Installed_Size, Homepage, Size, MD5Sum, Distribution,
- Release, Component)
- VALUES
- ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
- $16, $17, '%s', '%s', '%s')
- """ % (distr, src_cfg['release'], comp))
- aux.print_debug("Reading file " + path)
- # Copy content from gzipped file to temporary file, so that apt_pkg is
- # used by debian_bundle
- tmp = tempfile.NamedTemporaryFile()
- file = gzip.open(path)
- tmp.write(file.read())
- file.close()
- tmp.seek(0)
- aux.print_debug("Importing from " + path)
- self.import_packages(open(tmp.name))
- tmp.close()
- except IOError, (e, message):
- print "Could not read packages from %s: %s" % (path, message)
- cur.execute("DEALLOCATE package_insert")
-
- self.connection.commit()
-
- def print_warnings(self):
- for key in packages_gatherer.warned_about:
- print("Unknown key: %s appeared %d times" % (key, packages_gatherer.warned_about[key]))
Deleted: udd/src/popcon_gatherer.py
===================================================================
--- udd/src/popcon_gatherer.py 2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/popcon_gatherer.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-
-"""
-This script imports the popcon data into the database
-See http://popcon.debian.org/
-"""
-
-import aux
-import sys
-import gzip
-
-def main():
- if len(sys.argv) != 3:
- print 'Usage: %s <config-file> <source>' % sys.argv[0]
- sys.exit(1)
-
- config = aux.load_config(open(sys.argv[1]).read())
- source = sys.argv[2]
-
- try:
- my_config = config[source]
- except:
- raise
-
- if not 'path' in my_config:
- raise aux.ConfigException, "path not configured for source " % source
-
- conn = aux.open_connection(config)
-
- cur = conn.cursor()
-
- cur.execute("PREPARE pop_insert AS INSERT INTO popcon (name, vote, olde, recent, nofiles) VALUES ($1, $2, $3, $4, $5)")
-
- popcon = gzip.open(my_config['path'])
-
- for line in popcon.readlines():
- name, data = line.split(None, 1)
- if name == "Submissions:":
- cur.execute("INSERT INTO popcon (name, vote) VALUES ('_submissions', %s)" % (data))
- try:
- (name, vote, old, recent, nofiles) = data.split()
- cur.execute("EXECUTE pop_insert('%s', %s, %s, %s, %s)" %\
- (name, vote, old, recent, nofiles))
- except ValueError:
- continue
-
- cur.execute("DEALLOCATE pop_insert")
- conn.commit()
-
-if __name__ == '__main__':
- main()
Modified: udd/src/setup-db.sql
===================================================================
--- udd/src/setup-db.sql 2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/setup-db.sql 2008-06-29 10:56:23 UTC (rev 902)
@@ -52,4 +52,4 @@
GRANT SELECT ON popcon TO PUBLIC;
GRANT SELECT ON popcon_average TO PUBLIC;
-GRANT SELECT ON popcon_sum TO PUBLIC;
+GRANT SELECT ON popcon_max TO PUBLIC;
Deleted: udd/src/sources_gatherer.py
===================================================================
--- udd/src/sources_gatherer.py 2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/sources_gatherer.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -1,149 +0,0 @@
-#/usr/bin/env python
-# Last-Modified: <Thu Jun 26 10:20:28 2008>
-
-import debian_bundle.deb822
-import gzip
-import os
-import sys
-import aux
-import tempfile
-from aux import ConfigException
-
-distr = None
-
-mandatory = {'Format': 0, 'Maintainer': 0, 'Package': 0, 'Version': 0, 'Files': 0}
-non_mandatory = {'Uploaders': 0, 'Binary': 0, 'Architecture': 0,
- 'Standards-Version': 0, 'Homepage': 0, 'Build-Depends': 0,
- 'Build-Depends-Indep': 0, 'Build-Conflicts': 0, 'Build-Conflicts-Indep': 0,
- 'Priority': 0, 'Section': 0, 'Vcs-Arch': 0, 'Vcs-Browser': 0, 'Vcs-Bzr': 0,
- 'Vcs-Cvs': 0, 'Vcs-Darcs': 0, 'Vcs-Git': 0, 'Vcs-Hg': 0, 'Vcs-Svn': 0,
- 'X-Vcs-Browser': 0, 'X-Vcs-Bzr': 0, 'X-Vcs-Darcs': 0, 'X-Vcs-Svn': 0}
-
-ignorable = {}
-
-def null_or_quote(dict, key):
- if key in dict:
- return "'" + dict[key].replace("'", "\\'") + "'"
- else:
- return 'NULL'
-
-warned_about = {}
-def build_dict(control):
- """Build a dictionary from the control dictionary.
-
- Influenced by global variables mandatory, non_mandatory and ignorable"""
- global mandatory, non_mandatory
- d = {}
- for k in mandatory:
- if k not in control:
- raise "Mandatory field %s not specified" % k
- d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
- for k in non_mandatory:
- d[k] = null_or_quote(control, k)
- for k in control.keys():
- if k not in mandatory and k not in non_mandatory and k not in ignorable:
- if k not in warned_about:
- warned_about[k] = 1
- else:
- warned_about[k] += 1
- return d
-
-def import_sources(conn, file):
- """Import the sources from the file into the database-connection conn.
-
- Sequence has to have an iterator interface, that yields a line every time it
- is called.The Format of the file is expected to be that of a debian
- source file."""
- cur = conn.cursor()
- for control in debian_bundle.deb822.Packages.iter_paragraphs(file):
- d = build_dict(control)
- query = """EXECUTE source_insert
- (%(Package)s, %(Version)s, %(Maintainer)s, %(Format)s, %(Files)s,
- %(Uploaders)s, %(Binary)s, %(Architecture)s, %(Standards-Version)s,
- %(Homepage)s, %(Build-Depends)s, %(Build-Depends-Indep)s,
- %(Build-Conflicts)s, %(Build-Conflicts-Indep)s, %(Priority)s,
- %(Section)s, %(Vcs-Arch)s, %(Vcs-Browser)s, %(Vcs-Bzr)s, %(Vcs-Cvs)s,
- %(Vcs-Darcs)s, %(Vcs-Git)s, %(Vcs-Hg)s, %(Vcs-Svn)s, %(X-Vcs-Browser)s,
- %(X-Vcs-Bzr)s, %(X-Vcs-Darcs)s, %(X-Vcs-Svn)s)
- """ % d
- cur.execute(query)
-
-def main():
- global distr
- if len(sys.argv) != 3:
- print "Usage: %s <config> <source>" % sys.argv[0]
- sys.exit(1)
-
- src_name = sys.argv[2]
- cfg_path = sys.argv[1]
- config = None
- try:
- config = aux.load_config(open(cfg_path).read())
- except ConfigException, e:
- raise ConfigException, "Configuration error in " + cfg_path +": " + e.message
-
- if not src_name in config:
- raise ConfigException, "Source %s not specified in %s" %(src_name, cfg_path)
- src_cfg = config[src_name]
-
- if not 'directory' in src_cfg:
- raise ConfigException('directory not specified for source %s in file %s' %
- (src_name, cfg_path))
-
- if not 'components' in src_cfg:
- raise ConfigException('parts not specified for source %s in file %s' %
- (src_name, cfg_path))
-
- if not 'distribution' in src_cfg:
- raise ConfigException('distribution not specified for source %s in file %s' %
- (src_name, cfg_path))
-
- if not 'release' in src_cfg:
- raise ConfigException('release not specified for source %s in file %s' %
- (src_name, cfg_path))
-
- aux.debug = config['general']['debug']
-
- conn = aux.open_connection(config)
-
- cur = conn.cursor()
-
- for comp in src_cfg['components']:
- path = os.path.join(src_cfg['directory'], comp, 'source', 'Sources.gz')
- try:
- query = """PREPARE source_insert as INSERT INTO sources
- (Package, Version, Maintainer, Format, Files, Uploaders, Bin,
- Architecture, Standards_Version, Homepage, Build_Depends,
- Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
- Section, Vcs_Arch, Vcs_Browser, Vcs_Bzr, Vcs_Cvs, Vcs_Darcs, Vcs_Git,
- Vcs_Hg, Vcs_Svn, X_Vcs_Browser, X_Vcs_Bzr, X_Vcs_Darcs, X_Vcs_Svn,
- Distribution, Release, Component)
- VALUES
- ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
- $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, '%s', '%s',
- '%s')"""\
- % (src_cfg['distribution'], src_cfg['release'], comp)
- cur.execute(query)
-
- aux.print_debug("Reading file " + path)
- # Copy content from gzipped file to temporary file, so that apt_pkg is
- # used by debian_bundle
- tmp = tempfile.NamedTemporaryFile()
- file = gzip.open(path)
- tmp.write(file.read())
- file.close()
- tmp.seek(0)
- aux.print_debug("Importing from " + path)
- import_sources(conn, open(tmp.name))
- tmp.close()
- cur.execute("DEALLOCATE source_insert")
- except IOError, (e, message):
- print "Could not read packages from %s: %s" % (path, message)
-
- conn.commit()
-
- for key in warned_about:
- print "Unknowen key %s appeared %d times" % (key, warned_about[key])
-
-if __name__ == '__main__':
- main()
Modified: udd/src/test.yaml
===================================================================
--- udd/src/test.yaml 2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/test.yaml 2008-06-29 10:56:23 UTC (rev 902)
@@ -1,12 +1,12 @@
general:
- dbname: udd
+ dbname: udd-test
types:
- sources: python sources_gatherer.py
- packages: python packages_gatherer.py
- setup: python db_manager.py
- delete: python db_manager.py
- src-pkg: python srcs_and_pkgs.py
- popcon: python popcon_gatherer.py
+ sources: module udd.sources_gatherer
+ packages: module udd.packages_gatherer
+ setup: exec python db_manager.py
+ delete: exec python db_manager.py
+ src-pkg: exec python srcs_and_pkgs.py
+ popcon: module udd.popcon_gatherer
#src-pkg: python sources_gatherer.py
debug: 1
Copied: udd/src/udd/aux.py (from rev 884, udd/src/aux.py)
===================================================================
--- udd/src/udd/aux.py (rev 0)
+++ udd/src/udd/aux.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,112 @@
+"""Auxillary methods for the UDD"""
+
+import syck
+import sys
+import psycopg2
+
+# If debug is something that evaluates to True, then print_debug actually prints something
+debug = 0
+
+def quote(s):
+ "Quote a string for SQL"
+ return "'" + s.replace("'", "\\'") + "'"
+
+def null_or_quote(dict, key):
+ "If key is an element of dict, return it quoted. Return NULL otherwise"
+ if key in dict:
+ return quote(dict[key])
+ else:
+ return 'NULL'
+
+def get_archs(conn):
+ """Return a dicitionary, mapping from architecture names to their ids.
+
+ This mapping is retrivied from the connection <conn>"""
+ cur = conn.cursor()
+ cur.execute("SELECT * from arch_ids")
+ result = {}
+ for row in cur.fetchall():
+ result[row[1]] = row[0]
+ return result
+
+def get_distrs(conn):
+ """Return a dicitionary, mapping from distribution names to their ids.
+
+ This mapping is retrivied from the connection <conn>"""
+ cur = conn.cursor()
+ cur.execute("SELECT * from distr_ids")
+ result = {}
+ for row in cur.fetchall():
+ result[row[1]] = row[0]
+ return result
+
+class ConfigException(Exception):
+ def __init__(self, message):
+ Exception(self)
+ self.message = message
+
+ def __str__(self):
+ return "ConfigException: " + self.message
+
+def open_connection(config):
+ """Open the connection to the database and return it"""
+ return psycopg2.connect("dbname=" + config['general']['dbname'])
+
+def load_config(str):
+ """Load and check configuration from the string"""
+ config = syck.load(str)
+ if not 'general' in config:
+ raise ConfigException('general section not specified')
+
+ general = config['general']
+
+ if not 'dbname' in general:
+ raise ConfigException('dbname not specified')
+
+ if not 'archs' in general:
+ raise ConfigException('archs not specified')
+
+ if not 'types' in general:
+ raise ConfigException('types not specified')
+
+ if not 'debug' in general:
+ general['debug'] = 0
+
+ # Check that the source-entries are well-formed
+ for name in config:
+ if name == 'general':
+ continue
+
+ src = config[name]
+ if not 'type' in src:
+ raise ConfigException('type not specified for "%s"' % name)
+ if src['type'] not in general['types']:
+ raise ConfigException('Type of %s not specified in types' % name)
+
+ return config
+
+def insert_distr(conn, distr_name):
+ "Insert distribution <distr_name> into DB"
+ cur = conn.cursor()
+ cur.execute("INSERT INTO distr_ids (name) VALUES ('%s')" % distr_name)
+
+def print_debug(*args):
+ "Print arguments to stdout if debug is set to something that evaluates to true"
+ if debug:
+ sys.stdout.write(*args)
+ sys.stdout.write("\n")
+
+class BufferedLineReader:
+ """Provides an iterator over the input of the specified file."""
+ def __init__(self, file, cache_size):
+ self.cache_size = cache_size
+ self.file = file
+
+ def __iter__(self):
+ while True:
+ lines = self.file.readlines(self.cache_size)
+ if len(lines) == 0:
+ break
+ for line in lines:
+ yield line
+
Added: udd/src/udd/gatherer.py
===================================================================
--- udd/src/udd/gatherer.py (rev 0)
+++ udd/src/udd/gatherer.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,15 @@
+"""
+This is the base class of all gatherers which want to use the python
+interface to be called by the dispatcher
+"""
+
+class gatherer:
+ def __init__(self, connection, config):
+ self.connection = connection
+ self.config = config
+
+ def run(self, source):
+ raise NotImplementedError
+
+ def cursor(self):
+ return self.connection.cursor()
Copied: udd/src/udd/packages_gatherer.py (from rev 901, udd/src/packages_gatherer.py)
===================================================================
--- udd/src/udd/packages_gatherer.py (rev 0)
+++ udd/src/udd/packages_gatherer.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,182 @@
+#/usr/bin/env python
+# Last-Modified: <Sat Jun 28 17:17:05 2008>
+
+import debian_bundle.deb822
+import gzip
+import os
+import sys
+import aux
+import tempfile
+from aux import ConfigException
+import psycopg2
+from gatherer import gatherer
+
+def get_gatherer(connection, config):
+ return packages_gatherer(connection, config)
+
+class packages_gatherer(gatherer):
+ # For efficiency, these are dictionaries
+ mandatory = {'Package': 0, 'Version': 0, 'Architecture': 0, 'Maintainer': 0,
+ 'Description': 0}
+ non_mandatory = {'Source': 0, 'Essential': 0, 'Depends': 0, 'Recommends': 0,
+ 'Suggests': 0, 'Enhances': 0, 'Pre-Depends': 0, 'Installed-Size': 0,
+ 'Homepage': 0, 'Size': 0, 'MD5Sum': 0}
+ ignorable = ()
+
+ warned_about = {}
+ # A mapping from <package-name><version> to 1 If <package-name><version> is
+ # included in this dictionary, this means, that we've already added this
+ # package with this version for architecture 'all' to the database. Needed
+ # because different architectures include packages for architecture 'all'
+ # with the same version, and we don't want these duplicate entries
+ imported_all_pkgs = {}
+
+ def __init__(self, connection, config):
+ gatherer.__init__(self, connection, config)
+ # The ID for the distribution we want to include
+ self._distr = None
+
+ def build_dict(self, control):
+ """Build a dictionary from the control dictionary.
+
+ Influenced by class variables mandatory, non_mandatory and ignorable"""
+ d = {}
+ for k in packages_gatherer.mandatory:
+ if k not in control:
+ raise "Mandatory field %s not specified" % k
+ d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
+ for k in packages_gatherer.non_mandatory:
+ d[k] = aux.null_or_quote(control, k)
+ for k in control.keys():
+ if k not in packages_gatherer.mandatory and k not in packages_gatherer.non_mandatory and k not in packages_gatherer.ignorable:
+ if k not in packages_gatherer.warned_about:
+ packages_gatherer.warned_about[k] = 1
+ else:
+ packages_gatherer.warned_about[k] += 1
+ return d
+
+ def import_packages(self, sequence):
+ """Import the packages from the sequence into the database-connection
+ conn.
+
+ Sequence has to have an iterator interface, that yields a line every time
+ it is called.The Format of the sequence is expected to be that of a
+ debian packages file."""
+ # The fields that are to be read. Other fields are ignored
+ cur = self.connection.cursor()
+ for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence):
+ # Check whether packages with architectue 'all' have already been
+ # imported
+ if control['Architecture'] == 'all':
+ t = control['Package'] + control['Version']
+ if t in packages_gatherer.imported_all_pkgs:
+ continue
+ packages_gatherer.imported_all_pkgs[t] = 1
+
+ d = self.build_dict(control)
+
+ # These are integer values - we don't need quotes for them
+ if d['Installed-Size'] != 'NULL':
+ d['Installed-Size'] = d['Installed-Size'].strip("'")
+ if d['Size'] != 'NULL':
+ d['Size'] = d['Size'].strip("'")
+
+ # We just use the first line of the description
+ if d['Description'] != "NULL":
+ d['Description'] = d['Description'].split("\n",1)[0]
+ # If the description was a one-liner only, we don't need to add
+ # a quote
+ if d['Description'][-1] != "'" or d['Description'][-2] == '\\':
+ d['Description'] += "'"
+
+ # Source is non-mandatory, but we don't want it to be NULL
+ if d['Source'] == "NULL":
+ d['Source'] = d['Package']
+ d['Source_Version'] = d['Version']
+ else:
+ split = d['Source'].strip("'").split()
+ if len(split) == 1:
+ d['Source_Version'] = d['Version']
+ else:
+ d['Source'] = aux.quote(split[0])
+ d['Source_Version'] = aux.quote(split[1].strip("()"))
+
+ query = """EXECUTE package_insert
+ (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
+ %(Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
+ %(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
+ %(Pre-Depends)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
+ %(MD5Sum)s)""" % d
+ try:
+ cur.execute(query)
+ except psycopg2.ProgrammingError:
+ print query
+ raise
+
+ def run(self, source):
+ if not source in self.config:
+ raise ConfigException, "Source %s not specified" %(source)
+ src_cfg = self.config[source]
+
+ if not 'directory' in src_cfg:
+ raise ConfigException('directory not specified for source %s' %
+ (source))
+
+ if not 'archs' in src_cfg:
+ raise ConfigException('archs not specified for source %s' %
+ (source))
+
+ if not 'release' in src_cfg:
+ raise ConfigException('release not specified for source %s' %
+ (source))
+
+ if not 'components' in src_cfg:
+ raise ConfigException('components not specified for source %s' %
+ (source))
+
+ if not 'distribution' in src_cfg:
+ raise ConfigException('distribution not specified for source %s' %
+ (source))
+
+ aux.debug = self.config['general']['debug']
+
+ # Get distribution ID. If it does not exist, create it
+ self._distr = src_cfg['distribution']
+
+ cur = self.cursor()
+ #cur.execute("PREPARE pkg_insert AS INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ($1, $2, $3, $4, $5);")
+
+ # For every part and every architecture, import the packages into the DB
+ for comp in src_cfg['components']:
+ for arch in src_cfg['archs']:
+ path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
+ try:
+ cur.execute("""PREPARE package_insert AS INSERT INTO Packages
+ (Package, Version, Architecture, Maintainer, Description, Source,
+ Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
+ Pre_Depends, Installed_Size, Homepage, Size, MD5Sum, Distribution,
+ Release, Component)
+ VALUES
+ ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
+ $16, $17, '%s', '%s', '%s')
+ """ % (self._distr, src_cfg['release'], comp))
+ aux.print_debug("Reading file " + path)
+ # Copy content from gzipped file to temporary file, so that apt_pkg is
+ # used by debian_bundle
+ tmp = tempfile.NamedTemporaryFile()
+ file = gzip.open(path)
+ tmp.write(file.read())
+ file.close()
+ tmp.seek(0)
+ aux.print_debug("Importing from " + path)
+ self.import_packages(open(tmp.name))
+ tmp.close()
+ except IOError, (e, message):
+ print "Could not read packages from %s: %s" % (path, message)
+ cur.execute("DEALLOCATE package_insert")
+
+ self.connection.commit()
+
+ def print_warnings(self):
+ for key in packages_gatherer.warned_about:
+ print("Unknown key: %s appeared %d times" % (key, packages_gatherer.warned_about[key]))
Copied: udd/src/udd/popcon_gatherer.py (from rev 900, udd/src/popcon_gatherer.py)
===================================================================
--- udd/src/udd/popcon_gatherer.py (rev 0)
+++ udd/src/udd/popcon_gatherer.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+"""
+This script imports the popcon data into the database
+See http://popcon.debian.org/
+"""
+
+import aux
+import sys
+import gzip
+from gatherer import gatherer
+
+def get_gatherer(connection, config):
+ return popcon_gatherer(connection, config)
+
+class popcon_gatherer(gatherer):
+ def __init__(self, connection, config):
+ gatherer.__init__(self, connection, config)
+
+ def run(self, source):
+ try:
+ my_config = self.config[source]
+ except:
+ raise
+
+ if not 'path' in my_config:
+ raise aux.ConfigException, "path not configured for source " % source
+
+ cur = self.cursor()
+
+ cur.execute("PREPARE pop_insert AS INSERT INTO popcon (name, vote, olde, recent, nofiles) VALUES ($1, $2, $3, $4, $5)")
+
+ popcon = gzip.open(my_config['path'])
+
+ for line in popcon.readlines():
+ name, data = line.split(None, 1)
+ if name == "Submissions:":
+ cur.execute("INSERT INTO popcon (name, vote) VALUES ('_submissions', %s)" % (data))
+ try:
+ (name, vote, old, recent, nofiles) = data.split()
+ cur.execute("EXECUTE pop_insert('%s', %s, %s, %s, %s)" %\
+ (name, vote, old, recent, nofiles))
+ except ValueError:
+ continue
+
+ cur.execute("DEALLOCATE pop_insert")
+
+if __name__ == '__main__':
+ main()
Copied: udd/src/udd/sources_gatherer.py (from rev 898, udd/src/sources_gatherer.py)
===================================================================
--- udd/src/udd/sources_gatherer.py (rev 0)
+++ udd/src/udd/sources_gatherer.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,131 @@
+#/usr/bin/env python
+# Last-Modified: <Sun Jun 29 10:48:17 2008>
+
+import debian_bundle.deb822
+import gzip
+import os
+import sys
+import aux
+import tempfile
+from aux import ConfigException
+from aux import null_or_quote
+from gatherer import gatherer
+
+def get_gatherer(connection, config):
+ return sources_gatherer(connection, config)
+
+class sources_gatherer(gatherer):
+ mandatory = {'Format': 0, 'Maintainer': 0, 'Package': 0, 'Version': 0, 'Files': 0}
+ non_mandatory = {'Uploaders': 0, 'Binary': 0, 'Architecture': 0,
+ 'Standards-Version': 0, 'Homepage': 0, 'Build-Depends': 0,
+ 'Build-Depends-Indep': 0, 'Build-Conflicts': 0, 'Build-Conflicts-Indep': 0,
+ 'Priority': 0, 'Section': 0, 'Vcs-Arch': 0, 'Vcs-Browser': 0, 'Vcs-Bzr': 0,
+ 'Vcs-Cvs': 0, 'Vcs-Darcs': 0, 'Vcs-Git': 0, 'Vcs-Hg': 0, 'Vcs-Svn': 0,
+ 'X-Vcs-Browser': 0, 'X-Vcs-Bzr': 0, 'X-Vcs-Darcs': 0, 'X-Vcs-Svn': 0}
+ ignorable = {}
+
+ warned_about = {}
+
+ def __init__(self, connection, config):
+ gatherer.__init__(self, connection, config)
+ self._distr = None
+
+ def build_dict(self, control):
+ """Build a dictionary from the control dictionary.
+
+ Influenced by global variables mandatory, non_mandatory and ignorable"""
+ d = {}
+ for k in sources_gatherer.mandatory:
+ if k not in control:
+ raise "Mandatory field %s not specified" % k
+ d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
+ for k in sources_gatherer.non_mandatory:
+ d[k] = null_or_quote(control, k)
+ for k in control.keys():
+ if k not in sources_gatherer.mandatory and k not in sources_gatherer.non_mandatory and k not in sources_gatherer.ignorable:
+ if k not in sources_gatherer.warned_about:
+ sources_gatherer.warned_about[k] = 1
+ else:
+ sources_gatherer.warned_about[k] += 1
+ return d
+
+ def import_sources(self, file):
+ """Import the sources from the file into the database-connection conn.
+
+ Sequence has to have an iterator interface, that yields a line every time it
+ is called.The Format of the file is expected to be that of a debian
+ source file."""
+ cur = self.cursor()
+ for control in debian_bundle.deb822.Packages.iter_paragraphs(file):
+ d = self.build_dict(control)
+ query = """EXECUTE source_insert
+ (%(Package)s, %(Version)s, %(Maintainer)s, %(Format)s, %(Files)s,
+ %(Uploaders)s, %(Binary)s, %(Architecture)s, %(Standards-Version)s,
+ %(Homepage)s, %(Build-Depends)s, %(Build-Depends-Indep)s,
+ %(Build-Conflicts)s, %(Build-Conflicts-Indep)s, %(Priority)s,
+ %(Section)s, %(Vcs-Arch)s, %(Vcs-Browser)s, %(Vcs-Bzr)s, %(Vcs-Cvs)s,
+ %(Vcs-Darcs)s, %(Vcs-Git)s, %(Vcs-Hg)s, %(Vcs-Svn)s, %(X-Vcs-Browser)s,
+ %(X-Vcs-Bzr)s, %(X-Vcs-Darcs)s, %(X-Vcs-Svn)s)
+ """ % d
+ cur.execute(query)
+
+ def run(self, source):
+ if not source in self.config:
+ raise ConfigException, "Source %s not specified" %(src_name)
+ src_cfg = self.config[source]
+
+ if not 'directory' in src_cfg:
+ raise ConfigException('directory not specified for source %s' %
+ (src_name))
+
+ if not 'components' in src_cfg:
+ raise ConfigException('parts not specified for source %s' %
+ (src_name))
+
+ if not 'distribution' in src_cfg:
+ raise ConfigException('distribution not specified for source in file %s' %
+ (src_name))
+
+ if not 'release' in src_cfg:
+ raise ConfigException('release not specified for source %s' %
+ (src_name))
+
+ aux.debug = self.config['general']['debug']
+
+ cur = self.cursor()
+
+ for comp in src_cfg['components']:
+ path = os.path.join(src_cfg['directory'], comp, 'source', 'Sources.gz')
+ try:
+ query = """PREPARE source_insert as INSERT INTO sources
+ (Package, Version, Maintainer, Format, Files, Uploaders, Bin,
+ Architecture, Standards_Version, Homepage, Build_Depends,
+ Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
+ Section, Vcs_Arch, Vcs_Browser, Vcs_Bzr, Vcs_Cvs, Vcs_Darcs, Vcs_Git,
+ Vcs_Hg, Vcs_Svn, X_Vcs_Browser, X_Vcs_Bzr, X_Vcs_Darcs, X_Vcs_Svn,
+ Distribution, Release, Component)
+ VALUES
+ ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
+ $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, '%s', '%s',
+ '%s')"""\
+ % (src_cfg['distribution'], src_cfg['release'], comp)
+ cur.execute(query)
+
+ aux.print_debug("Reading file " + path)
+ # Copy content from gzipped file to temporary file, so that apt_pkg is
+ # used by debian_bundle
+ tmp = tempfile.NamedTemporaryFile()
+ file = gzip.open(path)
+ tmp.write(file.read())
+ file.close()
+ tmp.seek(0)
+ aux.print_debug("Importing from " + path)
+ self.import_sources(open(tmp.name))
+ tmp.close()
+ cur.execute("DEALLOCATE source_insert")
+ except IOError, (e, message):
+ print "Could not read packages from %s: %s" % (path, message)
+
+ def print_warnings(self):
+ for key in warned_about:
+ print "Unknowen key %s appeared %d times" % (key, warned_about[key])
Modified: udd/src/udd-dispatch.py
===================================================================
--- udd/src/udd-dispatch.py 2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/udd-dispatch.py 2008-06-29 10:56:23 UTC (rev 902)
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# Last-Modified: <Thu May 29 21:02:10 2008>
+# Last-Modified: <Sun Jun 29 10:53:46 2008>
"""Dispatch udd gatherers
@@ -7,7 +7,7 @@
import sys
from os import system
-import aux
+import udd.aux
def print_help():
print "Usage: " + sys.argv[0] + " <configuration> <source1> [source2 source3 ...]"
@@ -18,7 +18,7 @@
sys.exit(1)
# Check the configuration
- config = aux.load_config(open(sys.argv[1]).read())
+ config = udd.aux.load_config(open(sys.argv[1]).read())
types = config['general']['types']
@@ -26,6 +26,8 @@
if not src in config:
raise aux.ConfigException("%s is not specified in %s" % (src, sys.argv[1]))
+ connection = udd.aux.open_connection(config)
+
# Process the sources
for src in sys.argv[2:]:
src_config = config[src]
@@ -33,6 +35,14 @@
if not type in types:
print "No script specified for type " + src['type']
sys.exit(1)
- script = types[type]
- system(script + " " + sys.argv[1] + " " + src)
+
+ (command,rest) = types[type].split(None, 1)
+
+ if command == "exec":
+ system(rest + " " + sys.argv[1] + " " + src)
+ elif command == "module":
+ exec("import " + rest)
+ exec "gatherer = " + rest + ".get_gatherer(connection, config)"
+ gatherer.run(src)
+ connection.commit()
More information about the Collab-qa-commits
mailing list