[Collab-qa-commits] r856 - udd/src
neronus-guest at alioth.debian.org
neronus-guest at alioth.debian.org
Sat May 24 12:00:56 UTC 2008
Author: neronus-guest
Date: 2008-05-24 12:00:49 +0000 (Sat, 24 May 2008)
New Revision: 856
Added:
udd/src/aux.py
Modified:
udd/src/packages-gatherer.py
udd/src/setup-db.py
udd/src/test.yaml
udd/src/udd-dispatch.py
Log:
* Common function went into aux.py
* Code rewrite/refactoring
Added: udd/src/aux.py
===================================================================
--- udd/src/aux.py (rev 0)
+++ udd/src/aux.py 2008-05-24 12:00:49 UTC (rev 856)
@@ -0,0 +1,91 @@
+"""Auxillary methods for the UDD"""
+
+import syck
+import sys
+
+# If debug is something that evaluates to True, then print_debug actually prints something :)
+debug = 0
+
+def get_archs(conn):
+ """Return a dicitionary, mapping from architecture names to their ids.
+
+ This mapping is retrivied from the connection <conn>"""
+ cur = conn.cursor()
+ cur.execute("SELECT * from arch_ids")
+ result = {}
+ for row in cur.fetchall():
+ result[row[1]] = row[0]
+ return result
+
+def get_distrs(conn):
+ """Return a dicitionary, mapping from distribution names to their ids.
+
+ This mapping is retrivied from the connection <conn>"""
+ cur = conn.cursor()
+ cur.execute("SELECT * from distr_ids")
+ result = {}
+ for row in cur.fetchall():
+ result[row[1]] = row[0]
+ return result
+
+class ConfigException(Exception):
+ def __init__(self, message):
+ Exception(self)
+ self.message = message
+
+ def __str__(self):
+ return "ConfigException: " + self.message
+
+def load_config(seq):
+ """Load and check configuration from seq"""
+ config = syck.load(seq)
+ if not 'dbname' in config:
+ raise ConfigException('dbname not specified')
+
+ if not 'archs' in config:
+ raise ConfigException('archs not specified')
+
+ if not 'types' in config:
+ raise ConfigException('types not specified')
+
+ if not 'debug' in config:
+ config['debug'] = 0
+
+ # Check that the source-entries are well-formed
+ for name in config:
+ if name in ('dbname', 'archs', 'types', 'debug'):
+ continue
+
+ src = config[name]
+ if not 'type' in src:
+ raise ConfigException('type not specified for "%s"' % name)
+ if src['type'] not in config['types']:
+ raise ConfigException('Type of %s not specified in types' % name)
+
+ return config
+
+def insert_distr(conn, distr_name):
+ "Insert distribution <distr_name> into DB"
+ cur = conn.cursor()
+ cur.execute("INSERT INTO distr_ids (name) VALUES ('%s')" % distr_name)
+
+def print_debug(*args):
+ "Print arguments to stdout if debug is set to something that evaluates to true"
+ if debug:
+ sys.stdout.write(*args)
+ sys.stdout.write("\n")
+
+class BufferedLineReader:
+ """Provides an iterator over the input of the specified file."""
+ def __init__(self, file, cache_size):
+ self.cache_size = cache_size
+ self.file = file
+
+ def __iter__(self):
+ while True:
+ lines = self.file.readlines(self.cache_size)
+ if len(lines) == 0:
+ break
+ for line in lines:
+ yield line
+
Modified: udd/src/packages-gatherer.py
===================================================================
--- udd/src/packages-gatherer.py 2008-05-23 17:38:02 UTC (rev 855)
+++ udd/src/packages-gatherer.py 2008-05-24 12:00:49 UTC (rev 856)
@@ -1,79 +1,112 @@
#/usr/bin/env python
-# Last-Modified: <Fri 23 May 2008 19:33:11 CEST>
+# Last-Modified: <Sat May 24 11:34:28 2008>
from psycopg2 import connect
-from debian_bundle.deb822 import Packages
+import debian_bundle.deb822
+import gzip
import os
-import syck
import sys
-import gzip
+import aux
+from aux import ConfigException
-archs = []
+# A mapping from the architecture names to architecture IDs
+archs = {}
+# A mapping from <package-name><version> to 1
+# If <package-name><version> is included in this dictionary, this means,
+# that we've already added this package with this version for architecture 'all'
+# to the database. Needed because different architectures include packages
+# for architecture 'all' with the same version, and we don't want these duplicate
+# entries
+imported_all_pkgs = {}
+# The ID for the distribution we want to include
distr_id = None
-def get_archs(conn):
- c = conn.cursor();
- c.execute("SELECT * from arch_ids")
- result = {}
- for row in c.fetchall():
- result[row[1]] = row[0]
- return result
+def import_packages(conn, sequence):
+ """Import the packages from the sequence into the database-connection conn.
-def get_distr_id(conn, distr):
- c = conn.cursor();
- c.execute("SELECT distr_id from distr_ids WHERE name = '" + distr + "'")
- rows = c.fetchall()
- if len(rows) == 0:
- return None
- elif len(rows) > 1:
- print "Warning: Distribution %s exists more than once in distr_ids" % distr
- else:
- return rows[0][0]
-
+ Sequence has to have an iterator interface, that yields a line every time it
+ is called.The Format of the sequence is expected to be that of a debian
+ packages file."""
+ global imported_all_pkgs
+ # The fields that are to be read. Other fields are ignored
+ fields = ('Architecture', 'Package', 'Version')
+ for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence, fields):
+ # Check whether packages with architectue 'all' have already been
+ # imported
+ if control['Architecture'] == 'all':
+ t = control['Package'] + control['Version']
+ if t in imported_all_pkgs:
+ continue
+ imported_all_pkgs[t] = 1
-def import_pkgs(file, conn):
- "Import file specified by file into database"
- try:
- for control in Packages(gzip.open(file)):
- c = conn.cursor()
- c.execute("INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ('%s', %d, %d, '%s', 0)" % (control["Package"], distr_id, archs[control["Architecture"]], control["Version"]))
- except Exception, message:
- print "Could not parse %s: %s" % (file, message)
+ cur = conn.cursor()
+ query = "INSERT INTO pkgs (name, distr_id, arch_id, version, src_id)\
+ VALUES ('%s', %d, %d, '%s', 0)" % (control["Package"], distr_id, archs[control["Architecture"]], control["Version"])
+ cur.execute(query)
-if __name__ == '__main__':
+def main():
+ global distr_id
+ global archs
if len(sys.argv) != 3:
- print "Usage: %s <config> <source>" % (sys.argv[0])
+ print "Usage: %s <config> <source>" % sys.argv[0]
sys.exit(1)
- cfg_file = sys.argv[1]
- config = syck.load(open(cfg_file))
- if not 'dbname' in config:
- print "dbname not specified in " + cfg_file
- sys.exit(1)
+ src_name = sys.argv[2]
+ cfg_path = sys.argv[1]
+ config = None
+ try:
+ config = aux.load_config(open(cfg_path).read())
+ except ConfigException, e:
+ raise ConfigException, "Configuration error in " + cfg_path +": " + e.message
- source_name = sys.argv[2]
- if not source_name in config:
- print "%s not specified in %s" % (source_name, cfg_file)
- sys.exit(1)
+ if not src_name in config:
+ raise ConfigException, "Source %s not specified in " + cfg_path
+ src_cfg = config[src_name]
- source_config = config[source_name]
+ if not 'directory' in src_cfg:
+ raise ConfigException('directory not specified for source %s in file %s' %
+ (src_name, cfg_path))
+
+ if not 'archs' in src_cfg:
+ raise ConfigException('archs not specified for source %s in file %s' %
+ (src_name, cfg_path))
+
+ if not 'parts' in src_cfg:
+ raise ConfigException('parts not specified for source %s in file %s' %
+ (src_name, cfg_path))
+
+ if not 'distribution' in src_cfg:
+ raise ConfigException('distribution not specified for source %s in file %s' %
+ (src_name, cfg_path))
+
+ aux.debug = config['debug']
+
conn = connect('dbname=' + config['dbname'])
- archs = get_archs(conn)
-
- dir = source_config['directory']
- distr = source_config['distribution']
- distr_id = get_distr_id(conn, distr)
- if distr_id is None:
- c = conn.cursor()
- c.execute("INSERT INTO distr_ids (name) VALUES ('%s')" % (distr))
- distr_id = get_distr_id(conn, distr)
- if distr_id is None:
- print "Error: Could not create distr_id"
- sys.exit(1)
- for part in source_config['parts']:
- for arch in source_config['archs']:
- import_pkgs(os.path.join(dir, part, 'binary-' + arch, 'Packages.gz'), conn)
+ # Get distribution ID. If it does not exist, create it
+ distr_ids = aux.get_distrs(conn)
+ if src_cfg['distribution'] not in distr_ids:
+ aux.insert_distr(conn, src_cfg['distribution'])
+ distr_ids = aux.get_distrs(conn)
+ distr_id = distr_ids[src_cfg['distribution']]
+ archs = aux.get_archs(conn)
+
+ # For every part and every architecture, import the packages into the DB
+ for part in src_cfg['parts']:
+ for arch in src_cfg['archs']:
+ path = os.path.join(src_cfg['directory'], part, 'binary-' + arch, 'Packages.gz')
+ try:
+ aux.print_debug("Reading file " + path)
+ file = gzip.open(path)
+ lines = aux.BufferedLineReader(file, 1024*1024*4)
+ aux.print_debug("Importing from " + path)
+ import_packages(conn, lines)
+ file.close()
+ except IOError, (e, message):
+ print "Could not read packages from %s: %s" % (path, message)
+
conn.commit()
+if __name__ == '__main__':
+ main()
Modified: udd/src/setup-db.py
===================================================================
--- udd/src/setup-db.py 2008-05-23 17:38:02 UTC (rev 855)
+++ udd/src/setup-db.py 2008-05-24 12:00:49 UTC (rev 856)
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# Last-Modified: <Fri 23 May 2008 18:19:25 CEST>
+# Last-Modified: <Sat May 24 11:29:22 2008>
# Starting from an empty database, create the necessary tables
from psycopg2 import connect
@@ -12,7 +12,7 @@
sys.exit(1)
# Load configuration
- config = syck.load(open(sys.argv[1]))
+ config = syck.load(open(sys.argv[1]).read())
# Check configuration
if not 'dbname' in config:
print "dbname not specified in" + sys.argv[1]
Modified: udd/src/test.yaml
===================================================================
--- udd/src/test.yaml 2008-05-23 17:38:02 UTC (rev 855)
+++ udd/src/test.yaml 2008-05-24 12:00:49 UTC (rev 856)
@@ -2,6 +2,7 @@
types:
sources: echo
packages: python packages-gatherer.py
+debug: 1
archs:
[alpha, amd64, arm, armeb, armel, hppa, hurd-i386,
@@ -9,11 +10,64 @@
mipsel, powerpc, ppc64, s390, sparc, all, any]
debian-lenny:
- archs: [alpha, amd64, arm, armeb, armel, hppa, hurd-i386,
- i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
- mipsel, powerpc, ppc64, s390, sparc]
+ archs: [alpha, amd64, arm, armel, hppa,
+ i386, ia64, mips,
+ mipsel, powerpc, s390, sparc]
directory: /org/ftp.debian.org/dists/lenny/
parts: [main, contrib, non-free]
distribution: debian-lenny
type: packages
+debian-sid:
+ archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+ i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+ mipsel, powerpc, ppc64, s390, sparc]
+ directory: /org/ftp.debian.org/dists/sid/
+ parts: [main, contrib, non-free]
+ distribution: debian-sid
+ type: packages
+
+debian-sarge:
+ archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+ i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+ mipsel, powerpc, ppc64, s390, sparc]
+ directory: /org/ftp.debian.org/dists/sarge/
+ parts: [main, contrib, non-free]
+ distribution: debian-sarge
+ type: packages
+
+debian-backports-etch:
+ archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+ i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+ mipsel, powerpc, ppc64, s390, sparc]
+ directory: /org/ftp.backports.org/dists/etch-backports/
+ parts: [main, contrib, non-free]
+ distribution: debian-backports-etch
+ type: packages
+
+debian-backports-sarge:
+ archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+ i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+ mipsel, powerpc, ppc64, s390, sparc]
+ directory: /org/ftp.backports.org/dists/sarge-backports/
+ parts: [main, contrib, non-free]
+ distribution: debian-backports-sarge
+ type: packages
+
+debian-volatile-etch:
+ archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+ i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+ mipsel, powerpc, ppc64, s390, sparc]
+ directory: /org/volatile.debian.org/dists/etch/volatile/
+ parts: [main, contrib, non-free]
+ distribution: debian-volatile-etch
+ type: packages
+
+debian-volatile-sarge:
+ archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+ i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+ mipsel, powerpc, ppc64, s390, sparc]
+ directory: /org/volatile.debian.org/dists/sarge/volatile/
+ parts: [main, contrib, non-free]
+ distribution: debian-volatile-sarge
+ type: packages
Modified: udd/src/udd-dispatch.py
===================================================================
--- udd/src/udd-dispatch.py 2008-05-23 17:38:02 UTC (rev 855)
+++ udd/src/udd-dispatch.py 2008-05-24 12:00:49 UTC (rev 856)
@@ -1,14 +1,13 @@
#!/usr/bin/env python
-# Last-Modified: <Fri 23 May 2008 19:31:29 CEST>
+# Last-Modified: <Sat May 24 11:57:17 2008>
"""Dispatch udd gatherers
This script is used to dispatch the source gatherers of the UDD project."""
-import syck
-from psycopg2 import connect
import sys
from os import system
+import aux
def print_help():
print "Usage: " + sys.argv[0] + " <configuration> <source1> [source2 source3 ...]"
@@ -19,29 +18,18 @@
sys.exit(1)
# Check the configuration
- config = syck.load(open(sys.argv[1]))
- if not 'dbname' in config:
- print "dbname not specified in configuration file " + sys.argv[1]
- sys.exit(1)
+ config = aux.load_config(open(sys.argv[1]).read())
- if not 'types' in config:
- print "types not specified in configuration file " + sys.argv[1]
- sys.exit(1)
-
types = config['types']
- # Process the sources
for src in sys.argv[2:]:
if not src in config:
- print src + " is no data source according to " + sys.argv[1]
- sys.exit(1)
+ raise aux.ConfigException("%s is not specified in %s" % (src, sys.argv[1]))
+ # Process the sources
+ for src in sys.argv[2:]:
src_config = config[src]
- if not 'type' in src_config:
- print "Type of " + src + " not specified in " + sys.argv[1]
- sys.exit(1)
type = src_config['type']
-
if not type in types:
print "No script specified for type " + src['type']
sys.exit(1)
More information about the Collab-qa-commits
mailing list