[Collab-qa-commits] r856 - udd/src

neronus-guest at alioth.debian.org neronus-guest at alioth.debian.org
Sat May 24 12:00:56 UTC 2008


Author: neronus-guest
Date: 2008-05-24 12:00:49 +0000 (Sat, 24 May 2008)
New Revision: 856

Added:
   udd/src/aux.py
Modified:
   udd/src/packages-gatherer.py
   udd/src/setup-db.py
   udd/src/test.yaml
   udd/src/udd-dispatch.py
Log:
 * Common function went into aux.py
 * Code rewrite/refactoring


Added: udd/src/aux.py
===================================================================
--- udd/src/aux.py	                        (rev 0)
+++ udd/src/aux.py	2008-05-24 12:00:49 UTC (rev 856)
@@ -0,0 +1,91 @@
+"""Auxillary methods for the UDD"""
+
+import syck
+import sys
+
+# If debug is something that evaluates to True, then print_debug actually prints something :)
+debug = 0
+
+def get_archs(conn):
+  """Return a dicitionary, mapping from architecture names to their ids.
+
+  This mapping is retrivied from the connection <conn>"""
+  cur = conn.cursor()
+  cur.execute("SELECT * from arch_ids")
+  result = {}
+  for row in cur.fetchall():
+    result[row[1]] = row[0]
+  return result
+
+def get_distrs(conn):
+  """Return a dicitionary, mapping from distribution names to their ids.
+
+  This mapping is retrivied from the connection <conn>"""
+  cur = conn.cursor()
+  cur.execute("SELECT * from distr_ids")
+  result = {}
+  for row in cur.fetchall():
+    result[row[1]] = row[0]
+  return result
+
+class ConfigException(Exception):
+  def __init__(self, message):
+    Exception(self)
+    self.message = message
+
+  def __str__(self):
+    return "ConfigException: " + self.message
+
+def load_config(seq):
+  """Load and check configuration from seq"""
+  config = syck.load(seq)
+  if not 'dbname' in config:
+    raise ConfigException('dbname not specified')
+
+  if not 'archs' in config:
+    raise ConfigException('archs not specified')
+
+  if not 'types' in config:
+    raise ConfigException('types not specified')
+
+  if not 'debug' in config:
+    config['debug'] = 0
+
+  # Check that the source-entries are well-formed
+  for name in config:
+    if name in ('dbname', 'archs', 'types', 'debug'):
+      continue
+
+    src = config[name]
+    if not 'type' in src:
+      raise ConfigException('type not specified for "%s"' % name)
+    if src['type'] not in config['types']:
+      raise ConfigException('Type of %s not specified in types' % name)
+
+  return config
+
+def insert_distr(conn, distr_name):
+  "Insert distribution <distr_name> into DB"
+  cur = conn.cursor()
+  cur.execute("INSERT INTO distr_ids (name) VALUES ('%s')" % distr_name)
+
+def print_debug(*args):
+  "Print arguments to stdout if debug is set to something that evaluates to true"
+  if debug:
+    sys.stdout.write(*args)
+    sys.stdout.write("\n")
+
+class BufferedLineReader:
+  """Provides an iterator over the input of the specified file."""
+  def __init__(self, file, cache_size):
+    self.cache_size = cache_size
+    self.file = file
+
+  def __iter__(self):
+    while True:
+      lines = self.file.readlines(self.cache_size)
+      if len(lines) == 0:
+	break
+      for line in lines:
+	yield line
+

Modified: udd/src/packages-gatherer.py
===================================================================
--- udd/src/packages-gatherer.py	2008-05-23 17:38:02 UTC (rev 855)
+++ udd/src/packages-gatherer.py	2008-05-24 12:00:49 UTC (rev 856)
@@ -1,79 +1,112 @@
 #/usr/bin/env python
-# Last-Modified: <Fri 23 May 2008 19:33:11 CEST>
+# Last-Modified: <Sat May 24 11:34:28 2008>
 
 from psycopg2 import connect
-from debian_bundle.deb822 import Packages
+import debian_bundle.deb822
+import gzip
 import os
-import syck
 import sys
-import gzip
+import aux
+from aux import ConfigException
 
-archs = []
+# A mapping from the architecture names to architecture IDs
+archs = {}
+# A mapping from <package-name><version> to 1
+# If <package-name><version> is included in this dictionary, this means,
+# that we've already added this package with this version for architecture 'all'
+# to the database. Needed because different architectures include packages
+# for architecture 'all' with the same version, and we don't want these duplicate
+# entries
+imported_all_pkgs = {}
+# The ID for the distribution we want to include
 distr_id = None
 
-def get_archs(conn):
-  c = conn.cursor();
-  c.execute("SELECT * from arch_ids")
-  result = {}
-  for row in c.fetchall():
-    result[row[1]] = row[0]
-  return result
+def import_packages(conn, sequence):
+  """Import the packages from the sequence into the database-connection conn.
 
-def get_distr_id(conn, distr):
-  c = conn.cursor();
-  c.execute("SELECT distr_id from distr_ids WHERE name = '" + distr + "'")
-  rows = c.fetchall()
-  if len(rows) == 0:
-    return None
-  elif len(rows) > 1:
-    print "Warning: Distribution %s exists more than once in distr_ids" % distr
-  else:
-    return rows[0][0]
-  
+  Sequence has to have an iterator interface, that yields a line every time it
+  is called.The Format of the sequence is expected to be that of a debian
+  packages file."""
+  global imported_all_pkgs
+  # The fields that are to be read. Other fields are ignored
+  fields = ('Architecture', 'Package', 'Version')
+  for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence, fields):
+    # Check whether packages with architectue 'all' have already been
+    # imported
+    if control['Architecture'] == 'all':
+      t = control['Package'] + control['Version']
+      if t in imported_all_pkgs:
+	continue
+      imported_all_pkgs[t] = 1
 
-def import_pkgs(file, conn):
-  "Import file specified by file into database"
-  try:
-    for control in Packages(gzip.open(file)):
-      c = conn.cursor()
-      c.execute("INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ('%s', %d, %d, '%s', 0)" % (control["Package"], distr_id, archs[control["Architecture"]], control["Version"]))
-  except Exception, message:
-    print "Could not parse %s: %s" % (file, message)
+    cur = conn.cursor()
+    query = "INSERT INTO pkgs (name, distr_id, arch_id, version, src_id)\
+    VALUES ('%s', %d, %d, '%s', 0)" % (control["Package"], distr_id, archs[control["Architecture"]], control["Version"])
+    cur.execute(query)
 
-if __name__ == '__main__':
+def main():
+  global distr_id
+  global archs
   if len(sys.argv) != 3:
-    print "Usage: %s <config> <source>" % (sys.argv[0])
+    print "Usage: %s <config> <source>" % sys.argv[0]
     sys.exit(1)
 
-  cfg_file = sys.argv[1]
-  config = syck.load(open(cfg_file))
-  if not 'dbname' in config:
-    print "dbname not specified in " + cfg_file
-    sys.exit(1)
+  src_name = sys.argv[2]
+  cfg_path = sys.argv[1]
+  config = None
+  try:
+    config = aux.load_config(open(cfg_path).read())
+  except ConfigException, e:
+    raise ConfigException, "Configuration error in " + cfg_path +": " + e.message
 
-  source_name = sys.argv[2]
-  if not source_name in config:
-    print "%s not specified in %s" % (source_name, cfg_file)
-    sys.exit(1)
+  if not src_name in config:
+    raise ConfigException, "Source %s not specified in " + cfg_path
+  src_cfg = config[src_name]
 
-  source_config = config[source_name]
+  if not 'directory' in src_cfg:
+    raise ConfigException('directory not specified for source %s in file %s' %
+	(src_name, cfg_path))
+
+  if not 'archs' in src_cfg:
+    raise ConfigException('archs not specified for source %s in file %s' %
+	(src_name, cfg_path))
+
+  if not 'parts' in src_cfg:
+    raise ConfigException('parts not specified for source %s in file %s' %
+	(src_name, cfg_path))
+
+  if not 'distribution' in src_cfg:
+    raise ConfigException('distribution not specified for source %s in file %s' %
+	(src_name, cfg_path))
+
+  aux.debug = config['debug']
+
   conn = connect('dbname=' + config['dbname'])
-  archs = get_archs(conn)
-  
-  dir = source_config['directory']
-  distr = source_config['distribution']
-  distr_id = get_distr_id(conn, distr)
-  if distr_id is None:
-    c = conn.cursor()
-    c.execute("INSERT INTO distr_ids (name) VALUES ('%s')" % (distr))
-    distr_id = get_distr_id(conn, distr)
-    if distr_id is None:
-      print "Error: Could not create distr_id"
-      sys.exit(1)
 
-  for part in source_config['parts']:
-    for arch in source_config['archs']:
-      import_pkgs(os.path.join(dir, part, 'binary-' + arch, 'Packages.gz'), conn)
+  # Get distribution ID. If it does not exist, create it
+  distr_ids = aux.get_distrs(conn)
+  if src_cfg['distribution'] not in distr_ids:
+    aux.insert_distr(conn, src_cfg['distribution'])
+    distr_ids = aux.get_distrs(conn)
+  distr_id = distr_ids[src_cfg['distribution']]
 
+  archs = aux.get_archs(conn)
+
+  # For every part and every architecture, import the packages into the DB
+  for part in src_cfg['parts']:
+    for arch in src_cfg['archs']:
+      path = os.path.join(src_cfg['directory'], part, 'binary-' + arch, 'Packages.gz')
+      try:
+	aux.print_debug("Reading file " + path)
+	file = gzip.open(path)
+	lines = aux.BufferedLineReader(file, 1024*1024*4)
+	aux.print_debug("Importing from " + path)
+	import_packages(conn, lines)
+	file.close()
+      except IOError, (e, message):
+	print "Could not read packages from %s: %s" % (path, message)
+
   conn.commit()
 
+if __name__ == '__main__':
+  main()

Modified: udd/src/setup-db.py
===================================================================
--- udd/src/setup-db.py	2008-05-23 17:38:02 UTC (rev 855)
+++ udd/src/setup-db.py	2008-05-24 12:00:49 UTC (rev 856)
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Last-Modified: <Fri 23 May 2008 18:19:25 CEST>
+# Last-Modified: <Sat May 24 11:29:22 2008>
 # Starting from an empty database, create the necessary tables
 
 from psycopg2 import connect
@@ -12,7 +12,7 @@
     sys.exit(1)
 
   # Load configuration
-  config = syck.load(open(sys.argv[1]))
+  config = syck.load(open(sys.argv[1]).read())
   # Check configuration
   if not 'dbname' in config:
     print "dbname not specified in" + sys.argv[1]

Modified: udd/src/test.yaml
===================================================================
--- udd/src/test.yaml	2008-05-23 17:38:02 UTC (rev 855)
+++ udd/src/test.yaml	2008-05-24 12:00:49 UTC (rev 856)
@@ -2,6 +2,7 @@
 types:
         sources: echo
         packages: python packages-gatherer.py
+debug: 1
 
 archs:
  [alpha, amd64, arm, armeb, armel, hppa, hurd-i386,
@@ -9,11 +10,64 @@
   mipsel, powerpc, ppc64, s390, sparc, all, any]       
 
 debian-lenny:
-        archs: [alpha, amd64, arm, armeb, armel, hppa, hurd-i386,
-                i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
-                mipsel, powerpc, ppc64, s390, sparc]       
+        archs: [alpha, amd64, arm, armel, hppa,
+                i386, ia64, mips,
+                mipsel, powerpc, s390, sparc]       
         directory: /org/ftp.debian.org/dists/lenny/
         parts: [main, contrib, non-free]
         distribution: debian-lenny
         type: packages
 
+debian-sid:
+        archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+                i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+                mipsel, powerpc, ppc64, s390, sparc]       
+        directory: /org/ftp.debian.org/dists/sid/
+        parts: [main, contrib, non-free]
+        distribution: debian-sid
+        type: packages
+
+debian-sarge:
+        archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+                i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+                mipsel, powerpc, ppc64, s390, sparc]       
+        directory: /org/ftp.debian.org/dists/sarge/
+        parts: [main, contrib, non-free]
+        distribution: debian-sarge
+        type: packages
+
+debian-backports-etch:
+        archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+                i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+                mipsel, powerpc, ppc64, s390, sparc]       
+        directory: /org/ftp.backports.org/dists/etch-backports/
+        parts: [main, contrib, non-free]
+        distribution: debian-backports-etch
+        type: packages
+
+debian-backports-sarge:
+        archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+                i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+                mipsel, powerpc, ppc64, s390, sparc]       
+        directory: /org/ftp.backports.org/dists/sarge-backports/
+        parts: [main, contrib, non-free]
+        distribution: debian-backports-sarge
+        type: packages
+
+debian-volatile-etch:
+        archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+                i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+                mipsel, powerpc, ppc64, s390, sparc]       
+        directory: /org/volatile.debian.org/dists/etch/volatile/
+        parts: [main, contrib, non-free]
+        distribution: debian-volatile-etch
+        type: packages
+
+debian-volatile-sarge:
+        archs: [alpha, amd64, arm, armel, hppa, hurd-i386,
+                i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+                mipsel, powerpc, ppc64, s390, sparc]       
+        directory: /org/volatile.debian.org/dists/sarge/volatile/
+        parts: [main, contrib, non-free]
+        distribution: debian-volatile-sarge
+        type: packages

Modified: udd/src/udd-dispatch.py
===================================================================
--- udd/src/udd-dispatch.py	2008-05-23 17:38:02 UTC (rev 855)
+++ udd/src/udd-dispatch.py	2008-05-24 12:00:49 UTC (rev 856)
@@ -1,14 +1,13 @@
 #!/usr/bin/env python
-# Last-Modified: <Fri 23 May 2008 19:31:29 CEST>
+# Last-Modified: <Sat May 24 11:57:17 2008>
 
 """Dispatch udd gatherers
 
 This script is used to dispatch the source gatherers of the UDD project."""
 
-import syck
-from psycopg2 import connect
 import sys
 from os import system
+import aux
 
 def print_help():
   print "Usage: " + sys.argv[0] + " <configuration> <source1> [source2 source3 ...]"
@@ -19,29 +18,18 @@
     sys.exit(1)
 
   # Check the configuration
-  config = syck.load(open(sys.argv[1]))
-  if not 'dbname' in config:
-    print "dbname not specified in configuration file " + sys.argv[1]
-    sys.exit(1)
+  config = aux.load_config(open(sys.argv[1]).read())
 
-  if not 'types' in config:
-    print "types not specified in configuration file " + sys.argv[1]
-    sys.exit(1)
-
   types = config['types']
 
-  # Process the sources
   for src in sys.argv[2:]:
     if not src in config:
-      print src + " is no data source according to " + sys.argv[1]
-      sys.exit(1)
+      raise aux.ConfigException("%s is not specified in %s" % (src, sys.argv[1]))
 
+  # Process the sources
+  for src in sys.argv[2:]:
     src_config = config[src]
-    if not 'type' in src_config:
-      print "Type of " + src + " not specified in " + sys.argv[1]
-      sys.exit(1)
     type = src_config['type']
-
     if not type in types:
       print "No script specified for type " + src['type']
       sys.exit(1)




More information about the Collab-qa-commits mailing list