[Collab-qa-commits] r902 - in udd/src: . udd

neronus-guest at alioth.debian.org neronus-guest at alioth.debian.org
Sun Jun 29 10:56:24 UTC 2008


Author: neronus-guest
Date: 2008-06-29 10:56:23 +0000 (Sun, 29 Jun 2008)
New Revision: 902

Added:
   udd/src/udd/aux.py
   udd/src/udd/gatherer.py
   udd/src/udd/packages_gatherer.py
   udd/src/udd/popcon_gatherer.py
   udd/src/udd/sources_gatherer.py
Removed:
   udd/src/aux.py
   udd/src/packages_gatherer.py
   udd/src/popcon_gatherer.py
   udd/src/sources_gatherer.py
Modified:
   udd/src/setup-db.sql
   udd/src/test.yaml
   udd/src/udd-dispatch.py
Log:
* It is possible to use python modules now instead of executable files
* Modified our three existing gatherers to do so


Deleted: udd/src/aux.py
===================================================================
--- udd/src/aux.py	2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/aux.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -1,101 +0,0 @@
-"""Auxillary methods for the UDD"""
-
-import syck
-import sys
-import psycopg2
-
-# If debug is something that evaluates to True, then print_debug actually prints something
-debug = 0
-
-def get_archs(conn):
-  """Return a dicitionary, mapping from architecture names to their ids.
-
-  This mapping is retrivied from the connection <conn>"""
-  cur = conn.cursor()
-  cur.execute("SELECT * from arch_ids")
-  result = {}
-  for row in cur.fetchall():
-    result[row[1]] = row[0]
-  return result
-
-def get_distrs(conn):
-  """Return a dicitionary, mapping from distribution names to their ids.
-
-  This mapping is retrivied from the connection <conn>"""
-  cur = conn.cursor()
-  cur.execute("SELECT * from distr_ids")
-  result = {}
-  for row in cur.fetchall():
-    result[row[1]] = row[0]
-  return result
-
-class ConfigException(Exception):
-  def __init__(self, message):
-    Exception(self)
-    self.message = message
-
-  def __str__(self):
-    return "ConfigException: " + self.message
-
-def open_connection(config):
-  """Open the connection to the database and return it"""
-  return psycopg2.connect("dbname=" + config['general']['dbname'])
-
-def load_config(str):
-  """Load and check configuration from the string"""
-  config = syck.load(str)
-  if not 'general' in config:
-    raise ConfigException('general section not specified')
-  
-  general = config['general']
-
-  if not 'dbname' in general:
-    raise ConfigException('dbname not specified')
-
-  if not 'archs' in general:
-    raise ConfigException('archs not specified')
-
-  if not 'types' in general:
-    raise ConfigException('types not specified')
-
-  if not 'debug' in general:
-    general['debug'] = 0
-
-  # Check that the source-entries are well-formed
-  for name in config:
-    if name == 'general':
-      continue
-
-    src = config[name]
-    if not 'type' in src:
-      raise ConfigException('type not specified for "%s"' % name)
-    if src['type'] not in general['types']:
-      raise ConfigException('Type of %s not specified in types' % name)
-
-  return config
-
-def insert_distr(conn, distr_name):
-  "Insert distribution <distr_name> into DB"
-  cur = conn.cursor()
-  cur.execute("INSERT INTO distr_ids (name) VALUES ('%s')" % distr_name)
-
-def print_debug(*args):
-  "Print arguments to stdout if debug is set to something that evaluates to true"
-  if debug:
-    sys.stdout.write(*args)
-    sys.stdout.write("\n")
-
-class BufferedLineReader:
-  """Provides an iterator over the input of the specified file."""
-  def __init__(self, file, cache_size):
-    self.cache_size = cache_size
-    self.file = file
-
-  def __iter__(self):
-    while True:
-      lines = self.file.readlines(self.cache_size)
-      if len(lines) == 0:
-	break
-      for line in lines:
-	yield line
-

Deleted: udd/src/packages_gatherer.py
===================================================================
--- udd/src/packages_gatherer.py	2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/packages_gatherer.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -1,188 +0,0 @@
-#/usr/bin/env python
-# Last-Modified: <Sat Jun 28 15:48:36 2008>
-
-import debian_bundle.deb822
-import gzip
-import os
-import sys
-import aux
-import tempfile
-from aux import ConfigException
-import psycopg2
-from gatherer import gatherer
-
-def quote(s):
-  return "'" + s.replace("'", "\\'") + "'"
-
-def null_or_quote(dict, key):
-  if key in dict:
-    return quote(dict[key])
-  else:
-    return 'NULL'
-
-class packages_gatherer(gatherer):
-  # For efficiency, these are dictionaries
-  mandatory = {'Package': 0, 'Version': 0, 'Architecture': 0, 'Maintainer': 0,
-      'Description': 0}
-  non_mandatory = {'Source': 0, 'Essential': 0, 'Depends': 0, 'Recommends': 0,
-      'Suggests': 0, 'Enhances': 0, 'Pre-Depends': 0, 'Installed-Size': 0,
-      'Homepage': 0, 'Size': 0, 'MD5Sum': 0}
-  ignorable = ()
-
-  warned_about = {}
-  # A mapping from <package-name><version> to 1 If <package-name><version> is
-  # included in this dictionary, this means, that we've already added this
-  # package with this version for architecture 'all' to the database. Needed
-  # because different architectures include packages for architecture 'all'
-  # with the same version, and we don't want these duplicate entries
-  imported_all_pkgs = {}
-
-  def __init__(self, connection, config):
-    gatherer.__init__(self, connection, config)
-    # The ID for the distribution we want to include
-    self._distr = None
-
-  def build_dict(self, control):
-    """Build a dictionary from the control dictionary.
-
-    Influenced by class variables mandatory, non_mandatory and ignorable"""
-    d = {}
-    for k in packages_gatherer.mandatory:
-      if k not in control:
-	raise "Mandatory field %s not specified" % k
-      d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
-    for k in packages_gatherer.non_mandatory:
-      d[k] = packages_gatherer.null_or_quote(control, k)
-    for k in control.keys():
-      if k not in packages_gatherer.mandatory and k not in packages_gatherer.non_mandatory and k not in packages_gatherer.ignorable:
-	if k not in packages_gatherer.warned_about:
-	  packages_gatherer.warned_about[k] = 1
-	else:
-	  packages_gatherer.warned_about[k] += 1
-    return d
-
-  def import_packages(self, sequence):
-    """Import the packages from the sequence into the database-connection
-    conn.
-
-    Sequence has to have an iterator interface, that yields a line every time
-    it is called.The Format of the sequence is expected to be that of a
-    debian packages file."""
-    # The fields that are to be read. Other fields are ignored
-    cur = self.connection.cursor()
-    for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence):
-      # Check whether packages with architectue 'all' have already been
-      # imported
-      if control['Architecture'] == 'all':
-	t = control['Package'] + control['Version']
-	if t in packages_gatherer.imported_all_pkgs:
-	  continue
-	packages_gatherer.imported_all_pkgs[t] = 1
-
-      d = self.build_dict(control)
-
-      # These are integer values - we don't need quotes for them
-      if d['Installed-Size'] != 'NULL':
-	d['Installed-Size'] = d['Installed-Size'].strip("'")
-      if d['Size'] != 'NULL':
-	d['Size'] = d['Size'].strip("'")
-
-      # We just use the first line of the description
-      if d['Description'] != "NULL":
-	d['Description'] = d['Description'].split("\n",1)[0]
-	# If the description was a one-liner only, we don't need to add
-	# a quote
-	if d['Description'][-1] != "'" or d['Description'][-2] == '\\':
-	  d['Description'] += "'"
-      
-      # Source is non-mandatory, but we don't want it to be NULL
-      if d['Source'] == "NULL":
-	d['Source'] = d['Package']
-	d['Source_Version'] = d['Version']
-      else:
-	split = d['Source'].strip("'").split()
-	if len(split) == 1:
-	  d['Source_Version'] = d['Version']
-	else:
-	  d['Source'] = quote(split[0])
-	  d['Source_Version'] = quote(split[1].strip("()"))
-
-      query = """EXECUTE package_insert
-	  (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
-	  %(Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
-	  %(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
-	  %(Pre-Depends)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
-	  %(MD5Sum)s)""" % d
-      try:
-	cur.execute(query)
-      except psycopg2.ProgrammingError:
-	print query
-	raise
-
-  def run(self, source):
-    if not source in self.config:
-      raise ConfigException, "Source %s not specified" %(source)
-    src_cfg = self.config[source]
-
-    if not 'directory' in src_cfg:
-      raise ConfigException('directory not specified for source %s' %
-	  (source))
-
-    if not 'archs' in src_cfg:
-      raise ConfigException('archs not specified for source %s' %
-	  (source))
-
-    if not 'release' in src_cfg:
-      raise ConfigException('release not specified for source %s' %
-	  (source))
-
-    if not 'components' in src_cfg:
-      raise ConfigException('components not specified for source %s' %
-	  (source))
-
-    if not 'distribution' in src_cfg:
-      raise ConfigException('distribution not specified for source %s' %
-	  (source))
-
-    aux.debug = self.config['general']['debug']
-
-    # Get distribution ID. If it does not exist, create it
-    self._distr = src_cfg['distribution']
-
-    cur = self.cursor()
-    #cur.execute("PREPARE pkg_insert AS INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ($1, $2, $3, $4, $5);")
-
-    # For every part and every architecture, import the packages into the DB
-    for comp in src_cfg['components']:
-      for arch in src_cfg['archs']:
-	path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
-	try:
-	  cur.execute("""PREPARE package_insert AS INSERT INTO Packages
-	    (Package, Version, Architecture, Maintainer, Description, Source,
-	    Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
-	    Pre_Depends, Installed_Size, Homepage, Size, MD5Sum, Distribution,
-	    Release, Component)
-	  VALUES
-	    ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
-	      $16, $17, '%s', '%s', '%s')
-	    """ %  (distr, src_cfg['release'], comp))
-	  aux.print_debug("Reading file " + path)
-	  # Copy content from gzipped file to temporary file, so that apt_pkg is
-	  # used by debian_bundle
-	  tmp = tempfile.NamedTemporaryFile()
-	  file = gzip.open(path)
-	  tmp.write(file.read())
-	  file.close()
-	  tmp.seek(0)
-	  aux.print_debug("Importing from " + path)
-	  self.import_packages(open(tmp.name))
-	  tmp.close()
-	except IOError, (e, message):
-	  print "Could not read packages from %s: %s" % (path, message)
-	cur.execute("DEALLOCATE package_insert")
-
-    self.connection.commit()
-
-  def print_warnings(self):
-    for key in packages_gatherer.warned_about:
-      print("Unknown key: %s appeared %d times" % (key, packages_gatherer.warned_about[key]))

Deleted: udd/src/popcon_gatherer.py
===================================================================
--- udd/src/popcon_gatherer.py	2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/popcon_gatherer.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-
-"""
-This script imports the popcon data into the database
-See http://popcon.debian.org/
-"""
-
-import aux
-import sys
-import gzip
-
-def main():
-  if len(sys.argv) != 3:
-    print 'Usage: %s <config-file> <source>' % sys.argv[0]
-    sys.exit(1)
-
-  config = aux.load_config(open(sys.argv[1]).read())
-  source = sys.argv[2]
-
-  try:
-    my_config = config[source]
-  except:
-    raise
-
-  if not 'path' in my_config:
-    raise aux.ConfigException, "path not configured for source " % source
-
-  conn = aux.open_connection(config)
-
-  cur = conn.cursor()
-
-  cur.execute("PREPARE pop_insert AS INSERT INTO popcon (name, vote, olde, recent, nofiles) VALUES ($1, $2, $3, $4, $5)")
-
-  popcon = gzip.open(my_config['path'])
-
-  for line in popcon.readlines():
-    name, data = line.split(None, 1)
-    if name == "Submissions:":
-      cur.execute("INSERT INTO popcon (name, vote) VALUES ('_submissions', %s)" % (data))
-    try:
-      (name, vote, old, recent, nofiles) = data.split()
-      cur.execute("EXECUTE pop_insert('%s', %s, %s, %s, %s)" %\
-	  (name, vote, old, recent, nofiles))
-    except ValueError:
-      continue
-
-  cur.execute("DEALLOCATE pop_insert")
-  conn.commit()
-
-if __name__ == '__main__':
-  main()

Modified: udd/src/setup-db.sql
===================================================================
--- udd/src/setup-db.sql	2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/setup-db.sql	2008-06-29 10:56:23 UTC (rev 902)
@@ -52,4 +52,4 @@
 GRANT SELECT ON popcon TO PUBLIC;
 
 GRANT SELECT ON popcon_average TO PUBLIC;
-GRANT SELECT ON popcon_sum TO PUBLIC;
+GRANT SELECT ON popcon_max TO PUBLIC;

Deleted: udd/src/sources_gatherer.py
===================================================================
--- udd/src/sources_gatherer.py	2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/sources_gatherer.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -1,149 +0,0 @@
-#/usr/bin/env python
-# Last-Modified: <Thu Jun 26 10:20:28 2008>
-
-import debian_bundle.deb822
-import gzip
-import os
-import sys
-import aux
-import tempfile
-from aux import ConfigException
-
-distr = None
-
-mandatory = {'Format': 0, 'Maintainer': 0, 'Package': 0, 'Version': 0, 'Files': 0}
-non_mandatory = {'Uploaders': 0, 'Binary': 0, 'Architecture': 0,
-    'Standards-Version': 0, 'Homepage': 0, 'Build-Depends': 0,
-    'Build-Depends-Indep': 0, 'Build-Conflicts': 0, 'Build-Conflicts-Indep': 0,
-    'Priority': 0, 'Section': 0, 'Vcs-Arch': 0, 'Vcs-Browser': 0, 'Vcs-Bzr': 0,
-    'Vcs-Cvs': 0, 'Vcs-Darcs': 0, 'Vcs-Git': 0, 'Vcs-Hg': 0, 'Vcs-Svn': 0,
-    'X-Vcs-Browser': 0, 'X-Vcs-Bzr': 0, 'X-Vcs-Darcs': 0, 'X-Vcs-Svn': 0}
-
-ignorable = {}
-
-def null_or_quote(dict, key):
-  if key in dict:
-    return "'" + dict[key].replace("'", "\\'") + "'"
-  else:
-    return 'NULL'
-
-warned_about = {}
-def build_dict(control):
-  """Build a dictionary from the control dictionary.
-
-  Influenced by global variables mandatory, non_mandatory and ignorable"""
-  global mandatory, non_mandatory
-  d = {}
-  for k in mandatory:
-    if k not in control:
-      raise "Mandatory field %s not specified" % k
-    d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
-  for k in non_mandatory:
-    d[k] = null_or_quote(control, k)
-  for k in control.keys():
-    if k not in mandatory and k not in non_mandatory and k not in ignorable:
-      if k not in warned_about:
-	warned_about[k] = 1
-      else:
-	warned_about[k] += 1
-  return d
-
-def import_sources(conn, file):
-  """Import the sources from the file into the database-connection conn.
-
-  Sequence has to have an iterator interface, that yields a line every time it
-  is called.The Format of the file is expected to be that of a debian
-  source file."""
-  cur = conn.cursor()
-  for control in debian_bundle.deb822.Packages.iter_paragraphs(file):
-    d = build_dict(control)
-    query = """EXECUTE source_insert
-	(%(Package)s, %(Version)s, %(Maintainer)s, %(Format)s, %(Files)s,
-	%(Uploaders)s, %(Binary)s, %(Architecture)s, %(Standards-Version)s,
-	%(Homepage)s, %(Build-Depends)s, %(Build-Depends-Indep)s,
-	%(Build-Conflicts)s, %(Build-Conflicts-Indep)s, %(Priority)s,
-	%(Section)s, %(Vcs-Arch)s, %(Vcs-Browser)s, %(Vcs-Bzr)s, %(Vcs-Cvs)s,
-	%(Vcs-Darcs)s, %(Vcs-Git)s, %(Vcs-Hg)s, %(Vcs-Svn)s, %(X-Vcs-Browser)s,
-	%(X-Vcs-Bzr)s, %(X-Vcs-Darcs)s, %(X-Vcs-Svn)s)
-	"""  % d
-    cur.execute(query)
-
-def main():
-  global distr
-  if len(sys.argv) != 3:
-    print "Usage: %s <config> <source>" % sys.argv[0]
-    sys.exit(1)
-
-  src_name = sys.argv[2]
-  cfg_path = sys.argv[1]
-  config = None
-  try:
-    config = aux.load_config(open(cfg_path).read())
-  except ConfigException, e:
-    raise ConfigException, "Configuration error in " + cfg_path +": " + e.message
-
-  if not src_name in config:
-    raise ConfigException, "Source %s not specified in %s" %(src_name, cfg_path)
-  src_cfg = config[src_name]
-
-  if not 'directory' in src_cfg:
-    raise ConfigException('directory not specified for source %s in file %s' %
-	(src_name, cfg_path))
-
-  if not 'components' in src_cfg:
-    raise ConfigException('parts not specified for source %s in file %s' %
-	(src_name, cfg_path))
-
-  if not 'distribution' in src_cfg:
-    raise ConfigException('distribution not specified for source %s in file %s' %
-	(src_name, cfg_path))
-
-  if not 'release' in src_cfg:
-    raise ConfigException('release not specified for source %s in file %s' %
-	(src_name, cfg_path))
-
-  aux.debug = config['general']['debug']
-
-  conn = aux.open_connection(config)
-
-  cur = conn.cursor()
-
-  for comp in src_cfg['components']:
-    path = os.path.join(src_cfg['directory'], comp, 'source', 'Sources.gz')
-    try:
-      query = """PREPARE source_insert as INSERT INTO sources
-	(Package, Version, Maintainer, Format, Files, Uploaders, Bin,
-	Architecture, Standards_Version, Homepage, Build_Depends,
-	Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
-	Section, Vcs_Arch, Vcs_Browser, Vcs_Bzr, Vcs_Cvs, Vcs_Darcs, Vcs_Git,
-	Vcs_Hg, Vcs_Svn, X_Vcs_Browser, X_Vcs_Bzr, X_Vcs_Darcs, X_Vcs_Svn,
-	Distribution, Release, Component)
-      VALUES
-	($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
-	$17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, '%s', '%s',
-	'%s')"""\
-	% (src_cfg['distribution'], src_cfg['release'], comp)
-      cur.execute(query)
-
-      aux.print_debug("Reading file " + path)
-      # Copy content from gzipped file to temporary file, so that apt_pkg is
-      # used by debian_bundle
-      tmp = tempfile.NamedTemporaryFile()
-      file = gzip.open(path)
-      tmp.write(file.read())
-      file.close()
-      tmp.seek(0)
-      aux.print_debug("Importing from " + path)
-      import_sources(conn, open(tmp.name))
-      tmp.close()
-      cur.execute("DEALLOCATE source_insert")
-    except IOError, (e, message):
-      print "Could not read packages from %s: %s" % (path, message)
-
-  conn.commit()
-
-  for key in warned_about:
-    print "Unknowen key %s appeared %d times" % (key, warned_about[key])
-
-if __name__ == '__main__':
-  main()

Modified: udd/src/test.yaml
===================================================================
--- udd/src/test.yaml	2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/test.yaml	2008-06-29 10:56:23 UTC (rev 902)
@@ -1,12 +1,12 @@
 general:
-  dbname: udd
+  dbname: udd-test
   types:
-    sources: python sources_gatherer.py
-    packages: python packages_gatherer.py
-    setup: python db_manager.py
-    delete: python db_manager.py
-    src-pkg: python srcs_and_pkgs.py
-    popcon: python popcon_gatherer.py
+    sources: module udd.sources_gatherer
+    packages: module udd.packages_gatherer
+    setup: exec python db_manager.py
+    delete: exec python db_manager.py
+    src-pkg: exec python srcs_and_pkgs.py
+    popcon: module udd.popcon_gatherer
     #src-pkg: python sources_gatherer.py
   debug: 1
 

Copied: udd/src/udd/aux.py (from rev 884, udd/src/aux.py)
===================================================================
--- udd/src/udd/aux.py	                        (rev 0)
+++ udd/src/udd/aux.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,112 @@
+"""Auxillary methods for the UDD"""
+
+import syck
+import sys
+import psycopg2
+
+# If debug is something that evaluates to True, then print_debug actually prints something
+debug = 0
+
+def quote(s):
+  "Quote a string for SQL"
+  return "'" + s.replace("'", "\\'") + "'"
+
+def null_or_quote(dict, key):
+  "If key is an element of dict, return it quoted. Return NULL otherwise"
+  if key in dict:
+    return quote(dict[key])
+  else:
+    return 'NULL'
+
+def get_archs(conn):
+  """Return a dicitionary, mapping from architecture names to their ids.
+
+  This mapping is retrivied from the connection <conn>"""
+  cur = conn.cursor()
+  cur.execute("SELECT * from arch_ids")
+  result = {}
+  for row in cur.fetchall():
+    result[row[1]] = row[0]
+  return result
+
+def get_distrs(conn):
+  """Return a dicitionary, mapping from distribution names to their ids.
+
+  This mapping is retrivied from the connection <conn>"""
+  cur = conn.cursor()
+  cur.execute("SELECT * from distr_ids")
+  result = {}
+  for row in cur.fetchall():
+    result[row[1]] = row[0]
+  return result
+
+class ConfigException(Exception):
+  def __init__(self, message):
+    Exception(self)
+    self.message = message
+
+  def __str__(self):
+    return "ConfigException: " + self.message
+
+def open_connection(config):
+  """Open the connection to the database and return it"""
+  return psycopg2.connect("dbname=" + config['general']['dbname'])
+
+def load_config(str):
+  """Load and check configuration from the string"""
+  config = syck.load(str)
+  if not 'general' in config:
+    raise ConfigException('general section not specified')
+  
+  general = config['general']
+
+  if not 'dbname' in general:
+    raise ConfigException('dbname not specified')
+
+  if not 'archs' in general:
+    raise ConfigException('archs not specified')
+
+  if not 'types' in general:
+    raise ConfigException('types not specified')
+
+  if not 'debug' in general:
+    general['debug'] = 0
+
+  # Check that the source-entries are well-formed
+  for name in config:
+    if name == 'general':
+      continue
+
+    src = config[name]
+    if not 'type' in src:
+      raise ConfigException('type not specified for "%s"' % name)
+    if src['type'] not in general['types']:
+      raise ConfigException('Type of %s not specified in types' % name)
+
+  return config
+
+def insert_distr(conn, distr_name):
+  "Insert distribution <distr_name> into DB"
+  cur = conn.cursor()
+  cur.execute("INSERT INTO distr_ids (name) VALUES ('%s')" % distr_name)
+
+def print_debug(*args):
+  "Print arguments to stdout if debug is set to something that evaluates to true"
+  if debug:
+    sys.stdout.write(*args)
+    sys.stdout.write("\n")
+
+class BufferedLineReader:
+  """Provides an iterator over the input of the specified file."""
+  def __init__(self, file, cache_size):
+    self.cache_size = cache_size
+    self.file = file
+
+  def __iter__(self):
+    while True:
+      lines = self.file.readlines(self.cache_size)
+      if len(lines) == 0:
+	break
+      for line in lines:
+	yield line
+

Added: udd/src/udd/gatherer.py
===================================================================
--- udd/src/udd/gatherer.py	                        (rev 0)
+++ udd/src/udd/gatherer.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,15 @@
+"""
+This is the base class of all gatherers which want to use the python
+interface to be called by the dispatcher
+"""
+
+class gatherer:
+  def __init__(self, connection, config):
+    self.connection = connection
+    self.config = config
+
+  def run(self, source):
+    raise NotImplementedError
+
+  def cursor(self):
+    return self.connection.cursor()

Copied: udd/src/udd/packages_gatherer.py (from rev 901, udd/src/packages_gatherer.py)
===================================================================
--- udd/src/udd/packages_gatherer.py	                        (rev 0)
+++ udd/src/udd/packages_gatherer.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,182 @@
+#/usr/bin/env python
+# Last-Modified: <Sat Jun 28 17:17:05 2008>
+
+import debian_bundle.deb822
+import gzip
+import os
+import sys
+import aux
+import tempfile
+from aux import ConfigException
+import psycopg2
+from gatherer import gatherer
+
+def get_gatherer(connection, config):
+  return packages_gatherer(connection, config)
+
+class packages_gatherer(gatherer):
+  # For efficiency, these are dictionaries
+  mandatory = {'Package': 0, 'Version': 0, 'Architecture': 0, 'Maintainer': 0,
+      'Description': 0}
+  non_mandatory = {'Source': 0, 'Essential': 0, 'Depends': 0, 'Recommends': 0,
+      'Suggests': 0, 'Enhances': 0, 'Pre-Depends': 0, 'Installed-Size': 0,
+      'Homepage': 0, 'Size': 0, 'MD5Sum': 0}
+  ignorable = ()
+
+  warned_about = {}
+  # A mapping from <package-name><version> to 1 If <package-name><version> is
+  # included in this dictionary, this means, that we've already added this
+  # package with this version for architecture 'all' to the database. Needed
+  # because different architectures include packages for architecture 'all'
+  # with the same version, and we don't want these duplicate entries
+  imported_all_pkgs = {}
+
+  def __init__(self, connection, config):
+    gatherer.__init__(self, connection, config)
+    # The ID for the distribution we want to include
+    self._distr = None
+
+  def build_dict(self, control):
+    """Build a dictionary from the control dictionary.
+
+    Influenced by class variables mandatory, non_mandatory and ignorable"""
+    d = {}
+    for k in packages_gatherer.mandatory:
+      if k not in control:
+	raise "Mandatory field %s not specified" % k
+      d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
+    for k in packages_gatherer.non_mandatory:
+      d[k] = aux.null_or_quote(control, k)
+    for k in control.keys():
+      if k not in packages_gatherer.mandatory and k not in packages_gatherer.non_mandatory and k not in packages_gatherer.ignorable:
+	if k not in packages_gatherer.warned_about:
+	  packages_gatherer.warned_about[k] = 1
+	else:
+	  packages_gatherer.warned_about[k] += 1
+    return d
+
+  def import_packages(self, sequence):
+    """Import the packages from the sequence into the database-connection
+    conn.
+
+    Sequence has to have an iterator interface, that yields a line every time
+    it is called.The Format of the sequence is expected to be that of a
+    debian packages file."""
+    # The fields that are to be read. Other fields are ignored
+    cur = self.connection.cursor()
+    for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence):
+      # Check whether packages with architectue 'all' have already been
+      # imported
+      if control['Architecture'] == 'all':
+	t = control['Package'] + control['Version']
+	if t in packages_gatherer.imported_all_pkgs:
+	  continue
+	packages_gatherer.imported_all_pkgs[t] = 1
+
+      d = self.build_dict(control)
+
+      # These are integer values - we don't need quotes for them
+      if d['Installed-Size'] != 'NULL':
+	d['Installed-Size'] = d['Installed-Size'].strip("'")
+      if d['Size'] != 'NULL':
+	d['Size'] = d['Size'].strip("'")
+
+      # We just use the first line of the description
+      if d['Description'] != "NULL":
+	d['Description'] = d['Description'].split("\n",1)[0]
+	# If the description was a one-liner only, we don't need to add
+	# a quote
+	if d['Description'][-1] != "'" or d['Description'][-2] == '\\':
+	  d['Description'] += "'"
+      
+      # Source is non-mandatory, but we don't want it to be NULL
+      if d['Source'] == "NULL":
+	d['Source'] = d['Package']
+	d['Source_Version'] = d['Version']
+      else:
+	split = d['Source'].strip("'").split()
+	if len(split) == 1:
+	  d['Source_Version'] = d['Version']
+	else:
+	  d['Source'] = aux.quote(split[0])
+	  d['Source_Version'] = aux.quote(split[1].strip("()"))
+
+      query = """EXECUTE package_insert
+	  (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
+	  %(Description)s, %(Source)s, %(Source_Version)s, %(Essential)s,
+	  %(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s,
+	  %(Pre-Depends)s, %(Installed-Size)s, %(Homepage)s, %(Size)s,
+	  %(MD5Sum)s)""" % d
+      try:
+	cur.execute(query)
+      except psycopg2.ProgrammingError:
+	print query
+	raise
+
+  def run(self, source):
+    if not source in self.config:
+      raise ConfigException, "Source %s not specified" %(source)
+    src_cfg = self.config[source]
+
+    if not 'directory' in src_cfg:
+      raise ConfigException('directory not specified for source %s' %
+	  (source))
+
+    if not 'archs' in src_cfg:
+      raise ConfigException('archs not specified for source %s' %
+	  (source))
+
+    if not 'release' in src_cfg:
+      raise ConfigException('release not specified for source %s' %
+	  (source))
+
+    if not 'components' in src_cfg:
+      raise ConfigException('components not specified for source %s' %
+	  (source))
+
+    if not 'distribution' in src_cfg:
+      raise ConfigException('distribution not specified for source %s' %
+	  (source))
+
+    aux.debug = self.config['general']['debug']
+
+    # Get distribution ID. If it does not exist, create it
+    self._distr = src_cfg['distribution']
+
+    cur = self.cursor()
+    #cur.execute("PREPARE pkg_insert AS INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ($1, $2, $3, $4, $5);")
+
+    # For every part and every architecture, import the packages into the DB
+    for comp in src_cfg['components']:
+      for arch in src_cfg['archs']:
+	path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
+	try:
+	  cur.execute("""PREPARE package_insert AS INSERT INTO Packages
+	    (Package, Version, Architecture, Maintainer, Description, Source,
+	    Source_Version, Essential, Depends, Recommends, Suggests, Enhances,
+	    Pre_Depends, Installed_Size, Homepage, Size, MD5Sum, Distribution,
+	    Release, Component)
+	  VALUES
+	    ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
+	      $16, $17, '%s', '%s', '%s')
+	    """ %  (self._distr, src_cfg['release'], comp))
+	  aux.print_debug("Reading file " + path)
+	  # Copy content from gzipped file to temporary file, so that apt_pkg is
+	  # used by debian_bundle
+	  tmp = tempfile.NamedTemporaryFile()
+	  file = gzip.open(path)
+	  tmp.write(file.read())
+	  file.close()
+	  tmp.seek(0)
+	  aux.print_debug("Importing from " + path)
+	  self.import_packages(open(tmp.name))
+	  tmp.close()
+	except IOError, (e, message):
+	  print "Could not read packages from %s: %s" % (path, message)
+	cur.execute("DEALLOCATE package_insert")
+
+    self.connection.commit()
+
+  def print_warnings(self):
+    for key in packages_gatherer.warned_about:
+      print("Unknown key: %s appeared %d times" % (key, packages_gatherer.warned_about[key]))

Copied: udd/src/udd/popcon_gatherer.py (from rev 900, udd/src/popcon_gatherer.py)
===================================================================
--- udd/src/udd/popcon_gatherer.py	                        (rev 0)
+++ udd/src/udd/popcon_gatherer.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+"""
+This script imports the popcon data into the database
+See http://popcon.debian.org/
+"""
+
+import aux
+import sys
+import gzip
+from gatherer import gatherer
+
+def get_gatherer(connection, config):
+  return popcon_gatherer(connection, config)
+
+class popcon_gatherer(gatherer):
+  def __init__(self, connection, config):
+    gatherer.__init__(self, connection, config)
+
+  def run(self, source):
+    try:
+      my_config = self.config[source]
+    except:
+      raise
+
+    if not 'path' in my_config:
+      raise aux.ConfigException, "path not configured for source " % source
+
+    cur = self.cursor()
+
+    cur.execute("PREPARE pop_insert AS INSERT INTO popcon (name, vote, olde, recent, nofiles) VALUES ($1, $2, $3, $4, $5)")
+
+    popcon = gzip.open(my_config['path'])
+
+    for line in popcon.readlines():
+      name, data = line.split(None, 1)
+      if name == "Submissions:":
+	cur.execute("INSERT INTO popcon (name, vote) VALUES ('_submissions', %s)" % (data))
+      try:
+	(name, vote, old, recent, nofiles) = data.split()
+	cur.execute("EXECUTE pop_insert('%s', %s, %s, %s, %s)" %\
+	    (name, vote, old, recent, nofiles))
+      except ValueError:
+	continue
+
+    cur.execute("DEALLOCATE pop_insert")
+
+if __name__ == '__main__':
+  main()

Copied: udd/src/udd/sources_gatherer.py (from rev 898, udd/src/sources_gatherer.py)
===================================================================
--- udd/src/udd/sources_gatherer.py	                        (rev 0)
+++ udd/src/udd/sources_gatherer.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -0,0 +1,131 @@
+#/usr/bin/env python
+# Last-Modified: <Sun Jun 29 10:48:17 2008>
+
+import debian_bundle.deb822
+import gzip
+import os
+import sys
+import aux
+import tempfile
+from aux import ConfigException
+from aux import null_or_quote
+from gatherer import gatherer
+
+def get_gatherer(connection, config):
+  return sources_gatherer(connection, config)
+
+class sources_gatherer(gatherer):
+  mandatory = {'Format': 0, 'Maintainer': 0, 'Package': 0, 'Version': 0, 'Files': 0}
+  non_mandatory = {'Uploaders': 0, 'Binary': 0, 'Architecture': 0,
+      'Standards-Version': 0, 'Homepage': 0, 'Build-Depends': 0,
+      'Build-Depends-Indep': 0, 'Build-Conflicts': 0, 'Build-Conflicts-Indep': 0,
+      'Priority': 0, 'Section': 0, 'Vcs-Arch': 0, 'Vcs-Browser': 0, 'Vcs-Bzr': 0,
+      'Vcs-Cvs': 0, 'Vcs-Darcs': 0, 'Vcs-Git': 0, 'Vcs-Hg': 0, 'Vcs-Svn': 0,
+      'X-Vcs-Browser': 0, 'X-Vcs-Bzr': 0, 'X-Vcs-Darcs': 0, 'X-Vcs-Svn': 0}
+  ignorable = {}
+
+  warned_about = {}
+
+  def __init__(self, connection, config):
+    gatherer.__init__(self, connection, config)
+    self._distr = None
+
+  def build_dict(self, control):
+    """Build a dictionary from the control dictionary.
+
+    Influenced by global variables mandatory, non_mandatory and ignorable"""
+    d = {}
+    for k in sources_gatherer.mandatory:
+      if k not in control:
+	raise "Mandatory field %s not specified" % k
+      d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
+    for k in sources_gatherer.non_mandatory:
+      d[k] = null_or_quote(control, k)
+    for k in control.keys():
+      if k not in sources_gatherer.mandatory and k not in sources_gatherer.non_mandatory and k not in sources_gatherer.ignorable:
+	if k not in sources_gatherer.warned_about:
+	  sources_gatherer.warned_about[k] = 1
+	else:
+	  sources_gatherer.warned_about[k] += 1
+    return d
+
+  def import_sources(self, file):
+    """Import the sources from the file into the database-connection conn.
+
+    Sequence has to have an iterator interface, that yields a line every time it
+    is called.The Format of the file is expected to be that of a debian
+    source file."""
+    cur = self.cursor()
+    for control in debian_bundle.deb822.Packages.iter_paragraphs(file):
+      d = self.build_dict(control)
+      query = """EXECUTE source_insert
+	  (%(Package)s, %(Version)s, %(Maintainer)s, %(Format)s, %(Files)s,
+	  %(Uploaders)s, %(Binary)s, %(Architecture)s, %(Standards-Version)s,
+	  %(Homepage)s, %(Build-Depends)s, %(Build-Depends-Indep)s,
+	  %(Build-Conflicts)s, %(Build-Conflicts-Indep)s, %(Priority)s,
+	  %(Section)s, %(Vcs-Arch)s, %(Vcs-Browser)s, %(Vcs-Bzr)s, %(Vcs-Cvs)s,
+	  %(Vcs-Darcs)s, %(Vcs-Git)s, %(Vcs-Hg)s, %(Vcs-Svn)s, %(X-Vcs-Browser)s,
+	  %(X-Vcs-Bzr)s, %(X-Vcs-Darcs)s, %(X-Vcs-Svn)s)
+	  """  % d
+      cur.execute(query)
+
+  def run(self, source):
+    if not source in self.config:
+      raise ConfigException, "Source %s not specified" %(src_name)
+    src_cfg = self.config[source]
+
+    if not 'directory' in src_cfg:
+      raise ConfigException('directory not specified for source %s' %
+	  (src_name))
+
+    if not 'components' in src_cfg:
+      raise ConfigException('parts not specified for source %s' %
+	  (src_name))
+
+    if not 'distribution' in src_cfg:
+      raise ConfigException('distribution not specified for source in file %s' %
+	  (src_name))
+
+    if not 'release' in src_cfg:
+      raise ConfigException('release not specified for source %s' %
+	  (src_name))
+
+    aux.debug = self.config['general']['debug']
+
+    cur = self.cursor()
+
+    for comp in src_cfg['components']:
+      path = os.path.join(src_cfg['directory'], comp, 'source', 'Sources.gz')
+      try:
+	query = """PREPARE source_insert as INSERT INTO sources
+	  (Package, Version, Maintainer, Format, Files, Uploaders, Bin,
+	  Architecture, Standards_Version, Homepage, Build_Depends,
+	  Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
+	  Section, Vcs_Arch, Vcs_Browser, Vcs_Bzr, Vcs_Cvs, Vcs_Darcs, Vcs_Git,
+	  Vcs_Hg, Vcs_Svn, X_Vcs_Browser, X_Vcs_Bzr, X_Vcs_Darcs, X_Vcs_Svn,
+	  Distribution, Release, Component)
+	VALUES
+	  ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
+	  $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, '%s', '%s',
+	  '%s')"""\
+	  % (src_cfg['distribution'], src_cfg['release'], comp)
+	cur.execute(query)
+
+	aux.print_debug("Reading file " + path)
+	# Copy content from gzipped file to temporary file, so that apt_pkg is
+	# used by debian_bundle
+	tmp = tempfile.NamedTemporaryFile()
+	file = gzip.open(path)
+	tmp.write(file.read())
+	file.close()
+	tmp.seek(0)
+	aux.print_debug("Importing from " + path)
+	self.import_sources(open(tmp.name))
+	tmp.close()
+	cur.execute("DEALLOCATE source_insert")
+      except IOError, (e, message):
+	print "Could not read packages from %s: %s" % (path, message)
+
+  def print_warnings(self):
+    for key in warned_about:
+      print "Unknowen key %s appeared %d times" % (key, warned_about[key])

Modified: udd/src/udd-dispatch.py
===================================================================
--- udd/src/udd-dispatch.py	2008-06-28 15:50:00 UTC (rev 901)
+++ udd/src/udd-dispatch.py	2008-06-29 10:56:23 UTC (rev 902)
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Last-Modified: <Thu May 29 21:02:10 2008>
+# Last-Modified: <Sun Jun 29 10:53:46 2008>
 
 """Dispatch udd gatherers
 
@@ -7,7 +7,7 @@
 
 import sys
 from os import system
-import aux
+import udd.aux
 
 def print_help():
   print "Usage: " + sys.argv[0] + " <configuration> <source1> [source2 source3 ...]"
@@ -18,7 +18,7 @@
     sys.exit(1)
 
   # Check the configuration
-  config = aux.load_config(open(sys.argv[1]).read())
+  config = udd.aux.load_config(open(sys.argv[1]).read())
 
   types = config['general']['types']
 
@@ -26,6 +26,8 @@
     if not src in config:
       raise aux.ConfigException("%s is not specified in %s" % (src, sys.argv[1]))
 
+  connection = udd.aux.open_connection(config)
+
   # Process the sources
   for src in sys.argv[2:]:
     src_config = config[src]
@@ -33,6 +35,14 @@
     if not type in types:
       print "No script specified for type " + src['type']
       sys.exit(1)
-    script = types[type]
 
-    system(script + " " + sys.argv[1] + " " + src)
+
+    (command,rest) = types[type].split(None, 1)
+    
+    if command == "exec":
+      system(rest + " " + sys.argv[1] + " " + src)
+    elif command == "module":
+      exec("import " + rest)
+      exec "gatherer = " + rest + ".get_gatherer(connection, config)"
+      gatherer.run(src)
+    connection.commit()




More information about the Collab-qa-commits mailing list