[Collab-qa-commits] r855 - in udd: . src
neronus-guest at alioth.debian.org
neronus-guest at alioth.debian.org
Fri May 23 17:38:03 UTC 2008
Author: neronus-guest
Date: 2008-05-23 17:38:02 +0000 (Fri, 23 May 2008)
New Revision: 855
Added:
udd/src/
udd/src/destroy-db.sh
udd/src/import-pkgs.py
udd/src/packages-gatherer.py
udd/src/setup-db.py
udd/src/test.yaml
udd/src/udd-dispatch.py
Log:
Added source directory and first, temporary scripts
Added: udd/src/destroy-db.sh
===================================================================
--- udd/src/destroy-db.sh (rev 0)
+++ udd/src/destroy-db.sh 2008-05-23 17:38:02 UTC (rev 855)
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+SUDO="sudo -u postgres"
+$SUDO dropdb udd
+$SUDO createdb -O christian udd
Added: udd/src/import-pkgs.py
===================================================================
--- udd/src/import-pkgs.py (rev 0)
+++ udd/src/import-pkgs.py 2008-05-23 17:38:02 UTC (rev 855)
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# Last-Modified: <Fri 23 May 2008 14:52:25 CEST>
+
+import debian_bundle
+from debian_bundle import debfile
+import psycopg2
+import sys
+
+def get_archs(conn):
+ c = conn.cursor();
+ c.execute("SELECT * from arch_ids")
+ result = {}
+ for row in c.fetchall():
+ result[row[1]] = row[0]
+ return result
+
+def get_distrs(conn):
+ c = conn.cursor()
+ c.execute("SELECT * from distr_ids")
+ result = {}
+ for row in c.fetchall():
+ result[row[1]] = row[0]
+ return result
+
+if __name__ == '__main__':
+ conn = psycopg2.connect("dbname=udd")
+ archs = get_archs(conn)
+ distr = get_distrs(conn)
+ n = 0
+ for file in sys.argv[1:]:
+ try:
+ control = debfile.DebFile(file).debcontrol()
+ except Exception:
+ print "Could not parse " + file
+ continue
+ c = conn.cursor()
+ c.execute("INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ('%s', 0, %d, '%s', 0)" % (control["Package"], archs[control["Architecture"]], control["Version"]))
+ n += 1
+ if n % 100 == 0:
+ print n
+ conn.commit()
+
Added: udd/src/packages-gatherer.py
===================================================================
--- udd/src/packages-gatherer.py (rev 0)
+++ udd/src/packages-gatherer.py 2008-05-23 17:38:02 UTC (rev 855)
@@ -0,0 +1,79 @@
+#/usr/bin/env python
+# Last-Modified: <Fri 23 May 2008 19:33:11 CEST>
+
+from psycopg2 import connect
+from debian_bundle.deb822 import Packages
+import os
+import syck
+import sys
+import gzip
+
+archs = []
+distr_id = None
+
+def get_archs(conn):
+ c = conn.cursor();
+ c.execute("SELECT * from arch_ids")
+ result = {}
+ for row in c.fetchall():
+ result[row[1]] = row[0]
+ return result
+
+def get_distr_id(conn, distr):
+ c = conn.cursor();
+ c.execute("SELECT distr_id from distr_ids WHERE name = '" + distr + "'")
+ rows = c.fetchall()
+ if len(rows) == 0:
+ return None
+ elif len(rows) > 1:
+ print "Warning: Distribution %s exists more than once in distr_ids" % distr
+ else:
+ return rows[0][0]
+
+
+def import_pkgs(file, conn):
+ "Import file specified by file into database"
+ try:
+ for control in Packages(gzip.open(file)):
+ c = conn.cursor()
+ c.execute("INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ('%s', %d, %d, '%s', 0)" % (control["Package"], distr_id, archs[control["Architecture"]], control["Version"]))
+ except Exception, message:
+ print "Could not parse %s: %s" % (file, message)
+
+if __name__ == '__main__':
+ if len(sys.argv) != 3:
+ print "Usage: %s <config> <source>" % (sys.argv[0])
+ sys.exit(1)
+
+ cfg_file = sys.argv[1]
+ config = syck.load(open(cfg_file))
+ if not 'dbname' in config:
+ print "dbname not specified in " + cfg_file
+ sys.exit(1)
+
+ source_name = sys.argv[2]
+ if not source_name in config:
+ print "%s not specified in %s" % (source_name, cfg_file)
+ sys.exit(1)
+
+ source_config = config[source_name]
+ conn = connect('dbname=' + config['dbname'])
+ archs = get_archs(conn)
+
+ dir = source_config['directory']
+ distr = source_config['distribution']
+ distr_id = get_distr_id(conn, distr)
+ if distr_id is None:
+ c = conn.cursor()
+ c.execute("INSERT INTO distr_ids (name) VALUES ('%s')" % (distr))
+ distr_id = get_distr_id(conn, distr)
+ if distr_id is None:
+ print "Error: Could not create distr_id"
+ sys.exit(1)
+
+ for part in source_config['parts']:
+ for arch in source_config['archs']:
+ import_pkgs(os.path.join(dir, part, 'binary-' + arch, 'Packages.gz'), conn)
+
+ conn.commit()
+
Added: udd/src/setup-db.py
===================================================================
--- udd/src/setup-db.py (rev 0)
+++ udd/src/setup-db.py 2008-05-23 17:38:02 UTC (rev 855)
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# Last-Modified: <Fri 23 May 2008 18:19:25 CEST>
+# Starting from an empty database, create the necessary tables
+
+from psycopg2 import connect
+import syck
+import sys
+
+if __name__ == '__main__':
+ if len(sys.argv) != 2:
+ print "Usage: %s <udd config file>" % (sys.argv[0])
+ sys.exit(1)
+
+ # Load configuration
+ config = syck.load(open(sys.argv[1]))
+ # Check configuration
+ if not 'dbname' in config:
+ print "dbname not specified in" + sys.argv[1]
+ sys.exit(1)
+ if not 'archs' in config:
+ print 'archs not specified in' + sys.argv[1]
+ sys.exit(1)
+
+ connection = connect("dbname = " + config['dbname'])
+
+ # Create tables
+ cursor = connection.cursor()
+ cursor.execute("CREATE TABLE pkgs (pkg_id serial, name text, distr_id int, arch_id int, version text, src_id int);")
+ cursor.execute("CREATE TABLE sources (src_id serial, name text, upload_date timestamp, uploader_key int, maintainer int, build_archs int, version text, distr_id int);")
+ cursor.execute("CREATE TABLE distr_ids (distr_id serial, name text);")
+ cursor.execute("CREATE TABLE arch_ids (arch_id serial, name text);")
+ # TODO: Add carnivore
+
+ #Setup architecture table
+ for arch in config['archs']:
+ cursor.execute("INSERT INTO arch_ids (name) VALUES ('%s');" % (arch))
+
+ connection.commit()
+
Added: udd/src/test.yaml
===================================================================
--- udd/src/test.yaml (rev 0)
+++ udd/src/test.yaml 2008-05-23 17:38:02 UTC (rev 855)
@@ -0,0 +1,19 @@
+dbname: udd
+types:
+ sources: echo
+ packages: python packages-gatherer.py
+
+archs:
+ [alpha, amd64, arm, armeb, armel, hppa, hurd-i386,
+ i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+ mipsel, powerpc, ppc64, s390, sparc, all, any]
+
+debian-lenny:
+ archs: [alpha, amd64, arm, armeb, armel, hppa, hurd-i386,
+ i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
+ mipsel, powerpc, ppc64, s390, sparc]
+ directory: /org/ftp.debian.org/dists/lenny/
+ parts: [main, contrib, non-free]
+ distribution: debian-lenny
+ type: packages
+
Added: udd/src/udd-dispatch.py
===================================================================
--- udd/src/udd-dispatch.py (rev 0)
+++ udd/src/udd-dispatch.py 2008-05-23 17:38:02 UTC (rev 855)
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# Last-Modified: <Fri 23 May 2008 19:31:29 CEST>
+
+"""Dispatch udd gatherers
+
+This script is used to dispatch the source gatherers of the UDD project."""
+
+import syck
+from psycopg2 import connect
+import sys
+from os import system
+
+def print_help():
+ print "Usage: " + sys.argv[0] + " <configuration> <source1> [source2 source3 ...]"
+
+if __name__ == '__main__':
+ if len(sys.argv) < 3:
+ print_help()
+ sys.exit(1)
+
+ # Check the configuration
+ config = syck.load(open(sys.argv[1]))
+ if not 'dbname' in config:
+ print "dbname not specified in configuration file " + sys.argv[1]
+ sys.exit(1)
+
+ if not 'types' in config:
+ print "types not specified in configuration file " + sys.argv[1]
+ sys.exit(1)
+
+ types = config['types']
+
+ # Process the sources
+ for src in sys.argv[2:]:
+ if not src in config:
+ print src + " is no data source according to " + sys.argv[1]
+ sys.exit(1)
+
+ src_config = config[src]
+ if not 'type' in src_config:
+ print "Type of " + src + " not specified in " + sys.argv[1]
+ sys.exit(1)
+ type = src_config['type']
+
+ if not type in types:
+ print "No script specified for type " + src['type']
+ sys.exit(1)
+ script = types[type]
+
+ system(script + " " + sys.argv[1] + " " + src)
More information about the Collab-qa-commits
mailing list