[Collab-qa-commits] r891 - udd/src
neronus-guest at alioth.debian.org
neronus-guest at alioth.debian.org
Tue Jun 17 10:36:14 UTC 2008
Author: neronus-guest
Date: 2008-06-17 10:36:13 +0000 (Tue, 17 Jun 2008)
New Revision: 891
Modified:
udd/src/db_manager.py
udd/src/packages_gatherer.py
udd/src/setup-db.sql
udd/src/sources_gatherer.py
udd/src/test.yaml
Log:
Everything vrom *Sources is imported now
DB changed to hold all this information
Implemented a more flexible system to add/remove fields from Packages and Sources
Removed Serials - using multi value keys, instead
Modified: udd/src/db_manager.py
===================================================================
--- udd/src/db_manager.py 2008-06-16 09:05:23 UTC (rev 890)
+++ udd/src/db_manager.py 2008-06-17 10:36:13 UTC (rev 891)
@@ -2,10 +2,11 @@
import aux
import sys
+import os
"""This scripts sets up and deletes the tables of the database"""
-TABLES = ('sources', 'pkgs', 'distr_ids', 'arch_ids', 'build_archs')
+TABLES = ('sources', 'packages')
def print_help():
print "Usage: %s <config> <delete|setup>" % sys.argv[0]
@@ -20,16 +21,9 @@
if 'script' not in config['setup']:
raise aux.ConfigException('Script not specified in setup')
- c = conn.cursor()
- for line in open(config['setup']['script']).readlines():
- line = line.strip()
- if line:
- c.execute(line)
+ os.system("psql %s < %s" % (config['general']['dbname'],
+ config['setup']['script']))
- #Setup architecture table
- for arch in config['general']['archs']:
- c.execute("INSERT INTO arch_ids (name) VALUES ('%s');" % (arch))
-
def main():
if len(sys.argv) != 3:
print_help()
Modified: udd/src/packages_gatherer.py
===================================================================
--- udd/src/packages_gatherer.py 2008-06-16 09:05:23 UTC (rev 890)
+++ udd/src/packages_gatherer.py 2008-06-17 10:36:13 UTC (rev 891)
@@ -1,5 +1,5 @@
#/usr/bin/env python
-# Last-Modified: <Fri Jun 6 13:38:47 2008>
+# Last-Modified: <Sun Jun 15 13:16:19 2008>
import debian_bundle.deb822
import gzip
@@ -8,9 +8,8 @@
import aux
import tempfile
from aux import ConfigException
+import psycopg2
-# A mapping from the architecture names to architecture IDs
-archs = {}
# A mapping from <package-name><version> to 1
# If <package-name><version> is included in this dictionary, this means,
# that we've already added this package with this version for architecture 'all'
@@ -19,11 +18,41 @@
# entries
imported_all_pkgs = {}
# The ID for the distribution we want to include
-distr_id = None
+distr = None
-# A mapping from source names to source ids
-srcs = {}
+mandatory = ('Package', 'Version', 'Architecture', 'Maintainer',
+ 'Description')
+non_mandatory = ('Source', 'Essential', 'Depends', 'Recommends', 'Suggests',
+ 'Enhances', 'Pre-Depends', 'Installed-Size', 'Homepage', 'Size', 'MD5Sum')
+ignorable = ()
+def null_or_quote(dict, key):
+ if key in dict:
+ return "'" + dict[key].replace("'", "\\'") + "'"
+ else:
+ return 'NULL'
+
+warned_about = []
+
+def build_dict(control):
+ """Build a dictionary from the control dictionary.
+
+ Influenced by global variables mandatory, non_mandatory and ignorable"""
+ global mandatory, non_mandatory
+ d = {}
+ for k in mandatory:
+ if k not in control:
+ raise "Mandatory field %s not specified" % k
+ d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
+ for k in non_mandatory:
+ d[k] = null_or_quote(control, k)
+ for k in control.keys():
+ if k not in mandatory and k not in non_mandatory and k not in ignorable:
+ if k not in warned_about:
+ print("Unknown key: " + k)
+ warned_about.append(k)
+ return d
+
def import_packages(conn, sequence):
"""Import the packages from the sequence into the database-connection conn.
@@ -32,9 +61,8 @@
packages file."""
global imported_all_pkgs
# The fields that are to be read. Other fields are ignored
- fields = ('Architecture', 'Package', 'Version', 'Source')
cur = conn.cursor()
- for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence, fields):
+ for control in debian_bundle.deb822.Packages.iter_paragraphs(sequence):
# Check whether packages with architectue 'all' have already been
# imported
if control['Architecture'] == 'all':
@@ -43,21 +71,43 @@
continue
imported_all_pkgs[t] = 1
- if 'Source' not in control:
- control['Source'] = control['Package']
- else:
- control['Source'] = control['Source'].split()[0]
+ d = build_dict(control)
- if control['Source'] not in srcs:
- print "Warning: Source " + control['Source'] + " for package " + control['Package'] + " not found!"
- query = "EXECUTE pkg_insert('%s', %d, %d, '%s', NULL)" % (control["Package"], distr_id, archs[control["Architecture"]], control["Version"])
- else:
- query = "EXECUTE pkg_insert('%s', %d, %d, '%s', %d)" % (control["Package"], distr_id, archs[control["Architecture"]], control["Version"], srcs[control["Source"]])
- cur.execute(query)
+# if 'Source' not in control:
+# d['Source'] = d['Package']
+# d['Source_Version'] = d['Version']
+# else:
+# split = control['Source'].split()
+# d['Source'] = split[0]
+# if len(split) > 1:
+# d['Source_Version'] = split[1].strip('()')
+# else:
+# d['Source_Version'] = d['Version']
+ if d['Installed-Size'] != 'NULL':
+ d['Installed-Size'] = d['Installed-Size'].strip("'")
+ if d['Size'] != 'NULL':
+ d['Size'] = d['Size'].strip("'")
+
+ if d['Description'] != "NULL":
+ d['Description'] = d['Description'].split("\n")[0]
+ # This problem appears, if the description was a one-liner
+ if d['Description'][-1] != "'" or d['Description'][-2] == '\\':
+ d['Description'] += "'"
+
+ query = """EXECUTE package_insert
+ (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s,
+ %(Description)s, %(Source)s, %(Essential)s, %(Depends)s,
+ %(Recommends)s, %(Suggests)s, %(Enhances)s, %(Pre-Depends)s,
+ %(Installed-Size)s, %(Homepage)s, %(Size)s, %(MD5Sum)s)""" % d
+ try:
+ cur.execute(query)
+ except psycopg2.ProgrammingError:
+ print query
+ raise
+
def main():
- global distr_id
- global archs
+ global distr
if len(sys.argv) != 3:
print "Usage: %s <config> <source>" % sys.argv[0]
sys.exit(1)
@@ -82,10 +132,14 @@
raise ConfigException('archs not specified for source %s in file %s' %
(src_name, cfg_path))
- if not 'parts' in src_cfg:
- raise ConfigException('parts not specified for source %s in file %s' %
+ if not 'release' in src_cfg:
+ raise ConfigException('release not specified for source %s in file %s' %
(src_name, cfg_path))
+ if not 'components' in src_cfg:
+ raise ConfigException('components not specified for source %s in file %s' %
+ (src_name, cfg_path))
+
if not 'distribution' in src_cfg:
raise ConfigException('distribution not specified for source %s in file %s' %
(src_name, cfg_path))
@@ -99,26 +153,24 @@
conn = aux.open_connection(config)
# Get distribution ID. If it does not exist, create it
- distr_ids = aux.get_distrs(conn)
- if src_cfg['distribution'] not in distr_ids:
- aux.insert_distr(conn, src_cfg['distribution'])
- distr_ids = aux.get_distrs(conn)
- distr_id = distr_ids[src_cfg['distribution']]
+ distr = src_cfg['distribution']
- archs = aux.get_archs(conn)
-
cur = conn.cursor()
- cur.execute("PREPARE pkg_insert AS INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ($1, $2, $3, $4, $5);")
+ #cur.execute("PREPARE pkg_insert AS INSERT INTO pkgs (name, distr_id, arch_id, version, src_id) VALUES ($1, $2, $3, $4, $5);")
- cur.execute("SELECT name, src_id FROM sources WHERE distr_id = " + str(distr_id))
- for src in cur.fetchall():
- srcs[src[0]] = src[1]
-
# For every part and every architecture, import the packages into the DB
- for part in src_cfg['parts']:
+ for comp in src_cfg['components']:
for arch in src_cfg['archs']:
- path = os.path.join(src_cfg['directory'], part, 'binary-' + arch, 'Packages.gz')
+ path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
try:
+ cur.execute("""PREPARE package_insert AS INSERT INTO Packages
+ (Package, Version, Architecture, Maintainer, Description, Source, Essential,
+ Depends, Recommends, Suggests, Enhances, Pre_Depends, Installed_Size,
+ Homepage, Size, MD5Sum, Distribution, Release, Component)
+ VALUES
+ ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
+ $16, '%s', '%s', '%s')
+ """ % (distr, src_cfg['release'], comp))
aux.print_debug("Reading file " + path)
# Copy content from gzipped file to temporary file, so that apt_pkg is
# used by debian_bundle
@@ -132,8 +184,8 @@
tmp.close()
except IOError, (e, message):
print "Could not read packages from %s: %s" % (path, message)
+ cur.execute("DEALLOCATE package_insert")
- cur.execute("DEALLOCATE pkg_insert")
conn.commit()
if __name__ == '__main__':
Modified: udd/src/setup-db.sql
===================================================================
--- udd/src/setup-db.sql 2008-06-16 09:05:23 UTC (rev 890)
+++ udd/src/setup-db.sql 2008-06-17 10:36:13 UTC (rev 891)
@@ -1,18 +1,24 @@
-CREATE TABLE pkgs (pkg_id serial, name text, distr_id int, arch_id int, version text, src_id int, UNIQUE (name, distr_id, arch_id, version));
-CREATE TABLE sources (src_id serial, name text, upload_date timestamp, uploader_key int, maintainer text, version text, distr_id int, UNIQUE (name, version, distr_id));
-CREATE TABLE distr_ids (distr_id serial, name text);
-CREATE TABLE arch_ids (arch_id serial, name text);
-CREATE TABLE build_archs (src_id int, arch_id int);
+CREATE TABLE Packages
+ (Package text, Version text, Architecture text, Maintainer text, Description
+ text, Source text, Essential text, Depends text, Recommends text, Suggests
+ text, Enhances text, Pre_Depends text, Installed_Size int, Homepage text,
+ Size int, MD5Sum text, Distribution text, Release text, Component text,
+ UNIQUE (Package, Version, Architecture, Distribution, Release, Component));
-CREATE INDEX pkgs_id_idx ON pkgs (pkg_id);
-CREATE INDEX pkgs_name_idx ON pkgs (name);
-CREATE INDEX sources_id_idx ON sources (src_id);
-CREATE INDEX sources_name_idx ON sources (name);
-CREATE INDEX arch_id_idx ON arch_ids using btree (arch_id);
-CREATE INDEX pkgs_src_id_idx ON pkgs USING btree (srd_id);
+CREATE TABLE sources
+ (Package text, Version text, Maintainer text, Format text, Files text,
+ Uploaders text, Bin text, Architecture text, Standards_Version text,
+ Homepage text, Build_Depends text, Build_Depends_Indep text,
+ Build_Conflicts text, Build_Conflicts_Indep text, Priority text, Section
+ text, Distribution text, Release text, Component text, Vcs_Arch text,
+ Vcs_Browser text, Vcs_Bzr text, Vcs_Cvs text, Vcs_Darcs text, Vcs_Git text,
+ Vcs_Hg text, Vcs_Svn text, X_Vcs_Browser text, X_Vcs_Bzr text, X_Vcs_Darcs
+ text, X_Vcs_Svn text,
+ UNIQUE (package, version, distribution, release, component));
-GRANT SELECT ON pkgs TO PUBLIC;
+CREATE INDEX pkgs_name_idx ON Packages (Package);
+CREATE INDEX sources_id_idx ON sources (Package);
+CREATE INDEX pkgs_src_id_idx ON Packages USING btree (Source);
+
+GRANT SELECT ON Packages TO PUBLIC;
GRANT SELECT ON sources TO PUBLIC;
-GRANT SELECT ON distr_ids TO PUBLIC;
-GRANT SELECT ON arch_ids TO PUBLIC;
-GRANT SELECT ON build_archs TO PUBLIC;
Modified: udd/src/sources_gatherer.py
===================================================================
--- udd/src/sources_gatherer.py 2008-06-16 09:05:23 UTC (rev 890)
+++ udd/src/sources_gatherer.py 2008-06-17 10:36:13 UTC (rev 891)
@@ -1,5 +1,5 @@
#/usr/bin/env python
-# Last-Modified: <Fri Jun 6 12:31:10 2008>
+# Last-Modified: <Sun Jun 15 13:15:10 2008>
import debian_bundle.deb822
import gzip
@@ -9,41 +9,66 @@
import tempfile
from aux import ConfigException
-# A mapping from the architecture names to architecture IDs
-archs = {}
-# The ID for the distribution we want to include
-distr_id = None
+distr = None
+mandatory = ('Format', 'Maintainer', 'Package', 'Version', 'Files')
+non_mandatory = ('Uploaders', 'Binary', 'Architecture', 'Standards-Version',
+ 'Homepage', 'Build-Depends', 'Build-Depends-Indep',
+ 'Build-Conflicts', 'Build-Conflicts-Indep', 'Priority',
+ 'Section', 'Vcs-Arch', 'Vcs-Browser', 'Vcs-Bzr', 'Vcs-Cvs',
+ 'Vcs-Darcs', 'Vcs-Git', 'Vcs-Hg', 'Vcs-Svn', 'X-Vcs-Browser',
+ 'X-Vcs-Bzr', 'X-Vcs-Darcs', 'X-Vcs-Svn')
+
+ignorable = ()
+
+def null_or_quote(dict, key):
+ if key in dict:
+ return "'" + dict[key].replace("'", "\\'") + "'"
+ else:
+ return 'NULL'
+
+warned_about = []
+def build_dict(control):
+ """Build a dictionary from the control dictionary.
+
+ Influenced by global variables mandatory, non_mandatory and ignorable"""
+ global mandatory, non_mandatory
+ d = {}
+ for k in mandatory:
+ if k not in control:
+ raise "Mandatory field %s not specified" % k
+ d[k] = "'" + control[k].replace("\\", "\\\\").replace("'", "\\'") + "'"
+ for k in non_mandatory:
+ d[k] = null_or_quote(control, k)
+ for k in control.keys():
+ if k not in mandatory and k not in non_mandatory and k not in ignorable:
+ if k not in warned_about:
+ print("Unknown key: " + k)
+ warned_about.append(k)
+ return d
+
def import_sources(conn, file):
"""Import the sources from the file into the database-connection conn.
Sequence has to have an iterator interface, that yields a line every time it
is called.The Format of the file is expected to be that of a debian
source file."""
- # The fields that are to be read. Other fields are ignored
- fields = ('Package', 'Version', 'Architecture', 'Maintainer', 'Uploaders', 'Binary')
cur = conn.cursor()
- for control in debian_bundle.deb822.Packages.iter_paragraphs(file, fields):
- # Put the source package into the DB
- query = "EXECUTE source_insert('%s', '%s', '%s', %d)" % (control["Package"], control['Maintainer'].replace("'", "\\'"), control["Version"],
- distr_id)
+ for control in debian_bundle.deb822.Packages.iter_paragraphs(file):
+ d = build_dict(control)
+ query = """EXECUTE source_insert
+ (%(Package)s, %(Version)s, %(Maintainer)s, %(Format)s, %(Files)s,
+ %(Uploaders)s, %(Binary)s, %(Architecture)s, %(Standards-Version)s,
+ %(Homepage)s, %(Build-Depends)s, %(Build-Depends-Indep)s,
+ %(Build-Conflicts)s, %(Build-Conflicts-Indep)s, %(Priority)s,
+ %(Section)s, %(Vcs-Arch)s, %(Vcs-Browser)s, %(Vcs-Bzr)s, %(Vcs-Cvs)s,
+ %(Vcs-Darcs)s, %(Vcs-Git)s, %(Vcs-Hg)s, %(Vcs-Svn)s, %(X-Vcs-Browser)s,
+ %(X-Vcs-Bzr)s, %(X-Vcs-Darcs)s, %(X-Vcs-Svn)s)
+ """ % d
cur.execute(query)
- # Get the src_id of the source
- #cur.execute("SELECT src_id FROM sources WHERE name = '%(Package)s' AND version = '%(Version)s'" % control)
- cur.execute("EXECUTE select_src_id('%(Package)s', '%(Version)s')" % control)
- src_id = int(cur.fetchone()[0])
- # Fill the build_archs table for this source package
- if control['Architecture'] == 'all' or control['Architecture'] == 'any':
- query = "EXECUTE build_archs_insert(%d, %d)" % (src_id, archs[control['Architecture']])
- cur.execute(query)
- else:
- for arch in control['Architecture'].split():
- query = "EXECUTE build_archs_insert(%d, %d)" % (src_id, archs[arch])
- cur.execute(query)
def main():
- global distr_id
- global archs
+ global distr
if len(sys.argv) != 3:
print "Usage: %s <config> <source>" % sys.argv[0]
sys.exit(1)
@@ -64,7 +89,7 @@
raise ConfigException('directory not specified for source %s in file %s' %
(src_name, cfg_path))
- if not 'parts' in src_cfg:
+ if not 'components' in src_cfg:
raise ConfigException('parts not specified for source %s in file %s' %
(src_name, cfg_path))
@@ -72,27 +97,33 @@
raise ConfigException('distribution not specified for source %s in file %s' %
(src_name, cfg_path))
+ if not 'release' in src_cfg:
+ raise ConfigException('release not specified for source %s in file %s' %
+ (src_name, cfg_path))
+
aux.debug = config['general']['debug']
conn = aux.open_connection(config)
- # Get distribution ID. If it does not exist, create it
- distr_ids = aux.get_distrs(conn)
- if src_cfg['distribution'] not in distr_ids:
- aux.insert_distr(conn, src_cfg['distribution'])
- distr_ids = aux.get_distrs(conn)
- distr_id = distr_ids[src_cfg['distribution']]
-
- archs = aux.get_archs(conn)
-
cur = conn.cursor()
- cur.execute("PREPARE source_insert AS INSERT INTO sources (name, maintainer, version, distr_id) VALUES ($1,$2,$3,$4)")
- cur.execute("PREPARE build_archs_insert AS INSERT INTO build_archs (src_id, arch_id) VALUES ($1,$2)")
- cur.execute("PREPARE select_src_id AS SELECT src_id FROM sources WHERE name = $1 AND version = $2")
- for part in src_cfg['parts']:
- path = os.path.join(src_cfg['directory'], part, 'source', 'Sources.gz')
+ for comp in src_cfg['components']:
+ path = os.path.join(src_cfg['directory'], comp, 'source', 'Sources.gz')
try:
+ query = """PREPARE source_insert as INSERT INTO sources
+ (Package, Version, Maintainer, Format, Files, Uploaders, Bin,
+ Architecture, Standards_Version, Homepage, Build_Depends,
+ Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
+ Section, Vcs_Arch, Vcs_Browser, Vcs_Bzr, Vcs_Cvs, Vcs_Darcs, Vcs_Git,
+ Vcs_Hg, Vcs_Svn, X_Vcs_Browser, X_Vcs_Bzr, X_Vcs_Darcs, X_Vcs_Svn,
+ Distribution, Release, Component)
+ VALUES
+ ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
+ $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, '%s', '%s',
+ '%s')"""\
+ % (src_cfg['distribution'], comp, src_cfg['release'])
+ cur.execute(query)
+
aux.print_debug("Reading file " + path)
# Copy content from gzipped file to temporary file, so that apt_pkg is
# used by debian_bundle
@@ -104,12 +135,10 @@
aux.print_debug("Importing from " + path)
import_sources(conn, open(tmp.name))
tmp.close()
+ cur.execute("DEALLOCATE source_insert")
except IOError, (e, message):
print "Could not read packages from %s: %s" % (path, message)
- cur.execute("DEALLOCATE source_insert")
- cur.execute("DEALLOCATE build_archs_insert")
- cur.execute("DEALLOCATE select_src_id")
conn.commit()
if __name__ == '__main__':
Modified: udd/src/test.yaml
===================================================================
--- udd/src/test.yaml 2008-06-16 09:05:23 UTC (rev 890)
+++ udd/src/test.yaml 2008-06-17 10:36:13 UTC (rev 891)
@@ -6,6 +6,7 @@
setup: python db_manager.py
delete: python db_manager.py
src-pkg: python srcs_and_pkgs.py
+ # src-pkg: python sources_gatherer.py
debug: 1
archs:
@@ -16,7 +17,6 @@
netbsd-i386, hurd-powerpc, kfreebsd-powerpc, netbsd-powerpc, hurd-sparc,
kfreebsd-sparc, netbsd-sparc, darwin-i386, freebsd-i386, openbsd-i386, darwin-powerpc]
-
delete:
type: delete
@@ -25,13 +25,14 @@
script: setup-db.sql
debian-lenny:
- type: src-pkg
+ type: packages
archs: [alpha, amd64, arm, armel, hppa,
i386, ia64, mips,
mipsel, powerpc, s390, sparc]
directory: /org/ftp.debian.org/dists/lenny/
- parts: [main, contrib, non-free]
- distribution: debian-lenny
+ components: [main, contrib, non-free]
+ distribution: debian
+ release: lenny
debian-sid:
type: src-pkg
@@ -39,8 +40,9 @@
i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
mipsel, powerpc, ppc64, s390, sparc]
directory: /org/ftp.debian.org/dists/sid/
- parts: [main, contrib, non-free]
- distribution: debian-sid
+ components: [main, contrib, non-free]
+ distribution: debian
+ release: sid
debian-etch:
type: src-pkg
@@ -48,8 +50,9 @@
i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
mipsel, powerpc, ppc64, s390, sparc]
directory: /org/ftp.debian.org/dists/etch/
- parts: [main, contrib, non-free]
- distribution: debian-etch
+ components: [main, contrib, non-free]
+ distribution: debian
+ release: etch
debian-backports-etch:
type: src-pkg
@@ -57,8 +60,9 @@
i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
mipsel, powerpc, ppc64, s390, sparc]
directory: /org/ftp.backports.org/dists/etch-backports/
- parts: [main, contrib, non-free]
- distribution: debian-backports-etch
+ components: [main, contrib, non-free]
+ distribution: debian-backports
+ release: etch
debian-volatile-etch:
type: src-pkg
@@ -66,20 +70,23 @@
i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
mipsel, powerpc, ppc64, s390, sparc]
directory: /org/volatile.debian.org/dists/etch/volatile/
- parts: [main, contrib, non-free]
- distribution: debian-volatile-etch
+ components: [main, contrib, non-free]
+ distribution: debian-volatile
+ release: etch
test-src:
type: sources
directory: /org/ftp.debian.org/dists/lenny/
- parts: [main, contrib, non-free]
+ components: [main, contrib, non-free]
distribution: test
+ release: test
test-pkg:
type: packages
directory: /org/ftp.debian.org/dists/lenny/
- parts: [main, contrib, non-free]
+ components: [main, contrib, non-free]
distribution: test
+ release: test
archs:
[alpha, amd64, arm, armeb, armel, hppa, hurd-i386,
i386, i486, ia64, kfreebsd-amd64, kfreebsd-i386, m68k, mips,
More information about the Collab-qa-commits
mailing list