[Collab-qa-commits] r1531 - udd/udd
Lucas Nussbaum
lucas at alioth.debian.org
Thu Jul 23 14:05:41 UTC 2009
Author: lucas
Date: 2009-07-23 14:05:39 +0000 (Thu, 23 Jul 2009)
New Revision: 1531
Modified:
udd/udd/bugs_gatherer.pl
udd/udd/carnivore_gatherer.py
udd/udd/ddtp_gatherer.py
udd/udd/debtags_gatherer.py
udd/udd/ftpnew_gatherer.py
udd/udd/lintian_gatherer.py
udd/udd/orphaned_packages_gatherer.py
udd/udd/packages_gatherer.py
udd/udd/popcon_gatherer.py
udd/udd/screenshot_gatherer.py
udd/udd/sources_gatherer.py
udd/udd/testing_migrations_gatherer.py
udd/udd/ubuntu_bugs_gatherer.py
udd/udd/upload_history_gatherer.py
Log:
add ANALYZE at the end of all importers to teach pgsql some stats about the data we just imported
Modified: udd/udd/bugs_gatherer.pl
===================================================================
--- udd/udd/bugs_gatherer.pl 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/bugs_gatherer.pl 2009-07-23 14:05:39 UTC (rev 1531)
@@ -288,6 +288,19 @@
}
}
print "Inserting bugs: ",(time() - $t),"s\n" if $timing;
+
+ $dbh->commit();
+ print "Committing bugs: ",(time() - $t),"s\n" if $timing;
+
+ foreach my $postfix (qw{_packages _merged_with _found_in _fixed_in _tags}, '') {
+ my $sth = $dbh->prepare("ANALYZE $table$postfix");
+ $sth->execute() or die $!;
+ }
+
+ my $sth = $dbh->prepare("ANALYZE ".$src_config{'usertags-table'});
+ $sth->execute() or die $!;
+
+ print "Analyzing bugs: ",(time() - $t),"s\n" if $timing;
}
sub main {
@@ -323,8 +336,6 @@
exit(1)
}
- $dbh->commit();
- print "Committing bugs: ",(time() - $t),"s\n" if $timing;
}
main();
Modified: udd/udd/carnivore_gatherer.py
===================================================================
--- udd/udd/carnivore_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/carnivore_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -107,6 +107,8 @@
if info["name"] not in record:
record[info["name"]] = set()
record[info["name"]].add(content.rstrip())
+ for table in ['emails', 'names', 'keys', 'login']:
+ cur.execute("ANALYZE %s" % my_config["%s-table" % table])
if __name__ == '__main__':
main()
Modified: udd/udd/ddtp_gatherer.py
===================================================================
--- udd/udd/ddtp_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/ddtp_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -137,6 +137,7 @@
print >>stderr, "Error reading %s (%s)" % (dir+filename, err)
cur.execute("DEALLOCATE ddtp_insert")
+ cur.execute("ANALYZE %s" % my_config['table'])
if __name__ == '__main__':
main()
Modified: udd/udd/debtags_gatherer.py
===================================================================
--- udd/udd/debtags_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/debtags_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -70,6 +70,7 @@
cur.execute('EXECUTE debtags_insert (%s, %s)' \
% (quote(pkg), quote(tag)))
cur.execute('DEALLOCATE debtags_insert')
+ cur.execute("ANALYZE %s" % conf['table'])
def test():
Modified: udd/udd/ftpnew_gatherer.py
===================================================================
--- udd/udd/ftpnew_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/ftpnew_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -446,6 +446,8 @@
cur.execute("DEALLOCATE ftpnew_insert_source")
cur.execute("DEALLOCATE ftpnew_insert_package")
cur.execute("DEALLOCATE ftpnew_check_existing_package")
+ cur.execute("ANALYZE %s" % my_config["table_sources"])
+ cur.execute("ANALYZE %s" % my_config["table_packages"])
if __name__ == '__main__':
main()
Modified: udd/udd/lintian_gatherer.py
===================================================================
--- udd/udd/lintian_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/lintian_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -70,6 +70,7 @@
print "Can't parse line %d: %s" % (line_number, line.rstrip())
cur.execute("DEALLOCATE lintian_insert")
+ cur.execute("ANALYZE %s" % my_config["table"])
if __name__ == '__main__':
main()
Modified: udd/udd/orphaned_packages_gatherer.py
===================================================================
--- udd/udd/orphaned_packages_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/orphaned_packages_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -70,5 +70,6 @@
except IntegrityError, message:
print "Integrity Error inserting bug " + str(row[0]) + " " + m.group(2)
continue
+ cur2.execute("ANALYZE %s" % self.my_config['table'])
# vim:set et tabstop=2:
Modified: udd/udd/packages_gatherer.py
===================================================================
--- udd/udd/packages_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/packages_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -211,6 +211,10 @@
SELECT DISTINCT distribution, release, component, architecture
FROM %s""" % (table + '_distrelcomparch', table))
+ cur.execute("ANALYZE %s" % table)
+ cur.execute("ANALYZE %s" % table + '_summary')
+ cur.execute("ANALYZE %s" % table + '_distrelcomparch')
+
self.print_warnings()
def print_warnings(self):
Modified: udd/udd/popcon_gatherer.py
===================================================================
--- udd/udd/popcon_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/popcon_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -90,6 +90,9 @@
WHERE %(table)s.package = pkgs.package
GROUP BY pkgs.source;
""" % my_config)
+ cur.execute("ANALYZE " + table)
+ cur.execute("ANALYZE " + table_src)
+ cur.execute("ANALYZE " + table_src_average)
if __name__ == '__main__':
main()
Modified: udd/udd/screenshot_gatherer.py
===================================================================
--- udd/udd/screenshot_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/screenshot_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -62,6 +62,7 @@
print >>stderr, "Unable to inject data for package %s. %s" % (res['name'], err)
print >>stderr, "-->", res
cur.execute("DEALLOCATE screenshots_insert")
+ cur.execute("ANALYZE %s" % my_config['table'])
if __name__ == '__main__':
main()
Modified: udd/udd/sources_gatherer.py
===================================================================
--- udd/udd/sources_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/sources_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -48,25 +48,25 @@
d = {}
for k in sources_gatherer.mandatory:
if k not in control:
- raise "Mandatory field %s not specified" % k
+ raise "Mandatory field %s not specified" % k
d[k] = control[k]
for k in sources_gatherer.non_mandatory:
if k in control:
- d[k] = control[k]
+ d[k] = control[k]
else:
- d[k] = None
+ d[k] = None
d['Vcs-Type'] = None
d['Vcs-Url'] = None
for vcs in sources_gatherer.vcs:
if control.has_key("Vcs-"+vcs):
d['Vcs-Type'] = vcs
- d['Vcs-Url'] = control["Vcs-"+vcs]
- break
+ d['Vcs-Url'] = control["Vcs-"+vcs]
+ break
elif control.has_key("X-Vcs-"+vcs):
d['Vcs-Type'] = vcs
- d['Vcs-Url'] = control["X-Vcs-"+vcs]
- break
+ d['Vcs-Url'] = control["X-Vcs-"+vcs]
+ break
if control.has_key("Vcs-Browser"):
d['Vcs-Browser'] = control["Vcs-Browser"]
elif control.has_key("X-Vcs-Browser"):
@@ -139,56 +139,59 @@
for comp in src_cfg['components']:
path = os.path.join(src_cfg['directory'], comp, 'source', 'Sources.gz')
cur.execute("DELETE from %s WHERE Distribution = '%s' AND\
- release = '%s' AND component = '%s'"\
- % (table, src_cfg['distribution'], src_cfg['release'], comp))
+ release = '%s' AND component = '%s'"\
+ % (table, src_cfg['distribution'], src_cfg['release'], comp))
cur.execute("DELETE from %s WHERE Distribution = '%s' AND\
- release = '%s' AND component = '%s'"\
- % (utable, src_cfg['distribution'], src_cfg['release'], comp))
+ release = '%s' AND component = '%s'"\
+ % (utable, src_cfg['distribution'], src_cfg['release'], comp))
try:
- query = """PREPARE source_insert as INSERT INTO %s
- (Source, Version, Maintainer, Maintainer_name, Maintainer_email, Format, Files, Uploaders, Bin,
- Architecture, Standards_Version, Homepage, Build_Depends,
- Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
- Section, Vcs_Type, Vcs_Url, Vcs_Browser, python_version, checksums_sha1,
- checksums_sha256, original_maintainer, dm_upload_allowed,
- Distribution, Release, Component)
- VALUES
- ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
- $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, '%s', '%s', '%s')"""\
- % (table, src_cfg['distribution'], src_cfg['release'], comp)
- cur.execute(query)
- query = """PREPARE uploader_insert as INSERT INTO %s
- (Source, Version, Distribution, Release, Component, Uploader, Name, Email) VALUES
- ($1, $2, '%s', '%s', '%s', $3, $4, $5) """ % \
- (utable, src_cfg['distribution'], src_cfg['release'], comp)
- cur.execute(query)
+ query = """PREPARE source_insert as INSERT INTO %s
+ (Source, Version, Maintainer, Maintainer_name, Maintainer_email, Format, Files, Uploaders, Bin,
+ Architecture, Standards_Version, Homepage, Build_Depends,
+ Build_Depends_Indep, Build_Conflicts, Build_Conflicts_Indep, Priority,
+ Section, Vcs_Type, Vcs_Url, Vcs_Browser, python_version, checksums_sha1,
+ checksums_sha256, original_maintainer, dm_upload_allowed,
+ Distribution, Release, Component)
+ VALUES
+ ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16,
+ $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, '%s', '%s', '%s')"""\
+ % (table, src_cfg['distribution'], src_cfg['release'], comp)
+ cur.execute(query)
+ query = """PREPARE uploader_insert as INSERT INTO %s
+ (Source, Version, Distribution, Release, Component, Uploader, Name, Email) VALUES
+ ($1, $2, '%s', '%s', '%s', $3, $4, $5) """ % \
+ (utable, src_cfg['distribution'], src_cfg['release'], comp)
+ cur.execute(query)
-# aux.print_debug("Reading file " + path)
- # Copy content from gzipped file to temporary file, so that apt_pkg is
- # used by debian_bundle
- tmp = tempfile.NamedTemporaryFile()
- file = gzip.open(path)
- tmp.write(file.read())
- file.close()
- tmp.seek(0)
-# aux.print_debug("Importing from " + path)
- self.import_sources(open(tmp.name))
- tmp.close()
+# aux.print_debug("Reading file " + path)
+ # Copy content from gzipped file to temporary file, so that apt_pkg is
+ # used by debian_bundle
+ tmp = tempfile.NamedTemporaryFile()
+ file = gzip.open(path)
+ tmp.write(file.read())
+ file.close()
+ tmp.seek(0)
+# aux.print_debug("Importing from " + path)
+ self.import_sources(open(tmp.name))
+ tmp.close()
except IOError, (e, message):
- print "Could not read packages from %s: %s" % (path, message)
+ print "Could not read packages from %s: %s" % (path, message)
cur.execute("DEALLOCATE source_insert")
cur.execute("DEALLOCATE uploader_insert")
+ cur.execute('ANALYZE %s' % table)
+ cur.execute('ANALYZE %s' % utable)
+
self.print_warnings()
def setup(self):
if 'schema-dir' in self.config['general']:
schema_dir = self.config['general']['schema-dir']
if 'sources-schema' in self.my_config:
- schema = schema_dir + '/' + self.my_config['sources-schema']
- self.eval_sql_file(schema, self.my_config)
+ schema = schema_dir + '/' + self.my_config['sources-schema']
+ self.eval_sql_file(schema, self.my_config)
else:
- raise Exception("'packages-schema' not specified for source " + self.source)
+ raise Exception("'packages-schema' not specified for source " + self.source)
else:
raise Exception("'schema-dir' not specified")
Modified: udd/udd/testing_migrations_gatherer.py
===================================================================
--- udd/udd/testing_migrations_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/testing_migrations_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -21,35 +21,36 @@
self.assert_my_config('path')
def run(self):
- src_cfg = self.my_config
+ src_cfg = self.my_config
- c = self.connection.cursor()
+ c = self.connection.cursor()
- c.execute("DELETE FROM migrations")
+ c.execute("DELETE FROM migrations")
- c.execute("PREPARE mig_insert AS INSERT INTO migrations (source, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)")
+ c.execute("PREPARE mig_insert AS INSERT INTO migrations (source, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)")
- f = open(src_cfg['path'])
- for line in f:
- (package, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen) = line.split()
- for field in ('in_testing', 'in_unstable', 'sync', 'first_seen'):
- is_null = False
- exec "is_null = %s == ZERO_DATE" % field
- if is_null:
- exec "%s = 'NULL'" % field
- else:
- exec "%s = quote(%s)" % (field, field)
+ f = open(src_cfg['path'])
+ for line in f:
+ (package, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen) = line.split()
+ for field in ('in_testing', 'in_unstable', 'sync', 'first_seen'):
+ is_null = False
+ exec "is_null = %s == ZERO_DATE" % field
+ if is_null:
+ exec "%s = 'NULL'" % field
+ else:
+ exec "%s = quote(%s)" % (field, field)
- for field in ('package', 'testing_version', 'unstable_version', 'sync_version'):
- is_null = False
- exec "is_null = %s == '-'" % field
- if is_null:
- exec "%s = 'NULL'" % field
- else:
- exec "%s = quote(%s)" % (field, field)
-
- c.execute("EXECUTE mig_insert(%s, %s, %s, %s, %s, %s, %s, %s)" \
- % (package, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen))
+ for field in ('package', 'testing_version', 'unstable_version', 'sync_version'):
+ is_null = False
+ exec "is_null = %s == '-'" % field
+ if is_null:
+ exec "%s = 'NULL'" % field
+ else:
+ exec "%s = quote(%s)" % (field, field)
+
+ c.execute("EXECUTE mig_insert(%s, %s, %s, %s, %s, %s, %s, %s)" \
+ % (package, in_testing, testing_version, in_unstable, unstable_version, sync, sync_version, first_seen))
- c.execute("DEALLOCATE mig_insert")
+ c.execute("DEALLOCATE mig_insert")
+ c.execute("ANALYZE migrations")
Modified: udd/udd/ubuntu_bugs_gatherer.py
===================================================================
--- udd/udd/ubuntu_bugs_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/ubuntu_bugs_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -62,6 +62,11 @@
except Empty:
if httpq.qsize() == 0:
ok = False
+ c.execute("analyze ubuntu_bugs_subscribers")
+ c.execute("analyze ubuntu_bugs_duplicates")
+ c.execute("analyze ubuntu_bugs_tags")
+ c.execute("analyze ubuntu_bugs_tasks")
+ c.execute("analyze ubuntu_bugs")
def fetch_all_bugs(self):
fh = urllib.urlopen('https://launchpad.net/ubuntu/+bugs-text')
Modified: udd/udd/upload_history_gatherer.py
===================================================================
--- udd/udd/upload_history_gatherer.py 2009-07-23 11:22:30 UTC (rev 1530)
+++ udd/udd/upload_history_gatherer.py 2009-07-23 14:05:39 UTC (rev 1531)
@@ -113,3 +113,6 @@
cursor.executemany(query_archs, uploads_archs)
cursor.executemany(query_closes, uploads_closes)
cursor.execute("DEALLOCATE uh_insert")
+ cursor.execute("ANALYZE " + self.my_config['table'] + '_architecture')
+ cursor.execute("ANALYZE " + self.my_config['table'] + '_closes')
+ cursor.execute("ANALYZE " + self.my_config['table'])
More information about the Collab-qa-commits
mailing list