[Dehs-devel] r196 - trunk
Raphael Geissert
geissert at alioth.debian.org
Tue Jul 5 08:16:49 UTC 2011
Author: geissert
Date: 2011-07-05 08:16:49 +0000 (Tue, 05 Jul 2011)
New Revision: 196
Modified:
trunk/dehs_pg.php
Log:
Rewrite the Sources parser with a proper one
Additionally rewrite that section to use prepared statements
Modified: trunk/dehs_pg.php
===================================================================
--- trunk/dehs_pg.php 2011-07-05 00:24:37 UTC (rev 195)
+++ trunk/dehs_pg.php 2011-07-05 08:16:49 UTC (rev 196)
@@ -4,7 +4,7 @@
Originally written by Stefano Fabri <bluefuture at nospam@email.it>
Copyright 2004, Stefano Fabri
-Copyright 2007, 2008 by Raphael Geissert <atomo64 at gmail.com>
+Copyright 2007, 2008, 2011 by Raphael Geissert <atomo64 at gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -66,6 +66,25 @@
}
}
+function file_with_wrapper($file) {
+ $wrapper = '';
+ if (preg_match('/\.(bz2|gz)$/D', $file, $matches)) {
+ $wrapper = 'compress.';
+ switch ($matches[1]) {
+ case 'bz2':
+ $wrapper .= 'bzip2';
+ break;
+ case 'gz':
+ $wrapper .= 'zlib';
+ break;
+ default:
+ die('FIXME: '.__FILE__.':'.__LINE__);
+ }
+ $wrapper .= '://';
+ }
+ return $wrapper.$file;
+}
+
function ext_watch ($file,$pkg,$version) {
if (!file_exists($file)) return "";
@@ -159,138 +178,186 @@
print "Download Diff.gz of package $pkg => Fallito\n";
return false;
}
-function db_add($initial='') {
- global $dirs,$dists,$dbconn;
+
+function parse_sources($fh, $callback) {
+ $group = null;
+ $field = '';
+ $args = func_get_args();
+ unset($args[0]);
+ unset($args[1]);
+ // reindex the array:
+ $args = array_values($args);
+
+ while (!feof($fh)) {
+ if (!($line = fgets($fh)))
+ die_status ('fgets failed at '.ftell($fh));
+
+ $line = trim($line);
+
+ if (!strlen($line)) {
+ if ($group)
+ $callback($group, $args);
+ $group = null;
+ }
+ else if (preg_match('/^(\S+):\s*(.*?)\s*$/', $line, $matches)) {
+ if (!$group) {
+ $group = array();
+ }
+ $field = strtolower($matches[1]);
+ $value = $matches[2];
+ $group[$field] = $value;
+ } else if (preg_match('/^\s+(.*?)\s*$/', $line, $matches)) {
+ if (!$group) {
+ print STDERR 'syntax error: found lone field continuation at '.ftell($fh)."\n";
+ continue;
+ }
+ $group[$field] .= "\n" . $matches[1];
+ } else {
+ print STDERR 'syntax error: found lone data at '.ftell($fh)."\n";
+ }
+ }
+
+ if ($group) {
+ print STDERR 'syntax error: unterminated group, EOF reached'."\n";
+ $callback($group, $args);
+ }
+}
+
+function db_add_callback($group, $params) {
+ static $pkgs = array();
+
+ if ($group == null && $params == null) {
+ $old_pkgs = $pkgs;
+ $pkgs = array();
+ return $old_pkgs;
+ }
+
+ if (!isset($pkgs[$group['package']]) || is_updated($pkgs[$group['package']]['version'],$group['version'],true)) {
+ $group['section'] = $params[0];
+ $pkgs[$group['package']] = $group;
+ }
+}
+
+function db_add() {
+ global $dirs, $dists, $dbconn;
+
check_db();
download_sources();
$db = pg_pconnect($dbconn) or die_status(pg_last_error($db));
- pg_exec($db, "CREATE TEMP TABLE pkgs_atsrc (name text,dist text)") or die_status('Error creating temp table pkgs_atsrc');
- pg_exec($db, "CREATE UNIQUE INDEX idxdis on pkgs_atsrc (name,dist)") or die_status('Error creating index on temp table');
- pg_exec($db, "CREATE TEMP TABLE bin_atsrc (name text,bin_name text, dist text)") or die_status('Error creating temp table pkgs_atsrc');
- pg_exec($db, "CREATE UNIQUE INDEX idxbis on bin_atsrc (name,bin_name,dist)") or die_status('Error creating index on temp table bin_atsrc');
+ pg_exec($db, "CREATE TEMP TABLE pkgs_atsrc (name text,dist text)")
+ or die_status('Error creating temp table pkgs_atsrc');
+ pg_exec($db, "CREATE UNIQUE INDEX idxdis on pkgs_atsrc (name,dist)")
+ or die_status('Error creating index on temp table');
+ pg_exec($db, "CREATE TEMP TABLE bin_atsrc (name text,bin_name text, dist text)")
+ or die_status('Error creating temp table pkgs_atsrc');
+ pg_exec($db, "CREATE UNIQUE INDEX idxbis on bin_atsrc (name,bin_name,dist)")
+ or die_status('Error creating index on temp table bin_atsrc');
+
+ $prepared_stmts = array(
+ 'insert_pkgs_atsrc' => 'INSERT INTO pkgs_atsrc (name,dist) VALUES ($1,$2)',
+ 'select_pkgs.version' => 'SELECT pkgs.version FROM pkgs WHERE name=$1 AND dist=$2',
+ 'insert_pkgs' => 'INSERT INTO pkgs (name,version,dversionmangled,maint,dir,md5_atsource,bytes,dist,section,uploaders,homepage,vcs_browser,vcs_type,vcs,vcsoversource,lastpkgsourcesupdate)'
+ .' VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,'."'0'".',now())',
+ 'update_pkgs' => 'UPDATE pkgs SET version=$2, maint=$3, dir=$4, md5_atsource=$5,'
+ .'bytes=$6, section=$8, uploaders=$9, homepage=$10, vcs_browser=$11,'
+ .'vcs_type=$12, vcs=$13 WHERE name=$1 AND dist=$7',
+ 'new_version' => 'UPDATE pkgs SET dversionmangled=$1, wwiz_type=NULL,'
+ .'lastpkgsourcesupdate=now() WHERE name=$2 AND dist=$3',
+ 'insert_bin_atsrc' => 'INSERT INTO bin_atsrc (name,bin_name,dist) VALUES ($1, $2, $3)',
+ 'insert_binpkgs' => 'INSERT INTO binpkgs (name,bin_name,dist) VALUES ($1, $2, $3)',
+ );
+
+ foreach($prepared_stmts as $name=>$stmt)
+ if (!pg_prepare($db, $name, $stmt))
+ die_status ('Failed to prepare statement '.$stmt);
+ }
+
foreach ($dists as $dist=>$sections) {
- $pkgs = array();
- $pkgsections = array();
foreach ($sections as $section) {
$filename=$dirs['sources_dir'] . "/$dist/$section/Sources.gz";
- print "\nI'm parsing $dist/$section => Sources.gz\n";
+ print "\nParsing $dist/$section/Sources.gz\n";
- // Big regexp to parse a package entry
+ $fh = fopen(file_with_wrapper($filename), 'r')
+ or die_status("Could not open $filename");
- // [1] Source package name
- $regexp = "/Package:\s($initial.*)[^a]";
- // [2] : Binary package name
- $regexp .= "Binary:\s(.+)[^a]";
- // [3] : Epoch
- // [4] : Version
- $regexp .= "Version:\s(\d+:)?(.+)[^a]";
- $regexp .= "Priority.+";
- // [5] : Maintainer email address
- $regexp .= "Maintainer:.+<([^>]+)>[^a]";
- $regexp .= ".+";
- // [6] : Source package format
- $regexp .= "Format:\s(.+)[^a]";
- // [7] : Package directory
- $regexp .= "Directory:\s(.+)[^a]";
- // [8] : md5sum of the .diff.gz (or .debian.tar.(gz|bz2))
- // [9] : Size of the file in bytes
- // [10] : Extension (.diff.gz or .debian.tar.gz)
- $regexp .= "Files:.+\s(\S{32})\s(\d+)\s" . '\1\S+' . "(\.diff\.gz|\.debian\.tar\.(?:gz|bz2))[^a]";
- // [11] : Uploaders field
- // [12] : Uploaders value
- $regexp .= "(Uploaders:\s(.+)[^a])?";
- // [13] : DM-Upload-Allowed field
- // [14] : DM-Upload-Allowed value
- $regexp .= "(Dm-Upload-Allowed:\s(.+)[^a])?";
- // [15] : Homepage field
- // [16] : Homepage value
- $regexp .= "(Homepage:\s(.+)[^a])?";
- // [17] : Vcs-Browser field
- // [18] : Vcs-Browser value
- $regexp .= "(Vcs-Browser:\s(.+)[^a])?";
- // [19] : Vcs-* field
- // [20] : vcs type
- // [21] : Vcs-* value
- $regexp .= "(Vcs-([A-Z][a-z]+):\s(.+)[^a])?";
- // [21] : Checksums-* field
- // [22] : Checksums-* value
- $regexp .= "(Checksums-\S+:\s(.+)[^a])?";
- $regexp .= "/Ssi";
- $zp = gzopen($filename, "r") or die_status("Could not open $filename");
- $extracted=tempnam("/tmp/", "$dist_$section_Sources");
- exec("gzip -c -d $filename > $extracted");
- $sourcesize=filesize($extracted);
- unlink($extracted);
- if(filesize($filename)>20) {
- while (!gzeof($zp)) {
- $line='';
- while ($line!="\n") {
- $line = gzgets ($zp,4096) ;
- $buff1 .= $line;
- }
- if (preg_match($regexp,$buff1,$matches)) {
- $matches[4] = ((isset($matches[3]) && strlen($matches[3]))? $matches[3] : ''). $matches[4];
- if (!isset($pkgs[$matches[1]]) || is_updated($pkgs[$matches[1]][4],$matches[4],true)) {
- unset($matches[0]);
- $pkgs[$matches[1]] = $matches;
- $pkgsections[$matches[1]] = $section;
- }
- }
- else {
- preg_match("/Package:\s(.+)[^a]Binary/im",$buff1,$matches);
- if(strncmp($initial,$matches[1],strlen($initial))<0) break;
- }
- $left=$sourcesize-gztell($zp);
- print "\rSource file Left => $left";
- $buff1="";
- unset($matches);
- }
- }
- gzclose($zp) ;
+ parse_sources($fh, 'db_add_callback', $section);
+
+ fclose($fh);
}
- foreach ($pkgs as $package=>$matches) {
- $section = $pkgsections[$matches[1]];
- // Maintainer email address
- if ($matches[5]) {
- $matches[5]=iconv("ISO-8859-1","UTF-8",$matches[5]);
- $matches[5]=pg_escape_string($matches[5]);
+ $pkgs = db_add_callback(null, null);
+
+ foreach ($pkgs as $package=>$data) {
+
+ $files = explode("\n", $data['files']);
+ $md5 = null;
+ foreach ($files as $file) {
+ if (!preg_match('/^(\S{32})\s(\d+)\s'.preg_quote($data['package'], '/').'\S+(\.diff\.gz|\.debian\.tar\.(?:gz|bz2))$/', $file, $matches))
+ continue;
+ $data['file_md5'] = $matches[1];
+ $data['file_size'] = $matches[2];
}
- // Uploaders
- if ($matches[12]) {
- $matches[12]=iconv("ISO-8859-1","UTF-8",$matches[12]);
- $matches[12]=pg_escape_string($matches[12]);
+ if (!$md5) {
+ print STDERR 'Could not determine md5 of '.$data['package']."\n";
+ continue;
}
- @pg_exec($db,"INSERT INTO pkgs_atsrc (name,dist) VALUES ('$matches[1]','$dist')") OR die_status("Temp table pkgs_atsrc query error");
- $rst=@pg_exec($db, "INSERT INTO pkgs (name,version,dversionmangled,maint,dir,md5_atsource,bytes,dist,section,uploaders,homepage,vcs_browser,vcs_type,vcs,vcsoversource,lastpkgsourcesupdate) VALUES ('$matches[1]','$matches[4]','$matches[4]','$matches[5]','$matches[7]','$matches[8]','$matches[9]','$dist','$section','$matches[12]','$matches[16]','$matches[18]','$matches[20]','$matches[21]','0',now())");
+
+ foreach ($data as $k=>$v) {
+ if (strpos($k, 'vcs-') !== 0)
+ continue;
+ if ($k == 'vcs-browser')
+ continue;
+ $data['vcs_type'] = strstr($k, '-', true);
+ }
+
+ $m = $data['maintainer'];
+ $data['maintainer_email'] = substr($m, strpos($m, '<'), strpos($m, '>'));
+
+ if ($data['maintainer_email'])
+ $data['maintainer_email'] = iconv('ISO-8859-1', 'UTF-8', $data['maintainer_email']);
+ else
+ $data['maintainer_email'] = '';
+
+ if ($data['uploaders'])
+ $data['uploaders'] = iconv('ISO-8859-1', 'UTF-8', $data['uploaders']);
+
+ pg_execute($db, 'insert_pkgs_atsrc', array($data['package'], $dist));
+ or die_status("Temp table pkgs_atsrc query error");
+
+ $rst=pg_execute($db, 'insert_pkgs', array($data['package'],$data['version'],$data['version'],$data['maintainer_email'],$data['directory'],$data['file_md5'],$data['file_size'],$dist,$data['section'],$data['uploaders'],$data['homepage'],$data['vcs-browser'],$data['vcs_type'],$data['vcs-'.$data['vcs_type']]));
if (!$rst) {
- $rsql=pg_exec($db, "SELECT pkgs.version FROM pkgs WHERE name='$matches[1]' AND dist='$dist';");
- $version = $matches[4]; // just for safety
+ $rsql=pg_execute($db, 'select_pkgs.version', array($data['package'], $dist));
+ $version = $data['version']; // just for safety
while ($res_array=pg_fetch_array($rsql)) {
$version = $res_array['version'];
}
- $extra = '';
- if ($version != $matches[4]) {
+
+ if ($version != $data['version']) {
// only update the dversionmangled field if the versions differ
- $extra = ",dversionmangled='$matches[4]'";
- // also reset wwiz_type
- $extra = ",wwiz_type=NULL";
- // and update the lastpkgsourcesupdate field
- $extra = ",lastpkgsourcesupdate=now()";
+ // in that case, also reset wwiz_type and update lastpkgsourcesupdate
+ pg_execute($db, 'new_version', array($data['version'], $data['package'], $dist));
}
- $rst=@pg_exec($db, "UPDATE pkgs SET name='$matches[1]',version='$matches[4]'$extra,maint='$matches[5]',dir='$matches[7]',md5_atsource='$matches[8]',bytes='$matches[9]',dist='$dist',section='$section',uploaders='$matches[12]',homepage='$matches[16]',vcs_browser='$matches[18]',vcs_type='$matches[20]',vcs='$matches[21]' WHERE name='$matches[1]' AND dist='$dist'") OR die_status("\nDb adding error =>" . pg_last_error() . "\n");
+
+ $rst=pg_execute($db, 'update_pkgs', array($data['package'], $data['version'], $data['maintainer_email'],
+ $data['directory'], $data['file_md5'], $data['file_size'], $dist,
+ $data['section'], $data['uploaders'], $data['homepage'], $data['vcs-browser'],
+ $data['vcs_type'], $data['vcs-'.$data['vcs_type']]))
+ or die_status("\nDb adding error =>" . pg_last_error() . "\n");
}
- $bin_names=split(",", $matches[2]);
+ $bin_names = split(',', $data['binary']);
foreach ($bin_names as $bin_name) {
- pg_exec($db,"INSERT INTO bin_atsrc (name,bin_name,dist) VALUES ('$matches[1]','" . trim($bin_name) . "','$dist')") OR die_status("Temp table pkgs_atsrc query error");
- $rst=@pg_exec($db, "INSERT INTO binpkgs (name,bin_name,dist) VALUES ('$matches[1]','" . trim($bin_name) . "','$dist')") ;
+ $bin_name = trim($bin_name);
+ pg_execute($db, 'insert_bin_atsrc', array($data['package'], $bin_name, $dist))
+ or die_status("Temp table pkgs_atsrc query error");
+ pg_execute($db, 'insert_binpkgs', array($data['package'], $bin_name, $dist));
}
}
}
clear_db($db);
pg_close($db);
}
+
function db_query($pkg,$dist='%') {
global $dirs,$dbconn;
$db = pg_pconnect($dbconn ) or die_status(pg_last_error($db));
More information about the Dehs-devel
mailing list