[Debpool-commits] [SCM] Debpool Project Repository branch, master, updated. debian/0.3.10-2-ge7576aa
Andres Mejia
mcitadel at gmail.com
Fri Oct 24 03:11:23 UTC 2008
The following commit has been merged in the master branch:
commit e7576aae317c62d53e90e8dab3e388abdfcf9880
Author: Andres Mejia <mcitadel at gmail.com>
Date: Thu Oct 23 23:11:16 2008 -0400
Start implement a new module to use for file parsing
diff --git a/MANIFEST b/MANIFEST
index 4f52faa..fd1891a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -13,3 +13,4 @@ lib/DebPool/Bzip2.pm
lib/DebPool/Config.pm
# lib/DebPool/Hooks.pm
lib/DebPool/DB.pm
+lib/DebPool/Parser.pm
diff --git a/bin/debpool b/bin/debpool
index 5672c35..7b440f1 100755
--- a/bin/debpool
+++ b/bin/debpool
@@ -50,6 +50,7 @@ use DebPool::GnuPG qw(:functions :vars); # GnuPG interaction routines
use DebPool::Logging qw(:functions :facility :level); # Logging routines
use DebPool::Packages qw(:functions :vars); # Distribution databases
use DebPool::Signal qw(:functions :vars); # Handle signals
+use DebPool::Parser qw(:functions :vars); # File parsing routines
# Before we do anything else, let's find out if we need to act as a daemon,
# and if so, whether we can manage to pull it off.
diff --git a/debian/TODO b/debian/TODO
index e7d1ac2..9235b92 100644
--- a/debian/TODO
+++ b/debian/TODO
@@ -2,6 +2,7 @@ Features, fixes, and other stuff to be done.
(general)
+* Parse_DSC and Parse_Changes need to handle multiline entries better.
* Support tools for manipulating metadata (which really means 'anything
stored in database files').
* Support rebuild-dbs (including invocation from rebuild-all)?
diff --git a/debian/changelog b/debian/changelog
index 83dbef4..b44dd82 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,8 +1,9 @@
-debpool (0.3.11) experimental; urgency=low
+debpool (0.4.0) experimental; urgency=low
+ * Created a new module to handle file parsing. (NOT YET COMPLETE).
* Minor code cleanup.
- -- Andres Mejia <mcitadel at gmail.com> Tue, 21 Oct 2008 00:06:48 -0400
+ -- Andres Mejia <mcitadel at gmail.com> Thu, 23 Oct 2008 23:06:44 -0400
debpool (0.3.10) experimental; urgency=low
diff --git a/lib/DebPool/Packages.pm b/lib/DebPool/Packages.pm
index 227ab17..3a98479 100644
--- a/lib/DebPool/Packages.pm
+++ b/lib/DebPool/Packages.pm
@@ -135,7 +135,12 @@ my %Changes_Fields = (
'Urgency' => 'string',
'Maintainer' => 'string',
'Changed-By' => 'string',
+ 'Description' => 'multiline_array',
'Closes' => 'space_array',
+ 'Changes' => 'multiline_array',
+ 'Checksums-Sha1' => 'multiline_array',
+ 'Checksums-Sha256' => 'multiline_array',
+ 'Files' => 'multiline_array'
);
# Normal fields potentially found in .dsc files
@@ -143,13 +148,20 @@ my %Changes_Fields = (
my %DSC_Fields = (
'Format' => 'string',
'Source' => 'string',
- 'Version' => 'string',
'Binary' => 'comma_array',
- 'Maintainer' => 'string',
'Architecture' => 'space_array',
+ 'Version' => 'string',
+ 'Maintainer' => 'string',
+ 'Uploaders' => 'comma_array',
+ 'Homepage' => 'string',
'Standards-Version' => 'string',
+ 'Vcs-Browser' => 'string',
+# 'Vcs-Any' => 'string', # TODO: Handle these entries somewhere
'Build-Depends' => 'comma_array',
'Build-Depends-Indep' => 'comma_array',
+ 'Checksums-Sha1' => 'multiline_array',
+ 'Checksums-Sha256' => 'multiline_array',
+ 'Files' => 'multiline_array',
);
### File lexicals
diff --git a/lib/DebPool/Parser.pm b/lib/DebPool/Parser.pm
new file mode 100644
index 0000000..4557f7a
--- /dev/null
+++ b/lib/DebPool/Parser.pm
@@ -0,0 +1,248 @@
+package DebPool::Parser;
+
+###
+#
+# DebPool::Parser - Module for parsing changes and dsc files
+#
+# Copyright 2008 Andres Mejia. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. Neither the name of the Author nor the names of any contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $Id: Parser.pm 27 2008-10-23 03:06:59Z andres $
+#
+###
+
+# We use 'our', so we must have at least Perl 5.6
+
+require 5.006_000;
+
+# Always good ideas.
+
+use strict;
+use warnings;
+
+use POSIX; # WEXITSTATUS
+
+### Module setup
+
+BEGIN {
+ use Exporter ();
+ our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
+
+ # Version checking
+ $VERSION = '0.1.5';
+
+ @ISA = qw(Exporter);
+
+ @EXPORT = qw(
+ );
+
+ @EXPORT_OK = qw(
+ &Parse_File
+ );
+
+ %EXPORT_TAGS = (
+ 'functions' => [qw(&Parse_File)],
+ 'vars' => [qw()],
+ );
+}
+
+### Exported package globals
+
+### Non-exported package globals
+
+# Thread-safe? What's that? Package global error value. We don't export
+# this directly, because it would conflict with other modules.
+
+our($Error);
+
+# Hash of potential fields and what they should be represented as
+my %Field_Types = (
+ # All fields that can be found either in changes or dsc files
+ 'Format' => 'string', # both
+ 'Date' => 'string', # changes
+ 'Source' => 'string', # both
+ 'Binary' => 'space_array', # both
+ 'Architecture' => 'space_array', # both
+ 'Version' => 'string', # both
+ 'Distribution' => 'string', # both
+ 'Urgency' => 'string', # changes
+ 'Maintainer' => 'string', # both
+ 'Changed-By' => 'string', # changes
+ 'Description' => 'multiline_array', # changes
+ 'Closes' => 'space_array', # changes
+ 'Changes' => 'multiline_array', # changes
+ 'Checksums-Sha1' => 'checksums', # both
+ 'Checksums-Sha256' => 'checksums', # both
+ 'Files' => 'checksums', # both
+ 'Uploaders' => 'comma_array', # dsc
+ 'Homepage' => 'string', # dsc
+ 'Standards-Version' => 'string', # dsc
+# 'Vcs-Any' => 'string', # The Vcs-* entries will all be strings
+ 'Build-Depends' => 'comma_array', # dsc
+ 'Build-Depends-Indep' => 'comma_array', # dsc
+ 'Dm-Upload-Allowed' => 'string', #dsc
+# 'X-Any-Fields' => 'multiline_array', # both
+ 'Source-Version' => 'string', # used when binNMU is detected
+);
+
+### File lexicals
+
+# None
+
+### Constant functions
+
+# None
+
+### Meaningful functions
+
+# Parse_File($file)
+#
+# Parses a changes or dsc file. This method returns a hash of the different
+# types of data we want from each field. We use an internal method to help us
+# in placing an appropriate data type for each field (key) of our hash.
+
+sub Parse_File {
+ my ($file) = @_;
+
+ # Read in the entire file, stripping GPG encoding if we find
+ # it. It should be small, this is fine.
+ my $fh;
+ if (!open($fh, '<', $file)) {
+ print "Couldn't open file '$file': $!";
+ return;
+ }
+ my @data = <$fh>;
+ chomp @data;
+ close $fh;
+
+ # Add a key in a hash corresponding to the Field of the file we're parsing.
+ # Then add the corresponding values. We first start by adding the values
+ # in an array.
+ my ($field, @values, %fields);
+ foreach my $line (@data) {
+ if ($line eq '') {
+ next; # Ignore blank lines
+ } elsif ($line =~ m/^([^:\s]+):\s?(.*)$/) {
+ # We process entries for the last field so we must ensure that we
+ # have a field to process. This is the usual case during the first
+ # loop.
+ if ($field) {
+ $fields{$field} = Process_Type($field, $file, @values);
+ }
+ @values = ();
+ $field = $1;
+ if ($2) { # Only add entries if there's something to add
+ push @values, $2;
+ }
+ } else { #Still in the same field, we omit the first white space
+ push @values, (substr $line, 1);
+ }
+ }
+ # Once we're done with the for loop, we still have to process the last
+ # field.
+ if ($field) {
+ $fields{$field} = Process_Type($field, $file, @values);
+ }
+
+ # In case a valid binNMU is detected, Source will be written as
+ # <package> (<original_version>). We must strip the extra version from the
+ # string.
+ ($fields{'Source'}, $fields{'Source-Version'}) =
+ split(/ /, $fields{'Source'});
+ if (defined $fields{'Source-Version'}) {
+ $fields{'Source-Version'} =~ s/^\(|\)$//g;
+ }
+
+ return \%fields;
+}
+
+# Process_Type($field, $file, @values)
+
+# This method will return a string, an array, or a hash depending on the field
+# we are processing.
+
+sub Process_Type {
+ my ($field, $file, @values) = @_;
+
+ # Change the Files field type to file_entries if a changes file is being
+ # parsed.
+ if (($field eq 'Files') and ($file =~ m/.*\.changes$/)) {
+ $Field_Types{$field} = 'file_entries';
+ }
+
+ # Add the Vcs-* entries into the %Field_Types hash. We do this to
+ # compensate for the many different Vcs-* entries that may exist
+ if ($field =~ m/^Vcs-.*$/) {
+ $Field_Types{$field} = 'string';
+ }
+
+ # Make all unknown fields of type multiline_array for now.
+ if (!grep {$_ eq $field} (keys %Field_Types)) {
+ $Field_Types{$field} = 'multiline_array';
+ }
+
+ if ($Field_Types{$field} eq 'string') {
+ return $values[0];
+ } elsif ($Field_Types{$field} eq 'space_array') {
+ my @data = split /\s+/, $values[0];
+ return \@data;
+ } elsif ($Field_Types{$field} eq 'comma_array') {
+ my @data = split /,\s+/, $values[0];
+ return \@data;
+ } elsif ($Field_Types{$field} eq 'multiline_array') {
+ return \@values;
+ } elsif ($Field_Types{$field} eq 'checksums') {
+ # Checksum types are a special case. We return a hash where the
+ # filenames are the keys, each containing the value of the checksum and
+ # size in an array.
+ my %data;
+ foreach my $value (@values) {
+ my ($checksum, $size, $file) = split /\s+/, $value;
+ $data{$file} = [ $checksum, $size ];
+ }
+ return \%data;
+ } elsif ($Field_Types{$field} eq 'file_entries') {
+ # File entries in a changes file are similar to the checksum type,
+ # except that they also include the section and priority of a file.
+ my %data;
+ foreach my $value (@values) {
+ my ($checksum, $size, $section, $priority, $file) =
+ split /\s+/, $value;
+ $data{$file} = [ $checksum, $size, $section, $priority ];
+ }
+ return \%data;
+ } else { # Treat all unknown fields as multiline_arrays for now
+ return \@values;
+ }
+}
+
+END {}
+
+1;
+
+__END__
+
+# vim:set tabstop=4 expandtab:
--
Debpool Project Repository
More information about the Debpool-commits
mailing list