[Debpool-commits] [SCM] Debpool Project Repository branch, master, updated. debian/0.3.10-2-ge7576aa

Fri Oct 24 03:11:23 UTC 2008

The following commit has been merged in the master branch:
commit e7576aae317c62d53e90e8dab3e388abdfcf9880
Author: Andres Mejia <mcitadel at gmail.com>
Date:   Thu Oct 23 23:11:16 2008 -0400

    Start implement a new module to use for file parsing

diff --git a/MANIFEST b/MANIFEST
index 4f52faa..fd1891a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -13,3 +13,4 @@ lib/DebPool/Bzip2.pm
 lib/DebPool/Config.pm
 # lib/DebPool/Hooks.pm
 lib/DebPool/DB.pm
+lib/DebPool/Parser.pm
diff --git a/bin/debpool b/bin/debpool
index 5672c35..7b440f1 100755
--- a/bin/debpool
+++ b/bin/debpool
@@ -50,6 +50,7 @@ use DebPool::GnuPG qw(:functions :vars); # GnuPG interaction routines
 use DebPool::Logging qw(:functions :facility :level); # Logging routines
 use DebPool::Packages qw(:functions :vars); # Distribution databases
 use DebPool::Signal qw(:functions :vars); # Handle signals
+use DebPool::Parser qw(:functions :vars); # File parsing routines
 
 # Before we do anything else, let's find out if we need to act as a daemon,
 # and if so, whether we can manage to pull it off.
diff --git a/debian/TODO b/debian/TODO
index e7d1ac2..9235b92 100644
--- a/debian/TODO
+++ b/debian/TODO
@@ -2,6 +2,7 @@ Features, fixes, and other stuff to be done.
 
 (general)
 
+* Parse_DSC and Parse_Changes need to handle multiline entries better.
 * Support tools for manipulating metadata (which really means 'anything
   stored in database files').
 * Support rebuild-dbs (including invocation from rebuild-all)?
diff --git a/debian/changelog b/debian/changelog
index 83dbef4..b44dd82 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,8 +1,9 @@
-debpool (0.3.11) experimental; urgency=low
+debpool (0.4.0) experimental; urgency=low
 
+  * Created a new module to handle file parsing. (NOT YET COMPLETE).
   * Minor code cleanup.
 
- -- Andres Mejia <mcitadel at gmail.com>  Tue, 21 Oct 2008 00:06:48 -0400
+ -- Andres Mejia <mcitadel at gmail.com>  Thu, 23 Oct 2008 23:06:44 -0400
 
 debpool (0.3.10) experimental; urgency=low
 
diff --git a/lib/DebPool/Packages.pm b/lib/DebPool/Packages.pm
index 227ab17..3a98479 100644
--- a/lib/DebPool/Packages.pm
+++ b/lib/DebPool/Packages.pm
@@ -135,7 +135,12 @@ my %Changes_Fields = (
     'Urgency' => 'string',
     'Maintainer' => 'string',
     'Changed-By' => 'string',
+    'Description' => 'multiline_array',
     'Closes' => 'space_array',
+    'Changes' => 'multiline_array',
+    'Checksums-Sha1' => 'multiline_array',
+    'Checksums-Sha256' => 'multiline_array',
+    'Files' => 'multiline_array'
 );
 
 # Normal fields potentially found in .dsc files
@@ -143,13 +148,20 @@ my %Changes_Fields = (
 my %DSC_Fields = (
     'Format' => 'string',
     'Source' => 'string',
-    'Version' => 'string',
     'Binary' => 'comma_array',
-    'Maintainer' => 'string',
     'Architecture' => 'space_array',
+    'Version' => 'string',
+    'Maintainer' => 'string',
+    'Uploaders' => 'comma_array',
+    'Homepage' => 'string',
     'Standards-Version' => 'string',
+    'Vcs-Browser' => 'string',
+#    'Vcs-Any' => 'string', # TODO: Handle these entries somewhere
     'Build-Depends' => 'comma_array',
     'Build-Depends-Indep' => 'comma_array',
+    'Checksums-Sha1' => 'multiline_array',
+    'Checksums-Sha256' => 'multiline_array',
+    'Files' => 'multiline_array',
 );
 
 ### File lexicals
diff --git a/lib/DebPool/Parser.pm b/lib/DebPool/Parser.pm
new file mode 100644
index 0000000..4557f7a
--- /dev/null
+++ b/lib/DebPool/Parser.pm
@@ -0,0 +1,248 @@
+package DebPool::Parser;
+
+###
+#
+# DebPool::Parser - Module for parsing changes and dsc files
+#
+# Copyright 2008 Andres Mejia. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. Neither the name of the Author nor the names of any contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $Id: Parser.pm 27 2008-10-23 03:06:59Z andres $
+#
+###
+
+# We use 'our', so we must have at least Perl 5.6
+
+require 5.006_000;
+
+# Always good ideas.
+
+use strict;
+use warnings;
+
+use POSIX; # WEXITSTATUS
+
+### Module setup
+
+BEGIN {
+    use Exporter ();
+    our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
+
+    # Version checking
+    $VERSION = '0.1.5';
+
+    @ISA = qw(Exporter);
+
+    @EXPORT = qw(
+    );
+
+    @EXPORT_OK = qw(
+        &Parse_File
+    );
+
+    %EXPORT_TAGS = (
+        'functions' => [qw(&Parse_File)],
+        'vars' => [qw()],
+    );
+}
+
+### Exported package globals
+
+### Non-exported package globals
+
+# Thread-safe? What's that? Package global error value. We don't export
+# this directly, because it would conflict with other modules.
+
+our($Error);
+
+# Hash of potential fields and what they should be represented as
+my %Field_Types = (
+    # All fields that can be found either in changes or dsc files
+    'Format' => 'string', # both
+    'Date' => 'string', # changes
+    'Source' => 'string', # both
+    'Binary' => 'space_array', # both
+    'Architecture' => 'space_array', # both
+    'Version' => 'string', # both
+    'Distribution' => 'string', # both
+    'Urgency' => 'string', # changes
+    'Maintainer' => 'string', # both
+    'Changed-By' => 'string', # changes
+    'Description' => 'multiline_array', # changes
+    'Closes' => 'space_array', # changes
+    'Changes' => 'multiline_array', # changes
+    'Checksums-Sha1' => 'checksums', # both
+    'Checksums-Sha256' => 'checksums', # both
+    'Files' => 'checksums', # both
+    'Uploaders' => 'comma_array', # dsc
+    'Homepage' => 'string', # dsc
+    'Standards-Version' => 'string', # dsc
+#    'Vcs-Any' => 'string', # The Vcs-* entries will all be strings
+    'Build-Depends' => 'comma_array', # dsc
+    'Build-Depends-Indep' => 'comma_array', # dsc
+    'Dm-Upload-Allowed' => 'string', #dsc
+#    'X-Any-Fields' => 'multiline_array', # both
+    'Source-Version' => 'string', # used when binNMU is detected
+);
+
+### File lexicals
+
+# None
+
+### Constant functions
+
+# None
+
+### Meaningful functions
+
+# Parse_File($file)
+#
+# Parses a changes or dsc file. This method returns a hash of the different
+# types of data we want from each field. We use an internal method to help us
+# in placing an appropriate data type for each field (key) of our hash.
+
+sub Parse_File {
+    my ($file) = @_;
+
+    # Read in the entire file, stripping GPG encoding if we find
+    # it. It should be small, this is fine.
+    my $fh;
+    if (!open($fh, '<', $file)) {
+        print "Couldn't open file '$file': $!";
+        return;
+    }
+    my @data = <$fh>;
+    chomp @data;
+    close $fh;
+
+    # Add a key in a hash corresponding to the Field of the file we're parsing.
+    # Then add the corresponding values. We first start by adding the values
+    # in an array.
+    my ($field, @values, %fields);
+    foreach my $line (@data) {
+        if ($line eq '') {
+            next; # Ignore blank lines
+        } elsif ($line =~ m/^([^:\s]+):\s?(.*)$/) {
+            # We process entries for the last field so we must ensure that we
+            # have a field to process. This is the usual case during the first
+            # loop.
+            if ($field) {
+                $fields{$field} = Process_Type($field, $file, @values);
+            }
+            @values = ();
+            $field = $1;
+            if ($2) { # Only add entries if there's something to add
+                push @values, $2;
+            }
+        } else { #Still in the same field, we omit the first white space
+            push @values, (substr $line, 1);
+        }
+    }
+    # Once we're done with the for loop, we still have to process the last
+    # field.
+    if ($field) {
+        $fields{$field} = Process_Type($field, $file, @values);
+    }
+
+    # In case a valid binNMU is detected, Source will be written as
+    # <package> (<original_version>). We must strip the extra version from the
+    # string.
+    ($fields{'Source'}, $fields{'Source-Version'}) =
+        split(/ /, $fields{'Source'});
+    if (defined $fields{'Source-Version'}) {
+        $fields{'Source-Version'} =~ s/^\(|\)$//g;
+    }
+
+    return \%fields;
+}
+
+# Process_Type($field, $file, @values)
+
+# This method will return a string, an array, or a hash depending on the field
+# we are processing.
+
+sub Process_Type {
+    my ($field, $file, @values) = @_;
+
+    # Change the Files field type to file_entries if a changes file is being
+    # parsed.
+    if (($field eq 'Files') and ($file =~ m/.*\.changes$/)) {
+        $Field_Types{$field} = 'file_entries';
+    }
+
+    # Add the Vcs-* entries into the %Field_Types hash. We do this to
+    # compensate for the many different Vcs-* entries that may exist
+    if ($field =~ m/^Vcs-.*$/) {
+        $Field_Types{$field} = 'string';
+    }
+
+    # Make all unknown fields of type multiline_array for now.
+    if (!grep {$_ eq $field} (keys %Field_Types)) {
+        $Field_Types{$field} = 'multiline_array';
+    }
+
+    if ($Field_Types{$field} eq 'string') {
+        return $values[0];
+    } elsif ($Field_Types{$field} eq 'space_array') {
+        my @data = split /\s+/, $values[0];
+        return \@data;
+    } elsif ($Field_Types{$field} eq 'comma_array') {
+        my @data = split /,\s+/, $values[0];
+        return \@data;
+    } elsif ($Field_Types{$field} eq 'multiline_array') {
+        return \@values;
+    } elsif ($Field_Types{$field} eq 'checksums') {
+        # Checksum types are a special case. We return a hash where the
+        # filenames are the keys, each containing the value of the checksum and
+        # size in an array.
+        my %data;
+        foreach my $value (@values) {
+            my ($checksum, $size, $file) = split /\s+/, $value;
+            $data{$file} = [ $checksum, $size ];
+        }
+        return \%data;
+    } elsif ($Field_Types{$field} eq 'file_entries') {
+        # File entries in a changes file are similar to the checksum type,
+        # except that they also include the section and priority of a file.
+        my %data;
+        foreach my $value (@values) {
+            my ($checksum, $size, $section, $priority, $file) =
+                split /\s+/, $value;
+            $data{$file} = [ $checksum, $size, $section, $priority ];
+        }
+        return \%data;
+    } else { # Treat all unknown fields as multiline_arrays for now
+        return \@values;
+    }
+}
+
+END {}
+
+1;
+
+__END__
+
+# vim:set tabstop=4 expandtab:

-- 
Debpool Project Repository