[libmarc-parser-raw-perl] 05/29: major refactoring

Jonas Smedegaard dr at jones.dk
Tue Aug 4 11:57:19 UTC 2015


This is an automated email from the git hooks/post-receive script.

js pushed a commit to branch master
in repository libmarc-parser-raw-perl.

commit 799d2bcadc4ec8f1e4cebc8d29987d08a3e375ad
Author: Johann Rolschewski <rolschewski at gmail.com>
Date:   Mon May 11 18:18:11 2015 +0200

    major refactoring
---
 LICENSE                |  6 ++---
 README.md              | 13 ++++++++++
 lib/MARC/Parser/RAW.pm | 69 ++++++++++++++++++++++++++++++++------------------
 t/01-parser.t          | 48 ++++++++++++++++++++++-------------
 t/camel.mrc            |  2 +-
 5 files changed, 92 insertions(+), 46 deletions(-)

diff --git a/LICENSE b/LICENSE
index 48678db..932b9cf 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-This software is copyright (c) 2015 by Johann Rolschewski.
+This software is copyright (c) 2014- by Johann Rolschewski <jorol at cpan.org>.
 
 This is free software; you can redistribute it and/or modify it under
 the same terms as the Perl 5 programming language system itself.
@@ -12,7 +12,7 @@ b) the "Artistic License"
 
 --- The GNU General Public License, Version 1, February 1989 ---
 
-This software is Copyright (c) 2015 by Johann Rolschewski.
+This software is Copyright (c) 2014- by Johann Rolschewski <jorol at cpan.org>.
 
 This is free software, licensed under:
 
@@ -272,7 +272,7 @@ That's all there is to it!
 
 --- The Artistic License 1.0 ---
 
-This software is Copyright (c) 2015 by Johann Rolschewski.
+This software is Copyright (c) 2014- by Johann Rolschewski <jorol at cpan.org>.
 
 This is free software, licensed under:
 
diff --git a/README.md b/README.md
index a9e67ee..d275960 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,19 @@ Deserialize a raw MARC record to an ARRAY of ARRAYs.
 
 Split MARC field string in individual components.
 
+# AUTHOR
+
+Johann Rolschewski <jorol at cpan.org>
+
+# COPYRIGHT
+
+Copyright 2014- Johann Rolschewski
+
+# LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
 # SEEALSO
 
 [Catmandu](https://metacpan.org/pod/Catmandu), [Catmandu::Importer::MARC](https://metacpan.org/pod/Catmandu::Importer::MARC).
diff --git a/lib/MARC/Parser/RAW.pm b/lib/MARC/Parser/RAW.pm
index f20e6dc..718489f 100644
--- a/lib/MARC/Parser/RAW.pm
+++ b/lib/MARC/Parser/RAW.pm
@@ -7,6 +7,7 @@ use warnings;
 use charnames qw< :full >;
 use Carp qw(croak carp);
 use Encode qw(find_encoding);
+use English;
 use Readonly;
 
 Readonly my $LEADER_LEN         => 24;
@@ -93,8 +94,7 @@ sub new {
     $file or croak "first argument must be a file or filehandle";
 
     if ($encoding) {
-        find_encoding($encoding)
-            or croak "encoding \"$_[0]\" is not a valid encoding";
+        find_encoding($encoding) or croak "encoding \"$_[0]\" not found";
     }
 
     my $self = {
@@ -130,25 +130,24 @@ Reads the next record from MARC input stream. Returns a Perl hash.
 sub next {
     my $self = shift;
     my $fh   = $self->{fh};
-    local $/ = $END_OF_RECORD;
-    if ( my $record = <$fh> ) {
+    local $INPUT_RECORD_SEPARATOR = $END_OF_RECORD;
+    if ( defined (my $raw = <$fh>) ) {
         $self->{rec_number}++;
 
         # remove illegal garbage that sometimes occurs between records
-        $record
+        $raw
             =~ s/^[\N{SPACE}\N{NUL}\N{LINE FEED}\N{CARRIAGE RETURN}\N{SUB}]+//;
-        return unless $record;
+        return unless $raw;
 
-        my $record = _decode($record);
-        if ( scalar @{$record} > 1 ) {
-            return $record;
+
+        if ( my $marc = $self->_decode($raw) ) {
+            return $marc;
+        }
+        else {
+            return $self->next();
         }
-        carp $record->[0] . $self->{rec_number};
-        $self->next();
-    }
-    else {
-        return;
     }
+    return;
 }
 
 =head2 _decode($record)
@@ -158,34 +157,41 @@ Deserialize a raw MARC record to an ARRAY of ARRAYs.
 =cut
 
 sub _decode {
-    my $raw = shift;
+    my ( $self, $raw ) = @_;
     chop $raw;
     my ( $head, @fields ) = split $END_OF_FIELD, $raw;
 
     if ( !@fields ) {
-        return ["no fields found in record "];
+        carp "no fields found in record " . $self->{rec_number};
+        return;
     }
 
     # ToDO: better RegEX for leader
-    if ( $head !~ /(.{$LEADER_LEN})/cg ) {
-        return ["no record leader found in record "];
+    my $leader;
+    if ( $head =~ /(.{$LEADER_LEN})/cg ) {
+        $leader = $1;
+    }
+    else {
+        carp "no valid record leader found in record " . $self->{rec_number};
+        return;
     }
 
-    my $leader = $1;
-    my @tags   = $head =~ /\G(\d{3})\d{9}/cg;
+    my @tags = $head =~ /\G(\d{3})\d{9}/cg;
 
     if ( scalar @tags != scalar @fields ) {
-        return ["different number of tags and fields in record "];
+        carp "different number of tags and fields in record "
+            . $self->{rec_number};
+        return;
     }
 
     if ( $head !~ /\G$/cg ) {
-        my $tail = $1 if $head =~ /(.*)/cg;
-        return ["incomplete directory entry in record "];
+        carp "incomplete directory entry in record " . $self->{rec_number};
+        return;
     }
 
     return [
         [ 'LDR', undef, undef, '_', $leader ],
-        map [ shift(@tags), _field($_) ],
+        map [ shift(@tags), $self->_field($_) ],
         @fields
     ];
 }
@@ -197,7 +203,7 @@ Split MARC field string in individual components.
 =cut
 
 sub _field {
-    my ($field) = @_;
+    my ( $self, $field ) = @_;
     my @chunks = split( /$SUBFIELD_INDICATOR(.)/, $field );
     return ( undef, undef, '_', @chunks ) if @chunks == 1;
     my @subfields;
@@ -208,6 +214,19 @@ sub _field {
     return ( $indicator1, $indicator2, @subfields );
 }
 
+=head1 AUTHOR
+
+Johann Rolschewski E<lt>jorol at cpan.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright 2014- Johann Rolschewski
+
+=head1 LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
 =head1 SEEALSO
 
 L<Catmandu>, L<Catmandu::Importer::MARC>.
diff --git a/t/01-parser.t b/t/01-parser.t
index 422f1b8..e047a34 100644
--- a/t/01-parser.t
+++ b/t/01-parser.t
@@ -4,13 +4,24 @@ use Test::More;
 
 use MARC::Parser::RAW;
 
-my $failure =  eval {MARC::Parser::RAW->new()};
-is( $failure, undef, 'croak missing argument');
+new_ok( 'MARC::Parser::RAW' => ['./t/camel.mrc'] );
+new_ok( 'MARC::Parser::RAW' => ['./t/camel.mrc', 'UTF-8'] );
+can_ok( 'MARC::Parser::RAW', qw{ next });
+my $failure =  eval { MARC::Parser::RAW->new() };
+is( $failure, undef, 'croak missing argument' );
+$failure = eval { MARC::Parser::RAW->new('./t/camel.mrk') };
+is( $failure, undef, 'croak cannot find file');
+$failure = eval { MARC::Parser::RAW->new('./t/camel.mrc', 'XXX-0') };
+is( $failure, undef, 'croak unavailable encoding');
 
 my $parser = MARC::Parser::RAW->new('./t/camel.mrc');
-isa_ok( $parser, 'MARC::Parser::RAW' );
 my $record = $parser->next();
 is_deeply(
+    $record->[0],
+    [ 'LDR', undef, undef, '_', '00755cam  22002414a 4500' ],
+    'LDR'
+);
+is_deeply(
     $record->[1],
     [ '001', undef, undef, '_', 'fol05731351 ' ],
     'first field'
@@ -20,20 +31,23 @@ is_deeply(
     [ '020', ' ', ' ', 'a', '0471383147 (paper/cd-rom : alk. paper)' ],
     'sixth field'
 );
-$record = $parser->next();
-is_deeply(
-    $record->[1],
-    [ '001', undef, undef, '_', 'fol05754809 ' ],
-    'first field'
-);
 
-$parser = MARC::Parser::RAW->new('./t/camel.mrc', 'UTF-8');
-isa_ok( $parser, 'MARC::Parser::RAW' );
-$record = $parser->next();
-is_deeply(
-    $record->[1],
-    [ '001', undef, undef, '_', 'fol05731351 ' ],
-    'first field'
-);
+{
+    my @warnings;
+    local $SIG{__WARN__} = sub {
+         push @warnings, @_;
+    };
+    my $record = $parser->next();
+    is_deeply(
+        $record->[0],
+        [ 'LDR', undef, undef, '_', '00665nam  22002298a 4500' ],
+        'skipped faulty records'
+    );
+    is scalar(@warnings), 4, 'got warnings';
+    like $warnings[0], qr{no fields found in record}, 'carp no fields found in record';
+    like $warnings[1], qr{no valid record leader found in record}, 'carp no valid record leader found in record';
+    like $warnings[2], qr{different number of tags and fields in record}, 'carp different number of tags and fields in record';
+    like $warnings[3], qr{incomplete directory entry in record}, 'carp incomplete directory entry in record';
+}
 
 done_testing;
\ No newline at end of file
diff --git a/t/camel.mrc b/t/camel.mrc
index 68d6dad..7127bd2 100644
--- a/t/camel.mrc
+++ b/t/camel.mrc
@@ -1 +1 @@
-00755cam  22002414a 4500001001300000003000600013005001700019008004100036010001700077020004300094040001800137042000800155050002600163082001700189100003100206245005400237260004200291300007200333500003300405650003700438630002500475630001300500
fol05731351 
IMchF
20000613133448.0
000107s2000    nyua          001 0 eng  
  a   00020737 
  a0471383147 (paper/cd-rom : alk. paper)
  aDLCcDLCdDLC
  apcc
00aQA76.73.P22bM33 2000
00a005.13/3221
1 aMartinsson, Tobias,d1976-
10aActivePerl [...]
\ No newline at end of file
+00755cam  22002414a 4500001001300000003000600013005001700019008004100036010001700077020004300094040001800137042000800155050002600163082001700189100003100206245005400237260004200291300007200333500003300405650003700438630002500475630001300500
fol05731351 
IMchF
20000613133448.0
000107s2000    nyua          001 0 eng  
  a   00020737 
  a0471383147 (paper/cd-rom : alk. paper)
  aDLCcDLCdDLC
  apcc
00aQA76.73.P22bM33 2000
00a005.13/3221
1 aMartinsson, Tobias,d1976-
10aActivePerl [...]
\ No newline at end of file

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmarc-parser-raw-perl.git



More information about the Pkg-perl-cvs-commits mailing list