[libcatmandu-marc-perl] 31/208: Benchmarking code

Jonas Smedegaard dr at jones.dk
Sat Oct 28 03:42:32 UTC 2017


This is an automated email from the git hooks/post-receive script.

js pushed a commit to annotated tag upstream/1.19
in repository libcatmandu-marc-perl.

commit 64417677e463a68741901686ff9de104a73119da
Author: Patrick Hochstenbach <patrick.hochstenbach at ugent.be>
Date:   Fri Jul 8 14:27:38 2016 +0200

    Benchmarking code
---
 lib/Catmandu/Fix/Inline/marc_map.pm |   6 +-
 lib/Catmandu/Fix/marc_map.pm        |  25 +++--
 lib/Catmandu/MARC.pm                | 182 +++++++++++++++++-------------------
 3 files changed, 103 insertions(+), 110 deletions(-)

diff --git a/lib/Catmandu/Fix/Inline/marc_map.pm b/lib/Catmandu/Fix/Inline/marc_map.pm
index 07c17c1..ab8bc74 100644
--- a/lib/Catmandu/Fix/Inline/marc_map.pm
+++ b/lib/Catmandu/Fix/Inline/marc_map.pm
@@ -70,8 +70,12 @@ our $VERSION = '0.219';
 sub marc_map {
     my ($data,$marc_path,%opts) = @_;
     # Set default to nested_arrays for backwards compatibility
+    $opts{'-record'} = 'record' unless exists $opts{'-record'};
     $opts{'-nested_arrays'} = 1 unless exists $opts{'-nested_arrays'};
-    return Catmandu::MARC->new->marc_map($data,$marc_path,%opts);
+    return Catmandu::MARC->new->marc_map(
+                $data,
+                $marc_path,
+                \%opts);
 }
 
 1;
diff --git a/lib/Catmandu/Fix/marc_map.pm b/lib/Catmandu/Fix/marc_map.pm
index dbc5c8b..90bda3f 100644
--- a/lib/Catmandu/Fix/marc_map.pm
+++ b/lib/Catmandu/Fix/marc_map.pm
@@ -22,27 +22,24 @@ sub emit {
     my ($self,$fixer) = @_;
     my $path        = $fixer->split_path($self->path);
 
+    my $marc        = $fixer->capture(Catmandu::MARC->new);
     my $marc_path   = $fixer->emit_string($self->marc_path);
-    my $record_opt  = $fixer->emit_string($self->record // 'record');
-    my $join_opt    = $fixer->emit_string($self->join // '');
-    my $split_opt   = $fixer->emit_string($self->split // 0);
-    my $pluck_opt   = $fixer->emit_string($self->pluck // 0);
-    my $nested_arrays_opt = $fixer->emit_string($self->nested_arrays // 0);
-
-    my $value_opt   = $self->value ?
-                        $fixer->emit_string($self->value) : 'undef';
+    my $marc_opt    = $fixer->capture({
+                            '-record' => $self->record // 'record' ,
+                            '-join'   => $self->join   // '' ,
+                            '-split'  => $self->split  // 0 ,
+                            '-pluck'  => $self->pluck  // 0 ,
+                            '-nested_arrays' => $self->nested_arrays // 0 ,
+                            '-value'  => $self->value
+                        });
     my $var         = $fixer->var;
     my $result      = $fixer->generate_var;
 
     my $perl =<<EOF;
-if (my ${result} = Catmandu::MARC->new->marc_map(
+if (my ${result} = ${marc}->marc_map(
             ${var},
             ${marc_path},
-            -split => ${split_opt},
-            -join  => ${join_opt},
-            -pluck => ${pluck_opt},
-            -nested_arrays => ${nested_arrays_opt} ,
-            -value => ${value_opt}) ) {
+            ${marc_opt}) ) {
 EOF
     $perl .= $fixer->emit_create_path(
             $var,
diff --git a/lib/Catmandu/MARC.pm b/lib/Catmandu/MARC.pm
index 539b88d..aa69dd7 100644
--- a/lib/Catmandu/MARC.pm
+++ b/lib/Catmandu/MARC.pm
@@ -7,39 +7,48 @@ use Memoize;
 use Carp;
 use Moo;
 
-#memoize('_compile_marc_path');
+memoize('_compile_marc_path');
 
 our $VERSION = '0.219';
 
-warn 'here';
-
 sub marc_map {
-    my ($self,$data,$marc_path,%opts) = @_;
-    my $record_key = $opts{record} // 'record';
+    my $self      = $_[0];
+    my $data      = $_[1];
+    my $marc_path = $_[2];
+    my $opts      = $_[3];
 
-    return undef unless exists $data->{$record_key};
+    my $record_key = $opts->{'-record'} // 'record';
 
-    my $record = $data->{$record_key};
+    return wantarray ? () : undef
+        unless (defined $data->{$record_key} && ref $data->{$record_key} eq 'ARRAY');
 
-    unless (defined $record && ref $record eq 'ARRAY') {
-        return wantarray ? () : undef;
-    }
+    my $split          = $opts->{'-split'} // 0;
+    my $join_char      = $opts->{'-join'} // '';
+    my $pluck          = $opts->{'-pluck'};
+    my $value_set      = $opts->{'-value'};
+    my $nested_arrays  = $opts->{'-nested_arrays'} // 0;
 
-    my $split          = $opts{'-split'} // 0;
-    my $join_char      = $opts{'-join'} // '';
-    my $pluck          = $opts{'-pluck'};
-    my $value_set      = $opts{'-value'};
-    my $nested_arrays  = $opts{'-nested_arrays'} // 0;
-    my $attrs          = {};
+    my $context = _compile_marc_path($marc_path, subfield_wildcard => 1);
+
+    confess "invalid marc path" unless $context;
 
     my $vals;
 
-    marc_at_field($record, $marc_path, sub {
-        my ($field, $context) = @_;
+    for my $field (@{$data->{$record_key}}) {
+        next if (
+            ($context->{is_regex_field} == 1 && $field->[0] !~ $context->{field_regex} )
+            ||
+            ($context->{is_regex_field} == 0 && $field->[0] ne $context->{field} )
+            ||
+            (defined $context->{ind1} && (!defined $field->[1] || $field->[1] ne $context->{ind1}))
+            ||
+            (defined $context->{ind2} && (!defined $field->[2] || $field->[2] ne $context->{ind2}))
+        );
+
         my $v;
 
         if ($value_set) {
-            for (my $i = $context->{start}; $i < $context->{end}; $i += 2) {
+            for (my $i = 3; $field->[$i]; $i += 2) {
                 my $subfield_regex = $context->{subfield_regex};
                 if ($field->[$i] =~ $subfield_regex) {
                     $v = $value_set;
@@ -99,7 +108,7 @@ sub marc_map {
                 }
             }
         }
-    }, subfield_wildcard => 1);
+    }
 
     if (!defined $vals) {
         return undef;
@@ -114,13 +123,14 @@ sub marc_map {
 
 sub _extract_subfields {
     my ($field,$context,%opts) = @_;
+    my $field_size = int(@$field);
 
     my @v = ();
 
     if ($opts{pluck}) {
         # Treat the subfield as a hash index
         my $_h = {};
-        for (my $i = $context->{start}; $i < $context->{end}; $i += 2) {
+        for (my $i = $context->{start}; $i < $field_size; $i += 2) {
             push @{ $_h->{ $field->[$i] } } , $field->[$i + 1];
         }
         my $subfield = $context->{subfield};
@@ -130,7 +140,7 @@ sub _extract_subfields {
         }
     }
     else {
-        for (my $i = $context->{start}; $i < $context->{end}; $i += 2) {
+        for (my $i = $context->{start}; $i < $field_size; $i += 2) {
             my $subfield_regex = $context->{subfield_regex};
             if ($field->[$i] =~ $subfield_regex) {
                 push(@v, $field->[$i + 1]);
@@ -141,7 +151,6 @@ sub _extract_subfields {
     return @v ? \@v : undef;
 }
 
-
 sub marc_add {
     my ($self,$data,$marc_path, at subfields) = @_;
 
@@ -215,17 +224,39 @@ sub marc_set {
         $value = $last;
     }
 
-    marc_at_field($record, $marc_path, sub {
-        my ($field,$context) = @_;
+    my $context = _compile_marc_path($marc_path, subfield_default => 1);
+
+    confess "invalid marc path" unless $context;
+
+    for my $field (@$record) {
+        my ($tag, $ind1, $ind2, @subfields) = @$field;
+
+        if ($context->{is_regex_field}) {
+            next unless $tag =~ $context->{field_regex};
+        }
+        else {
+            next unless $tag eq $context->{field};
+        }
+
+        if (defined $context->{ind1}) {
+            if (!defined $ind1 || $ind1 ne $context->{ind1}) {
+                next;
+            }
+        }
+        if (defined $context->{ind2}) {
+            if (!defined $ind2 || $ind2 ne $context->{ind2}) {
+                next;
+            }
+        }
 
         my $found = 0;
-        for (my $i = $context->{start}; $i < $context->{end}; $i += 2) {
-            if ($field->[$i] eq $context->{subfield}) {
+        for (my $i = 0; $i < @subfields; $i += 2) {
+            if ($subfields[$i] eq $context->{subfield}) {
                 if (defined $context->{from}) {
-                    substr($field->[$i + 1], $context->{from}, $context->{len}) = $value;
+                    substr($field->[$i + 4], $context->{from}, $context->{len}) = $value;
                 }
                 else {
-                    $field->[$i + 1] = $value;
+                    $field->[$i + 4] = $value;
                 }
                 $found = 1;
             }
@@ -234,7 +265,7 @@ sub marc_set {
         if ($found == 0) {
             push(@$field,$context->{subfield},$value);
         }
-    }, subfield_default => 1);
+    }
 
     $data;
 }
@@ -246,31 +277,37 @@ sub marc_remove {
 
     my $new_record;
 
-    marc_at_field($record, $marc_path, sub {
-        my ($field,$context) = @_;
+    my $context = _compile_marc_path($marc_path);
 
-        if ($field->[0] =~ $context->{field_regex}) {
+    confess "invalid marc path" unless $context;
+
+    for my $field (@$record) {
+        my $field_size = int(@$field);
+
+        if (
+            ($context->{is_regex_field} == 0 && $field->[0] eq $context->{field})
+            ||
+            ($context->{is_regex_field} == 1 && $field->[0] =~ $context->{field_regex})
+            ) {
             if (defined $context->{ind1}) {
-                return if (defined $field->[1] && $field->[1] eq $context->{ind1});
+                next if (defined $field->[1] && $field->[1] eq $context->{ind1});
             }
 
             if (defined $context->{ind2}) {
-                return if (defined $field->[2] && $field->[2] eq $context->{ind2});
+                next if (defined $field->[2] && $field->[2] eq $context->{ind2});
             }
 
             unless (
                 defined $context->{ind1} ||
                 defined $context->{ind2} ||
                 defined $context->{subfield_regex} ) {
-                return;
+                next;
             }
-        }
 
-        if (defined $context->{subfield_regex}) {
-            my $subfield_regex = $context->{subfield_regex};
-            if ( $field->[0] =~ $context->{field_regex}) {
+            if (defined $context->{subfield_regex}) {
+                my $subfield_regex = $context->{subfield_regex};
                 my $new_subf = [];
-                for (my $i = $context->{start}; $i < $context->{end}; $i += 2) {
+                for (my $i = $context->{start}; $i < $field_size; $i += 2) {
                     unless ($field->[$i] =~ $subfield_regex) {
                         push @$new_subf , $field->[$i];
                         push @$new_subf , $field->[$i+1];
@@ -281,8 +318,7 @@ sub marc_remove {
         }
 
         push @$new_record , $field;
-
-    }, nofilter => 1);
+    }
 
     $data->{$record_key} = $new_record;
 
@@ -389,9 +425,9 @@ sub marc_decode_dollar_subfields {
     my $new_record = [];
 
     for my $field (@$old_record) {
-        my ($field,$ind1,$ind2, at subfields) = @$field;
+        my ($tag,$ind1,$ind2, at subfields) = @$field;
 
-        my $fixed_field = [$field,$ind1,$ind2];
+        my $fixed_field = [$tag,$ind1,$ind2];
 
         for (my $i = 0 ; $i < @subfields ; $i += 2) {
             my $code  = $subfields[$i];
@@ -452,14 +488,15 @@ sub _compile_marc_path {
         return undef;
     }
 
-    $field_regex = $field;
-
     if ($field =~ /\*/) {
+        $field_regex    = $field;
         $field_regex    =~ s/\*/./g;
         $is_regex_field = 1;
+        $field_regex    = qr/^$field_regex$/;
+    }
+    else {
+        $is_regex_field = 0;
     }
-
-    $field_regex = qr/^$field_regex$/;
 
     return {
         field           => $field ,
@@ -469,58 +506,13 @@ sub _compile_marc_path {
         subfield_regex  => $subfield_regex ,
         ind1            => $ind1 ,
         ind2            => $ind2 ,
+        start           => 3,
         from            => $from ,
         to              => $to ,
         len             => $len
     };
 }
 
-sub marc_at_field {
-    my ($record,$marc_path,$callback,%opts) = @_;
-
-    croak "need a marc_path and callback" unless defined($marc_path) && defined($callback);
-
-    my $context = _compile_marc_path($marc_path,%opts);
-
-    confess "invalid marc path" unless $context;
-
-    for my $row (@$record) {
-        unless ($opts{nofilter}) {
-            if ($context->{is_regex_field}) {
-                next unless $row->[0] =~ $context->{field_regex};
-            }
-            else {
-                next unless $row->[0] eq $context->{field};
-            }
-
-            if (defined $context->{ind1}) {
-                if (!defined $row->[1] || $row->[1] ne $context->{ind1}) {
-                    next;
-                }
-            }
-            if (defined $context->{ind2}) {
-                if (!defined $row->[2] || $row->[2] ne $context->{ind2}) {
-                    next;
-                }
-            }
-        }
-
-        if ($row->[0] =~ /^LDR|^00/) {
-            $context->{start} = 3;
-        }
-        elsif (defined $row->[5] && $row->[5] eq '_') {
-            $context->{start} = 5;
-        }
-        else {
-            $context->{start} = 3;
-        }
-
-        $context->{end} = int(@$row);
-
-        $callback->($row,$context);
-    }
-}
-
 1;
 
 __END__

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-marc-perl.git



More information about the Pkg-perl-cvs-commits mailing list