[libcatmandu-perl] 35/101: Fix Catmandu::Importer::Text split by character

Jonas Smedegaard dr at jones.dk
Tue Feb 23 13:43:51 UTC 2016


This is an automated email from the git hooks/post-receive script.

js pushed a commit to branch master
in repository libcatmandu-perl.

commit 405e7374ec75b0514dd9be56084b9e823a16c332
Author: Jakob Voss <voss at gbv.de>
Date:   Sat Dec 5 14:36:18 2015 +0100

    Fix Catmandu::Importer::Text split by character
---
 lib/Catmandu/Fix/parse_text.pm |  2 ++
 lib/Catmandu/Importer/Text.pm  | 45 +++++++++++++++++++++++-------------------
 t/Catmandu-Importer-Text.t     | 14 ++++++++-----
 3 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/lib/Catmandu/Fix/parse_text.pm b/lib/Catmandu/Fix/parse_text.pm
index 3b00ee5..cd0a8b4 100644
--- a/lib/Catmandu/Fix/parse_text.pm
+++ b/lib/Catmandu/Fix/parse_text.pm
@@ -66,4 +66,6 @@ Catmandu::Fix::parse_text - parses a text into an array or hash of values
 
 L<Catmandu::Fix>
 
+L<Catmandu::Importer::Text>
+
 =cut
diff --git a/lib/Catmandu/Importer/Text.pm b/lib/Catmandu/Importer/Text.pm
index 9bf95bf..b398926 100644
--- a/lib/Catmandu/Importer/Text.pm
+++ b/lib/Catmandu/Importer/Text.pm
@@ -10,17 +10,17 @@ use namespace::clean;
 with 'Catmandu::Importer';
 
 has pattern => (
-    is => 'ro',
-    coerce => sub { 
-        $_[0] =~ /\n/m ? qr{$_[0]}x : qr{$_[0]} 
+    is     => 'ro',
+    coerce => sub {
+        $_[0] =~ /\n/m ? qr{$_[0]}x : qr{$_[0]};
     },
 );
 
 has split => (
-    is => 'ro',
+    is     => 'ro',
     coerce => sub {
-        length $_[0] == 1 ? $_[0] : qr{$_[0]}
-    } 
+        length $_[0] == 1 ? quotemeta($_[0]) : qr{$_[0]};
+    }
 );
 
 sub generator {
@@ -31,21 +31,23 @@ sub generator {
         state $count   = 0;
         state $line;
 
-        while ( defined($line = $self->readline) ) {
+        while ( defined( $line = $self->readline ) ) {
             chomp $line;
             next if $pattern and $line !~ $pattern;
 
             my $data = { _id => ++$count };
 
-            if (@+ < 2) {       # no capturing groups
+            if ( @+ < 2 ) {    # no capturing groups
                 $data->{text} = $line;
-            } elsif (%+) {      # named capturing groups
-                $data->{match} = { %+ };
-            } else {            # numbered capturing groups
+            }
+            elsif (%+) {       # named capturing groups
+                $data->{match} = {%+};
+            }
+            else {             # numbered capturing groups
                 no strict 'refs';
-                $data->{match} = [ map { $$_ } 1.. at +-1 ]; 
+                $data->{match} = [ map { $$_ } 1 .. @+ - 1 ];
             }
-            
+
             if ($split) {
                 $data->{text} = [ split $split, $line ];
             }
@@ -91,10 +93,11 @@ In Perl code:
 
 =head1 DESCRIPTION
 
-This L<Catmandu::Importer> reads each line of input as an item with line number
-in field C<_id> and text content in field C<text>. Line separators are not
-included. A regular expression can be specified to only import selected lines
-and parts of lines that match a given pattern. 
+This L<Catmandu::Importer> reads textual input line by line. Each line is
+imported as item with line number in field C<_id> and text content in field
+C<text>. Line separators are not included. Lines can further be split by
+character or pattern and a regular expression can be specified to only import
+selected lines and to translate pattern groups to fields.
 
 =head1 CONFIGURATION
 
@@ -120,7 +123,7 @@ An ARRAY of one or more fixes or file scripts to be applied to imported items.
 
 =item split
 
-Character or regular expression (given as string with a least two characters),
+Single Character or regular expression (as string with a least two characters),
 to split each line.  Resulting parts are imported in field C<text> as array.
 
 =item pattern
@@ -144,14 +147,16 @@ or as array with
 
 =head1 METHODS
 
-Every L<Catmandu::Importer> is a L<Catmandu::Iterable> all its methods are
+Every L<Catmandu::Importer> is a L<Catmandu::Iterable> with all its methods 
 inherited.
 
 =head1 SEE ALSO
 
 L<Catmandu::Exporter::Text>
 
-L<awk|https://en.wikipedia.org/wiki/AWK> and
+L<Catmandu::Fix::parse_text>
+
+Unix tools L<awk|https://en.wikipedia.org/wiki/AWK> and
 L<sed|https://en.wikipedia.org/wiki/Sed>
 
 =cut
diff --git a/t/Catmandu-Importer-Text.t b/t/Catmandu-Importer-Text.t
index 373c4bc..563cd66 100644
--- a/t/Catmandu-Importer-Text.t
+++ b/t/Catmandu-Importer-Text.t
@@ -9,7 +9,7 @@ my $text = <<EOF;
 Roses are red,
 Violets are blue,
 Sugar is sweet,
-And so are you.
+And so| are you.
 EOF
 
 sub text {
@@ -20,13 +20,13 @@ is_deeply text(), [
        {_id => 1 , text => "Roses are red,"} ,
        {_id => 2 , text => "Violets are blue,"},
        {_id => 3 , text => "Sugar is sweet,"},
-       {_id => 4 , text => "And so are you."},
+       {_id => 4 , text => "And so| are you."},
     ], 'simple text import';
 
 is_deeply text( pattern => 'are' ), [
        {_id => 1 , text => "Roses are red,"} ,
        {_id => 2 , text => "Violets are blue,"},
-       {_id => 3 , text => "And so are you."},
+       {_id => 3 , text => "And so| are you."},
     ], 'simple pattern match';
 
 is_deeply text( pattern => '(\w+)(.).*\.$' ), [
@@ -52,12 +52,16 @@ is_deeply [ map { $_->{text} } @{ text( split => ' ' ) } ],
     [ map { [ split ' ', $_ ] } split "\n", $text ],
     'split by character';
 
+is_deeply [ map { $_->{text} } @{ text( split => '|' ) } ],
+    [ map { [ split '\\|', $_ ] } split "\n", $text ],
+    'split by character (no regexp)';
+
 is_deeply [ map { $_->{text} } @{ text( split => 'is|are' ) } ],
     [ map { [ split /is|are/, $_ ] } split "\n", $text ],
     'split by regexp';
 
-is_deeply text( split => ' is | are ', pattern => '^And so (.*)' ),
-    [ { _id => 1, text => ['And so','you.'], match => ['are you.'] } ],
+is_deeply text( split => ' is | are ', pattern => '^And so. (.*)' ),
+    [ { _id => 1, text => ['And so|','you.'], match => ['are you.'] } ],
     'split and pattern';
 
 done_testing;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-perl.git



More information about the Pkg-perl-cvs-commits mailing list