[libparser-mgc-perl] 06/12: Import of PEVANS/Parser-MGC-0.06 from CPAN.

Jonas Smedegaard dr at jones.dk
Sat Dec 17 17:37:01 UTC 2016


This is an automated email from the git hooks/post-receive script.

js pushed a commit to tag PEVANS
in repository libparser-mgc-perl.

commit 79aa1b271436ee7b06bc0d1c61c142ca2d5d3fb7
Author: Paul Evans <leonerd at leonerd.org.uk>
Date:   Thu Mar 17 17:52:48 2011 +0000

    Import of PEVANS/Parser-MGC-0.06 from CPAN.
    
    gitpan-cpan-distribution: Parser-MGC
    gitpan-cpan-version:      0.06
    gitpan-cpan-path:         PEVANS/Parser-MGC-0.06.tar.gz
    gitpan-cpan-author:       PEVANS
    gitpan-cpan-maturity:     released
---
 Changes                      |  10 ++
 MANIFEST                     |  10 +-
 META.yml                     |   4 +-
 README                       | 116 +++++++++++++++++++++--
 examples/eval-expr.pl        |  41 +++++----
 examples/parse-dict.pl       |  54 +++++++++++
 examples/parse-pod.pl        |  39 ++++++++
 examples/synopsis.pl         |   9 +-
 lib/Parser/MGC.pm            | 215 ++++++++++++++++++++++++++++++++++++++-----
 t/02expect.t                 |   6 +-
 t/06substring.t              |  27 ++++++
 t/12token_string.t           |  17 +++-
 t/21scope_of.t               |  26 +++++-
 t/{24one_of.t => 24any_of.t} |   2 +-
 t/30commit.t                 |   2 +-
 t/31scope_level.t            |   2 +-
 t/90ex_dict.t                |  32 +++++++
 t/90ex_expr.t                |  33 +++++++
 t/90ex_pod.t                 |  36 ++++++++
 t/90ex_synopsis.t            |  21 +++++
 t/98backcompat.t             |  25 +++++
 21 files changed, 666 insertions(+), 61 deletions(-)

diff --git a/Changes b/Changes
index 5b7312c..0259a77 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,15 @@
 Revision history for Parser-MGC
 
+0.06    CHANGES:
+         * Renamed ->one_of to ->any_of
+         * Added ->substring_before
+         * Allow ->scope_of to not take a start pattern
+         * Recognise the usual set of character escapes in ->token_string
+         * Added more example scripts to demonstrate:
+            + the use ->substring_before to parse POD-like notation
+            + accumulator variables instead of structural return
+         * Unit-test the example scripts
+
 0.05    CHANGES:
          * Added ->scope_level
          * Added ->from_reader as a new potential source of string input
diff --git a/MANIFEST b/MANIFEST
index d2bcc68..33ae696 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1,6 +1,8 @@
 Build.PL
 Changes
 examples/eval-expr.pl
+examples/parse-dict.pl
+examples/parse-pod.pl
 examples/synopsis.pl
 lib/Parser/MGC.pm
 LICENSE
@@ -14,6 +16,7 @@ t/02expect.t
 t/03reader.t
 t/04where.t
 t/05comment.t
+t/06substring.t
 t/10token_int.t
 t/11token_float.t
 t/12token_string.t
@@ -23,7 +26,12 @@ t/20maybe.t
 t/21scope_of.t
 t/22list_of.t
 t/23sequence_of.t
-t/24one_of.t
+t/24any_of.t
 t/30commit.t
 t/31scope_level.t
+t/90ex_dict.t
+t/90ex_expr.t
+t/90ex_pod.t
+t/90ex_synopsis.t
+t/98backcompat.t
 t/99pod.t
diff --git a/META.yml b/META.yml
index 4a88122..41e1e73 100644
--- a/META.yml
+++ b/META.yml
@@ -15,9 +15,9 @@ name: Parser-MGC
 provides:
   Parser::MGC:
     file: lib/Parser/MGC.pm
-    version: 0.05
+    version: 0.06
 requires:
   File::Slurp: 0
 resources:
   license: http://dev.perl.org/licenses/
-version: 0.05
+version: 0.06
diff --git a/README b/README
index 6b211da..efe5982 100644
--- a/README
+++ b/README
@@ -10,7 +10,7 @@ SYNOPSIS
         my $self = shift;
 
         $self->sequence_of( sub {
-           $self->one_of(
+           $self->any_of(
               sub { $self->token_int },
               sub { $self->token_string },
               sub { \$self->token_ident },
@@ -171,6 +171,20 @@ STRUCTURE-FORMING METHODS
         $self->scope_of( "{", sub { $self->parse_statements }, "}" );
      }
 
+    If the $start pattern is undefined, it is presumed the caller has
+    already checked for this. This is useful when the stop pattern needs to
+    be calculated based on the start pattern.
+
+     sub parse_bracketed
+     {
+        my $self = shift;
+
+        my $delim = $self->expect( qr/[\(\[\<\{]/ );
+        $delim =~ tr/([<{/)]>}/;
+
+        $self->enter_scope( undef, sub { $self->parse_body }, $delim );
+     }
+
   $ret = $parser->list_of( $sep, $code )
     Expects to find a list of instances of something parsed by $code,
     separated by the $sep pattern. Returns an ARRAY ref containing a list of
@@ -202,7 +216,7 @@ STRUCTURE-FORMING METHODS
         $self->sequence_of( sub { $self->parse_statement } );
      }
 
-  $ret = $parser->one_of( @codes )
+  $ret = $parser->any_of( @codes )
     Expects that one of the given code references can parse something from
     the input, returning what it returned. Each code reference may indicate
     a failure to parse by calling the "fail" method.
@@ -214,18 +228,23 @@ STRUCTURE-FORMING METHODS
      {
         my $self = shift;
 
-        $self->one_of(
+        $self->any_of(
            sub { $self->parse_declaration; $self->expect(";") },
            sub { $self->parse_expression; $self->expect(";") },
            sub { $self->parse_block },
         );
      }
 
+    Note: This method used to be called "one_of", but was renamed for
+    clarity. Currently this method is provided also as an alias by the old
+    name. Code using the old name should be rewritten to "any_of" instead,
+    as this backward-compatibility alias may be removed in a later version.
+
   $parser->commit
     Calling this method will cancel the backtracking behaviour of the
-    innermost "maybe" or "one_of" structure forming method. That is, if
+    innermost "maybe" or "any_of" structure forming method. That is, if
     later code then calls "fail", the exception will be propagated out of
-    "maybe", and no further code blocks will be attempted by "one_of".
+    "maybe", and no further code blocks will be attempted by "any_of".
 
     Typically this will be called once the grammatical structure of an
     alternation has been determined, ensuring that any further failures are
@@ -235,7 +254,7 @@ STRUCTURE-FORMING METHODS
      {
         my $self = shift;
 
-        $self->one_of(
+        $self->any_of(
            ...
            sub {
               $self->scope_of( "{",
@@ -249,11 +268,39 @@ TOKEN PARSING METHODS
     The following methods attempt to consume some part of the input string,
     to be used as part of the parsing process.
 
-  $parser->expect( $string )
-  $parser->expect( qr/pattern/ )
+  $str = $parser->expect( $literal )
+  $str = $parser->expect( qr/pattern/ )
     Expects to find a literal string or regexp pattern match, and consumes
     it. This method returns the string that was captured.
 
+  $str = $parser->substring_before( $literal )
+  $str = $parser->substring_before( qr/pattern/ )
+    Expects to possibly find a literal string or regexp pattern match. If it
+    finds such, consume all the input text before but excluding this match,
+    and return it. If it fails to find a match before the end of the current
+    scope, consumes all the input text until the end of scope and return it.
+
+    This method does not consume the part of input that matches, only the
+    text before it. It is not considered a failure if the substring before
+    this match is empty. If a non-empty match is required, use the "fail"
+    method:
+
+     sub token_nonempty_part
+     {
+        my $self = shift;
+
+        my $str = $parser->substring_before( "," );
+        length $str or $self->fail( "Expected a string fragment before ," );
+
+        return $str;
+     }
+
+    Note that unlike most of the other token parsing methods, this method
+    does not consume either leading or trailing whitespace around the
+    substring. It is expected that this method would be used as part a
+    parser to read quoted strings, or similar cases where whitespace should
+    be preserved.
+
   $int = $parser->token_int
     Expects to find an integer in decimal, octal or hexadecimal notation,
     and consumes it. Negative integers, preceeded by "-", are also
@@ -268,6 +315,24 @@ TOKEN PARSING METHODS
     Expects to find a quoted string, and consumes it. The string should be
     quoted using """ or "'" quote marks.
 
+    The content of the quoted string can contain character escapes similar
+    to those accepted by C or Perl. Specifically, the following forms are
+    recognised:
+
+     \a           Bell ("alert")
+     \b           Backspace
+     \e           Escape
+     \f           Form feed
+     \n           Newline
+     \r           Return
+     \t           Horizontal Tab
+     \0, \012     Octal character
+     \x34, \x{5678}   Hexadecimal character
+
+    C's "\v" for vertical tab is not supported as it is rarely used in
+    practice and it collides with Perl's "\v" regexp escape. Perl's "\c" for
+    forming other control characters is also not supported.
+
   $ident = $parser->token_ident
     Expects to find an identifier, and consumes it.
 
@@ -275,8 +340,41 @@ TOKEN PARSING METHODS
     Expects to find a keyword, and consumes it. A keyword is defined as an
     identifier which is exactly one of the literal values passed in.
 
+EXAMPLES
+  Accumulating Results Using Variables
+    Although the structure-forming methods all return a value, obtained from
+    their nested parsing code, it can sometimes be more convenient to use a
+    variable to accumulate a result in instead. For example, consider the
+    following parser method, designed to parse a set of "name: "value""
+    assignments, such as might be found in a configuration file, or
+    YAML/JSON-style mapping value.
+
+     sub parse_dict
+     {
+        my $self = shift;
+ 
+        my %ret;
+        $self->list_of( ",", sub {
+           my $key = $self->token_ident;
+           exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" );
+ 
+           $self->expect( ":" );
+ 
+           $ret{$key} = $self->parse_value;
+        } );
+ 
+        return \%ret
+     }
+
+    Instead of using the return value from "list_of", this method
+    accumulates values in the %ret hash, eventually returning a reference to
+    it as its result. Because of this, it can perform some error checking
+    while it parses; namely, rejecting duplicate keys.
+
 TODO
-    *   Unescaping of string constants; customisable
+    *   Make unescaping of string constants more customisable. Possibly
+        consider instead a "parse_string_generic" using a loop over
+        "substring_before".
 
     *   Easy ability for subclasses to define more token types
 
diff --git a/examples/eval-expr.pl b/examples/eval-expr.pl
index 71c2ecb..ab9a1f2 100755
--- a/examples/eval-expr.pl
+++ b/examples/eval-expr.pl
@@ -3,6 +3,7 @@
 use strict;
 use warnings;
 
+package ExprParser;
 use base qw( Parser::MGC );
 
 sub parse
@@ -16,33 +17,37 @@ sub parse_term
 {
    my $self = shift;
 
-   my $lhs = $self->parse_factor;
+   my $val = $self->parse_factor;
 
-   $self->one_of(
-      sub { $self->expect( "+" ); $self->commit; $lhs + $self->parse_term },
-      sub { $self->expect( "-" ); $self->commit; $lhs - $self->parse_term },
-      sub { $lhs }
+   1 while $self->any_of(
+      sub { $self->expect( "+" ); $self->commit; $val += $self->parse_factor; 1 },
+      sub { $self->expect( "-" ); $self->commit; $val -= $self->parse_factor; 1 },
+      sub { 0 },
    );
+
+   return $val;
 }
 
 sub parse_factor
 {
    my $self = shift;
 
-   my $lhs = $self->parse_atom;
+   my $val = $self->parse_atom;
 
-   $self->one_of(
-      sub { $self->expect( "*" ); $self->commit; $lhs * $self->parse_term },
-      sub { $self->expect( "/" ); $self->commit; $lhs / $self->parse_term },
-      sub { $lhs }
+   1 while $self->any_of(
+      sub { $self->expect( "*" ); $self->commit; $val *= $self->parse_atom; 1 },
+      sub { $self->expect( "/" ); $self->commit; $val /= $self->parse_atom; 1 },
+      sub { 0 },
    );
+
+   return $val;
 }
 
 sub parse_atom
 {
    my $self = shift;
 
-   $self->one_of(
+   $self->any_of(
       sub { $self->scope_of( "(", sub { $self->commit; $self->parse }, ")" ) },
       sub { $self->token_int },
    );
@@ -50,11 +55,15 @@ sub parse_atom
 
 use Data::Dump qw( pp );
 
-my $parser = __PACKAGE__->new;
+if( !caller ) {
+   my $parser = __PACKAGE__->new;
 
-while( defined( my $line = <STDIN> ) ) {
-   my $ret = eval { $parser->from_string( $line ) };
-   print $@ and next if $@;
+   while( defined( my $line = <STDIN> ) ) {
+      my $ret = eval { $parser->from_string( $line ) };
+      print $@ and next if $@;
 
-   print pp( $ret ) . "\n";
+      print pp( $ret ) . "\n";
+   }
 }
+
+1;
diff --git a/examples/parse-dict.pl b/examples/parse-dict.pl
new file mode 100644
index 0000000..178132d
--- /dev/null
+++ b/examples/parse-dict.pl
@@ -0,0 +1,54 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+package DictParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+   my $self = shift;
+
+   $self->any_of(
+      sub { $self->token_int },
+
+      sub { $self->token_string },
+
+      sub { $self->scope_of( "{",
+               sub { $self->commit; $self->parse_dict },
+            "}" );
+      },
+   );
+}
+
+sub parse_dict
+{
+   my $self = shift;
+
+   my %ret;
+   $self->list_of( ",", sub {
+      my $key = $self->token_ident;
+
+      $self->expect( ":" );
+
+      $ret{$key} = $self->parse;
+   } );
+
+   return \%ret
+}
+
+use Data::Dump qw( pp );
+
+if( !caller ) {
+   my $parser = __PACKAGE__->new;
+
+   while( defined( my $line = <STDIN> ) ) {
+      my $ret = eval { $parser->from_string( $line ) };
+      print $@ and next if $@;
+
+      print pp( $ret ) . "\n";
+   }
+}
+
+1;
diff --git a/examples/parse-pod.pl b/examples/parse-pod.pl
new file mode 100755
index 0000000..918a6c7
--- /dev/null
+++ b/examples/parse-pod.pl
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+package PodParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+   my $self = shift;
+
+   $self->sequence_of(
+      sub { $self->any_of(
+
+         sub { my $tag = $self->expect( qr/[A-Z](?=<)/ );
+               $self->commit;
+               my $delim = $self->expect( qr/<+/ );
+               +{ $tag => $self->scope_of( undef, \&parse, ">" x length $delim ) }; },
+
+         sub { $self->substring_before( qr/[A-Z]</ ) },
+      ) },
+   );
+}
+
+use Data::Dump qw( pp );
+
+if( !caller ) {
+   my $parser = __PACKAGE__->new;
+
+   while( defined( my $line = <STDIN> ) ) {
+      my $ret = eval { $parser->from_string( $line ) };
+      print $@ and next if $@;
+
+      print pp( $ret ) . "\n";
+   }
+}
+
+1;
diff --git a/examples/synopsis.pl b/examples/synopsis.pl
old mode 100644
new mode 100755
index 4c1cbb4..51a8006
--- a/examples/synopsis.pl
+++ b/examples/synopsis.pl
@@ -11,7 +11,7 @@ sub parse
    my $self = shift;
 
    $self->sequence_of( sub {
-      $self->one_of(
+      $self->any_of(
          sub { $self->token_int },
          sub { $self->token_string },
          sub { \$self->token_ident },
@@ -23,4 +23,9 @@ sub parse
 my $parser = LispParser->new;
 
 use Data::Dump qw( pp );
-print pp( $parser->from_file( $ARGV[0] ) );
+
+if( !caller ) {
+   print pp( $parser->from_file( $ARGV[0] ) );
+}
+
+1;
diff --git a/lib/Parser/MGC.pm b/lib/Parser/MGC.pm
index 235ec8d..b58897f 100644
--- a/lib/Parser/MGC.pm
+++ b/lib/Parser/MGC.pm
@@ -8,7 +8,7 @@ package Parser::MGC;
 use strict;
 use warnings;
 
-our $VERSION = '0.05';
+our $VERSION = '0.06';
 
 use Carp;
 
@@ -28,7 +28,7 @@ C<Parser::MGC> - build simple recursive-descent parsers
     my $self = shift;
 
     $self->sequence_of( sub {
-       $self->one_of(
+       $self->any_of(
           sub { $self->token_int },
           sub { $self->token_string },
           sub { \$self->token_ident },
@@ -283,8 +283,6 @@ sub fail
    my $self = shift;
    my ( $message ) = @_;
 
-   my ( $lineno, $col, $line ) = $self->where;
-
    die Parser::MGC::Failure->new( $message, $self->where );
 }
 
@@ -298,16 +296,22 @@ sub at_eos
 {
    my $self = shift;
 
-   $self->skip_ws;
-
    my $pos = pos $self->{str};
 
-   return 1 if defined $pos and $pos >= length $self->{str};
+   $self->skip_ws;
 
-   return 0 unless defined $self->{endofscope};
+   my $at_eos;
+   if( pos( $self->{str} ) >= length $self->{str} ) {
+      $at_eos = 1;
+   }
+   elsif( defined $self->{endofscope} ) {
+      $at_eos = $self->{str} =~ m/\G$self->{endofscope}/;
+   }
+   else {
+      $at_eos = 0;
+   }
 
-   # No /g so we won't actually alter pos()
-   my $at_eos = $self->{str} =~ m/\G$self->{endofscope}/;
+   pos( $self->{str} ) = $pos;
 
    return $at_eos;
 }
@@ -393,6 +397,20 @@ failure if called at the end of a scope.
     $self->scope_of( "{", sub { $self->parse_statements }, "}" );
  }
 
+If the C<$start> pattern is undefined, it is presumed the caller has already
+checked for this. This is useful when the stop pattern needs to be calculated
+based on the start pattern.
+
+ sub parse_bracketed
+ {
+    my $self = shift;
+
+    my $delim = $self->expect( qr/[\(\[\<\{]/ );
+    $delim =~ tr/([<{/)]>}/;
+
+    $self->enter_scope( undef, sub { $self->parse_body }, $delim );
+ }
+
 =cut
 
 sub scope_of
@@ -402,7 +420,8 @@ sub scope_of
 
    ref $stop or $stop = qr/\Q$stop/;
 
-   $self->expect( $start );
+   $self->expect( $start ) if defined $start;
+
    local $self->{endofscope} = $stop;
    local $self->{scope_level} = $self->{scope_level} + 1;
 
@@ -441,7 +460,7 @@ sub list_of
    my @ret;
 
    while( !$self->at_eos ) {
-      push @ret, scalar $code->( $self );
+      push @ret, $code->( $self );
 
       $self->skip_ws;
       $self->{str} =~ m/\G$sep/gc or last;
@@ -472,10 +491,16 @@ sub sequence_of
    my $self = shift;
    my ( $code ) = @_;
 
-   return $self->list_of( "", $code );
+   my @ret;
+
+   while( !$self->at_eos ) {
+      push @ret, $code->( $self );
+   }
+
+   return \@ret;
 }
 
-=head2 $ret = $parser->one_of( @codes )
+=head2 $ret = $parser->any_of( @codes )
 
 Expects that one of the given code references can parse something from the
 input, returning what it returned. Each code reference may indicate a failure
@@ -488,16 +513,21 @@ alternations of possible parse trees.
  {
     my $self = shift;
 
-    $self->one_of(
+    $self->any_of(
        sub { $self->parse_declaration; $self->expect(";") },
        sub { $self->parse_expression; $self->expect(";") },
        sub { $self->parse_block },
     );
  }
 
+Note: This method used to be called C<one_of>, but was renamed for clarity.
+Currently this method is provided also as an alias by the old name. Code
+using the old name should be rewritten to C<any_of> instead, as this 
+backward-compatibility alias may be removed in a later version.
+
 =cut
 
-sub one_of
+sub any_of
 {
    my $self = shift;
 
@@ -519,12 +549,14 @@ sub one_of
    $self->fail( "Found nothing parseable" );
 }
 
+*one_of = \&any_of;
+
 =head2 $parser->commit
 
 Calling this method will cancel the backtracking behaviour of the innermost
-C<maybe> or C<one_of> structure forming method. That is, if later code then
+C<maybe> or C<any_of> structure forming method. That is, if later code then
 calls C<fail>, the exception will be propagated out of C<maybe>, and no
-further code blocks will be attempted by C<one_of>.
+further code blocks will be attempted by C<any_of>.
 
 Typically this will be called once the grammatical structure of an
 alternation has been determined, ensuring that any further failures are raised
@@ -534,7 +566,7 @@ as real exceptions, rather than by attempting other alternatives.
  {
     my $self = shift;
 
-    $self->one_of(
+    $self->any_of(
        ...
        sub {
           $self->scope_of( "{",
@@ -593,9 +625,9 @@ sub skip_ws
    }
 }
 
-=head2 $parser->expect( $string )
+=head2 $str = $parser->expect( $literal )
 
-=head2 $parser->expect( qr/pattern/ )
+=head2 $str = $parser->expect( qr/pattern/ )
 
 Expects to find a literal string or regexp pattern match, and consumes it.
 This method returns the string that was captured.
@@ -616,6 +648,62 @@ sub expect
    return $1;
 }
 
+=head2 $str = $parser->substring_before( $literal )
+
+=head2 $str = $parser->substring_before( qr/pattern/ )
+
+Expects to possibly find a literal string or regexp pattern match. If it finds
+such, consume all the input text before but excluding this match, and return
+it. If it fails to find a match before the end of the current scope, consumes
+all the input text until the end of scope and return it.
+
+This method does not consume the part of input that matches, only the text
+before it. It is not considered a failure if the substring before this match
+is empty. If a non-empty match is required, use the C<fail> method:
+
+ sub token_nonempty_part
+ {
+    my $self = shift;
+
+    my $str = $parser->substring_before( "," );
+    length $str or $self->fail( "Expected a string fragment before ," );
+
+    return $str;
+ }
+
+Note that unlike most of the other token parsing methods, this method does not
+consume either leading or trailing whitespace around the substring. It is
+expected that this method would be used as part a parser to read quoted
+strings, or similar cases where whitespace should be preserved.
+
+=cut
+
+sub substring_before
+{
+   my $self = shift;
+   my ( $expect ) = @_;
+
+   ref $expect or $expect = qr/\Q$expect/;
+
+   my $endre = ( defined $self->{endofscope} ) ?
+      qr/$expect|$self->{endofscope}/ :
+      $expect;
+
+   # NO skip_ws
+
+   my $start = pos $self->{str};
+   my $end;
+   if( $self->{str} =~ m/\G(?s:.*?)($endre)/ ) {
+      $end = $-[1];
+   }
+   else {
+      $end = length $self->{str};
+   }
+
+   pos( $self->{str} ) = $end;
+   return substr( $self->{str}, $start, $end - $start );
+}
+
 =head2 $int = $parser->token_int
 
 Expects to find an integer in decimal, octal or hexadecimal notation, and
@@ -629,6 +717,7 @@ sub token_int
 
    $self->fail( "Expected integer" ) if $self->at_eos;
 
+   $self->skip_ws;
    $self->{str} =~ m/\G(-?)($self->{patterns}{int})/gc or
       $self->fail( "Expected integer" );
 
@@ -654,6 +743,7 @@ sub token_float
 
    $self->fail( "Expected float" ) if $self->at_eos;
 
+   $self->skip_ws;
    $self->{str} =~ m/\G(-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+)/gci or
       $self->fail( "Expected float" );
 
@@ -665,8 +755,35 @@ sub token_float
 Expects to find a quoted string, and consumes it. The string should be quoted
 using C<"> or C<'> quote marks.
 
+The content of the quoted string can contain character escapes similar to
+those accepted by C or Perl. Specifically, the following forms are recognised:
+
+ \a           Bell ("alert")
+ \b           Backspace
+ \e           Escape
+ \f           Form feed
+ \n           Newline
+ \r           Return
+ \t           Horizontal Tab
+ \0, \012     Octal character
+ \x34, \x{5678}   Hexadecimal character
+
+C's C<\v> for vertical tab is not supported as it is rarely used in practice
+and it collides with Perl's C<\v> regexp escape. Perl's C<\c> for forming other
+control characters is also not supported.
+
 =cut
 
+my %escapes = (
+   a => "\a",
+   b => "\b",
+   e => "\e",
+   f => "\f",
+   n => "\n",
+   r => "\r",
+   t => "\t",
+);
+
 sub token_string
 {
    my $self = shift;
@@ -675,17 +792,31 @@ sub token_string
 
    my $pos = pos $self->{str};
 
+   $self->skip_ws;
    $self->{str} =~ m/\G($self->{patterns}{string_delim})/gc or
       $self->fail( "Expected string delimiter" );
 
    my $delim = $1;
 
-   $self->{str} =~ m/\G((?:\\.|[^\\])*?)$delim/gc or
-      pos($self->{str}) = $pos, $self->fail( "Expected contents of string" );
+   $self->{str} =~ m/
+      \G(
+         (?:
+            \\[0-7]{1,3}     # octal escape
+           |\\x[0-9A-F]{2}   # 2-digit hex escape
+           |\\x\{[0-9A-F]+\} # {}-delimited hex escape
+           |\\.              # symbolic escape
+           |[^\\$delim]+     # plain chunk
+         )*?
+      )$delim/gcix or
+         pos($self->{str}) = $pos, $self->fail( "Expected contents of string" );
 
    my $string = $1;
 
-   # TODO: Unescape stuff like \\ and \n and whatnot
+   $string =~ s<\\(?:([0-7]{1,3})|x([0-9A-F]{2})|x\{([0-9A-F]+)\}|(.))>
+               [defined $1 ? chr oct $1 :
+                defined $2 ? chr hex $2 :
+                defined $3 ? chr hex $3 :
+                             exists $escapes{$4} ? $escapes{$4} : $4]egi;
 
    return $string;
 }
@@ -702,6 +833,7 @@ sub token_ident
 
    $self->fail( "Expected identifier" ) if $self->at_eos;
 
+   $self->skip_ws;
    $self->{str} =~ m/\G($self->{patterns}{ident})/gc or
       $self->fail( "Expected identifier" );
 
@@ -764,13 +896,46 @@ sub STRING
 # Provide fallback operators for cmp, eq, etc...
 use overload fallback => 1;
 
+=head1 EXAMPLES
+
+=head2 Accumulating Results Using Variables
+
+Although the structure-forming methods all return a value, obtained from their
+nested parsing code, it can sometimes be more convenient to use a variable to
+accumulate a result in instead. For example, consider the following parser
+method, designed to parse a set of C<name: "value"> assignments, such as might
+be found in a configuration file, or YAML/JSON-style mapping value.
+
+ sub parse_dict
+ {
+    my $self = shift;
+ 
+    my %ret;
+    $self->list_of( ",", sub {
+       my $key = $self->token_ident;
+       exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" );
+ 
+       $self->expect( ":" );
+ 
+       $ret{$key} = $self->parse_value;
+    } );
+ 
+    return \%ret
+ }
+
+Instead of using the return value from C<list_of>, this method accumulates
+values in the C<%ret> hash, eventually returning a reference to it as its
+result. Because of this, it can perform some error checking while it parses;
+namely, rejecting duplicate keys.
+
 =head1 TODO
 
 =over 4
 
 =item *
 
-Unescaping of string constants; customisable
+Make unescaping of string constants more customisable. Possibly consider
+instead a C<parse_string_generic> using a loop over C<substring_before>.
 
 =item *
 
diff --git a/t/02expect.t b/t/02expect.t
index a6382e2..6826c73 100644
--- a/t/02expect.t
+++ b/t/02expect.t
@@ -2,7 +2,7 @@
 
 use strict;
 
-use Test::More tests => 3;
+use Test::More tests => 4;
 
 package TestParser;
 use base qw( Parser::MGC );
@@ -22,6 +22,10 @@ is_deeply( $parser->from_string( "hello world" ),
    [ "hello", "world" ],
    '"hello world"' );
 
+is_deeply( $parser->from_string( "  hello world  " ),
+   [ "hello", "world" ],
+   '"  hello world  "' );
+
 # Perl 5.13.6 changed the regexp form
 # Accept both old and new-style stringification
 my $modifiers = (qr/foobar/ =~ /\Q(?^/) ? '^' : '-xism';
diff --git a/t/06substring.t b/t/06substring.t
new file mode 100644
index 0000000..8d76371
--- /dev/null
+++ b/t/06substring.t
@@ -0,0 +1,27 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 2;
+
+package TestParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+   my $self = shift;
+
+   [ $self->substring_before( "!" ), $self->expect( "!" ) ];
+}
+
+package main;
+
+my $parser = TestParser->new;
+
+is_deeply( $parser->from_string( "Hello, world!" ),
+   [ "Hello, world", "!" ],
+   '"Hello, world!"' );
+
+is_deeply( $parser->from_string( "!" ),
+   [ "", "!" ],
+   '"Hello, world!"' );
diff --git a/t/12token_string.t b/t/12token_string.t
index f57b560..39c3eba 100644
--- a/t/12token_string.t
+++ b/t/12token_string.t
@@ -2,7 +2,7 @@
 
 use strict;
 
-use Test::More tests => 7;
+use Test::More tests => 18;
 
 package TestParser;
 use base qw( Parser::MGC );
@@ -24,6 +24,21 @@ is( $parser->from_string( q["double"] ), "double", 'Double quoted string' );
 is( $parser->from_string( q["foo 'bar'"] ), "foo 'bar'", 'Double quoted string containing single substr' );
 is( $parser->from_string( q['foo "bar"'] ), 'foo "bar"', 'Single quoted string containing double substr' );
 
+is( $parser->from_string( q["tab \t"]       ), "tab \t",       '\t' );
+is( $parser->from_string( q["newline \n"]   ), "newline \n",   '\n' );
+is( $parser->from_string( q["return \r"]    ), "return \r",    '\r' );
+is( $parser->from_string( q["form feed \f"] ), "form feed \f", '\f' );
+is( $parser->from_string( q["backspace \b"] ), "backspace \b", '\b' );
+is( $parser->from_string( q["bell \a"]      ), "bell \a",      '\a' );
+is( $parser->from_string( q["escape \e"]    ), "escape \e",    '\e' );
+
+# ord('A') == 65 == 0101 == 0x41 
+#  TODO: This is ASCII dependent. If anyone on EBCDIC cares, do let me know...
+is( $parser->from_string( q["null \0"] ),         "null \0",         'Octal null' );
+is( $parser->from_string( q["octal \101BC"] ),    "octal ABC",       'Octal' );
+is( $parser->from_string( q["hex \x41BC"] ),      "hex ABC",         'Hexadecimal' );
+is( $parser->from_string( q["unihex \x{263a}"] ), "unihex \x{263a}", 'Unicode hex' );
+
 $parser = TestParser->new(
    patterns => { string_delim => qr/"/ }
 );
diff --git a/t/21scope_of.t b/t/21scope_of.t
index fcb7da9..1ebf552 100644
--- a/t/21scope_of.t
+++ b/t/21scope_of.t
@@ -2,7 +2,7 @@
 
 use strict;
 
-use Test::More tests => 3;
+use Test::More tests => 6;
 
 package TestParser;
 use base qw( Parser::MGC );
@@ -18,6 +18,23 @@ sub parse
    );
 }
 
+package DynamicDelimParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+   my $self = shift;
+
+   my $delim = $self->expect( qr/[\(\[]/ );
+   $delim =~ tr/([/)]/;
+
+   $self->scope_of(
+      undef,
+      sub { return $self->token_int },
+      $delim,
+   );
+}
+
 package main;
 
 my $parser = TestParser->new;
@@ -26,3 +43,10 @@ is( $parser->from_string( "(123)" ), 123, '"(123)"' );
 
 ok( !eval { $parser->from_string( "(abc)" ) }, '"(abc)"' );
 ok( !eval { $parser->from_string( "456" ) }, '"456"' );
+
+$parser = DynamicDelimParser->new;
+
+is( $parser->from_string( "(45)" ), 45, '"(45)"' );
+is( $parser->from_string( "[45]" ), 45, '"[45]"' );
+
+ok( !eval { $parser->from_string( "(45]" ) }, '"(45]" fails' );
diff --git a/t/24one_of.t b/t/24any_of.t
similarity index 97%
rename from t/24one_of.t
rename to t/24any_of.t
index 665e291..aec0559 100644
--- a/t/24one_of.t
+++ b/t/24any_of.t
@@ -11,7 +11,7 @@ sub parse
 {
    my $self = shift;
 
-   $self->one_of(
+   $self->any_of(
       sub { [ int => $self->token_int ] },
       sub { [ str => $self->token_string ] },
       sub { [ ident => $self->token_ident ] },
diff --git a/t/30commit.t b/t/30commit.t
index b9080bb..77fe010 100644
--- a/t/30commit.t
+++ b/t/30commit.t
@@ -11,7 +11,7 @@ sub parse
 {
    my $self = shift;
 
-   $self->one_of(
+   $self->any_of(
       sub { $self->token_int },
       sub {
          $self->scope_of( "(",
diff --git a/t/31scope_level.t b/t/31scope_level.t
index 06e87ac..50e581f 100644
--- a/t/31scope_level.t
+++ b/t/31scope_level.t
@@ -13,7 +13,7 @@ sub parse
 
    $self->sequence_of( 
       sub {
-         $self->one_of(
+         $self->any_of(
             sub { $self->expect( qr/[a-z]+/ ) . "/" . $self->scope_level },
             sub { $self->scope_of( "(", \&parse, ")" ) },
          );
diff --git a/t/90ex_dict.t b/t/90ex_dict.t
new file mode 100644
index 0000000..efdd78d
--- /dev/null
+++ b/t/90ex_dict.t
@@ -0,0 +1,32 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 4;
+
+require "examples/parse-dict.pl";
+
+my $parser = DictParser->new;
+
+sub test
+{
+   my ( $str, $expect, $name ) = @_;
+
+   is_deeply( $parser->from_string( $str ), $expect, $name );
+}
+
+test q[123],
+     123,
+     "Number";
+
+test q["Hello"],
+     "Hello",
+     "String";
+
+test q[{one: 1, two: 2}],
+     { one => 1, two => 2 },
+     "Flat dict";
+
+test q[{numbers: {three: 3, four: 4}}],
+     { numbers => { three => 3, four => 4 } },
+     "Nested dict";
diff --git a/t/90ex_expr.t b/t/90ex_expr.t
new file mode 100644
index 0000000..c3df649
--- /dev/null
+++ b/t/90ex_expr.t
@@ -0,0 +1,33 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More;
+
+require "examples/eval-expr.pl";
+
+my $parser = ExprParser->new;
+
+while( <DATA> ) {
+   chomp;
+   my ( $str, $expect ) = split m/=/;
+
+   is( $parser->from_string( $str ), $expect, $str );
+}
+
+done_testing;
+
+__DATA__
+1+2=3
+ 1 + 2 =3
+1+2+3=6
+10-4=6
+10-2-2=6
+3*4=12
+3*4*5=60
+20/4=5
+20/5/2=2
+3+4*5=23
+4*5+3=23
+(3+4)*5=35
+4*(5+3)=32
diff --git a/t/90ex_pod.t b/t/90ex_pod.t
new file mode 100644
index 0000000..1dbb98f
--- /dev/null
+++ b/t/90ex_pod.t
@@ -0,0 +1,36 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 5;
+
+require "examples/parse-pod.pl";
+
+my $parser = PodParser->new;
+
+sub test
+{
+   my ( $str, $expect, $name ) = @_;
+
+   is_deeply( $parser->from_string( $str ), $expect, $name );
+}
+
+test "Plain text",
+     [ "Plain text" ],
+     "plain";
+
+test "B<bold>",
+     [ { B => [ "bold" ] } ],
+     "B<>";
+
+test "Text with I<italic> text",
+     [ "Text with ", { I => [ "italic" ] }, " text" ],
+     "I<> surrounded";
+
+test "Nested B<I<tags>>",
+     [ "Nested ", { B => [ { I => [ "tags" ] } ] } ],
+     "Nested";
+
+test "Double C<< Class->method >> tags",
+     [ "Double ", { C => [ " Class->method " ] }, " tags" ],
+     "Double tags";
diff --git a/t/90ex_synopsis.t b/t/90ex_synopsis.t
new file mode 100644
index 0000000..097c4ca
--- /dev/null
+++ b/t/90ex_synopsis.t
@@ -0,0 +1,21 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 4;
+
+require "examples/synopsis.pl";
+
+my $parser = LispParser->new;
+
+sub test
+{
+   my ( $str, $expect ) = @_;
+
+   is_deeply( $parser->from_string( $str ), [ $expect ], qq("$str") );
+}
+
+test "123", 123;
+test "'hello'", 'hello';
+test "(123 456)", [ 123, 456 ];
+test "(+ 1 (* 2 3))", [ \'+', 1, [ \'*', 2, 3 ] ];
diff --git a/t/98backcompat.t b/t/98backcompat.t
new file mode 100644
index 0000000..a4d9f1c
--- /dev/null
+++ b/t/98backcompat.t
@@ -0,0 +1,25 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 2;
+
+package OneOfParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+   my $self = shift;
+
+   $self->one_of(
+      sub { [ int => $self->token_int ] },
+      sub { [ str => $self->token_string ] },
+   );
+}
+
+package main;
+
+my $parser = OneOfParser->new;
+
+is_deeply( $parser->from_string( "123" ), [ int => 123 ], 'one_of integer' );
+is_deeply( $parser->from_string( q["hi"] ), [ str => "hi" ], 'one_of string' );

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libparser-mgc-perl.git



More information about the Pkg-perl-cvs-commits mailing list