[libparser-mgc-perl] 06/12: Import of PEVANS/Parser-MGC-0.06 from CPAN.
Jonas Smedegaard
dr at jones.dk
Sat Dec 17 17:37:01 UTC 2016
This is an automated email from the git hooks/post-receive script.
js pushed a commit to tag PEVANS
in repository libparser-mgc-perl.
commit 79aa1b271436ee7b06bc0d1c61c142ca2d5d3fb7
Author: Paul Evans <leonerd at leonerd.org.uk>
Date: Thu Mar 17 17:52:48 2011 +0000
Import of PEVANS/Parser-MGC-0.06 from CPAN.
gitpan-cpan-distribution: Parser-MGC
gitpan-cpan-version: 0.06
gitpan-cpan-path: PEVANS/Parser-MGC-0.06.tar.gz
gitpan-cpan-author: PEVANS
gitpan-cpan-maturity: released
---
Changes | 10 ++
MANIFEST | 10 +-
META.yml | 4 +-
README | 116 +++++++++++++++++++++--
examples/eval-expr.pl | 41 +++++----
examples/parse-dict.pl | 54 +++++++++++
examples/parse-pod.pl | 39 ++++++++
examples/synopsis.pl | 9 +-
lib/Parser/MGC.pm | 215 ++++++++++++++++++++++++++++++++++++++-----
t/02expect.t | 6 +-
t/06substring.t | 27 ++++++
t/12token_string.t | 17 +++-
t/21scope_of.t | 26 +++++-
t/{24one_of.t => 24any_of.t} | 2 +-
t/30commit.t | 2 +-
t/31scope_level.t | 2 +-
t/90ex_dict.t | 32 +++++++
t/90ex_expr.t | 33 +++++++
t/90ex_pod.t | 36 ++++++++
t/90ex_synopsis.t | 21 +++++
t/98backcompat.t | 25 +++++
21 files changed, 666 insertions(+), 61 deletions(-)
diff --git a/Changes b/Changes
index 5b7312c..0259a77 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,15 @@
Revision history for Parser-MGC
+0.06 CHANGES:
+ * Renamed ->one_of to ->any_of
+ * Added ->substring_before
+ * Allow ->scope_of to not take a start pattern
+ * Recognise the usual set of character escapes in ->token_string
+ * Added more example scripts to demonstrate:
+ + the use ->substring_before to parse POD-like notation
+ + accumulator variables instead of structural return
+ * Unit-test the example scripts
+
0.05 CHANGES:
* Added ->scope_level
* Added ->from_reader as a new potential source of string input
diff --git a/MANIFEST b/MANIFEST
index d2bcc68..33ae696 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1,6 +1,8 @@
Build.PL
Changes
examples/eval-expr.pl
+examples/parse-dict.pl
+examples/parse-pod.pl
examples/synopsis.pl
lib/Parser/MGC.pm
LICENSE
@@ -14,6 +16,7 @@ t/02expect.t
t/03reader.t
t/04where.t
t/05comment.t
+t/06substring.t
t/10token_int.t
t/11token_float.t
t/12token_string.t
@@ -23,7 +26,12 @@ t/20maybe.t
t/21scope_of.t
t/22list_of.t
t/23sequence_of.t
-t/24one_of.t
+t/24any_of.t
t/30commit.t
t/31scope_level.t
+t/90ex_dict.t
+t/90ex_expr.t
+t/90ex_pod.t
+t/90ex_synopsis.t
+t/98backcompat.t
t/99pod.t
diff --git a/META.yml b/META.yml
index 4a88122..41e1e73 100644
--- a/META.yml
+++ b/META.yml
@@ -15,9 +15,9 @@ name: Parser-MGC
provides:
Parser::MGC:
file: lib/Parser/MGC.pm
- version: 0.05
+ version: 0.06
requires:
File::Slurp: 0
resources:
license: http://dev.perl.org/licenses/
-version: 0.05
+version: 0.06
diff --git a/README b/README
index 6b211da..efe5982 100644
--- a/README
+++ b/README
@@ -10,7 +10,7 @@ SYNOPSIS
my $self = shift;
$self->sequence_of( sub {
- $self->one_of(
+ $self->any_of(
sub { $self->token_int },
sub { $self->token_string },
sub { \$self->token_ident },
@@ -171,6 +171,20 @@ STRUCTURE-FORMING METHODS
$self->scope_of( "{", sub { $self->parse_statements }, "}" );
}
+ If the $start pattern is undefined, it is presumed the caller has
+ already checked for this. This is useful when the stop pattern needs to
+ be calculated based on the start pattern.
+
+ sub parse_bracketed
+ {
+ my $self = shift;
+
+ my $delim = $self->expect( qr/[\(\[\<\{]/ );
+ $delim =~ tr/([<{/)]>}/;
+
+ $self->enter_scope( undef, sub { $self->parse_body }, $delim );
+ }
+
$ret = $parser->list_of( $sep, $code )
Expects to find a list of instances of something parsed by $code,
separated by the $sep pattern. Returns an ARRAY ref containing a list of
@@ -202,7 +216,7 @@ STRUCTURE-FORMING METHODS
$self->sequence_of( sub { $self->parse_statement } );
}
- $ret = $parser->one_of( @codes )
+ $ret = $parser->any_of( @codes )
Expects that one of the given code references can parse something from
the input, returning what it returned. Each code reference may indicate
a failure to parse by calling the "fail" method.
@@ -214,18 +228,23 @@ STRUCTURE-FORMING METHODS
{
my $self = shift;
- $self->one_of(
+ $self->any_of(
sub { $self->parse_declaration; $self->expect(";") },
sub { $self->parse_expression; $self->expect(";") },
sub { $self->parse_block },
);
}
+ Note: This method used to be called "one_of", but was renamed for
+ clarity. Currently this method is provided also as an alias by the old
+ name. Code using the old name should be rewritten to "any_of" instead,
+ as this backward-compatibility alias may be removed in a later version.
+
$parser->commit
Calling this method will cancel the backtracking behaviour of the
- innermost "maybe" or "one_of" structure forming method. That is, if
+ innermost "maybe" or "any_of" structure forming method. That is, if
later code then calls "fail", the exception will be propagated out of
- "maybe", and no further code blocks will be attempted by "one_of".
+ "maybe", and no further code blocks will be attempted by "any_of".
Typically this will be called once the grammatical structure of an
alternation has been determined, ensuring that any further failures are
@@ -235,7 +254,7 @@ STRUCTURE-FORMING METHODS
{
my $self = shift;
- $self->one_of(
+ $self->any_of(
...
sub {
$self->scope_of( "{",
@@ -249,11 +268,39 @@ TOKEN PARSING METHODS
The following methods attempt to consume some part of the input string,
to be used as part of the parsing process.
- $parser->expect( $string )
- $parser->expect( qr/pattern/ )
+ $str = $parser->expect( $literal )
+ $str = $parser->expect( qr/pattern/ )
Expects to find a literal string or regexp pattern match, and consumes
it. This method returns the string that was captured.
+ $str = $parser->substring_before( $literal )
+ $str = $parser->substring_before( qr/pattern/ )
+ Expects to possibly find a literal string or regexp pattern match. If it
+ finds such, consume all the input text before but excluding this match,
+ and return it. If it fails to find a match before the end of the current
+ scope, consumes all the input text until the end of scope and return it.
+
+ This method does not consume the part of input that matches, only the
+ text before it. It is not considered a failure if the substring before
+ this match is empty. If a non-empty match is required, use the "fail"
+ method:
+
+ sub token_nonempty_part
+ {
+ my $self = shift;
+
+ my $str = $parser->substring_before( "," );
+ length $str or $self->fail( "Expected a string fragment before ," );
+
+ return $str;
+ }
+
+ Note that unlike most of the other token parsing methods, this method
+ does not consume either leading or trailing whitespace around the
+ substring. It is expected that this method would be used as part a
+ parser to read quoted strings, or similar cases where whitespace should
+ be preserved.
+
$int = $parser->token_int
Expects to find an integer in decimal, octal or hexadecimal notation,
and consumes it. Negative integers, preceeded by "-", are also
@@ -268,6 +315,24 @@ TOKEN PARSING METHODS
Expects to find a quoted string, and consumes it. The string should be
quoted using """ or "'" quote marks.
+ The content of the quoted string can contain character escapes similar
+ to those accepted by C or Perl. Specifically, the following forms are
+ recognised:
+
+ \a Bell ("alert")
+ \b Backspace
+ \e Escape
+ \f Form feed
+ \n Newline
+ \r Return
+ \t Horizontal Tab
+ \0, \012 Octal character
+ \x34, \x{5678} Hexadecimal character
+
+ C's "\v" for vertical tab is not supported as it is rarely used in
+ practice and it collides with Perl's "\v" regexp escape. Perl's "\c" for
+ forming other control characters is also not supported.
+
$ident = $parser->token_ident
Expects to find an identifier, and consumes it.
@@ -275,8 +340,41 @@ TOKEN PARSING METHODS
Expects to find a keyword, and consumes it. A keyword is defined as an
identifier which is exactly one of the literal values passed in.
+EXAMPLES
+ Accumulating Results Using Variables
+ Although the structure-forming methods all return a value, obtained from
+ their nested parsing code, it can sometimes be more convenient to use a
+ variable to accumulate a result in instead. For example, consider the
+ following parser method, designed to parse a set of "name: "value""
+ assignments, such as might be found in a configuration file, or
+ YAML/JSON-style mapping value.
+
+ sub parse_dict
+ {
+ my $self = shift;
+
+ my %ret;
+ $self->list_of( ",", sub {
+ my $key = $self->token_ident;
+ exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" );
+
+ $self->expect( ":" );
+
+ $ret{$key} = $self->parse_value;
+ } );
+
+ return \%ret
+ }
+
+ Instead of using the return value from "list_of", this method
+ accumulates values in the %ret hash, eventually returning a reference to
+ it as its result. Because of this, it can perform some error checking
+ while it parses; namely, rejecting duplicate keys.
+
TODO
- * Unescaping of string constants; customisable
+ * Make unescaping of string constants more customisable. Possibly
+ consider instead a "parse_string_generic" using a loop over
+ "substring_before".
* Easy ability for subclasses to define more token types
diff --git a/examples/eval-expr.pl b/examples/eval-expr.pl
index 71c2ecb..ab9a1f2 100755
--- a/examples/eval-expr.pl
+++ b/examples/eval-expr.pl
@@ -3,6 +3,7 @@
use strict;
use warnings;
+package ExprParser;
use base qw( Parser::MGC );
sub parse
@@ -16,33 +17,37 @@ sub parse_term
{
my $self = shift;
- my $lhs = $self->parse_factor;
+ my $val = $self->parse_factor;
- $self->one_of(
- sub { $self->expect( "+" ); $self->commit; $lhs + $self->parse_term },
- sub { $self->expect( "-" ); $self->commit; $lhs - $self->parse_term },
- sub { $lhs }
+ 1 while $self->any_of(
+ sub { $self->expect( "+" ); $self->commit; $val += $self->parse_factor; 1 },
+ sub { $self->expect( "-" ); $self->commit; $val -= $self->parse_factor; 1 },
+ sub { 0 },
);
+
+ return $val;
}
sub parse_factor
{
my $self = shift;
- my $lhs = $self->parse_atom;
+ my $val = $self->parse_atom;
- $self->one_of(
- sub { $self->expect( "*" ); $self->commit; $lhs * $self->parse_term },
- sub { $self->expect( "/" ); $self->commit; $lhs / $self->parse_term },
- sub { $lhs }
+ 1 while $self->any_of(
+ sub { $self->expect( "*" ); $self->commit; $val *= $self->parse_atom; 1 },
+ sub { $self->expect( "/" ); $self->commit; $val /= $self->parse_atom; 1 },
+ sub { 0 },
);
+
+ return $val;
}
sub parse_atom
{
my $self = shift;
- $self->one_of(
+ $self->any_of(
sub { $self->scope_of( "(", sub { $self->commit; $self->parse }, ")" ) },
sub { $self->token_int },
);
@@ -50,11 +55,15 @@ sub parse_atom
use Data::Dump qw( pp );
-my $parser = __PACKAGE__->new;
+if( !caller ) {
+ my $parser = __PACKAGE__->new;
-while( defined( my $line = <STDIN> ) ) {
- my $ret = eval { $parser->from_string( $line ) };
- print $@ and next if $@;
+ while( defined( my $line = <STDIN> ) ) {
+ my $ret = eval { $parser->from_string( $line ) };
+ print $@ and next if $@;
- print pp( $ret ) . "\n";
+ print pp( $ret ) . "\n";
+ }
}
+
+1;
diff --git a/examples/parse-dict.pl b/examples/parse-dict.pl
new file mode 100644
index 0000000..178132d
--- /dev/null
+++ b/examples/parse-dict.pl
@@ -0,0 +1,54 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+package DictParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+ my $self = shift;
+
+ $self->any_of(
+ sub { $self->token_int },
+
+ sub { $self->token_string },
+
+ sub { $self->scope_of( "{",
+ sub { $self->commit; $self->parse_dict },
+ "}" );
+ },
+ );
+}
+
+sub parse_dict
+{
+ my $self = shift;
+
+ my %ret;
+ $self->list_of( ",", sub {
+ my $key = $self->token_ident;
+
+ $self->expect( ":" );
+
+ $ret{$key} = $self->parse;
+ } );
+
+ return \%ret
+}
+
+use Data::Dump qw( pp );
+
+if( !caller ) {
+ my $parser = __PACKAGE__->new;
+
+ while( defined( my $line = <STDIN> ) ) {
+ my $ret = eval { $parser->from_string( $line ) };
+ print $@ and next if $@;
+
+ print pp( $ret ) . "\n";
+ }
+}
+
+1;
diff --git a/examples/parse-pod.pl b/examples/parse-pod.pl
new file mode 100755
index 0000000..918a6c7
--- /dev/null
+++ b/examples/parse-pod.pl
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+package PodParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+ my $self = shift;
+
+ $self->sequence_of(
+ sub { $self->any_of(
+
+ sub { my $tag = $self->expect( qr/[A-Z](?=<)/ );
+ $self->commit;
+ my $delim = $self->expect( qr/<+/ );
+ +{ $tag => $self->scope_of( undef, \&parse, ">" x length $delim ) }; },
+
+ sub { $self->substring_before( qr/[A-Z]</ ) },
+ ) },
+ );
+}
+
+use Data::Dump qw( pp );
+
+if( !caller ) {
+ my $parser = __PACKAGE__->new;
+
+ while( defined( my $line = <STDIN> ) ) {
+ my $ret = eval { $parser->from_string( $line ) };
+ print $@ and next if $@;
+
+ print pp( $ret ) . "\n";
+ }
+}
+
+1;
diff --git a/examples/synopsis.pl b/examples/synopsis.pl
old mode 100644
new mode 100755
index 4c1cbb4..51a8006
--- a/examples/synopsis.pl
+++ b/examples/synopsis.pl
@@ -11,7 +11,7 @@ sub parse
my $self = shift;
$self->sequence_of( sub {
- $self->one_of(
+ $self->any_of(
sub { $self->token_int },
sub { $self->token_string },
sub { \$self->token_ident },
@@ -23,4 +23,9 @@ sub parse
my $parser = LispParser->new;
use Data::Dump qw( pp );
-print pp( $parser->from_file( $ARGV[0] ) );
+
+if( !caller ) {
+ print pp( $parser->from_file( $ARGV[0] ) );
+}
+
+1;
diff --git a/lib/Parser/MGC.pm b/lib/Parser/MGC.pm
index 235ec8d..b58897f 100644
--- a/lib/Parser/MGC.pm
+++ b/lib/Parser/MGC.pm
@@ -8,7 +8,7 @@ package Parser::MGC;
use strict;
use warnings;
-our $VERSION = '0.05';
+our $VERSION = '0.06';
use Carp;
@@ -28,7 +28,7 @@ C<Parser::MGC> - build simple recursive-descent parsers
my $self = shift;
$self->sequence_of( sub {
- $self->one_of(
+ $self->any_of(
sub { $self->token_int },
sub { $self->token_string },
sub { \$self->token_ident },
@@ -283,8 +283,6 @@ sub fail
my $self = shift;
my ( $message ) = @_;
- my ( $lineno, $col, $line ) = $self->where;
-
die Parser::MGC::Failure->new( $message, $self->where );
}
@@ -298,16 +296,22 @@ sub at_eos
{
my $self = shift;
- $self->skip_ws;
-
my $pos = pos $self->{str};
- return 1 if defined $pos and $pos >= length $self->{str};
+ $self->skip_ws;
- return 0 unless defined $self->{endofscope};
+ my $at_eos;
+ if( pos( $self->{str} ) >= length $self->{str} ) {
+ $at_eos = 1;
+ }
+ elsif( defined $self->{endofscope} ) {
+ $at_eos = $self->{str} =~ m/\G$self->{endofscope}/;
+ }
+ else {
+ $at_eos = 0;
+ }
- # No /g so we won't actually alter pos()
- my $at_eos = $self->{str} =~ m/\G$self->{endofscope}/;
+ pos( $self->{str} ) = $pos;
return $at_eos;
}
@@ -393,6 +397,20 @@ failure if called at the end of a scope.
$self->scope_of( "{", sub { $self->parse_statements }, "}" );
}
+If the C<$start> pattern is undefined, it is presumed the caller has already
+checked for this. This is useful when the stop pattern needs to be calculated
+based on the start pattern.
+
+ sub parse_bracketed
+ {
+ my $self = shift;
+
+ my $delim = $self->expect( qr/[\(\[\<\{]/ );
+ $delim =~ tr/([<{/)]>}/;
+
+ $self->enter_scope( undef, sub { $self->parse_body }, $delim );
+ }
+
=cut
sub scope_of
@@ -402,7 +420,8 @@ sub scope_of
ref $stop or $stop = qr/\Q$stop/;
- $self->expect( $start );
+ $self->expect( $start ) if defined $start;
+
local $self->{endofscope} = $stop;
local $self->{scope_level} = $self->{scope_level} + 1;
@@ -441,7 +460,7 @@ sub list_of
my @ret;
while( !$self->at_eos ) {
- push @ret, scalar $code->( $self );
+ push @ret, $code->( $self );
$self->skip_ws;
$self->{str} =~ m/\G$sep/gc or last;
@@ -472,10 +491,16 @@ sub sequence_of
my $self = shift;
my ( $code ) = @_;
- return $self->list_of( "", $code );
+ my @ret;
+
+ while( !$self->at_eos ) {
+ push @ret, $code->( $self );
+ }
+
+ return \@ret;
}
-=head2 $ret = $parser->one_of( @codes )
+=head2 $ret = $parser->any_of( @codes )
Expects that one of the given code references can parse something from the
input, returning what it returned. Each code reference may indicate a failure
@@ -488,16 +513,21 @@ alternations of possible parse trees.
{
my $self = shift;
- $self->one_of(
+ $self->any_of(
sub { $self->parse_declaration; $self->expect(";") },
sub { $self->parse_expression; $self->expect(";") },
sub { $self->parse_block },
);
}
+Note: This method used to be called C<one_of>, but was renamed for clarity.
+Currently this method is provided also as an alias by the old name. Code
+using the old name should be rewritten to C<any_of> instead, as this
+backward-compatibility alias may be removed in a later version.
+
=cut
-sub one_of
+sub any_of
{
my $self = shift;
@@ -519,12 +549,14 @@ sub one_of
$self->fail( "Found nothing parseable" );
}
+*one_of = \&any_of;
+
=head2 $parser->commit
Calling this method will cancel the backtracking behaviour of the innermost
-C<maybe> or C<one_of> structure forming method. That is, if later code then
+C<maybe> or C<any_of> structure forming method. That is, if later code then
calls C<fail>, the exception will be propagated out of C<maybe>, and no
-further code blocks will be attempted by C<one_of>.
+further code blocks will be attempted by C<any_of>.
Typically this will be called once the grammatical structure of an
alternation has been determined, ensuring that any further failures are raised
@@ -534,7 +566,7 @@ as real exceptions, rather than by attempting other alternatives.
{
my $self = shift;
- $self->one_of(
+ $self->any_of(
...
sub {
$self->scope_of( "{",
@@ -593,9 +625,9 @@ sub skip_ws
}
}
-=head2 $parser->expect( $string )
+=head2 $str = $parser->expect( $literal )
-=head2 $parser->expect( qr/pattern/ )
+=head2 $str = $parser->expect( qr/pattern/ )
Expects to find a literal string or regexp pattern match, and consumes it.
This method returns the string that was captured.
@@ -616,6 +648,62 @@ sub expect
return $1;
}
+=head2 $str = $parser->substring_before( $literal )
+
+=head2 $str = $parser->substring_before( qr/pattern/ )
+
+Expects to possibly find a literal string or regexp pattern match. If it finds
+such, consume all the input text before but excluding this match, and return
+it. If it fails to find a match before the end of the current scope, consumes
+all the input text until the end of scope and return it.
+
+This method does not consume the part of input that matches, only the text
+before it. It is not considered a failure if the substring before this match
+is empty. If a non-empty match is required, use the C<fail> method:
+
+ sub token_nonempty_part
+ {
+ my $self = shift;
+
+ my $str = $parser->substring_before( "," );
+ length $str or $self->fail( "Expected a string fragment before ," );
+
+ return $str;
+ }
+
+Note that unlike most of the other token parsing methods, this method does not
+consume either leading or trailing whitespace around the substring. It is
+expected that this method would be used as part a parser to read quoted
+strings, or similar cases where whitespace should be preserved.
+
+=cut
+
+sub substring_before
+{
+ my $self = shift;
+ my ( $expect ) = @_;
+
+ ref $expect or $expect = qr/\Q$expect/;
+
+ my $endre = ( defined $self->{endofscope} ) ?
+ qr/$expect|$self->{endofscope}/ :
+ $expect;
+
+ # NO skip_ws
+
+ my $start = pos $self->{str};
+ my $end;
+ if( $self->{str} =~ m/\G(?s:.*?)($endre)/ ) {
+ $end = $-[1];
+ }
+ else {
+ $end = length $self->{str};
+ }
+
+ pos( $self->{str} ) = $end;
+ return substr( $self->{str}, $start, $end - $start );
+}
+
=head2 $int = $parser->token_int
Expects to find an integer in decimal, octal or hexadecimal notation, and
@@ -629,6 +717,7 @@ sub token_int
$self->fail( "Expected integer" ) if $self->at_eos;
+ $self->skip_ws;
$self->{str} =~ m/\G(-?)($self->{patterns}{int})/gc or
$self->fail( "Expected integer" );
@@ -654,6 +743,7 @@ sub token_float
$self->fail( "Expected float" ) if $self->at_eos;
+ $self->skip_ws;
$self->{str} =~ m/\G(-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+)/gci or
$self->fail( "Expected float" );
@@ -665,8 +755,35 @@ sub token_float
Expects to find a quoted string, and consumes it. The string should be quoted
using C<"> or C<'> quote marks.
+The content of the quoted string can contain character escapes similar to
+those accepted by C or Perl. Specifically, the following forms are recognised:
+
+ \a Bell ("alert")
+ \b Backspace
+ \e Escape
+ \f Form feed
+ \n Newline
+ \r Return
+ \t Horizontal Tab
+ \0, \012 Octal character
+ \x34, \x{5678} Hexadecimal character
+
+C's C<\v> for vertical tab is not supported as it is rarely used in practice
+and it collides with Perl's C<\v> regexp escape. Perl's C<\c> for forming other
+control characters is also not supported.
+
=cut
+my %escapes = (
+ a => "\a",
+ b => "\b",
+ e => "\e",
+ f => "\f",
+ n => "\n",
+ r => "\r",
+ t => "\t",
+);
+
sub token_string
{
my $self = shift;
@@ -675,17 +792,31 @@ sub token_string
my $pos = pos $self->{str};
+ $self->skip_ws;
$self->{str} =~ m/\G($self->{patterns}{string_delim})/gc or
$self->fail( "Expected string delimiter" );
my $delim = $1;
- $self->{str} =~ m/\G((?:\\.|[^\\])*?)$delim/gc or
- pos($self->{str}) = $pos, $self->fail( "Expected contents of string" );
+ $self->{str} =~ m/
+ \G(
+ (?:
+ \\[0-7]{1,3} # octal escape
+ |\\x[0-9A-F]{2} # 2-digit hex escape
+ |\\x\{[0-9A-F]+\} # {}-delimited hex escape
+ |\\. # symbolic escape
+ |[^\\$delim]+ # plain chunk
+ )*?
+ )$delim/gcix or
+ pos($self->{str}) = $pos, $self->fail( "Expected contents of string" );
my $string = $1;
- # TODO: Unescape stuff like \\ and \n and whatnot
+ $string =~ s<\\(?:([0-7]{1,3})|x([0-9A-F]{2})|x\{([0-9A-F]+)\}|(.))>
+ [defined $1 ? chr oct $1 :
+ defined $2 ? chr hex $2 :
+ defined $3 ? chr hex $3 :
+ exists $escapes{$4} ? $escapes{$4} : $4]egi;
return $string;
}
@@ -702,6 +833,7 @@ sub token_ident
$self->fail( "Expected identifier" ) if $self->at_eos;
+ $self->skip_ws;
$self->{str} =~ m/\G($self->{patterns}{ident})/gc or
$self->fail( "Expected identifier" );
@@ -764,13 +896,46 @@ sub STRING
# Provide fallback operators for cmp, eq, etc...
use overload fallback => 1;
+=head1 EXAMPLES
+
+=head2 Accumulating Results Using Variables
+
+Although the structure-forming methods all return a value, obtained from their
+nested parsing code, it can sometimes be more convenient to use a variable to
+accumulate a result in instead. For example, consider the following parser
+method, designed to parse a set of C<name: "value"> assignments, such as might
+be found in a configuration file, or YAML/JSON-style mapping value.
+
+ sub parse_dict
+ {
+ my $self = shift;
+
+ my %ret;
+ $self->list_of( ",", sub {
+ my $key = $self->token_ident;
+ exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" );
+
+ $self->expect( ":" );
+
+ $ret{$key} = $self->parse_value;
+ } );
+
+ return \%ret
+ }
+
+Instead of using the return value from C<list_of>, this method accumulates
+values in the C<%ret> hash, eventually returning a reference to it as its
+result. Because of this, it can perform some error checking while it parses;
+namely, rejecting duplicate keys.
+
=head1 TODO
=over 4
=item *
-Unescaping of string constants; customisable
+Make unescaping of string constants more customisable. Possibly consider
+instead a C<parse_string_generic> using a loop over C<substring_before>.
=item *
diff --git a/t/02expect.t b/t/02expect.t
index a6382e2..6826c73 100644
--- a/t/02expect.t
+++ b/t/02expect.t
@@ -2,7 +2,7 @@
use strict;
-use Test::More tests => 3;
+use Test::More tests => 4;
package TestParser;
use base qw( Parser::MGC );
@@ -22,6 +22,10 @@ is_deeply( $parser->from_string( "hello world" ),
[ "hello", "world" ],
'"hello world"' );
+is_deeply( $parser->from_string( " hello world " ),
+ [ "hello", "world" ],
+ '" hello world "' );
+
# Perl 5.13.6 changed the regexp form
# Accept both old and new-style stringification
my $modifiers = (qr/foobar/ =~ /\Q(?^/) ? '^' : '-xism';
diff --git a/t/06substring.t b/t/06substring.t
new file mode 100644
index 0000000..8d76371
--- /dev/null
+++ b/t/06substring.t
@@ -0,0 +1,27 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 2;
+
+package TestParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+ my $self = shift;
+
+ [ $self->substring_before( "!" ), $self->expect( "!" ) ];
+}
+
+package main;
+
+my $parser = TestParser->new;
+
+is_deeply( $parser->from_string( "Hello, world!" ),
+ [ "Hello, world", "!" ],
+ '"Hello, world!"' );
+
+is_deeply( $parser->from_string( "!" ),
+ [ "", "!" ],
+ '"Hello, world!"' );
diff --git a/t/12token_string.t b/t/12token_string.t
index f57b560..39c3eba 100644
--- a/t/12token_string.t
+++ b/t/12token_string.t
@@ -2,7 +2,7 @@
use strict;
-use Test::More tests => 7;
+use Test::More tests => 18;
package TestParser;
use base qw( Parser::MGC );
@@ -24,6 +24,21 @@ is( $parser->from_string( q["double"] ), "double", 'Double quoted string' );
is( $parser->from_string( q["foo 'bar'"] ), "foo 'bar'", 'Double quoted string containing single substr' );
is( $parser->from_string( q['foo "bar"'] ), 'foo "bar"', 'Single quoted string containing double substr' );
+is( $parser->from_string( q["tab \t"] ), "tab \t", '\t' );
+is( $parser->from_string( q["newline \n"] ), "newline \n", '\n' );
+is( $parser->from_string( q["return \r"] ), "return \r", '\r' );
+is( $parser->from_string( q["form feed \f"] ), "form feed \f", '\f' );
+is( $parser->from_string( q["backspace \b"] ), "backspace \b", '\b' );
+is( $parser->from_string( q["bell \a"] ), "bell \a", '\a' );
+is( $parser->from_string( q["escape \e"] ), "escape \e", '\e' );
+
+# ord('A') == 65 == 0101 == 0x41
+# TODO: This is ASCII dependent. If anyone on EBCDIC cares, do let me know...
+is( $parser->from_string( q["null \0"] ), "null \0", 'Octal null' );
+is( $parser->from_string( q["octal \101BC"] ), "octal ABC", 'Octal' );
+is( $parser->from_string( q["hex \x41BC"] ), "hex ABC", 'Hexadecimal' );
+is( $parser->from_string( q["unihex \x{263a}"] ), "unihex \x{263a}", 'Unicode hex' );
+
$parser = TestParser->new(
patterns => { string_delim => qr/"/ }
);
diff --git a/t/21scope_of.t b/t/21scope_of.t
index fcb7da9..1ebf552 100644
--- a/t/21scope_of.t
+++ b/t/21scope_of.t
@@ -2,7 +2,7 @@
use strict;
-use Test::More tests => 3;
+use Test::More tests => 6;
package TestParser;
use base qw( Parser::MGC );
@@ -18,6 +18,23 @@ sub parse
);
}
+package DynamicDelimParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+ my $self = shift;
+
+ my $delim = $self->expect( qr/[\(\[]/ );
+ $delim =~ tr/([/)]/;
+
+ $self->scope_of(
+ undef,
+ sub { return $self->token_int },
+ $delim,
+ );
+}
+
package main;
my $parser = TestParser->new;
@@ -26,3 +43,10 @@ is( $parser->from_string( "(123)" ), 123, '"(123)"' );
ok( !eval { $parser->from_string( "(abc)" ) }, '"(abc)"' );
ok( !eval { $parser->from_string( "456" ) }, '"456"' );
+
+$parser = DynamicDelimParser->new;
+
+is( $parser->from_string( "(45)" ), 45, '"(45)"' );
+is( $parser->from_string( "[45]" ), 45, '"[45]"' );
+
+ok( !eval { $parser->from_string( "(45]" ) }, '"(45]" fails' );
diff --git a/t/24one_of.t b/t/24any_of.t
similarity index 97%
rename from t/24one_of.t
rename to t/24any_of.t
index 665e291..aec0559 100644
--- a/t/24one_of.t
+++ b/t/24any_of.t
@@ -11,7 +11,7 @@ sub parse
{
my $self = shift;
- $self->one_of(
+ $self->any_of(
sub { [ int => $self->token_int ] },
sub { [ str => $self->token_string ] },
sub { [ ident => $self->token_ident ] },
diff --git a/t/30commit.t b/t/30commit.t
index b9080bb..77fe010 100644
--- a/t/30commit.t
+++ b/t/30commit.t
@@ -11,7 +11,7 @@ sub parse
{
my $self = shift;
- $self->one_of(
+ $self->any_of(
sub { $self->token_int },
sub {
$self->scope_of( "(",
diff --git a/t/31scope_level.t b/t/31scope_level.t
index 06e87ac..50e581f 100644
--- a/t/31scope_level.t
+++ b/t/31scope_level.t
@@ -13,7 +13,7 @@ sub parse
$self->sequence_of(
sub {
- $self->one_of(
+ $self->any_of(
sub { $self->expect( qr/[a-z]+/ ) . "/" . $self->scope_level },
sub { $self->scope_of( "(", \&parse, ")" ) },
);
diff --git a/t/90ex_dict.t b/t/90ex_dict.t
new file mode 100644
index 0000000..efdd78d
--- /dev/null
+++ b/t/90ex_dict.t
@@ -0,0 +1,32 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 4;
+
+require "examples/parse-dict.pl";
+
+my $parser = DictParser->new;
+
+sub test
+{
+ my ( $str, $expect, $name ) = @_;
+
+ is_deeply( $parser->from_string( $str ), $expect, $name );
+}
+
+test q[123],
+ 123,
+ "Number";
+
+test q["Hello"],
+ "Hello",
+ "String";
+
+test q[{one: 1, two: 2}],
+ { one => 1, two => 2 },
+ "Flat dict";
+
+test q[{numbers: {three: 3, four: 4}}],
+ { numbers => { three => 3, four => 4 } },
+ "Nested dict";
diff --git a/t/90ex_expr.t b/t/90ex_expr.t
new file mode 100644
index 0000000..c3df649
--- /dev/null
+++ b/t/90ex_expr.t
@@ -0,0 +1,33 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More;
+
+require "examples/eval-expr.pl";
+
+my $parser = ExprParser->new;
+
+while( <DATA> ) {
+ chomp;
+ my ( $str, $expect ) = split m/=/;
+
+ is( $parser->from_string( $str ), $expect, $str );
+}
+
+done_testing;
+
+__DATA__
+1+2=3
+ 1 + 2 =3
+1+2+3=6
+10-4=6
+10-2-2=6
+3*4=12
+3*4*5=60
+20/4=5
+20/5/2=2
+3+4*5=23
+4*5+3=23
+(3+4)*5=35
+4*(5+3)=32
diff --git a/t/90ex_pod.t b/t/90ex_pod.t
new file mode 100644
index 0000000..1dbb98f
--- /dev/null
+++ b/t/90ex_pod.t
@@ -0,0 +1,36 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 5;
+
+require "examples/parse-pod.pl";
+
+my $parser = PodParser->new;
+
+sub test
+{
+ my ( $str, $expect, $name ) = @_;
+
+ is_deeply( $parser->from_string( $str ), $expect, $name );
+}
+
+test "Plain text",
+ [ "Plain text" ],
+ "plain";
+
+test "B<bold>",
+ [ { B => [ "bold" ] } ],
+ "B<>";
+
+test "Text with I<italic> text",
+ [ "Text with ", { I => [ "italic" ] }, " text" ],
+ "I<> surrounded";
+
+test "Nested B<I<tags>>",
+ [ "Nested ", { B => [ { I => [ "tags" ] } ] } ],
+ "Nested";
+
+test "Double C<< Class->method >> tags",
+ [ "Double ", { C => [ " Class->method " ] }, " tags" ],
+ "Double tags";
diff --git a/t/90ex_synopsis.t b/t/90ex_synopsis.t
new file mode 100644
index 0000000..097c4ca
--- /dev/null
+++ b/t/90ex_synopsis.t
@@ -0,0 +1,21 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 4;
+
+require "examples/synopsis.pl";
+
+my $parser = LispParser->new;
+
+sub test
+{
+ my ( $str, $expect ) = @_;
+
+ is_deeply( $parser->from_string( $str ), [ $expect ], qq("$str") );
+}
+
+test "123", 123;
+test "'hello'", 'hello';
+test "(123 456)", [ 123, 456 ];
+test "(+ 1 (* 2 3))", [ \'+', 1, [ \'*', 2, 3 ] ];
diff --git a/t/98backcompat.t b/t/98backcompat.t
new file mode 100644
index 0000000..a4d9f1c
--- /dev/null
+++ b/t/98backcompat.t
@@ -0,0 +1,25 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use Test::More tests => 2;
+
+package OneOfParser;
+use base qw( Parser::MGC );
+
+sub parse
+{
+ my $self = shift;
+
+ $self->one_of(
+ sub { [ int => $self->token_int ] },
+ sub { [ str => $self->token_string ] },
+ );
+}
+
+package main;
+
+my $parser = OneOfParser->new;
+
+is_deeply( $parser->from_string( "123" ), [ int => 123 ], 'one_of integer' );
+is_deeply( $parser->from_string( q["hi"] ), [ str => "hi" ], 'one_of string' );
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libparser-mgc-perl.git
More information about the Pkg-perl-cvs-commits
mailing list