r26152 - in /branches/upstream/libxml-parser-lite-tree-perl/current: ./ lib/ lib/XML/ lib/XML/Parser/ lib/XML/Parser/Lite/ t/

gregoa at users.alioth.debian.org gregoa at users.alioth.debian.org
Sat Oct 18 16:32:19 UTC 2008


Author: gregoa
Date: Sat Oct 18 16:32:16 2008
New Revision: 26152

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=26152
Log:
[svn-upgrade] Integrating new upstream version, libxml-parser-lite-tree-perl (0.08)

Added:
    branches/upstream/libxml-parser-lite-tree-perl/current/META.yml
    branches/upstream/libxml-parser-lite-tree-perl/current/lib/
    branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/
    branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/
    branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/Lite/
    branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/Lite/Tree.pm
    branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/LiteCopy.pm
    branches/upstream/libxml-parser-lite-tree-perl/current/t/01_basic.t
    branches/upstream/libxml-parser-lite-tree-perl/current/t/02_options.t
    branches/upstream/libxml-parser-lite-tree-perl/current/t/03_comments.t
    branches/upstream/libxml-parser-lite-tree-perl/current/t/04_processing_instructions.t
    branches/upstream/libxml-parser-lite-tree-perl/current/t/05_doctypes.t
Removed:
    branches/upstream/libxml-parser-lite-tree-perl/current/Tree.pm
    branches/upstream/libxml-parser-lite-tree-perl/current/t/01.t
Modified:
    branches/upstream/libxml-parser-lite-tree-perl/current/MANIFEST
    branches/upstream/libxml-parser-lite-tree-perl/current/Makefile.PL
    branches/upstream/libxml-parser-lite-tree-perl/current/README

Modified: branches/upstream/libxml-parser-lite-tree-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/MANIFEST?rev=26152&op=diff
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/MANIFEST (original)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/MANIFEST Sat Oct 18 16:32:16 2008
@@ -1,5 +1,11 @@
 Makefile.PL
 MANIFEST
 README
-t/01.t
-Tree.pm
+t/01_basic.t
+t/02_options.t
+t/03_comments.t
+t/04_processing_instructions.t
+t/05_doctypes.t
+lib/XML/Parser/LiteCopy.pm
+lib/XML/Parser/Lite/Tree.pm
+META.yml                                 Module meta-data (added by MakeMaker)

Added: branches/upstream/libxml-parser-lite-tree-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/META.yml?rev=26152&op=file
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/META.yml (added)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/META.yml Sat Oct 18 16:32:16 2008
@@ -1,0 +1,11 @@
+# http://module-build.sourceforge.net/META-spec.html
+#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
+name:         XML-Parser-Lite-Tree
+version:      0.08
+version_from: lib/XML/Parser/Lite/Tree.pm
+installdirs:  site
+requires:
+    Test::More:                    0
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.30

Modified: branches/upstream/libxml-parser-lite-tree-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/Makefile.PL?rev=26152&op=diff
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/Makefile.PL (original)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/Makefile.PL Sat Oct 18 16:32:16 2008
@@ -1,10 +1,9 @@
-use ExtUtils::MakeMaker;
-
-WriteMakefile(
-	'NAME'		=> 'XML::Parser::Lite::Tree',
-	'VERSION_FROM'	=> 'Tree.pm',
-	'PREREQ_PM'	=> {
-				'XML::Parser::Lite'	=> 0,
-				'Test::Simple'		=> 0,
-			},
-);
+use ExtUtils::MakeMaker;
+
+WriteMakefile(
+	'NAME'		=> 'XML::Parser::Lite::Tree',
+	'VERSION_FROM'	=> 'lib/XML/Parser/Lite/Tree.pm',
+	'PREREQ_PM'	=> {
+		'Test::More'		=> 0,
+	},
+);

Modified: branches/upstream/libxml-parser-lite-tree-perl/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/README?rev=26152&op=diff
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/README (original)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/README Sat Oct 18 16:32:16 2008
@@ -18,11 +18,14 @@
 
 This module requires these other modules and libraries:
 
-  XML::Parser::Lite
-  Test::Simple
+  Test::More
 
 
 COPYRIGHT AND LICENCE
 
-Copyright (C) 2004 Cal Henderson <cal at iamcal.com>
+Copyright (C) 2004-2008 Cal Henderson <cal at iamcal.com>
 License: Perl Artistic License
+
+Contains XML::Parser::Lite:
+Copyright (C) 2000-2007 Paul Kulchenko. All rights reserved.
+Copyright (C) 2008- Martin Kutter. All rights reserved.

Added: branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/Lite/Tree.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/Lite/Tree.pm?rev=26152&op=file
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/Lite/Tree.pm (added)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/Lite/Tree.pm Sat Oct 18 16:32:16 2008
@@ -1,0 +1,401 @@
+package XML::Parser::Lite::Tree;
+
+use 5.006;
+use strict;
+use warnings;
+use XML::Parser::LiteCopy;
+
+our $VERSION = '0.08';
+
+use vars qw( $parser );
+
+sub instance {
+	return $parser if $parser;
+	$parser = __PACKAGE__->new;
+}
+
+sub new {
+	my $class = shift;
+	my $self = bless {}, $class;
+
+	my %opts = (ref $_[0]) ? ((ref $_[0] eq 'HASH') ? %{$_[0]} : () ) : @_;
+	$self->{opts} = \%opts;
+
+	$self->{__parser} = new XML::Parser::LiteCopy
+		Handlers => {
+			Start	=> sub { $self->_start_tag(@_); },
+			Char	=> sub { $self->_do_char(@_); },
+			End	=> sub { $self->_end_tag(@_); },
+			Comment	=> sub { $self->_do_comment(@_); },
+			XMLDecl	=> sub { $self->_do_xmldecl(@_); },
+			Doctype	=> sub { $self->_do_doctype(@_); },
+		};
+	$self->{process_ns} = $self->{opts}->{process_ns} || 0;
+	$self->{skip_white} = $self->{opts}->{skip_white} || 0;
+
+	return $self;
+}
+
+sub parse {
+	my ($self, $content) = @_;
+
+	my $root = {
+		'type' => 'root',
+		'children' => [],
+	};
+
+	$self->{tag_stack} = [$root];
+
+	$self->{__parser}->parse($content);
+
+	$self->cleanup($root);
+
+	if ($self->{skip_white}){
+		$self->strip_white($root);
+	}
+
+	if ($self->{process_ns}){
+		$self->{ns_stack} = {};
+		$self->mark_namespaces($root);
+	}
+
+	return $root;
+}
+
+sub _start_tag {
+	my $self = shift;
+	shift;
+
+	my $new_tag = {
+		'type' => 'element',
+		'name' => shift,
+		'attributes' => {},
+		'children' => [],
+	};
+
+	while (my $a_name = shift @_){
+		my $a_value = shift @_;
+		$new_tag->{attributes}->{$a_name} = $a_value;
+	}
+
+	push @{$self->{tag_stack}->[-1]->{children}}, $new_tag;
+	push @{$self->{tag_stack}}, $new_tag;
+}
+
+sub _do_char {
+	my $self = shift;
+	shift;
+
+	for my $content(@_){
+
+		my $new_tag = {
+			'type' => 'text',
+			'content' => $content,
+		};
+
+		push @{$self->{tag_stack}->[-1]->{children}}, $new_tag;
+	}
+}
+
+sub _end_tag {
+	my $self = shift;
+
+	pop @{$self->{tag_stack}};
+}
+
+sub _do_comment {
+	my $self = shift;
+	shift;
+
+	for my $content(@_){
+
+		my $new_tag = {
+			'type' => 'comment',
+			'content' => $content,
+		};
+
+		push @{$self->{tag_stack}->[-1]->{children}}, $new_tag;
+	}
+}
+
+sub _do_xmldecl {
+	my $self = shift;
+	shift;
+
+	push @{$self->{tag_stack}->[-1]->{children}}, {
+		'type' => 'pi',
+		'content' => shift,
+	};
+}
+
+sub _do_doctype {
+	my $self = shift;
+	shift;
+
+	push @{$self->{tag_stack}->[-1]->{children}}, {
+		'type' => 'dtd',
+		'content' => shift,
+	};
+}
+
+sub mark_namespaces {
+	my ($self, $obj) = @_;
+
+	my @ns_keys;
+
+	#
+	# mark
+	#
+
+	if ($obj->{type} eq 'element'){
+
+		#
+		# first, add any new NS's to the stack
+		#
+
+		my @keys = keys %{$obj->{attributes}};
+
+		for my $k(@keys){
+
+			if ($k =~ /^xmlns:(.*)$/){
+
+				push @{$self->{ns_stack}->{$1}}, $obj->{attributes}->{$k};
+				push @ns_keys, $1;
+				delete $obj->{attributes}->{$k};
+			}
+
+			if ($k eq 'xmlns'){
+
+				push @{$self->{ns_stack}->{__default__}}, $obj->{attributes}->{$k};
+				push @ns_keys, '__default__';
+				delete $obj->{attributes}->{$k};
+			}
+		}
+
+
+		#
+		# now - does this tag have a NS?
+		#
+
+		if ($obj->{name} =~ /^(.*?):(.*)$/){
+
+			$obj->{local_name} = $2;
+			$obj->{ns_key} = $1;
+			$obj->{ns} = $self->{ns_stack}->{$1}->[-1];
+		}else{
+			$obj->{local_name} = $obj->{name};
+			$obj->{ns} = $self->{ns_stack}->{__default__}->[-1];
+		}
+
+
+		#
+		# finally, add xpath-style namespace nodes
+		#
+
+		$obj->{namespaces} = {};
+
+		for my $key (keys %{$self->{ns_stack}}){
+
+			if (scalar @{$self->{ns_stack}->{$key}}){
+
+				my $uri = $self->{ns_stack}->{$key}->[-1];
+				$obj->{namespaces}->{$key} = $uri;
+			}
+		}
+	}
+
+
+	#
+	# descend
+	#
+
+	if ($obj->{type} eq 'root' || $obj->{type} eq 'element'){
+
+		for my $child (@{$obj->{children}}){
+
+			$self->mark_namespaces($child);
+		}
+	}
+
+
+	#
+	# pop from stack
+	#
+
+	for my $k (@ns_keys){
+		pop @{$self->{ns_stack}->{$k}};
+	}
+}
+
+sub strip_white {
+	my ($self, $obj) = @_;
+
+	if ($obj->{type} eq 'root' || $obj->{type} eq 'element'){
+
+		my $new_kids = [];
+
+		for my $child (@{$obj->{children}}){
+
+			if ($child->{type} eq 'text'){
+
+				if ($child->{content} =~ m/\S/){
+
+					push @{$new_kids}, $child;
+				}
+
+			}elsif ($child->{type} eq 'element'){
+
+				$self->strip_white($child);
+				push @{$new_kids}, $child;
+			}else{
+				push @{$new_kids}, $child;
+			}
+		}
+
+		$obj->{children} = $new_kids;
+	}
+}
+
+sub cleanup {
+	my ($self, $obj) = @_;
+
+	#
+	# cleanup PIs
+	#
+
+	if ($obj->{type} eq 'pi'){
+
+		if ($obj->{content} =~ m/^(\S+)\s+(.*)\?$/s){
+
+			delete $obj->{content};
+			$obj->{target} = $1;
+			$obj->{content} = $2;
+		}
+	}
+
+
+	#
+	# cleanup DTDs
+	#
+
+	if ($obj->{type} eq 'dtd'){
+
+		if ($obj->{content} =~ m/^(\S+)\s+(.*)$/s){
+
+			delete $obj->{content};
+			$obj->{name} = $1;
+			$obj->{content} = $2;
+		}
+	}
+
+
+	#
+	# recurse
+	#
+	
+	if ($obj->{type} eq 'root' || $obj->{type} eq 'element'){
+
+		for my $child (@{$obj->{children}}){
+
+			$self->cleanup($child);
+		}
+	}
+}
+
+
+1;
+__END__
+
+=head1 NAME
+
+XML::Parser::Lite::Tree - Lightweight XML tree builder
+
+=head1 SYNOPSIS
+
+  use XML::Parser::Lite::Tree;
+
+  my $tree_parser = XML::Parser::Lite::Tree::instance();
+  my $tree = $tree_parser->parse($xml_data);
+
+    OR
+
+  my $tree = XML::Parser::Lite::Tree::instance()->parse($xml_data);
+
+=head1 DESCRIPTION
+
+This is a singleton class for parsing XML into a tree structure. How does this
+differ from other XML tree generators? By using XML::Parser::Lite, which is a
+pure perl XML parser. Using this module you can tree-ify simple XML without
+having to compile any C.
+
+
+For example, the following XML:
+
+  <foo woo="yay"><bar a="b" c="d" />hoopla</foo>
+
+
+Parses into the following tree:
+
+          'children' => [
+                          {
+                            'children' => [
+                                            {
+                                              'children' => [],
+                                              'attributes' => {
+                                                                'a' => 'b',
+                                                                'c' => 'd'
+                                                              },
+                                              'type' => 'element',
+                                              'name' => 'bar'
+                                            },
+                                            {
+                                              'content' => 'hoopla',
+                                              'type' => 'text'
+                                            }
+                                          ],
+                            'attributes' => {
+                                              'woo' => 'yay'
+                                            },
+                            'type' => 'element',
+                            'name' => 'foo'
+                          }
+                        ],
+          'type' => 'root'
+        };
+
+
+Each node contains a C<type> key, one of C<root>, C<element> and C<text>. C<root> is the 
+document root, and only contains an array ref C<children>. C<element> represents a normal
+tag, and contains an array ref C<children>, a hash ref C<attributes> and a string C<name>.
+C<text> nodes contain only a C<content> string.
+
+
+=head1 METHODS
+
+=over 4
+
+=item C<instance()>
+
+Returns an instance of the tree parser.
+
+=item C<new( options... )>
+
+Creates a new parser. Valid options include C<process_ns> to process namespaces.
+
+=item C<parse($xml)>
+
+Parses the xml in C<$xml> and returns the tree as a hash ref.
+
+=back
+
+
+=head1 AUTHOR
+
+Copyright (C) 2004-2008, Cal Henderson, E<lt>cal at iamcal.comE<gt>
+
+
+=head1 SEE ALSO
+
+L<XML::Parser::Lite>.
+
+=cut

Added: branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/LiteCopy.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/LiteCopy.pm?rev=26152&op=file
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/LiteCopy.pm (added)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/lib/XML/Parser/LiteCopy.pm Sat Oct 18 16:32:16 2008
@@ -1,0 +1,370 @@
+# NOTE: This module comes from SOAP::Lite, which you probably don't
+# have, so it's repackaged here to avoid the huge dependancy tree.
+# also, the current version in CPAN doesn't run under older perls
+# so i've removed the 'use version' magic. And it's been renamed
+# so that search.cpan.org doesn't whine at me
+
+# ======================================================================
+#
+# Copyright (C) 2000-2007 Paul Kulchenko (paulclinger at yahoo.com)
+# Copyright (C) 2008 Martin Kutter (martin.kutter at fen-net.de)
+# SOAP::Lite is free software; you can redistribute it
+# and/or modify it under the same terms as Perl itself.
+#
+# $Id: Lite.pm 249 2008-05-05 20:35:05Z kutterma $
+#
+# ======================================================================
+
+package XML::Parser::LiteCopy;
+
+use strict;
+use vars qw($VERSION);
+$VERSION = '0.710.05';
+
+sub new {
+    my $class = shift;
+
+    return $class if ref $class;
+    my $self = bless {} => $class;
+
+    my %parameters = @_;
+    $self->setHandlers(); # clear first
+    $self->setHandlers(%{$parameters{Handlers} || {}});
+
+    return $self;
+}
+
+sub setHandlers {
+    my $self = shift;
+
+    # allow symbolic refs, avoid "subroutine redefined" warnings
+    no strict 'refs'; local $^W;
+    # clear all handlers if called without parameters
+    if (not @_) {
+        for (qw(Start End Char Final Init Comment Doctype XMLDecl)) {
+            *$_ = sub {}
+        }
+    }
+
+    # we could use each here, too...
+    while (@_) {
+        my($name, $func) = splice(@_, 0, 2);
+        *$name = defined $func
+            ? $func
+            : sub {}
+    }
+    return $self;
+}
+
+sub _regexp {
+    my $patch = shift || '';
+    my $package = __PACKAGE__;
+
+    # This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html
+
+    # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
+    # Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998.
+    # Copyright (c) 1998, Robert D. Cameron.
+    # The following code may be freely used and distributed provided that
+    # this copyright and citation notice remains intact and that modifications
+    # or additions are clearly identified.
+
+    # Modifications may be tracked on SOAP::Lite's SVN at
+    # https://soaplite.svn.sourceforge.net/svnroot/soaplite/
+    #
+    use re 'eval';
+    my $TextSE = "[^<]+";
+    my $UntilHyphen = "[^-]*-";
+    my $Until2Hyphens = "([^-]*)-(?:[^-]$[^-]*-)*-";
+    my $CommentCE = "$Until2Hyphens(?{${package}::comment(\$2)})>?";
+#    my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
+#    my $CommentCE = "$Until2Hyphens>?";
+    my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
+    my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
+    my $S = "[ \\n\\t\\r]+";
+    my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
+    my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
+    my $Name = "(?:$NameStrt)(?:$NameChar)*";
+    my $QuoteSE = "\"[^\"]*\"|'[^']*'";
+    my $DT_IdentSE = "$Name(?:$S(?:$Name|$QuoteSE))*";
+#    my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
+    my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
+    my $S1 = "[\\n\\r\\t ]";
+    my $UntilQMs = "[^?]*\\?";
+    my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*";
+    my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail>))|%$Name;|$S";
+    my $DocTypeCE = "$S($DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?)>(?{${package}::_doctype(\$3)})";
+#    my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
+#    my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
+#    my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
+    my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
+#    my $PI_CE = "$Name(?:$PI_Tail)?";
+    my $PI_CE = "($Name(?:$PI_Tail))>(?{${package}::_xmldecl(\$5)})";
+    # these expressions were modified for backtracking and events
+#    my $EndTagCE = "($Name)(?{${package}::_end(\$2)})(?:$S)?>";
+    my $EndTagCE = "($Name)(?{${package}::_end(\$6)})(?:$S)?>";
+    my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'";
+#    my $ElemTagCE = "($Name)(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)(?{[\@{\$^R||[]},\$4=>defined\$5?\$5:\$6]}))*(?:$S)?(/)?>(?{${package}::_start( \$3,\@{\$^R||[]})})(?{\${7} and ${package}::_end(\$3)})";
+    my $ElemTagCE = "($Name)"
+        . "(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)"
+        . "(?{[\@{\$^R||[]},\$8=>defined\$9?\$9:\$10]}))*(?:$S)?(/)?>"
+        . "(?{${package}::_start(\$7,\@{\$^R||[]})})(?{\$11 and ${package}::_end(\$7)})";
+
+    my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
+
+    # Next expression is under "black magic".
+    # Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE',
+    # but it doesn't work under Perl 5.005 and only magic with
+    # (?:....)?? solved the problem.
+    # I would appreciate if someone let me know what is the right thing to do
+    # and what's the reason for all this magic.
+    # Seems like a problem related to (?:....)? rather than to ?{} feature.
+    # Tests are in t/31-xmlparserlite.t if you decide to play with it.
+    #"(?{[]})(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE";
+    "(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE";
+}
+
+setHandlers();
+
+# Try 5.6 and 5.10 regex first
+my $REGEXP = _regexp('??');
+
+sub _parse_re {
+    use re "eval";
+    undef $^R;
+    1 while $_[0] =~ m{$REGEXP}go
+};
+
+# fixup regex if it does not work...
+{
+    if (not eval { _parse_re('<soap:foo xmlns:soap="foo">bar</soap:foo>'); 1; } ) {
+        $REGEXP = _regexp();
+        local $^W;
+        *_parse_re = sub {
+                use re "eval";
+                undef $^R;
+                1 while $_[0] =~ m{$REGEXP}go
+            };
+    }
+}
+
+sub parse {
+    _init();
+    _parse_re($_[1]);
+    _final();
+}
+
+my(@stack, $level);
+
+sub _init {
+    @stack = ();
+    $level = 0;
+    Init(__PACKAGE__, @_);
+}
+
+sub _final {
+    die "not properly closed tag '$stack[-1]'\n" if @stack;
+    die "no element found\n" unless $level;
+    Final(__PACKAGE__, @_)
+}
+
+sub _start {
+    die "multiple roots, wrong element '$_[0]'\n" if $level++ && !@stack;
+    push(@stack, $_[0]);
+    Start(__PACKAGE__, @_);
+}
+
+sub _char {
+    Char(__PACKAGE__, $_[0]), return if @stack;
+
+    # check for junk before or after element
+    # can't use split or regexp due to limitations in ?{} implementation,
+    # will iterate with loop, but we'll do it no more than two times, so
+    # it shouldn't affect performance
+    for (my $i=0; $i < length $_[0]; $i++) {
+        die "junk '$_[0]' @{[$level ? 'after' : 'before']} XML element\n"
+        if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there
+    }
+}
+
+sub _end {
+    pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n";
+    End(__PACKAGE__, $_[0]);
+}
+
+sub comment {
+    Comment(__PACKAGE__, $_[0]);
+}
+
+sub end {
+     pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n";
+     End(__PACKAGE__, $_[0]);
+ }
+
+sub _doctype {
+    Doctype(__PACKAGE__, $_[0]);
+}
+
+sub _xmldecl {
+    XMLDecl(__PACKAGE__, $_[0]);
+}
+
+
+
+# ======================================================================
+1;
+
+__END__
+
+=head1 NAME
+
+XML::Parser::Lite - Lightweight regexp-based XML parser
+
+=head1 SYNOPSIS
+
+  use XML::Parser::Lite;
+
+  $p1 = new XML::Parser::Lite;
+  $p1->setHandlers(
+    Start => sub { shift; print "start: @_\n" },
+    Char => sub { shift; print "char: @_\n" },
+    End => sub { shift; print "end: @_\n" },
+  );
+  $p1->parse('<foo id="me">Hello World!</foo>');
+
+  $p2 = new XML::Parser::Lite
+    Handlers => {
+      Start => sub { shift; print "start: @_\n" },
+      Char => sub { shift; print "char: @_\n" },
+      End => sub { shift; print "end: @_\n" },
+    }
+  ;
+  $p2->parse('<foo id="me">Hello <bar>cruel</bar> World!</foo>');
+
+=head1 DESCRIPTION
+
+This Perl implements an XML parser with a interface similar to
+XML::Parser. Though not all callbacks are supported, you should be able to
+use it in the same way you use XML::Parser. Due to using experimantal regexp
+features it'll work only on Perl 5.6 and above and may behave differently on
+different platforms.
+
+Note that you cannot use regular expressions or split in callbacks. This is
+due to a limitation of perl's regular expression implementation (which is
+not re-entrant).
+
+=head1 SUBROUTINES/METHODS
+
+=head2 new
+
+Constructor.
+
+As (almost) all SOAP::Lite constructors, new() returns the object called on
+when called as object method. This means that the following effectifely is
+a no-op if $obj is a object:
+
+ $obj = $obj->new();
+
+New accepts a single named parameter, C<Handlers> with a hash ref as value:
+
+ my $parser = XML::Parser::Lite->new(
+    Handlers => {
+        Start => sub { shift; print "start: @_\n" },
+        Char => sub { shift; print "char: @_\n" },
+        End => sub { shift; print "end: @_\n" },
+    }
+ );
+
+The handlers given will be passed to setHandlers.
+
+=head2 setHandlers
+
+Sets (or resets) the parsing handlers. Accepts a hash with the handler names
+and handler code references as parameters. Passing C<undef> instead of a
+code reference replaces the handler by a no-op.
+
+The following handlers can be set:
+
+ Init
+ Start
+ Char
+ End
+ Final
+
+All other handlers are ignored.
+
+Calling setHandlers without parameters resets all handlers to no-ops.
+
+=head2 parse
+
+Parses the XML given. In contrast to L<XML::Parser|XML::Parser>'s parse
+method, parse() only parses strings.
+
+=head1 Handler methods
+
+=head2 Init
+
+Called before parsing starts. You should perform any necessary initializations
+in Init.
+
+=head2 Start
+
+Called at the start of each XML node. See L<XML::Parser> for details.
+
+=head2 Char
+
+Called for each character sequence. May be called multiple times for the
+characters contained in an XML node (even for every single character).
+Your implementation has to make sure that it captures all characters.
+
+=head2 End
+
+Called at the end of each XML node. See L<XML::Parser> for details
+
+=head2 Comment
+
+See L<XML::Parser> for details
+
+=head2 XMLDecl
+
+See L<XML::Parser> for details
+
+=head2 Doctype
+
+See L<XML::Parser> for details
+
+=head2 Final
+
+Called at the end of the parsing process. You should perform any neccessary
+cleanup here.
+
+=head1 SEE ALSO
+
+ XML::Parser
+
+=head1 COPYRIGHT
+
+Copyright (C) 2000-2007 Paul Kulchenko. All rights reserved.
+
+Copyright (C) 2008- Martin Kutter. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html
+Copyright (c) 1998, Robert D. Cameron.
+
+=head1 AUTHOR
+
+Paul Kulchenko (paulclinger at yahoo.com)
+
+Martin Kutter (martin.kutter at fen-net.de)
+
+Additional handlers supplied by Adam Leggett.
+
+=cut
+
+
+
+
+

Added: branches/upstream/libxml-parser-lite-tree-perl/current/t/01_basic.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/t/01_basic.t?rev=26152&op=file
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/t/01_basic.t (added)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/t/01_basic.t Sat Oct 18 16:32:16 2008
@@ -1,0 +1,28 @@
+use Test::More tests => 15;
+
+use XML::Parser::Lite::Tree;
+my $x = XML::Parser::Lite::Tree->instance();
+ok( defined($x), "instance() returns something" );
+ok( ref $x eq 'XML::Parser::Lite::Tree', "instance returns the right object" );
+
+my $tree = $x->parse('<foo bar="baz">woo<yay />hoopla</foo>');
+
+ok( defined($tree), "parse() returns something" );
+ok( scalar @{$tree->{children}} == 1, "tree root contains a single root node" );
+
+my $root_node = pop @{$tree->{children}};
+
+ok( $root_node->{type} eq 'element', "root node is an element" );
+ok( $root_node->{name} eq 'foo', "root node has correct name" );
+ok( scalar keys %{$root_node->{attributes}} == 1, "correct attribute count" );
+ok( $root_node->{attributes}->{bar} eq 'baz', "correct attribute name and value" );
+ok( scalar @{$root_node->{children}} == 3, "correct child count" );
+
+ok( $root_node->{children}->[0]->{type} eq 'text', "child 1 type correct" );
+ok( $root_node->{children}->[0]->{content} eq 'woo', "child 1 content correct" );
+
+ok( $root_node->{children}->[1]->{type} eq 'element', "child 2 type correct" );
+ok( $root_node->{children}->[1]->{name} eq 'yay', "child 2 name correct" );
+
+ok( $root_node->{children}->[2]->{type} eq 'text', "child 3 type correct" );
+ok( $root_node->{children}->[2]->{content} eq 'hoopla', "child 3 content correct" );

Added: branches/upstream/libxml-parser-lite-tree-perl/current/t/02_options.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/t/02_options.t?rev=26152&op=file
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/t/02_options.t (added)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/t/02_options.t Sat Oct 18 16:32:16 2008
@@ -1,0 +1,97 @@
+use Test::More tests => 36;
+
+use XML::Parser::Lite::Tree;
+
+
+#
+# test the whitespace folding
+#
+
+my $parser = new XML::Parser::Lite::Tree(skip_white => 1);
+my $tree = $parser->parse("<foo>  <bar> <baz>woo</baz></bar>  </foo>");
+
+is(scalar @{&get_node($tree, ''     )->{children}}, 1, "one child of the root node");
+is(scalar @{&get_node($tree, '0'    )->{children}}, 1, "one child, level 2");
+is(scalar @{&get_node($tree, '0/0'  )->{children}}, 1, "one child, level 3");
+is(scalar @{&get_node($tree, '0/0/0')->{children}}, 1, "one child, level 4");
+
+is(&get_node($tree, '0'      )->{type}, 'element');
+is(&get_node($tree, '0/0'    )->{type}, 'element');
+is(&get_node($tree, '0/0/0'  )->{type}, 'element');
+is(&get_node($tree, '0/0/0/0')->{type}, 'text');
+
+is(&get_node($tree, '0'      )->{name}, 'foo');
+is(&get_node($tree, '0/0'    )->{name}, 'bar');
+is(&get_node($tree, '0/0/0'  )->{name}, 'baz');
+is(&get_node($tree, '0/0/0/0')->{content}, 'woo');
+
+
+#
+# test the namespace parsing
+#
+
+my $xml = q~
+	<aaa
+		xmlns="urn:default"
+		xmlns:foo="urn:foo"
+	>
+		<bbb />
+		<foo:ccc
+			xmlns="urn:override"
+		>
+			<ddd xmlns:bar="urn:bar" />
+		</foo:ccc>
+	</aaa>
+~;
+
+$parser = new XML::Parser::Lite::Tree(process_ns => 1, skip_white => 1);
+$tree = $parser->parse($xml);
+
+is(&get_node($tree, '0'    )->{ns}, 'urn:default');
+is(&get_node($tree, '0/0'  )->{ns}, 'urn:default');
+is(&get_node($tree, '0/1'  )->{ns}, 'urn:foo');
+is(&get_node($tree, '0/1/0')->{ns}, 'urn:override');
+
+is(&get_node($tree, '0'    )->{name}, 'aaa');
+is(&get_node($tree, '0/0'  )->{name}, 'bbb');
+is(&get_node($tree, '0/1'  )->{name}, 'foo:ccc');
+is(&get_node($tree, '0/1/0')->{name}, 'ddd');
+
+is(&get_node($tree, '0'    )->{local_name}, 'aaa');
+is(&get_node($tree, '0/0'  )->{local_name}, 'bbb');
+is(&get_node($tree, '0/1'  )->{local_name}, 'ccc');
+is(&get_node($tree, '0/1/0')->{local_name}, 'ddd');
+
+is(&get_node($tree, '0'    )->{namespaces}->{__default__}, 'urn:default');
+is(&get_node($tree, '0/0'  )->{namespaces}->{__default__}, 'urn:default');
+is(&get_node($tree, '0/1'  )->{namespaces}->{__default__}, 'urn:override');
+is(&get_node($tree, '0/1/0')->{namespaces}->{__default__}, 'urn:override');
+
+is(&get_node($tree, '0'    )->{namespaces}->{foo}, 'urn:foo');
+is(&get_node($tree, '0/0'  )->{namespaces}->{foo}, 'urn:foo');
+is(&get_node($tree, '0/1'  )->{namespaces}->{foo}, 'urn:foo');
+is(&get_node($tree, '0/1/0')->{namespaces}->{foo}, 'urn:foo');
+
+is(&get_node($tree, '0    ')->{namespaces}->{bar}, undef);
+is(&get_node($tree, '0/0  ')->{namespaces}->{bar}, undef);
+is(&get_node($tree, '0/1  ')->{namespaces}->{bar}, undef);
+is(&get_node($tree, '0/1/0')->{namespaces}->{bar}, 'urn:bar');
+
+
+
+
+#
+# a super-simple xpath-like function for finding a single given child
+#
+
+sub get_node {
+	my ($tree, $path) = @_;
+	my $node = $tree;
+	if (length $path){
+		my @refs = split /\//, $path;
+		for my $ref (@refs){
+			$node = $node->{children}->[$ref];
+		}
+	}
+	return $node;
+}

Added: branches/upstream/libxml-parser-lite-tree-perl/current/t/03_comments.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/t/03_comments.t?rev=26152&op=file
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/t/03_comments.t (added)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/t/03_comments.t Sat Oct 18 16:32:16 2008
@@ -1,0 +1,47 @@
+use Test::More tests => 9;
+
+use XML::Parser::Lite::Tree;
+
+
+#
+# test comment nodes
+#
+
+my $parser = new XML::Parser::Lite::Tree(skip_white => 1);
+my $tree = $parser->parse(q~
+	<foo>
+		<woo />
+		<!-- yay -->
+		<hoopla />
+	</foo>
+~);
+
+is(&get_node($tree, '0'  )->{type}, 'element');
+is(&get_node($tree, '0/0')->{type}, 'element');
+is(&get_node($tree, '0/1')->{type}, 'comment');
+is(&get_node($tree, '0/2')->{type}, 'element');
+
+is(&get_node($tree, '0'  )->{name}, 'foo');
+is(&get_node($tree, '0/0')->{name}, 'woo');
+is(&get_node($tree, '0/1')->{name}, undef);
+is(&get_node($tree, '0/2')->{name}, 'hoopla');
+
+is(&get_node($tree, '0/1')->{content}, ' yay ');
+
+
+
+#
+# a super-simple xpath-like function for finding a single given child
+#
+
+sub get_node {
+	my ($tree, $path) = @_;
+	my $node = $tree;
+	if (length $path){
+		my @refs = split /\//, $path;
+		for my $ref (@refs){
+			$node = $node->{children}->[$ref];
+		}
+	}
+	return $node;
+}

Added: branches/upstream/libxml-parser-lite-tree-perl/current/t/04_processing_instructions.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/t/04_processing_instructions.t?rev=26152&op=file
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/t/04_processing_instructions.t (added)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/t/04_processing_instructions.t Sat Oct 18 16:32:16 2008
@@ -1,0 +1,50 @@
+use Test::More tests => 9;
+
+use XML::Parser::Lite::Tree;
+
+
+#
+# test processing instructions
+#
+
+my $parser = new XML::Parser::Lite::Tree(skip_white => 1);
+my $tree = $parser->parse(q~
+	<?xml version="1.0" encoding="utf-8"?>
+	<foo>
+		<woo />
+		<hoopla />
+		<?php echo 'Hello world'; ?>
+	</foo>
+~);
+
+is(&get_node($tree, '0'  )->{type}, 'pi');
+is(&get_node($tree, '1'  )->{type}, 'element');
+is(&get_node($tree, '1/0')->{type}, 'element');
+is(&get_node($tree, '1/1')->{type}, 'element');
+is(&get_node($tree, '1/2')->{type}, 'pi');
+
+is(&get_node($tree, '0'  )->{target}, 'xml');
+is(&get_node($tree, '1/2')->{target}, 'php');
+
+like(&get_node($tree, '0'  )->{content}, qr/^version/);
+like(&get_node($tree, '1/2')->{content}, qr/^echo/);
+
+
+
+
+
+#
+# a super-simple xpath-like function for finding a single given child
+#
+
+sub get_node {
+	my ($tree, $path) = @_;
+	my $node = $tree;
+	if (length $path){
+		my @refs = split /\//, $path;
+		for my $ref (@refs){
+			$node = $node->{children}->[$ref];
+		}
+	}
+	return $node;
+}

Added: branches/upstream/libxml-parser-lite-tree-perl/current/t/05_doctypes.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libxml-parser-lite-tree-perl/current/t/05_doctypes.t?rev=26152&op=file
==============================================================================
--- branches/upstream/libxml-parser-lite-tree-perl/current/t/05_doctypes.t (added)
+++ branches/upstream/libxml-parser-lite-tree-perl/current/t/05_doctypes.t Sat Oct 18 16:32:16 2008
@@ -1,0 +1,41 @@
+use Test::More tests => 5;
+
+use XML::Parser::Lite::Tree;
+
+
+#
+# test processing instructions
+#
+
+my $parser = new XML::Parser::Lite::Tree(skip_white => 1);
+my $tree = $parser->parse(q~
+	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+		"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+	<foo>
+		<bar />
+	</foo>
+~);
+
+is(&get_node($tree, '0'  )->{type}, 'dtd');
+is(&get_node($tree, '1'  )->{type}, 'element');
+is(&get_node($tree, '1/0')->{type}, 'element');
+
+is(&get_node($tree, '0'  )->{name}, 'html');
+like(&get_node($tree, '0')->{content}, qr/^PUBLIC/);
+
+
+#
+# a super-simple xpath-like function for finding a single given child
+#
+
+sub get_node {
+	my ($tree, $path) = @_;
+	my $node = $tree;
+	if (length $path){
+		my @refs = split /\//, $path;
+		for my $ref (@refs){
+			$node = $node->{children}->[$ref];
+		}
+	}
+	return $node;
+}




More information about the Pkg-perl-cvs-commits mailing list