[libhtml-scrubber-perl] 01/01: Imported from HTML-Scrubber-0.03.tar.gz.

Sat Nov 11 13:45:58 UTC 2017

This is an automated email from the git hooks/post-receive script.

fsfs pushed a commit to annotated tag HTML-Scrubber-0.03
in repository libhtml-scrubber-perl.

commit ee48eb29258d5b379a3449a2fa77a974a8c297a9
Author: D. H. <podmaster at cpan.org>
Date:   Mon Jul 21 14:57:16 2003 +0000

    Imported from HTML-Scrubber-0.03.tar.gz.
---
 Changes     |   6 +++
 MANIFEST    |   1 +
 META.yml    |  10 ++++
 README      |   2 +-
 Scrubber.pm | 147 ++++++++++++++++++++++++++++++++++++++++++--------------
 test.pl     | 156 +++++++++++++++++++++++++++++++-----------------------------
 6 files changed, 209 insertions(+), 113 deletions(-)

diff --git a/Changes b/Changes
index 4eb2d8c..c985e48 100755
--- a/Changes
+++ b/Changes
@@ -1,5 +1,11 @@
 Revision history for Perl extension HTML::Scrubber.
 
+0.03  Mon Jul 21 07:32:10 2003
+    - perltidy ;)
+    - closed http://rt.cpan.org/NoAuth/Bug.html?id=2969
+      now escape spurious >< in text
+    - updated test.pl
+
 0.02  Fri Apr 18 14:12:02 2003
     - finished TODO, settled on API
     - created a cpan worthy distribution and uploaded to CPAN
diff --git a/MANIFEST b/MANIFEST
index 34007ed..e26cca7 100755
--- a/MANIFEST
+++ b/MANIFEST
@@ -5,3 +5,4 @@ MANIFEST.SKIP
 README
 Scrubber.pm
 test.pl
+META.yml                                Module meta-data (added by MakeMaker)
diff --git a/META.yml b/META.yml
new file mode 100755
index 0000000..b9cfcbc
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,10 @@
+#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
+name:         HTML-Scrubber
+version:      0.03
+version_from: Scrubber.pm
+installdirs:  site
+requires:
+    HTML::Parser:                  3
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.10_06
diff --git a/README b/README
index 72c53d6..0a2de5a 100755
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-HTML/Scrubber version 0.02
+HTML/Scrubber version 0.03
 ==========================
 
 INSTALLATION
diff --git a/Scrubber.pm b/Scrubber.pm
index c2264c1..d58f28f 100755
--- a/Scrubber.pm
+++ b/Scrubber.pm
@@ -36,7 +36,9 @@ HTML::Scrubber - Perl extension for scrubbing/sanitizing html
 
 =head1 DESCRIPTION
 
-If you wanna "scrubbing"/"sanitize" html input, this be the modulehtml
+If you wanna "scrub" or "sanitize" html input
+in a reliable an flexible fashion,
+then this module is for you.
 
 I wasn't satisfied with HTML::Sanitizer because it is
 based on HTML::TreeBuilder,
@@ -61,7 +63,7 @@ use HTML::Entities;
 use vars qw[ $VERSION $_scrub $_scrub_fh ];
 use strict;
 
-$VERSION = '0.02';
+$VERSION = '0.03';
 
 # my my my my, these here to prevent foolishness like 
 # http://perlmonks.org/index.pl?node_id=251127#Stealing+Lexicals
@@ -71,8 +73,11 @@ $_scrub_fh = [\&_scrub_fh, "self, event, tagname, attr, text"];
 sub new {
     my $package = shift;
     my $p = HTML::Parser->new(
-        api_version => 3,
-        default_h => $_scrub,
+        api_version     => 3,
+        default_h       => $_scrub,
+        marked_sections => 0,
+        strict_comment  => 0,
+        unbroken_text   => 1,
     );
 
     my $self = {
@@ -125,6 +130,8 @@ sub allow {
 
     $self->{_rules}{$_}=1 for @_;
 
+    return unless $self->{_optimize}; # till I figure it out (huh)
+
     if( $self->{_p}{'*'} ){       # default allow
         $self->{_p}->report_tags();   # so clear it
     } else {
@@ -144,6 +151,8 @@ sub deny {
 
     $self->{_rules}{$_} = 0 for @_;
 
+    return unless $self->{_optimize}; # till I figure it out (huh)
+
     $self->{_p}->ignore_tags( # always ignore stuff we don't want
         grep {
             not $self->{_rules}{$_}
@@ -287,29 +296,68 @@ sub _scrub_fh {
     my( $p, $e, $t, $a, $text ) = @_;
     my $s = $p->{"\0_s"} ;
 
-    if ( $e eq 'start' ) {
-        if( exists $s->{_rules}->{$t} ) {  # is there a specific rule
-            if( ref $s->{_rules}->{$t} ) { # is it complicated?(not simple;)
-                print {$s->{_out}} $s->_validate($t, $t, $a);
-            } elsif( $s->{_rules}->{$t} ) {
-        # validate using default attribute rule
-                print {$s->{_out}} $s->_validate($t, '_', $a);
+    if ( $e eq 'start' )
+    {
+        if( exists $s->{_rules}->{$t} )  # is there a specific rule
+        {
+            if( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;)
+            { 
+                print
+                    {$s->{_out}}
+                        $s->_validate($t, $t, $a);
+            }
+            elsif( $s->{_rules}->{$t} ) # validate using default attribute rule
+            {
+                print
+                    {$s->{_out}}
+                        $s->_validate($t, '_', $a);
             }
-        } elsif( $s->{_rules}->{'*'} ) { # default allow tags
-            print {$s->{_out}} $s->_validate($t, '_', $a);
         }
-    } elsif ( $e eq 'end' ) {
-        if( exists $s->{_rules}->{$t} ) {
-            print {$s->{_out}} "</$t>" if $s->{_rules}->{$t};
-        } elsif( $s->{_rules}->{'*'} ) {
+        elsif( $s->{_rules}->{'*'} ) # default allow tags
+        {
+            print
+                {$s->{_out}}
+                    $s->_validate($t, '_', $a);
+        }
+    }
+    elsif ( $e eq 'end' )
+    {    
+        if( exists $s->{_rules}->{$t} )
+        {
+            print
+                {$s->{_out}}
+                    "</$t>"
+                        if $s->{_rules}->{$t};
+                        
+        }
+        elsif( $s->{_rules}->{'*'} )
+        {
+        
             print {$s->{_out}} "</$t>";
         }
-    } elsif ( $e eq 'comment' ) {
-        print {$s->{_out}} $text if $s->{_comment};
-    } elsif ( $e eq 'process' ) {
-        print {$s->{_out}} $text if $s->{_process};
-    } elsif ( $e eq 'text' or $e eq 'default') {
-        print {$s->{_out}} $text;
+    }
+    elsif ( $e eq 'comment' )
+    {
+        print
+            {$s->{_out}}
+                $text
+                    if $s->{_comment};
+    }
+    elsif ( $e eq 'process' )
+    {
+        print
+            {$s->{_out}}
+                $text
+                    if $s->{_process};
+    }
+    elsif ( $e eq 'text' or $e eq 'default')
+    {
+        $text =~ s/</</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch
+        $text =~ s/>/>/g;
+
+        print
+            {$s->{_out}}
+                $text;
     }
 }
 
@@ -322,30 +370,52 @@ sub _scrub {
     my( $p, $e, $t, $a, $text ) = @_;
     my $s = $p->{"\0_s"} ;
 
-    if ( $e eq 'start' ) {
-        if( exists $s->{_rules}->{$t} ) {  # is there a specific rule
-            if( ref $s->{_rules}->{$t} ) { # is it complicated?(not simple;)
+    if ( $e eq 'start' )
+    {
+        if( exists $s->{_rules}->{$t} )  # is there a specific rule
+        {  
+            if( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;)
+            {
                 $s->{_r} .= $s->_validate($t, $t, $a);
-            } elsif( $s->{_rules}->{$t} ) {
-        # validate using default attribute rule
+            }
+            elsif( $s->{_rules}->{$t} )  # validate using default attribute rule
+            {
                 $s->{_r} .= $s->_validate($t, '_', $a);
             }
-        } elsif( $s->{_rules}->{'*'} ) { # default allow tags
+        }
+        elsif( $s->{_rules}->{'*'} )     # default allow tags
+        { 
             $s->{_r} .= $s->_validate($t, '_', $a);
         }
-    } elsif ( $e eq 'end' ) {
-        if( exists $s->{_rules}->{$t} ) {
+    }
+    elsif ( $e eq 'end' )
+    {
+        if( exists $s->{_rules}->{$t} )
+        {
             $s->{_r} .= "</$t>" if $s->{_rules}->{$t};
-        } elsif( $s->{_rules}->{'*'} ) {
+        }
+        elsif( $s->{_rules}->{'*'} )
+        {
             $s->{_r} .= "</$t>";
         }
-    } elsif ( $e eq 'comment' ) {
+    }
+    elsif ( $e eq 'comment' )
+    {
         $s->{_r} .= $text if $s->{_comment};
-    } elsif ( $e eq 'process' ) {
+    }
+    elsif ( $e eq 'process' )
+    {
         $s->{_r} .= $text if $s->{_process};
-    } elsif ( $e eq 'text' or $e eq 'default') {
+    }
+    elsif ( $e eq 'text' or $e eq 'default')
+    {
+        $text =~ s/</</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch
+        $text =~ s/>/>/g;
+
         $s->{_r} .= $text;
-    } elsif ( $e eq 'start_document' ) {
+    }
+    elsif ( $e eq 'start_document' )
+    {
         $s->{_r} = "";
     }
 }
@@ -353,6 +423,11 @@ sub _scrub {
 1;
 
 #print sprintf q[ '%-12s => %s,], "$_'", $h{$_} for sort keys %h;# perl!
+#perl -ne"chomp;print $_;print qq'\t\t# test ', ++$a if /ok\(/;print $/" test.pl >test2.pl
+#perl -ne"chomp;print $_;if( /ok\(/ ){s/\#test \d+$//;print qq'\t\t# test ', ++$a }print $/" test.pl >test2.pl
+#perl -ne"chomp;if(/ok\(/){s/# test .*$//;print$_,qq'\t\t# test ',++$a}else{print$_}print$/" test.pl >test2.pl
+
+=cut
 
 =head1 How does it work?
 
diff --git a/test.pl b/test.pl
index b87d83f..bacf386 100755
--- a/test.pl
+++ b/test.pl
@@ -6,9 +6,9 @@
 # change 'tests => 1' to 'tests => last_test_to_print';
 
 use Test;
-BEGIN { plan tests => 70 };
+BEGIN { plan tests => 77 };
 use HTML::Scrubber;
-ok(1); # If we made it this far, we're ok.
+ok(1); # If we made it this far, we're ok.				# test 1
 
 #########################
 
@@ -17,8 +17,9 @@ ok(1); # If we made it this far, we're ok.
 
 
 my $html = q[
+    <script>//blah</script>
     <HR Align="left">
-    <B> bold
+    <B> bold <
         <U> underlined
             <I>
                 <A href='#"'>  LINK    </A>
@@ -30,115 +31,118 @@ my $html = q[
 
 my $scrubber = HTML::Scrubber->new();
 
-ok( $scrubber );
-ok( ! $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
-ok( ! $scrubber->allow( qw[ p b i u hr br ] ) );
+ok( $scrubber );				# test 2
+ok( ! $scrubber->default() );				# test 3
+ok( ! $scrubber->comment() );				# test 4
+ok( ! $scrubber->process() );				# test 5
+ok( ! $scrubber->allow( qw[ p b i u hr br ] ) );				# test 6
 
 $scrubber = $scrubber->scrub($html);
 
-ok( $scrubber );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber !~ /\Q<!--\E/ );
+ok( $scrubber );				# test 7
+ok( $scrubber !~ /href/i );				# test 8
+ok( $scrubber !~ /Align/i );				# test 9
+ok( $scrubber !~ /\Q<!--\E/ );				# test 10
+ok( $scrubber =~ /bold </ );		# test 11
 
 $scrubber = HTML::Scrubber->new( deny => [ qw[ p b i u hr br ] ] );
 
-ok( $scrubber );
-ok( ! $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
+ok( $scrubber );				# test 12
+ok( ! $scrubber->default() );				# test 13
+ok( ! $scrubber->comment() );				# test 14
+ok( ! $scrubber->process() );				# test 15
 
 $scrubber = $scrubber->scrub($html);
 
-ok( $scrubber );
-ok( $scrubber !~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber !~ /\Q<!--\E/ );
+ok( $scrubber );				# test 16
+ok( $scrubber !~ /[><]/ );				# test 17
+ok( $scrubber !~ /href/i );				# test 18
+ok( $scrubber !~ /Align/i );				# test 19
+ok( $scrubber !~ /\Q<!--\E/ );				# test 20
+ok( $scrubber =~ /bold </ );		# test 21
 
 $scrubber = HTML::Scrubber->new( default => [ 0 ] );
 
-ok( $scrubber );
-ok( ! $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
+ok( $scrubber );				# test 22
+ok( ! $scrubber->default() );				# test 23
+ok( ! $scrubber->comment() );				# test 24
+ok( ! $scrubber->process() );				# test 25
 
 $scrubber = $scrubber->scrub($html);
 
-ok( $scrubber );
-ok( $scrubber !~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber !~ /\Q<!--\E/ );
+ok( $scrubber );				# test 26
+ok( $scrubber !~ /[><]/ );				# test 27
+ok( $scrubber !~ /href/i );				# test 28
+ok( $scrubber !~ /Align/i );				# test 29
+ok( $scrubber !~ /\Q<!--\E/ );				# test 30
+ok( $scrubber =~ /bold </ );		# test 31
 
 $scrubber = HTML::Scrubber->new( default => [ 1 ] );
 
-ok( $scrubber );
-ok( $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
+ok( $scrubber );				# test 32
+ok( $scrubber->default() );				# test 33
+ok( ! $scrubber->comment() );				# test 34
+ok( ! $scrubber->process() );				# test 35
 
 $scrubber = $scrubber->scrub($html);
 
-ok( $scrubber );
-ok( $scrubber =~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber !~ /\Q<!--\E/ );
+ok( $scrubber );				# test 36
+ok( $scrubber =~ /[><]/ );				# test 37
+ok( $scrubber !~ /href/i );				# test 38
+ok( $scrubber !~ /Align/i );				# test 39
+ok( $scrubber !~ /\Q<!--\E/ );				# test 40
+ok( $scrubber =~ /bold </ );		# test 41
 
 $scrubber = HTML::Scrubber->new( default => [ 1 ] );
 
-ok( $scrubber );
-ok( $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
-ok( ! $scrubber->comment(1) );
+ok( $scrubber );				# test 42
+ok( $scrubber->default() );				# test 43
+ok( ! $scrubber->comment() );				# test 44
+ok( ! $scrubber->process() );				# test 45
+ok( ! $scrubber->comment(1) );				# test 46
 
 $scrubber = $scrubber->scrub($html);
 
-ok( $scrubber );
-ok( $scrubber =~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber =~ /\Q<!--\E/ );
-
+ok( $scrubber );				# test 47
+ok( $scrubber =~ /[><]/ );				# test 48
+ok( $scrubber !~ /href/i );				# test 49
+ok( $scrubber !~ /Align/i );				# test 50
+ok( $scrubber =~ /\Q<!--\E/ );				# test 51
+ok( $scrubber =~ /bold </ );		# test 52
 
 
 $scrubber = HTML::Scrubber->new( default => [ 1 => { align => 1, '*' => 0 } ] );
 
-ok( $scrubber );
-ok( $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
-ok( ! $scrubber->comment(1) );
+ok( $scrubber );				# test 53
+ok( $scrubber->default() );				# test 54
+ok( ! $scrubber->comment() );				# test 55
+ok( ! $scrubber->process() );				# test 56
+ok( ! $scrubber->comment(1) );				# test 57
 
 $scrubber = $scrubber->scrub($html);
 
-ok( $scrubber );
-ok( $scrubber =~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber =~ /Align/i );
-ok( $scrubber =~ /\Q<!--\E/ );
-ok( $scrubber =~ /"left"/ );
+ok( $scrubber );				# test 58
+ok( $scrubber =~ /[><]/ );				# test 59
+ok( $scrubber !~ /href/i );				# test 60
+ok( $scrubber =~ /Align/i );				# test 61
+ok( $scrubber =~ /\Q<!--\E/ );				# test 62
+ok( $scrubber =~ /"left"/ );				# test 63
+ok( $scrubber =~ /bold </ );		# test 64
 
 $scrubber = HTML::Scrubber->new( default => [ 1 => { align => 0, '*' => 1 } ] );
 
-ok( $scrubber );
-ok( $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
-ok( ! $scrubber->comment(1) );
-
+ok( $scrubber );				# test 65
+ok( $scrubber->default() );				# test 66
+ok( ! $scrubber->comment() );				# test 67
+ok( ! $scrubber->process() );				# test 68
+ok( ! $scrubber->comment(1) );				# test 69
 $scrubber = $scrubber->scrub($html);
 
-ok( $scrubber );
-ok( $scrubber =~ /[><]/ );
-ok( $scrubber =~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber =~ /\Q<!--\E/ );
-ok( $scrubber =~ /\Q&quot\E/ );
-ok( $scrubber =~ /\#/ );
-
-
+ok( $scrubber );				# test 70
+ok( $scrubber =~ /[><]/ );				# test 71
+ok( $scrubber =~ /href/i );				# test 72
+ok( $scrubber !~ /Align/i );				# test 73
+ok( $scrubber =~ /\Q<!--\E/ );				# test 74
+ok( $scrubber =~ /\Q&quot\E/ );				# test 75
+ok( $scrubber =~ /\#/ );				# test 76
+ok( $scrubber =~ /bold </ );		# test 77

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libhtml-scrubber-perl.git