[libhtml-scrubber-perl] 01/01: Imported from HTML-Scrubber-0.03.tar.gz.
Florian Schlichting
fsfs at moszumanska.debian.org
Sat Nov 11 13:45:58 UTC 2017
This is an automated email from the git hooks/post-receive script.
fsfs pushed a commit to annotated tag HTML-Scrubber-0.03
in repository libhtml-scrubber-perl.
commit ee48eb29258d5b379a3449a2fa77a974a8c297a9
Author: D. H. <podmaster at cpan.org>
Date: Mon Jul 21 14:57:16 2003 +0000
Imported from HTML-Scrubber-0.03.tar.gz.
---
Changes | 6 +++
MANIFEST | 1 +
META.yml | 10 ++++
README | 2 +-
Scrubber.pm | 147 ++++++++++++++++++++++++++++++++++++++++++--------------
test.pl | 156 +++++++++++++++++++++++++++++++-----------------------------
6 files changed, 209 insertions(+), 113 deletions(-)
diff --git a/Changes b/Changes
index 4eb2d8c..c985e48 100755
--- a/Changes
+++ b/Changes
@@ -1,5 +1,11 @@
Revision history for Perl extension HTML::Scrubber.
+0.03 Mon Jul 21 07:32:10 2003
+ - perltidy ;)
+ - closed http://rt.cpan.org/NoAuth/Bug.html?id=2969
+ now escape spurious >< in text
+ - updated test.pl
+
0.02 Fri Apr 18 14:12:02 2003
- finished TODO, settled on API
- created a cpan worthy distribution and uploaded to CPAN
diff --git a/MANIFEST b/MANIFEST
index 34007ed..e26cca7 100755
--- a/MANIFEST
+++ b/MANIFEST
@@ -5,3 +5,4 @@ MANIFEST.SKIP
README
Scrubber.pm
test.pl
+META.yml Module meta-data (added by MakeMaker)
diff --git a/META.yml b/META.yml
new file mode 100755
index 0000000..b9cfcbc
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,10 @@
+#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
+name: HTML-Scrubber
+version: 0.03
+version_from: Scrubber.pm
+installdirs: site
+requires:
+ HTML::Parser: 3
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.10_06
diff --git a/README b/README
index 72c53d6..0a2de5a 100755
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-HTML/Scrubber version 0.02
+HTML/Scrubber version 0.03
==========================
INSTALLATION
diff --git a/Scrubber.pm b/Scrubber.pm
index c2264c1..d58f28f 100755
--- a/Scrubber.pm
+++ b/Scrubber.pm
@@ -36,7 +36,9 @@ HTML::Scrubber - Perl extension for scrubbing/sanitizing html
=head1 DESCRIPTION
-If you wanna "scrubbing"/"sanitize" html input, this be the modulehtml
+If you wanna "scrub" or "sanitize" html input
+in a reliable an flexible fashion,
+then this module is for you.
I wasn't satisfied with HTML::Sanitizer because it is
based on HTML::TreeBuilder,
@@ -61,7 +63,7 @@ use HTML::Entities;
use vars qw[ $VERSION $_scrub $_scrub_fh ];
use strict;
-$VERSION = '0.02';
+$VERSION = '0.03';
# my my my my, these here to prevent foolishness like
# http://perlmonks.org/index.pl?node_id=251127#Stealing+Lexicals
@@ -71,8 +73,11 @@ $_scrub_fh = [\&_scrub_fh, "self, event, tagname, attr, text"];
sub new {
my $package = shift;
my $p = HTML::Parser->new(
- api_version => 3,
- default_h => $_scrub,
+ api_version => 3,
+ default_h => $_scrub,
+ marked_sections => 0,
+ strict_comment => 0,
+ unbroken_text => 1,
);
my $self = {
@@ -125,6 +130,8 @@ sub allow {
$self->{_rules}{$_}=1 for @_;
+ return unless $self->{_optimize}; # till I figure it out (huh)
+
if( $self->{_p}{'*'} ){ # default allow
$self->{_p}->report_tags(); # so clear it
} else {
@@ -144,6 +151,8 @@ sub deny {
$self->{_rules}{$_} = 0 for @_;
+ return unless $self->{_optimize}; # till I figure it out (huh)
+
$self->{_p}->ignore_tags( # always ignore stuff we don't want
grep {
not $self->{_rules}{$_}
@@ -287,29 +296,68 @@ sub _scrub_fh {
my( $p, $e, $t, $a, $text ) = @_;
my $s = $p->{"\0_s"} ;
- if ( $e eq 'start' ) {
- if( exists $s->{_rules}->{$t} ) { # is there a specific rule
- if( ref $s->{_rules}->{$t} ) { # is it complicated?(not simple;)
- print {$s->{_out}} $s->_validate($t, $t, $a);
- } elsif( $s->{_rules}->{$t} ) {
- # validate using default attribute rule
- print {$s->{_out}} $s->_validate($t, '_', $a);
+ if ( $e eq 'start' )
+ {
+ if( exists $s->{_rules}->{$t} ) # is there a specific rule
+ {
+ if( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;)
+ {
+ print
+ {$s->{_out}}
+ $s->_validate($t, $t, $a);
+ }
+ elsif( $s->{_rules}->{$t} ) # validate using default attribute rule
+ {
+ print
+ {$s->{_out}}
+ $s->_validate($t, '_', $a);
}
- } elsif( $s->{_rules}->{'*'} ) { # default allow tags
- print {$s->{_out}} $s->_validate($t, '_', $a);
}
- } elsif ( $e eq 'end' ) {
- if( exists $s->{_rules}->{$t} ) {
- print {$s->{_out}} "</$t>" if $s->{_rules}->{$t};
- } elsif( $s->{_rules}->{'*'} ) {
+ elsif( $s->{_rules}->{'*'} ) # default allow tags
+ {
+ print
+ {$s->{_out}}
+ $s->_validate($t, '_', $a);
+ }
+ }
+ elsif ( $e eq 'end' )
+ {
+ if( exists $s->{_rules}->{$t} )
+ {
+ print
+ {$s->{_out}}
+ "</$t>"
+ if $s->{_rules}->{$t};
+
+ }
+ elsif( $s->{_rules}->{'*'} )
+ {
+
print {$s->{_out}} "</$t>";
}
- } elsif ( $e eq 'comment' ) {
- print {$s->{_out}} $text if $s->{_comment};
- } elsif ( $e eq 'process' ) {
- print {$s->{_out}} $text if $s->{_process};
- } elsif ( $e eq 'text' or $e eq 'default') {
- print {$s->{_out}} $text;
+ }
+ elsif ( $e eq 'comment' )
+ {
+ print
+ {$s->{_out}}
+ $text
+ if $s->{_comment};
+ }
+ elsif ( $e eq 'process' )
+ {
+ print
+ {$s->{_out}}
+ $text
+ if $s->{_process};
+ }
+ elsif ( $e eq 'text' or $e eq 'default')
+ {
+ $text =~ s/</</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch
+ $text =~ s/>/>/g;
+
+ print
+ {$s->{_out}}
+ $text;
}
}
@@ -322,30 +370,52 @@ sub _scrub {
my( $p, $e, $t, $a, $text ) = @_;
my $s = $p->{"\0_s"} ;
- if ( $e eq 'start' ) {
- if( exists $s->{_rules}->{$t} ) { # is there a specific rule
- if( ref $s->{_rules}->{$t} ) { # is it complicated?(not simple;)
+ if ( $e eq 'start' )
+ {
+ if( exists $s->{_rules}->{$t} ) # is there a specific rule
+ {
+ if( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;)
+ {
$s->{_r} .= $s->_validate($t, $t, $a);
- } elsif( $s->{_rules}->{$t} ) {
- # validate using default attribute rule
+ }
+ elsif( $s->{_rules}->{$t} ) # validate using default attribute rule
+ {
$s->{_r} .= $s->_validate($t, '_', $a);
}
- } elsif( $s->{_rules}->{'*'} ) { # default allow tags
+ }
+ elsif( $s->{_rules}->{'*'} ) # default allow tags
+ {
$s->{_r} .= $s->_validate($t, '_', $a);
}
- } elsif ( $e eq 'end' ) {
- if( exists $s->{_rules}->{$t} ) {
+ }
+ elsif ( $e eq 'end' )
+ {
+ if( exists $s->{_rules}->{$t} )
+ {
$s->{_r} .= "</$t>" if $s->{_rules}->{$t};
- } elsif( $s->{_rules}->{'*'} ) {
+ }
+ elsif( $s->{_rules}->{'*'} )
+ {
$s->{_r} .= "</$t>";
}
- } elsif ( $e eq 'comment' ) {
+ }
+ elsif ( $e eq 'comment' )
+ {
$s->{_r} .= $text if $s->{_comment};
- } elsif ( $e eq 'process' ) {
+ }
+ elsif ( $e eq 'process' )
+ {
$s->{_r} .= $text if $s->{_process};
- } elsif ( $e eq 'text' or $e eq 'default') {
+ }
+ elsif ( $e eq 'text' or $e eq 'default')
+ {
+ $text =~ s/</</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch
+ $text =~ s/>/>/g;
+
$s->{_r} .= $text;
- } elsif ( $e eq 'start_document' ) {
+ }
+ elsif ( $e eq 'start_document' )
+ {
$s->{_r} = "";
}
}
@@ -353,6 +423,11 @@ sub _scrub {
1;
#print sprintf q[ '%-12s => %s,], "$_'", $h{$_} for sort keys %h;# perl!
+#perl -ne"chomp;print $_;print qq'\t\t# test ', ++$a if /ok\(/;print $/" test.pl >test2.pl
+#perl -ne"chomp;print $_;if( /ok\(/ ){s/\#test \d+$//;print qq'\t\t# test ', ++$a }print $/" test.pl >test2.pl
+#perl -ne"chomp;if(/ok\(/){s/# test .*$//;print$_,qq'\t\t# test ',++$a}else{print$_}print$/" test.pl >test2.pl
+
+=cut
=head1 How does it work?
diff --git a/test.pl b/test.pl
index b87d83f..bacf386 100755
--- a/test.pl
+++ b/test.pl
@@ -6,9 +6,9 @@
# change 'tests => 1' to 'tests => last_test_to_print';
use Test;
-BEGIN { plan tests => 70 };
+BEGIN { plan tests => 77 };
use HTML::Scrubber;
-ok(1); # If we made it this far, we're ok.
+ok(1); # If we made it this far, we're ok. # test 1
#########################
@@ -17,8 +17,9 @@ ok(1); # If we made it this far, we're ok.
my $html = q[
+ <script>//blah</script>
<HR Align="left">
- <B> bold
+ <B> bold <
<U> underlined
<I>
<A href='#"'> LINK </A>
@@ -30,115 +31,118 @@ my $html = q[
my $scrubber = HTML::Scrubber->new();
-ok( $scrubber );
-ok( ! $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
-ok( ! $scrubber->allow( qw[ p b i u hr br ] ) );
+ok( $scrubber ); # test 2
+ok( ! $scrubber->default() ); # test 3
+ok( ! $scrubber->comment() ); # test 4
+ok( ! $scrubber->process() ); # test 5
+ok( ! $scrubber->allow( qw[ p b i u hr br ] ) ); # test 6
$scrubber = $scrubber->scrub($html);
-ok( $scrubber );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber !~ /\Q<!--\E/ );
+ok( $scrubber ); # test 7
+ok( $scrubber !~ /href/i ); # test 8
+ok( $scrubber !~ /Align/i ); # test 9
+ok( $scrubber !~ /\Q<!--\E/ ); # test 10
+ok( $scrubber =~ /bold </ ); # test 11
$scrubber = HTML::Scrubber->new( deny => [ qw[ p b i u hr br ] ] );
-ok( $scrubber );
-ok( ! $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
+ok( $scrubber ); # test 12
+ok( ! $scrubber->default() ); # test 13
+ok( ! $scrubber->comment() ); # test 14
+ok( ! $scrubber->process() ); # test 15
$scrubber = $scrubber->scrub($html);
-ok( $scrubber );
-ok( $scrubber !~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber !~ /\Q<!--\E/ );
+ok( $scrubber ); # test 16
+ok( $scrubber !~ /[><]/ ); # test 17
+ok( $scrubber !~ /href/i ); # test 18
+ok( $scrubber !~ /Align/i ); # test 19
+ok( $scrubber !~ /\Q<!--\E/ ); # test 20
+ok( $scrubber =~ /bold </ ); # test 21
$scrubber = HTML::Scrubber->new( default => [ 0 ] );
-ok( $scrubber );
-ok( ! $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
+ok( $scrubber ); # test 22
+ok( ! $scrubber->default() ); # test 23
+ok( ! $scrubber->comment() ); # test 24
+ok( ! $scrubber->process() ); # test 25
$scrubber = $scrubber->scrub($html);
-ok( $scrubber );
-ok( $scrubber !~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber !~ /\Q<!--\E/ );
+ok( $scrubber ); # test 26
+ok( $scrubber !~ /[><]/ ); # test 27
+ok( $scrubber !~ /href/i ); # test 28
+ok( $scrubber !~ /Align/i ); # test 29
+ok( $scrubber !~ /\Q<!--\E/ ); # test 30
+ok( $scrubber =~ /bold </ ); # test 31
$scrubber = HTML::Scrubber->new( default => [ 1 ] );
-ok( $scrubber );
-ok( $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
+ok( $scrubber ); # test 32
+ok( $scrubber->default() ); # test 33
+ok( ! $scrubber->comment() ); # test 34
+ok( ! $scrubber->process() ); # test 35
$scrubber = $scrubber->scrub($html);
-ok( $scrubber );
-ok( $scrubber =~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber !~ /\Q<!--\E/ );
+ok( $scrubber ); # test 36
+ok( $scrubber =~ /[><]/ ); # test 37
+ok( $scrubber !~ /href/i ); # test 38
+ok( $scrubber !~ /Align/i ); # test 39
+ok( $scrubber !~ /\Q<!--\E/ ); # test 40
+ok( $scrubber =~ /bold </ ); # test 41
$scrubber = HTML::Scrubber->new( default => [ 1 ] );
-ok( $scrubber );
-ok( $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
-ok( ! $scrubber->comment(1) );
+ok( $scrubber ); # test 42
+ok( $scrubber->default() ); # test 43
+ok( ! $scrubber->comment() ); # test 44
+ok( ! $scrubber->process() ); # test 45
+ok( ! $scrubber->comment(1) ); # test 46
$scrubber = $scrubber->scrub($html);
-ok( $scrubber );
-ok( $scrubber =~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber =~ /\Q<!--\E/ );
-
+ok( $scrubber ); # test 47
+ok( $scrubber =~ /[><]/ ); # test 48
+ok( $scrubber !~ /href/i ); # test 49
+ok( $scrubber !~ /Align/i ); # test 50
+ok( $scrubber =~ /\Q<!--\E/ ); # test 51
+ok( $scrubber =~ /bold </ ); # test 52
$scrubber = HTML::Scrubber->new( default => [ 1 => { align => 1, '*' => 0 } ] );
-ok( $scrubber );
-ok( $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
-ok( ! $scrubber->comment(1) );
+ok( $scrubber ); # test 53
+ok( $scrubber->default() ); # test 54
+ok( ! $scrubber->comment() ); # test 55
+ok( ! $scrubber->process() ); # test 56
+ok( ! $scrubber->comment(1) ); # test 57
$scrubber = $scrubber->scrub($html);
-ok( $scrubber );
-ok( $scrubber =~ /[><]/ );
-ok( $scrubber !~ /href/i );
-ok( $scrubber =~ /Align/i );
-ok( $scrubber =~ /\Q<!--\E/ );
-ok( $scrubber =~ /"left"/ );
+ok( $scrubber ); # test 58
+ok( $scrubber =~ /[><]/ ); # test 59
+ok( $scrubber !~ /href/i ); # test 60
+ok( $scrubber =~ /Align/i ); # test 61
+ok( $scrubber =~ /\Q<!--\E/ ); # test 62
+ok( $scrubber =~ /"left"/ ); # test 63
+ok( $scrubber =~ /bold </ ); # test 64
$scrubber = HTML::Scrubber->new( default => [ 1 => { align => 0, '*' => 1 } ] );
-ok( $scrubber );
-ok( $scrubber->default() );
-ok( ! $scrubber->comment() );
-ok( ! $scrubber->process() );
-ok( ! $scrubber->comment(1) );
-
+ok( $scrubber ); # test 65
+ok( $scrubber->default() ); # test 66
+ok( ! $scrubber->comment() ); # test 67
+ok( ! $scrubber->process() ); # test 68
+ok( ! $scrubber->comment(1) ); # test 69
$scrubber = $scrubber->scrub($html);
-ok( $scrubber );
-ok( $scrubber =~ /[><]/ );
-ok( $scrubber =~ /href/i );
-ok( $scrubber !~ /Align/i );
-ok( $scrubber =~ /\Q<!--\E/ );
-ok( $scrubber =~ /\Q"\E/ );
-ok( $scrubber =~ /\#/ );
-
-
+ok( $scrubber ); # test 70
+ok( $scrubber =~ /[><]/ ); # test 71
+ok( $scrubber =~ /href/i ); # test 72
+ok( $scrubber !~ /Align/i ); # test 73
+ok( $scrubber =~ /\Q<!--\E/ ); # test 74
+ok( $scrubber =~ /\Q"\E/ ); # test 75
+ok( $scrubber =~ /\#/ ); # test 76
+ok( $scrubber =~ /bold </ ); # test 77
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libhtml-scrubber-perl.git
More information about the Pkg-perl-cvs-commits
mailing list