[liburi-find-delimited-perl] 01/05: [svn-inject] Installing original source of liburi-find-delimited-perl
dom at earth.li
dom at earth.li
Sun Aug 16 18:29:07 UTC 2015
This is an automated email from the git hooks/post-receive script.
dom pushed a commit to branch master
in repository liburi-find-delimited-perl.
commit 69cee4352ed0f869d23e34894e70f04d49f48f36
Author: Dominic Hargreaves <dom at earth.li>
Date: Tue Feb 26 00:06:33 2008 +0000
[svn-inject] Installing original source of liburi-find-delimited-perl
---
Changes | 6 ++
MANIFEST | 6 ++
Makefile.PL | 8 ++
README | 107 +++++++++++++++++++++++
lib/URI/Find/Delimited.pm | 218 ++++++++++++++++++++++++++++++++++++++++++++++
t/delimited.t | 120 +++++++++++++++++++++++++
6 files changed, 465 insertions(+)
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..c74c0af
--- /dev/null
+++ b/Changes
@@ -0,0 +1,6 @@
+0.02 24 March 2003
+ Bugfix (CPAN RT #2245) - turned on URI::URL::strict to stop it
+ assuming that any old thing followed by a colon is a scheme.
+
+0.01 18 February 2003
+ Initial release.
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..bcf040c
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,6 @@
+Changes
+MANIFEST
+Makefile.PL
+README
+lib/URI/Find/Delimited.pm
+t/delimited.t
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..a9f686f
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,8 @@
+use ExtUtils::MakeMaker;
+
+WriteMakefile( NAME => "URI::Find::Delimited",
+ VERSION_FROM => "lib/URI/Find/Delimited.pm",
+ PREREQ_PM => { 'Test::More' => 0,
+ 'URI::Find' => 0
+ }
+ );
diff --git a/README b/README
new file mode 100644
index 0000000..13e1f6c
--- /dev/null
+++ b/README
@@ -0,0 +1,107 @@
+NAME
+ URI::Find::Delimited - Find URIs which may be wrapped in enclosing
+ delimiters.
+
+DESCRIPTION
+ Works like URI::Find, but is prepared for URIs in your text to be
+ wrapped in a pair of delimiters and optionally have a title. This will
+ be useful for processing text that already has some minimal markup in
+ it, like bulletin board posts or wiki text.
+
+SYNOPSIS
+ my $finder = URI::Find::Delimited->new;
+ my $text = "This is a [http://the.earth.li/ titled link].";
+ $finder->find(\$text);
+ print $text;
+
+METHODS
+ new
+ my $finder = URI::Find::Delimited->new(
+ callback => \&callback,
+ delimiter_re => [ '\[', '\]' ],
+ ignore_quoted => 1 # defaults to 0
+ );
+
+ All arguments are optional; defaults are provided (see below).
+
+ Creates a new URI::Find::Delimited object. This object works
+ similarly to a URI::Find object, but as well as just looking for
+ URIs it is also aware of the concept of a wrapped, titled URI. These
+ look something like
+
+ [http://foo.com/ the foo website]
+
+ where:
+
+ * "[" is the opening delimiter
+ * "]" is the closing delimiter
+ * "http://foo.com/" is the URI
+ * "the foo website" is the title
+ * the URI and title are separated by spaces and/or tabs
+
+ The URI::Find::Delimited object will extract each of these parts
+ separately and pass them to your callback.
+
+ callback
+ "callback" is a function which is called on each URI found. It
+ is passed five arguments: the opening delimiter (if found), the
+ closing delimiter (if found), the URI, the title (if found), and
+ any whitespace found between the URI and title.
+
+ The return value of the callback will replace the original URI
+ in the text.
+
+ If you do not supply your own callback, the object will create a
+ default one which will put your URIs in 'a href' tags using the
+ URI for the target and the title for the link text. If no title
+ is provided for a URI then the URI itself will be used as the
+ title. If the delimiters aren't balanced (eg if the opening one
+ is present but no closing one is found) then the URI is treated
+ as not being wrapped.
+
+ Note: the default callback will not remove the delimiters from
+ the text. It should be simple enough to write your own callback
+ to remove them, based on the one in the source, if that's what
+ you want. In fact there's an example in this distribution, in
+ "t/delimited.t".
+
+ delimiter_re
+ The "delimiter_re" parameter is optional. If you do supply it
+ then it should be a ref to an array containing two regexes. It
+ defaults to using single square brackets as the delimiters.
+
+ Don't use capturing groupings "( )" in your delimiters or things
+ will break. Use non-capturing "(?: )" instead.
+
+ ignore_quoted
+ If the "ignore_quoted" parameter is supplied and set to a true
+ value, then any URIs immediately preceded with a double-quote
+ character will not be matched, ie your callback will not be
+ executed for them and they'll be treated just as normal text.
+
+ This is kinda lame but it's in here because I need to be able to
+ ignore things like
+
+ <img src="http://foo.com/bar.gif">
+
+ A better implementation may happen at some point.
+
+SEE ALSO
+ URI::Find.
+
+AUTHOR
+ Kake Pugh (kake at earth.li).
+
+COPYRIGHT
+ Copyright (C) 2003 Kake Pugh. All Rights Reserved.
+
+ This module is free software; you can redistribute it and/or modify
+ it under the same terms as Perl itself.
+
+CREDITS
+ Tim Bagot helped me stop faffing over the name, by pointing out that
+ RFC 2396 Appendix E uses "delimited". Dave Hinton helped me fix the
+ regex to make it work for delimited URIs with no title. Nick Cleaton
+ helped me make "ignore_quoted" work. Some of the code was taken from
+ URI::Find.
+
diff --git a/lib/URI/Find/Delimited.pm b/lib/URI/Find/Delimited.pm
new file mode 100644
index 0000000..07853b2
--- /dev/null
+++ b/lib/URI/Find/Delimited.pm
@@ -0,0 +1,218 @@
+package URI::Find::Delimited;
+
+use strict;
+
+use vars qw( $VERSION );
+$VERSION = '0.02';
+
+use base qw(URI::Find);
+
+# For 5.005_03 compatibility (copied from URI::Find::Schemeless)
+use URI::Find ();
+
+=head1 NAME
+
+URI::Find::Delimited - Find URIs which may be wrapped in enclosing delimiters.
+
+=head1 DESCRIPTION
+
+Works like L<URI::Find>, but is prepared for URIs in your text to be
+wrapped in a pair of delimiters and optionally have a title. This will
+be useful for processing text that already has some minimal markup in
+it, like bulletin board posts or wiki text.
+
+=head1 SYNOPSIS
+
+ my $finder = URI::Find::Delimited->new;
+ my $text = "This is a [http://the.earth.li/ titled link].";
+ $finder->find(\$text);
+ print $text;
+
+=head1 METHODS
+
+=over 4
+
+=item B<new>
+
+ my $finder = URI::Find::Delimited->new(
+ callback => \&callback,
+ delimiter_re => [ '\[', '\]' ],
+ ignore_quoted => 1 # defaults to 0
+ );
+
+All arguments are optional; defaults are provided (see below).
+
+Creates a new URI::Find::Delimited object. This object works similarly
+to a L<URI::Find> object, but as well as just looking for URIs it is also
+aware of the concept of a wrapped, titled URI. These look something like
+
+ [http://foo.com/ the foo website]
+
+where:
+
+=over 4
+
+=item * C<[> is the opening delimiter
+
+=item * C<]> is the closing delimiter
+
+=item * C<http://foo.com/> is the URI
+
+=item * C<the foo website> is the title
+
+=item * the URI and title are separated by spaces and/or tabs
+
+=back
+
+The URI::Find::Delimited object will extract each of these parts
+separately and pass them to your callback.
+
+=over 4
+
+=item B<callback>
+
+C<callback> is a function which is called on each URI found. It is
+passed five arguments: the opening delimiter (if found), the closing
+delimiter (if found), the URI, the title (if found), and any
+whitespace found between the URI and title.
+
+The return value of the callback will replace the original URI in the
+text.
+
+If you do not supply your own callback, the object will create a
+default one which will put your URIs in 'a href' tags using the URI
+for the target and the title for the link text. If no title is
+provided for a URI then the URI itself will be used as the title. If
+the delimiters aren't balanced (eg if the opening one is present but
+no closing one is found) then the URI is treated as not being wrapped.
+
+Note: the default callback will not remove the delimiters from the
+text. It should be simple enough to write your own callback to remove
+them, based on the one in the source, if that's what you want. In fact
+there's an example in this distribution, in C<t/delimited.t>.
+
+=item B<delimiter_re>
+
+The C<delimiter_re> parameter is optional. If you do supply it then it
+should be a ref to an array containing two regexes. It defaults to
+using single square brackets as the delimiters.
+
+Don't use capturing groupings C<( )> in your delimiters or things
+will break. Use non-capturing C<(?: )> instead.
+
+=item B<ignore_quoted>
+
+If the C<ignore_quoted> parameter is supplied and set to a true value,
+then any URIs immediately preceded with a double-quote character will
+not be matched, ie your callback will not be executed for them and
+they'll be treated just as normal text.
+
+This is kinda lame but it's in here because I need to be able to
+ignore things like
+
+ <img src="http://foo.com/bar.gif">
+
+A better implementation may happen at some point.
+
+=back
+
+=cut
+
+sub new {
+ my ($class, %args) = @_;
+
+ my ( $callback, $delimiter_re, $ignore_quoted ) =
+ @args{ qw( callback delimiter_re ignore_quoted ) };
+
+ unless (defined $callback) {
+ $callback = sub {
+ my ($open, $close, $uri, $title, $whitespace) = @_;
+ if ( $open && $close ) {
+ $title ||= $uri;
+ qq|$open<a href="$uri">$title</a>$close|;
+ } else {
+ qq|$open<a href="$uri">$uri</a>$whitespace$title$close|;
+ }
+ };
+ }
+ $delimiter_re ||= [ '\[', '\]' ];
+
+ my $self = bless { callback => $callback,
+ delimiter_re => $delimiter_re,
+ ignore_quoted => $ignore_quoted
+ }, $class;
+ return $self;
+}
+
+sub find {
+ my($self, $r_text) = @_;
+
+ my $urlsfound = 0;
+
+ URI::URL::strict(1); # Don't assume any old thing followed by : is a scheme
+
+ my $uri_re = $self->uri_re;
+ my $prefix_re = $self->{ignore_quoted} ? '(?<!["a-zA-Z])' : '';
+ my $open_re = $self->{delimiter_re}[0];
+ my $close_re = $self->{delimiter_re}[1];
+
+ # Note we only allow spaces and tabs, not all whitespace, between a URI
+ # and its title. Also we disallow newlines *in* the title. These are
+ # both to avoid the bug where $uri1\n$uri2 leads to $uri2 being considered
+ # as part of the title, and thus not wrapped.
+ $$r_text =~ s{$prefix_re # maybe don't match things preceded by a "
+ (?:
+ ($open_re) # opening delimiter
+ ($uri_re) # the URI itself
+ ([ \t]*) # optional whitespace between URI and title
+ ((?<=[ \t])[^\n$close_re]+)? #title if there was whitespace
+ ($close_re) # closing delimiter
+ |
+ ($uri_re) # just the URI itself
+ )
+ }{
+ my ($open, $uri_match, $whitespace, $title, $close, $just_uri) =
+ ($1, $2, $3, $4, $5, $6);
+ $uri_match = $just_uri if $just_uri;
+ foreach ( $open, $whitespace, $title, $close ) {
+ $_ ||= "";
+ }
+ my $orig_text = qq|$open$uri_match$whitespace$title$close|;
+
+ if( my $uri = $self->_is_uri( \$uri_match ) ) { # if not a false alarm
+ $urlsfound++;
+ $self->{callback}->($open,$close,$uri_match,$title,$whitespace);
+ } else {
+ $orig_text;
+ }
+ }egx;
+
+ return $urlsfound;
+}
+
+=head1 SEE ALSO
+
+L<URI::Find>.
+
+=head1 AUTHOR
+
+Kake Pugh (kake at earth.li).
+
+=head1 COPYRIGHT
+
+ Copyright (C) 2003 Kake Pugh. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 CREDITS
+
+Tim Bagot helped me stop faffing over the name, by pointing out that
+RFC 2396 Appendix E uses "delimited". Dave Hinton helped me fix the
+regex to make it work for delimited URIs with no title. Nick Cleaton
+helped me make C<ignore_quoted> work. Some of the code was taken from
+L<URI::Find>.
+
+=cut
+
+1;
diff --git a/t/delimited.t b/t/delimited.t
new file mode 100644
index 0000000..a7f34f8
--- /dev/null
+++ b/t/delimited.t
@@ -0,0 +1,120 @@
+use strict;
+local $^W = 1;
+
+use Test::More tests => 18;
+
+use_ok( "URI::Find::Delimited" );
+
+my $finder = URI::Find::Delimited->new;
+
+my $text = "This contains no URIs";
+$finder->find(\$text);
+is( $text, qq|This contains no URIs|, "left alone if no URIs" );
+
+$text = "http://the.earth.li/ foo bar";
+$finder->find(\$text);
+like( $text, qr|<a href="http://the.earth.li/">http://the.earth.li/</a>|,
+ "URIs at very start of line are picked up" );
+is( $text, qq|<a href="http://the.earth.li/">http://the.earth.li/</a> foo bar|,
+ "...and don't pick up trailing stuff as a title" );
+
+$text = "foo bar http://the.earth.li/";
+$finder->find(\$text);
+is( $text, qq|foo bar <a href="http://the.earth.li/">http://the.earth.li/</a>|,
+ "URIs at very end of line are picked up" );
+
+$text = "This is a sentence containing http://the.earth.li/";
+$finder->find(\$text);
+is( $text, qq|This is a sentence containing <a href="http://the.earth.li/">http://the.earth.li/</a>|,
+ "URI used as title if no title or delimiters" );
+#print "# $text\n";
+
+$text = "[http://use.perl.org/]";
+$finder->find(\$text);
+is( $text, qq|[<a href="http://use.perl.org/">http://use.perl.org/</a>]|,
+ "delimited URIs are found even if no title" );
+
+$text = "This has a [http://the.earth.li/ usemod link]";
+$finder->find(\$text);
+is( $text, qq|This has a [<a href="http://the.earth.li/">usemod link</a>]|,
+ "title found and used" );
+#print "# $text\n";
+
+$text = "This has a [http://the.earth.li/ broken usemod link";
+$finder->find(\$text);
+is( $text, qq|This has a [<a href="http://the.earth.li/">http://the.earth.li/</a> broken usemod link|,
+ "title ignored when final square bracket missing" );
+#print "# $text\n";
+
+$text = "This has a http://the.earth.li/ broken usemod link]";
+$finder->find(\$text);
+is( $text, qq|This has a <a href="http://the.earth.li/">http://the.earth.li/</a> broken usemod link]|,
+ "title ignored when first square bracket missing" );
+#print "# $text\n";
+
+$text = <<EOT;
+http://the.earth.li/
+http://www.pubs.com/
+EOT
+$finder->find(\$text);
+like( $text, qr|<a href="http://www.pubs.com/">http://www.pubs.com/</a>|,
+ "untitled URI following another untitled URI gets picked up correctly" );
+
+$text = <<EOT;
+http://the.earth.li/
+[http://www.pubs.com/ foo]
+EOT
+$finder->find(\$text);
+like( $text, qr|<a href="http://www.pubs.com/">foo</a>|,
+ "titled URI following untitled URI gets picked up correctly" );
+
+# Test alternative callbacks.
+$finder = URI::Find::Delimited->new(
+ callback => sub {
+ my ($open, $close, $uri, $title, $whitespace) = @_;
+ if ( $open && $close ) {
+ $title ||= $uri;
+ qq|<a href="$uri">$title</a>|;
+ } else {
+ qq|<a href="$uri">$uri</a>$whitespace$title|;
+ }
+ }
+);
+$text = "This has a [http://the.earth.li/ usemod link]";
+$finder->find(\$text);
+is( $text, qq|This has a <a href="http://the.earth.li/">usemod link</a>|,
+ "can override callback" );
+
+# Test alternative delimiters.
+$finder = URI::Find::Delimited->new( delimiter_re => [ '\{', '\}' ] );
+$text = qq|A {http://the.earth.li/ titled link}|;
+$finder->find(\$text);
+is( $text, qq|A {<a href="http://the.earth.li/">titled link</a>}|,
+ "can overrride the delimiters" );
+
+# Test ignoring quoted URIs.
+$finder = URI::Find::Delimited->new;
+$text = qq|This has a <a href="http://the.earth.li/">link already embedded|;
+$finder->find(\$text);
+is( $text, qq|This has a <a href="<a href="http://the.earth.li/">http://the.earth.li/</a>">link already embedded|,
+ "URIs in existing links picked up by default" );
+
+$finder = URI::Find::Delimited->new( ignore_quoted => 0 );
+$text = qq|This has a <a href="http://the.earth.li/">link already embedded|;
+$finder->find(\$text);
+is( $text, qq|This has a <a href="<a href="http://the.earth.li/">http://the.earth.li/</a>">link already embedded|,
+ "...and when ignore_quoted is false" );
+
+$finder = URI::Find::Delimited->new( ignore_quoted => 1 );
+$text = qq|This has a <a href="http://the.earth.li/">link already embedded|;
+$finder->find(\$text);
+is( $text, qq|This has a <a href="http://the.earth.li/">link already embedded|,
+ "...but not when ignore_quoted is true" );
+
+# Bug CPAN RT #2245
+$finder = URI::Find::Delimited->new;
+$text = qq|style:font|;
+$finder->find(\$text);
+is( $text, "style:font",
+ "random things with colons in not automatically assumed to be URIs" );
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/liburi-find-delimited-perl.git
More information about the Pkg-perl-cvs-commits
mailing list