[liburi-find-delimited-perl] 01/05: [svn-inject] Installing original source of liburi-find-delimited-perl

dom at earth.li dom at earth.li
Sun Aug 16 18:29:07 UTC 2015


This is an automated email from the git hooks/post-receive script.

dom pushed a commit to branch master
in repository liburi-find-delimited-perl.

commit 69cee4352ed0f869d23e34894e70f04d49f48f36
Author: Dominic Hargreaves <dom at earth.li>
Date:   Tue Feb 26 00:06:33 2008 +0000

    [svn-inject] Installing original source of liburi-find-delimited-perl
---
 Changes                   |   6 ++
 MANIFEST                  |   6 ++
 Makefile.PL               |   8 ++
 README                    | 107 +++++++++++++++++++++++
 lib/URI/Find/Delimited.pm | 218 ++++++++++++++++++++++++++++++++++++++++++++++
 t/delimited.t             | 120 +++++++++++++++++++++++++
 6 files changed, 465 insertions(+)

diff --git a/Changes b/Changes
new file mode 100644
index 0000000..c74c0af
--- /dev/null
+++ b/Changes
@@ -0,0 +1,6 @@
+0.02    24 March 2003
+        Bugfix (CPAN RT #2245) - turned on URI::URL::strict to stop it
+          assuming that any old thing followed by a colon is a scheme.
+
+0.01    18 February 2003
+        Initial release.
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..bcf040c
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,6 @@
+Changes
+MANIFEST
+Makefile.PL
+README
+lib/URI/Find/Delimited.pm
+t/delimited.t
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..a9f686f
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,8 @@
+use ExtUtils::MakeMaker;
+
+WriteMakefile( NAME         => "URI::Find::Delimited",
+               VERSION_FROM => "lib/URI/Find/Delimited.pm",
+               PREREQ_PM    => { 'Test::More'       => 0,
+                                 'URI::Find'        => 0
+                               }
+             );
diff --git a/README b/README
new file mode 100644
index 0000000..13e1f6c
--- /dev/null
+++ b/README
@@ -0,0 +1,107 @@
+NAME
+    URI::Find::Delimited - Find URIs which may be wrapped in enclosing
+    delimiters.
+
+DESCRIPTION
+    Works like URI::Find, but is prepared for URIs in your text to be
+    wrapped in a pair of delimiters and optionally have a title. This will
+    be useful for processing text that already has some minimal markup in
+    it, like bulletin board posts or wiki text.
+
+SYNOPSIS
+      my $finder = URI::Find::Delimited->new;
+      my $text = "This is a [http://the.earth.li/ titled link].";
+      $finder->find(\$text);
+      print $text;
+
+METHODS
+    new
+          my $finder = URI::Find::Delimited->new(
+              callback      => \&callback,
+              delimiter_re  => [ '\[', '\]' ],
+              ignore_quoted => 1               # defaults to 0
+          );
+
+        All arguments are optional; defaults are provided (see below).
+
+        Creates a new URI::Find::Delimited object. This object works
+        similarly to a URI::Find object, but as well as just looking for
+        URIs it is also aware of the concept of a wrapped, titled URI. These
+        look something like
+
+          [http://foo.com/ the foo website]
+
+        where:
+
+        * "[" is the opening delimiter
+        * "]" is the closing delimiter
+        * "http://foo.com/" is the URI
+        * "the foo website" is the title
+        * the URI and title are separated by spaces and/or tabs
+
+        The URI::Find::Delimited object will extract each of these parts
+        separately and pass them to your callback.
+
+        callback
+            "callback" is a function which is called on each URI found. It
+            is passed five arguments: the opening delimiter (if found), the
+            closing delimiter (if found), the URI, the title (if found), and
+            any whitespace found between the URI and title.
+
+            The return value of the callback will replace the original URI
+            in the text.
+
+            If you do not supply your own callback, the object will create a
+            default one which will put your URIs in 'a href' tags using the
+            URI for the target and the title for the link text. If no title
+            is provided for a URI then the URI itself will be used as the
+            title. If the delimiters aren't balanced (eg if the opening one
+            is present but no closing one is found) then the URI is treated
+            as not being wrapped.
+
+            Note: the default callback will not remove the delimiters from
+            the text. It should be simple enough to write your own callback
+            to remove them, based on the one in the source, if that's what
+            you want. In fact there's an example in this distribution, in
+            "t/delimited.t".
+
+        delimiter_re
+            The "delimiter_re" parameter is optional. If you do supply it
+            then it should be a ref to an array containing two regexes. It
+            defaults to using single square brackets as the delimiters.
+
+            Don't use capturing groupings "( )" in your delimiters or things
+            will break. Use non-capturing "(?: )" instead.
+
+        ignore_quoted
+            If the "ignore_quoted" parameter is supplied and set to a true
+            value, then any URIs immediately preceded with a double-quote
+            character will not be matched, ie your callback will not be
+            executed for them and they'll be treated just as normal text.
+
+            This is kinda lame but it's in here because I need to be able to
+            ignore things like
+
+              <img src="http://foo.com/bar.gif">
+
+            A better implementation may happen at some point.
+
+SEE ALSO
+        URI::Find.
+
+AUTHOR
+        Kake Pugh (kake at earth.li).
+
+COPYRIGHT
+             Copyright (C) 2003 Kake Pugh.  All Rights Reserved.
+
+        This module is free software; you can redistribute it and/or modify
+        it under the same terms as Perl itself.
+
+CREDITS
+        Tim Bagot helped me stop faffing over the name, by pointing out that
+        RFC 2396 Appendix E uses "delimited". Dave Hinton helped me fix the
+        regex to make it work for delimited URIs with no title. Nick Cleaton
+        helped me make "ignore_quoted" work. Some of the code was taken from
+        URI::Find.
+
diff --git a/lib/URI/Find/Delimited.pm b/lib/URI/Find/Delimited.pm
new file mode 100644
index 0000000..07853b2
--- /dev/null
+++ b/lib/URI/Find/Delimited.pm
@@ -0,0 +1,218 @@
+package URI::Find::Delimited;
+
+use strict;
+
+use vars qw( $VERSION );
+$VERSION = '0.02';
+
+use base qw(URI::Find);
+
+# For 5.005_03 compatibility (copied from URI::Find::Schemeless)
+use URI::Find ();
+
+=head1 NAME
+
+URI::Find::Delimited - Find URIs which may be wrapped in enclosing delimiters.
+
+=head1 DESCRIPTION
+
+Works like L<URI::Find>, but is prepared for URIs in your text to be
+wrapped in a pair of delimiters and optionally have a title. This will
+be useful for processing text that already has some minimal markup in
+it, like bulletin board posts or wiki text.
+
+=head1 SYNOPSIS
+
+  my $finder = URI::Find::Delimited->new;
+  my $text = "This is a [http://the.earth.li/ titled link].";
+  $finder->find(\$text);
+  print $text;
+
+=head1 METHODS
+
+=over 4
+
+=item B<new>
+
+  my $finder = URI::Find::Delimited->new(
+      callback      => \&callback,
+      delimiter_re  => [ '\[', '\]' ],
+      ignore_quoted => 1               # defaults to 0
+  );
+
+All arguments are optional; defaults are provided (see below).
+
+Creates a new URI::Find::Delimited object. This object works similarly
+to a L<URI::Find> object, but as well as just looking for URIs it is also
+aware of the concept of a wrapped, titled URI.  These look something like
+
+  [http://foo.com/ the foo website]
+
+where:
+
+=over 4
+
+=item * C<[> is the opening delimiter
+
+=item * C<]> is the closing delimiter
+
+=item * C<http://foo.com/> is the URI
+
+=item * C<the foo website> is the title
+
+=item * the URI and title are separated by spaces and/or tabs
+
+=back
+
+The URI::Find::Delimited object will extract each of these parts
+separately and pass them to your callback.
+
+=over 4
+
+=item B<callback>
+
+C<callback> is a function which is called on each URI found. It is
+passed five arguments: the opening delimiter (if found), the closing
+delimiter (if found), the URI, the title (if found), and any
+whitespace found between the URI and title.
+
+The return value of the callback will replace the original URI in the
+text.
+
+If you do not supply your own callback, the object will create a
+default one which will put your URIs in 'a href' tags using the URI
+for the target and the title for the link text. If no title is
+provided for a URI then the URI itself will be used as the title. If
+the delimiters aren't balanced (eg if the opening one is present but
+no closing one is found) then the URI is treated as not being wrapped. 
+
+Note: the default callback will not remove the delimiters from the
+text. It should be simple enough to write your own callback to remove
+them, based on the one in the source, if that's what you want.  In fact
+there's an example in this distribution, in C<t/delimited.t>.
+
+=item B<delimiter_re>
+
+The C<delimiter_re> parameter is optional. If you do supply it then it
+should be a ref to an array containing two regexes.  It defaults to
+using single square brackets as the delimiters.
+
+Don't use capturing groupings C<( )> in your delimiters or things
+will break. Use non-capturing C<(?: )> instead.
+
+=item B<ignore_quoted>
+
+If the C<ignore_quoted> parameter is supplied and set to a true value,
+then any URIs immediately preceded with a double-quote character will
+not be matched, ie your callback will not be executed for them and
+they'll be treated just as normal text.
+
+This is kinda lame but it's in here because I need to be able to
+ignore things like
+
+  <img src="http://foo.com/bar.gif">
+
+A better implementation may happen at some point.
+
+=back
+
+=cut
+
+sub new {
+    my ($class, %args) = @_;
+
+    my ( $callback, $delimiter_re, $ignore_quoted ) =
+                        @args{ qw( callback delimiter_re ignore_quoted ) };
+
+    unless (defined $callback) {
+        $callback = sub {
+            my ($open, $close, $uri, $title, $whitespace) = @_;
+            if ( $open && $close ) {
+                $title ||= $uri;
+ 	        qq|$open<a href="$uri">$title</a>$close|;
+	    } else {
+                qq|$open<a href="$uri">$uri</a>$whitespace$title$close|;
+            }
+        };
+    }
+    $delimiter_re ||= [ '\[', '\]' ];
+
+    my $self = bless { callback      => $callback,
+		       delimiter_re  => $delimiter_re,
+		       ignore_quoted => $ignore_quoted
+		     }, $class;
+    return $self;
+}
+
+sub find {
+    my($self, $r_text) = @_;
+
+    my $urlsfound = 0;
+
+    URI::URL::strict(1); # Don't assume any old thing followed by : is a scheme
+
+    my $uri_re    = $self->uri_re;
+    my $prefix_re = $self->{ignore_quoted} ? '(?<!["a-zA-Z])' : '';
+    my $open_re   = $self->{delimiter_re}[0];
+    my $close_re  = $self->{delimiter_re}[1];
+
+    # Note we only allow spaces and tabs, not all whitespace, between a URI
+    # and its title.  Also we disallow newlines *in* the title.  These are
+    # both to avoid the bug where $uri1\n$uri2 leads to $uri2 being considered
+    # as part of the title, and thus not wrapped.
+    $$r_text =~ s{$prefix_re     # maybe don't match things preceded by a "
+		  (?:
+		    ($open_re)   # opening delimiter
+                    ($uri_re)    # the URI itself
+		    ([ \t]*)     # optional whitespace between URI and title
+		    ((?<=[ \t])[^\n$close_re]+)? #title if there was whitespace
+                    ($close_re)  # closing delimiter
+	          |
+                      ($uri_re)  # just the URI itself
+                  )
+                 }{
+        my ($open, $uri_match, $whitespace, $title, $close, $just_uri) =
+              ($1,         $2,          $3,     $4,     $5,        $6);
+        $uri_match = $just_uri if $just_uri;
+        foreach ( $open, $whitespace, $title, $close ) {
+            $_ ||= "";
+	}
+        my $orig_text = qq|$open$uri_match$whitespace$title$close|;
+
+        if( my $uri = $self->_is_uri( \$uri_match ) ) { # if not a false alarm
+            $urlsfound++;
+            $self->{callback}->($open,$close,$uri_match,$title,$whitespace);
+	} else {
+            $orig_text;
+        }
+    }egx;
+
+    return $urlsfound;
+}
+
+=head1 SEE ALSO
+
+L<URI::Find>.
+
+=head1 AUTHOR
+
+Kake Pugh (kake at earth.li).
+
+=head1 COPYRIGHT
+
+     Copyright (C) 2003 Kake Pugh.  All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 CREDITS
+
+Tim Bagot helped me stop faffing over the name, by pointing out that
+RFC 2396 Appendix E uses "delimited". Dave Hinton helped me fix the
+regex to make it work for delimited URIs with no title. Nick Cleaton
+helped me make C<ignore_quoted> work. Some of the code was taken from
+L<URI::Find>.
+
+=cut
+
+1;
diff --git a/t/delimited.t b/t/delimited.t
new file mode 100644
index 0000000..a7f34f8
--- /dev/null
+++ b/t/delimited.t
@@ -0,0 +1,120 @@
+use strict;
+local $^W = 1;
+
+use Test::More tests => 18;
+
+use_ok( "URI::Find::Delimited" );
+
+my $finder = URI::Find::Delimited->new;
+
+my $text = "This contains no URIs";
+$finder->find(\$text);
+is( $text, qq|This contains no URIs|, "left alone if no URIs" );
+
+$text = "http://the.earth.li/ foo bar";
+$finder->find(\$text);
+like( $text, qr|<a href="http://the.earth.li/">http://the.earth.li/</a>|,
+    "URIs at very start of line are picked up" );
+is( $text, qq|<a href="http://the.earth.li/">http://the.earth.li/</a> foo bar|,
+    "...and don't pick up trailing stuff as a title" );
+
+$text = "foo bar http://the.earth.li/";
+$finder->find(\$text);
+is( $text, qq|foo bar <a href="http://the.earth.li/">http://the.earth.li/</a>|,
+    "URIs at very end of line are picked up" );
+
+$text = "This is a sentence containing http://the.earth.li/";
+$finder->find(\$text);
+is( $text, qq|This is a sentence containing <a href="http://the.earth.li/">http://the.earth.li/</a>|,
+    "URI used as title if no title or delimiters" );
+#print "# $text\n";
+
+$text = "[http://use.perl.org/]";
+$finder->find(\$text);
+is( $text, qq|[<a href="http://use.perl.org/">http://use.perl.org/</a>]|,
+    "delimited URIs are found even if no title" );
+
+$text = "This has a [http://the.earth.li/ usemod link]";
+$finder->find(\$text);
+is( $text, qq|This has a [<a href="http://the.earth.li/">usemod link</a>]|,
+    "title found and used" );
+#print "# $text\n";
+
+$text = "This has a [http://the.earth.li/ broken usemod link";
+$finder->find(\$text);
+is( $text, qq|This has a [<a href="http://the.earth.li/">http://the.earth.li/</a> broken usemod link|,
+    "title ignored when final square bracket missing" );
+#print "# $text\n";
+
+$text = "This has a http://the.earth.li/ broken usemod link]";
+$finder->find(\$text);
+is( $text, qq|This has a <a href="http://the.earth.li/">http://the.earth.li/</a> broken usemod link]|,
+    "title ignored when first square bracket missing" );
+#print "# $text\n";
+
+$text = <<EOT;
+http://the.earth.li/
+http://www.pubs.com/
+EOT
+$finder->find(\$text);
+like( $text, qr|<a href="http://www.pubs.com/">http://www.pubs.com/</a>|,
+      "untitled URI following another untitled URI gets picked up correctly" );
+
+$text = <<EOT;
+http://the.earth.li/
+[http://www.pubs.com/ foo]
+EOT
+$finder->find(\$text);
+like( $text, qr|<a href="http://www.pubs.com/">foo</a>|,
+      "titled URI following untitled URI gets picked up correctly" );
+
+# Test alternative callbacks.
+$finder = URI::Find::Delimited->new(
+    callback => sub {
+        my ($open, $close, $uri, $title, $whitespace) = @_;
+	if ( $open && $close ) {
+	    $title ||= $uri;
+	    qq|<a href="$uri">$title</a>|;
+	} else {
+	    qq|<a href="$uri">$uri</a>$whitespace$title|;
+	}
+    }
+);
+$text = "This has a [http://the.earth.li/ usemod link]";
+$finder->find(\$text);
+is( $text, qq|This has a <a href="http://the.earth.li/">usemod link</a>|,
+    "can override callback" );
+
+# Test alternative delimiters.
+$finder = URI::Find::Delimited->new( delimiter_re => [ '\{', '\}' ] );
+$text = qq|A {http://the.earth.li/ titled link}|;
+$finder->find(\$text);
+is( $text, qq|A {<a href="http://the.earth.li/">titled link</a>}|,
+    "can overrride the delimiters" );
+
+# Test ignoring quoted URIs.
+$finder = URI::Find::Delimited->new;
+$text = qq|This has a <a href="http://the.earth.li/">link already embedded|;
+$finder->find(\$text);
+is( $text, qq|This has a <a href="<a href="http://the.earth.li/">http://the.earth.li/</a>">link already embedded|,
+    "URIs in existing links picked up by default" );
+
+$finder = URI::Find::Delimited->new( ignore_quoted => 0 );
+$text = qq|This has a <a href="http://the.earth.li/">link already embedded|;
+$finder->find(\$text);
+is( $text, qq|This has a <a href="<a href="http://the.earth.li/">http://the.earth.li/</a>">link already embedded|,
+    "...and when ignore_quoted is false" );
+
+$finder = URI::Find::Delimited->new( ignore_quoted => 1 );
+$text = qq|This has a <a href="http://the.earth.li/">link already embedded|;
+$finder->find(\$text);
+is( $text, qq|This has a <a href="http://the.earth.li/">link already embedded|,
+    "...but not when ignore_quoted is true" );
+
+# Bug CPAN RT #2245
+$finder = URI::Find::Delimited->new;
+$text = qq|style:font|;
+$finder->find(\$text);
+is( $text, "style:font",
+    "random things with colons in not automatically assumed to be URIs" );
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/liburi-find-delimited-perl.git



More information about the Pkg-perl-cvs-commits mailing list