r40686 - in /branches/upstream/libtext-affixes-perl: ./ current/ current/t/
mogaal-guest at users.alioth.debian.org
mogaal-guest at users.alioth.debian.org
Fri Jul 24 21:31:00 UTC 2009
Author: mogaal-guest
Date: Fri Jul 24 21:30:53 2009
New Revision: 40686
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=40686
Log:
[svn-inject] Installing original source of libtext-affixes-perl
Added:
branches/upstream/libtext-affixes-perl/
branches/upstream/libtext-affixes-perl/current/
branches/upstream/libtext-affixes-perl/current/Affixes.pm
branches/upstream/libtext-affixes-perl/current/Changes
branches/upstream/libtext-affixes-perl/current/MANIFEST
branches/upstream/libtext-affixes-perl/current/META.yml
branches/upstream/libtext-affixes-perl/current/Makefile.PL
branches/upstream/libtext-affixes-perl/current/README
branches/upstream/libtext-affixes-perl/current/t/
branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t
branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t
branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t
branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t
branches/upstream/libtext-affixes-perl/current/t/pod.t
Added: branches/upstream/libtext-affixes-perl/current/Affixes.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/Affixes.pm?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/Affixes.pm (added)
+++ branches/upstream/libtext-affixes-perl/current/Affixes.pm Fri Jul 24 21:30:53 2009
@@ -1,0 +1,253 @@
+package Text::Affixes;
+
+use 5.006;
+use strict;
+use warnings;
+
+require Exporter;
+use AutoLoader qw(AUTOLOAD);
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+ get_prefixes
+ get_suffixes
+);
+
+our $VERSION = '0.07';
+
+=head1 NAME
+
+Text::Affixes - Prefixes and suffixes analisys of text
+
+=head1 SYNOPSIS
+
+ use Text::Affixes;
+ my $text = "Hello, world. Hello, big world.";
+ my $prefixes = get_prefixes($text);
+
+ # $prefixes now holds
+ # {
+ # 3 => {
+ # 'Hel' => 2,
+ # 'wor' => 2,
+ # }
+ # }
+
+ # or
+
+ $prefixes = get_prefixes({min => 1, max => 2},$text);
+
+ # $prefixes now holds
+ # {
+ # 1 => {
+ # 'H' => 2,
+ # 'w' => 2,
+ # 'b' => 1,
+ # },
+ # 2 => {
+ # 'He' => 2,
+ # 'wo' => 2,
+ # 'bi' => 1,
+ # }
+ # }
+
+ # the use for get_suffixes is similar
+
+=head1 DESCRIPTION
+
+Provides methods for prefixe and suffix analisys of text.
+
+=head1 METHODS
+
+=head2 get_prefixes
+
+Extracts prefixes from text. You can specify the minimum and maximum
+number of characters of prefixes you want.
+
+Returns a reference to a hash, where the specified limits are mapped
+in hashes; each of those hashes maps every prefix in the text into the
+number of times it was found.
+
+By default, both minimum and maximum limits are 3. If the minimum
+limit is greater than the lower one, an empty hash is returned.
+
+A prefix is considered to be a sequence of word characters (\w) in
+the beginning of a word (that is, after a word boundary) that does not
+reach the end of the word ("regular expressionly", a prefix is the $1
+of /\b(\w+)\w/).
+
+ # extracting prefixes of size 3
+ $prefixes = get_prefixes( $text );
+
+ # extracting prefixes of sizes 2 and 3
+ $prefixes = get_prefixes( {min => 2}, $text );
+
+ # extracting prefixes of sizes 3 and 4
+ $prefixes = get_prefixes( {max => 4}, $text );
+
+ # extracting prefixes of sizes 2, 3 and 4
+ $prefixes = get_prefixes( {min => 2, max=> 4}, $text);
+
+=cut
+
+sub get_prefixes {
+ return _get_elements(1, at _);
+}
+
+=head2 get_suffixes
+
+The get_suffixes function is similar to the get_prefixes one. You
+should read the documentation for that one and than come back to this
+point.
+
+A suffix is considered to be a sequence of word characters (\w) in
+the end of a word (that is, before a word boundary) that does not start
+at the beginning of the word ("regular expressionly" speaking, a
+prefix is the $1 of /\w(\w+)\b/).
+
+ # extracting suffixes of size 3
+ $suffixes = get_suffixes( $text );
+
+ # extracting suffixes of sizes 2 and 3
+ $suffixes = get_suffixes( {min => 2}, $text );
+
+ # extracting suffixes of sizes 3 and 4
+ $suffixes = get_suffixes( {max => 4}, $text );
+
+ # extracting suffixes of sizes 2, 3 and 4
+ $suffixes = get_suffixes( {min => 2, max=> 4}, $text);
+
+=cut
+
+sub get_suffixes {
+ return _get_elements(0, at _);
+}
+
+sub _get_elements {
+ my $task = shift;
+
+=head1 OPTIONS
+
+Apart from deciding on a minimum and maximum size for prefixes or suffixes, you
+can also decide on some configuration options.
+
+=cut
+
+ # configuration
+ my %conf = ( min => 3,
+ max => 3,
+ exclude_numbers => 1,
+ lowercase => 0,
+ );
+ if (ref $_[0] eq 'HASH') {
+ %conf = (%conf, %{+shift});
+ }
+ return {} if $conf{max} < $conf{min};
+
+ # get the elements
+ my %elements;
+ my $text = shift || return undef;
+ $conf{min} = 1 if $conf{min} < 1;
+ for ($conf{min} .. $conf{max}) {
+
+ my $regex = $task ? qr/\b(\w{$_})\w/ : # prefixes
+ qr/\w(\w{$_})\b/ ; # suffixes
+
+ while ($text =~ /$regex/g) {
+ $elements{$_}{$1}++;
+ }
+
+ }
+
+=head2 exclude_numbers
+
+Set to 0 if you consider numbers as part of words. Default value is 1.
+
+ # this
+ get_suffixes( {min => 1, max => 1, exclude_numbers => 0}, "Hello, but w8" );
+
+ # returns this:
+ {
+ 1 => {
+ 'o' => 1,
+ 't' => 1,
+ '8' => 1
+ }
+ }
+
+=cut
+
+ # exclude elements containing numbers
+ if ($conf{exclude_numbers}) {
+ for my $s (keys %elements) {
+ for (keys %{$elements{$s}}) {
+ delete ${$elements{$s}}{$_} if /\d/;
+ }
+ }
+ }
+
+=head2 lowercase
+
+Set to 1 to extract all prefixes in lowercase mode. Default value is 0.
+
+ATTENTION: This does not mean that prefixes with uppercased characters won't be
+extracted. It means they will be extracted after being lowercased.
+
+ # this...
+ get_prefixes( {min => 2, max => 2, lowercase => 1}, "Hello, hello");
+
+ # returns this:
+ {
+ 2 => {
+ 'he' => 2
+ }
+ }
+
+=cut
+
+ # elements containing uppercased characters become lowercased ones
+ if ($conf{lowercase}) {
+ for my $s (keys %elements) {
+ for (keys %{$elements{$s}}) {
+ if (/[A-Z]/) {
+ ${$elements{$s}}{lc $_} +=
+ ${$elements{$s}}{$_};
+ delete ${$elements{$s}}{$_};
+ }
+ }
+ }
+ }
+
+ return \%elements;
+}
+
+1;
+__END__
+
+=head1 TO DO
+
+=over 6
+
+=item * Make it more efficient (use C for that)
+
+=back
+
+=head1 AUTHOR
+
+Jose Castro, C<< <cog at cpan.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2004 Jose Castro, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
Added: branches/upstream/libtext-affixes-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/Changes?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/Changes (added)
+++ branches/upstream/libtext-affixes-perl/current/Changes Fri Jul 24 21:30:53 2009
@@ -1,0 +1,32 @@
+Revision history for Text-Affixes
+
+0.07 Sat Nov 19 19:22:00 2005
+ - increased test coverage
+
+0.06 Fri Nov 05 00:17:14 2004
+ - added parameter "exclude_numbers" to the configuration
+ - added parameter "lowercase" to the configuration
+ - code for extracting prefixes and suffixes now resides in the same
+ private function
+
+0.05 Mon Nov 01 15:18:00 2004
+ - Perl required version is now 5.006
+ - changes in the documentation
+ - added pod.t and pod-coverage.t to the tests directory
+ - when minimum size is bigger than maximum size, a reference to an
+ empty hash is automatically returned
+
+0.04 Tue Oct 26 19:20:00 2004
+ - corrected grammar (it's "suffix", not "sufix"; duh!)
+
+0.03 Tue Oct 26 00:00:10 2004
+ - minor changes in the documentation
+ - added tests
+
+0.02 Sat Oct 23 17:04:20 2004
+ - added README
+
+0.01 Sat Oct 23 17:04:20 2004
+ - original version; created by h2xs 1.22 with options
+ -Xan Text::Affixes
+
Added: branches/upstream/libtext-affixes-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/MANIFEST?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/MANIFEST (added)
+++ branches/upstream/libtext-affixes-perl/current/MANIFEST Fri Jul 24 21:30:53 2009
@@ -1,0 +1,11 @@
+Affixes.pm
+Changes
+Makefile.PL
+MANIFEST
+README
+t/01-prefixes.t
+t/02-suffixes.t
+t/03-extreme_cases.t
+t/pod.t
+t/pod-coverage.t
+META.yml Module meta-data (added by MakeMaker)
Added: branches/upstream/libtext-affixes-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/META.yml?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/META.yml (added)
+++ branches/upstream/libtext-affixes-perl/current/META.yml Fri Jul 24 21:30:53 2009
@@ -1,0 +1,10 @@
+# http://module-build.sourceforge.net/META-spec.html
+#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
+name: Text-Affixes
+version: 0.07
+version_from: Affixes.pm
+installdirs: site
+requires:
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.17
Added: branches/upstream/libtext-affixes-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/Makefile.PL?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/Makefile.PL (added)
+++ branches/upstream/libtext-affixes-perl/current/Makefile.PL Fri Jul 24 21:30:53 2009
@@ -1,0 +1,13 @@
+use 5.006;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ 'NAME' => 'Text::Affixes',
+ 'VERSION_FROM' => 'Affixes.pm', # finds $VERSION
+ 'PREREQ_PM' => {}, # e.g., Module::Name => 1.1
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (ABSTRACT_FROM => 'Affixes.pm', # retrieve abstract from module
+ AUTHOR => 'Jose Castro <cog at cpan.org>',
+ ) : ()),
+);
Added: branches/upstream/libtext-affixes-perl/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/README?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/README (added)
+++ branches/upstream/libtext-affixes-perl/current/README Fri Jul 24 21:30:53 2009
@@ -1,0 +1,156 @@
+Affixes.pm version 0.07
+=======================
+
+=head1 NAME
+
+Text::Affixes - Prefixes and suffixes analisys of text
+
+=head1 SYNOPSIS
+
+ use Text::Affixes;
+ my $text = "Hello, world. Hello, big world.";
+ my $prefixes = get_prefixes($text);
+
+ # $prefixes now holds
+ # {
+ # 3 => {
+ # 'Hel' => 2,
+ # 'wor' => 2,
+ # }
+ # }
+
+ # or
+
+ $prefixes = get_prefixes({min => 1, max => 2},$text);
+
+ # $prefixes now holds
+ # {
+ # 1 => {
+ # 'H' => 2,
+ # 'w' => 2,
+ # 'b' => 1,
+ # },
+ # 2 => {
+ # 'He' => 2,
+ # 'wo' => 2,
+ # 'bi' => 1,
+ # }
+ # }
+
+ # the use for get_suffixes is similar
+
+=head1 DESCRIPTION
+
+Provides methods for prefixe and suffix analisys of text.
+
+=head1 METHODS
+
+=head2 get_prefixes
+
+Extracts prefixes from text. You can specify the minimum and maximum
+number of characters of prefixes you want.
+
+Returns a reference to a hash, where the specified limits are mapped
+in hashes; each of those hashes maps every prefix in the text into the
+number of times it was found.
+
+By default, both minimum and maximum limits are 3. If the minimum
+limit is greater than the lower one, an empty hash is returned.
+
+A prefix is considered to be a sequence of word characters (\w) in
+the beginning of a word (that is, after a word boundary) that does not
+reach the end of the word ("regular expressionly", a prefix is the $1
+of /\b(\w+)\w/).
+
+ # extracting prefixes of size 3
+ $prefixes = get_prefixes( $text );
+
+ # extracting prefixes of sizes 2 and 3
+ $prefixes = get_prefixes( {min => 2}, $text );
+
+ # extracting prefixes of sizes 3 and 4
+ $prefixes = get_prefixes( {max => 4}, $text );
+
+ # extracting prefixes of sizes 2, 3 and 4
+ $prefixes = get_prefixes( {min => 2, max=> 4}, $text);
+
+=head2 get_suffixes
+
+The get_suffixes function is similar to the get_prefixes one. You
+should read the documentation for that one and than come back to this
+point.
+
+A suffix is considered to be a sequence of word characters (\w) in
+the end of a word (that is, before a word boundary) that does not start
+at the beginning of the word ("regular expressionly" speaking, a
+prefix is the $1 of /\w(\w+)\b/).
+
+ # extracting suffixes of size 3
+ $suffixes = get_suffixes( $text );
+
+ # extracting suffixes of sizes 2 and 3
+ $suffixes = get_suffixes( {min => 2}, $text );
+
+ # extracting suffixes of sizes 3 and 4
+ $suffixes = get_suffixes( {max => 4}, $text );
+
+ # extracting suffixes of sizes 2, 3 and 4
+ $suffixes = get_suffixes( {min => 2, max=> 4}, $text);
+
+=head1 OPTIONS
+
+Apart from deciding on a minimum and maximum size for prefixes or suffixes, you
+can also decide on some configuration options.
+
+=head2 exclude_numbers
+
+Set to 0 if you consider numbers as part of words. Default value is 1.
+
+ # this
+ get_suffixes( {min => 1, max => 1, exclude_numbers => 0}, "Hello, but w8" );
+
+ # returns this:
+ {
+ 1 => {
+ 'o' => 1,
+ 't' => 1,
+ '8' => 1
+ }
+ }
+
+=head2 lowercase
+
+Set to 1 to extract all prefixes in lowercase mode. Default value is 0.
+
+ATTENTION: This does not mean that prefixes with uppercased characters won't be
+extracted. It means they will be extracted after being lowercased.
+
+ # this...
+ get_prefixes( {min => 2, max => 2, lowercase => 1}, "Hello, hello");
+
+ # returns this:
+ {
+ 2 => {
+ 'he' => 2
+ }
+ }
+
+=head1 TO DO
+
+=over 6
+
+=item * Make it more efficient (use C for that)
+
+=back
+
+=head1 AUTHOR
+
+Jose Castro, C<< <cog at cpan.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2004 Jose Castro, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
Added: branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,76 @@
+use Test::More tests => 8;
+BEGIN { use_ok('Text::Affixes') };
+
+my $text = "Hello, world. Hello, big world.";
+is_deeply(
+ get_prefixes($text),
+ {
+ 3 => {
+ 'Hel' => 2,
+ 'wor' => 2,
+ }
+ }
+);
+
+is_deeply(
+ get_prefixes({min => 1, max => 2},$text),
+ {
+ 1 => {
+ 'H' => 2,
+ 'w' => 2,
+ 'b' => 1,
+ },
+ 2 => {
+ 'He' => 2,
+ 'wo' => 2,
+ 'bi' => 1,
+ }
+ }
+);
+
+$text = "Hello1, 2world";
+
+is_deeply( get_prefixes({min => 2, max => 2}, $text),
+ {
+ 2 => {
+ 'He' => 1,
+ }
+ }
+);
+
+is_deeply( get_prefixes({min => 2, max => 2, exclude_numbers => 0}, $text),
+ {
+ 2 => {
+ 'He' => 1,
+ '2w' => 1,
+ }
+ }
+);
+
+is_deeply( get_prefixes({min => 2, max => 2, lowercase => 1}, $text),
+ {
+ 2 => {
+ 'he' => 1,
+ }
+ }
+);
+
+$text = "Hello, hello";
+is_deeply( get_prefixes({min => 2, max => 2, lowercase => 1}, $text),
+ {
+ 2 => {
+ 'he' => 2,
+ }
+ }
+);
+
+is_deeply( get_prefixes({min => 2, max => 2, lowercase => 0}, $text),
+ {
+ 2 => {
+ 'He' => 1,
+ 'he' => 1,
+ }
+ }
+);
+
+
Added: branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,72 @@
+use Test::More tests => 8;
+BEGIN { use_ok('Text::Affixes') };
+
+my $text = "Hello, world. Hello, big world.";
+is_deeply(
+ get_suffixes($text),
+ {
+ 3 => {
+ 'llo' => 2,
+ 'rld' => 2,
+ }
+ });
+
+is_deeply(
+ get_suffixes({min => 1, max => 2},$text),
+ {
+ 1 => {
+ 'o' => 2,
+ 'd' => 2,
+ 'g' => 1,
+ },
+ 2 => {
+ 'lo' => 2,
+ 'ld' => 2,
+ 'ig' => 1,
+ }
+ });
+
+$text = "Hello1, 2worlD";
+
+is_deeply( get_suffixes({min => 2, max => 2}, $text),
+ {
+ 2 => {
+ 'lD' => 1,
+ }
+ }
+);
+
+is_deeply( get_suffixes({min => 2, max => 2, exclude_numbers => 0}, $text),
+ {
+ 2 => {
+ 'lD' => 1,
+ 'o1' => 1,
+ }
+ }
+);
+
+is_deeply( get_suffixes({min => 2, max => 2, lowercase => 1}, $text),
+ {
+ 2 => {
+ 'ld' => 1,
+ }
+ }
+);
+
+$text = "Hello, hellO";
+is_deeply( get_suffixes({min => 2, max => 2, lowercase => 1}, $text),
+ {
+ 2 => {
+ 'lo' => 2,
+ }
+ }
+);
+
+is_deeply( get_suffixes({min => 2, max => 2, lowercase => 0}, $text),
+ {
+ 2 => {
+ 'lo' => 1,
+ 'lO' => 1,
+ }
+ }
+);
Added: branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,28 @@
+use Test::More tests => 5;
+BEGIN { use_ok('Text::Affixes') };
+
+my $text = "Hello, world. Hello, big world.";
+my $prefixes = get_prefixes( {}, $text);
+
+is_deeply( $prefixes ,
+ # $prefixes now holds
+ {
+ 3 => {
+ 'Hel' => 2,
+ 'wor' => 2,
+ }
+ });
+
+$prefixes = get_prefixes({min => 0, max => 0},$text);
+
+is_deeply( $prefixes ,
+ # $prefixes now holds
+ { });
+
+$prefixes = get_prefixes({min => 4, max => 3},$text);
+
+is_deeply( $prefixes ,
+ # $prefixes now holds
+ { });
+
+is_deeply( get_prefixes(), undef );
Added: branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,6 @@
+#!perl -T
+
+use Test::More;
+eval "use Test::Pod::Coverage 1.04";
+plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@;
+all_pod_coverage_ok();
Added: branches/upstream/libtext-affixes-perl/current/t/pod.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/pod.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/pod.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/pod.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,6 @@
+#!perl -T
+
+use Test::More;
+eval "use Test::Pod 1.14";
+plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
+all_pod_files_ok();
More information about the Pkg-perl-cvs-commits
mailing list