r40686 - in /branches/upstream/libtext-affixes-perl: ./ current/ current/t/

mogaal-guest at users.alioth.debian.org mogaal-guest at users.alioth.debian.org
Fri Jul 24 21:31:00 UTC 2009


Author: mogaal-guest
Date: Fri Jul 24 21:30:53 2009
New Revision: 40686

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=40686
Log:
[svn-inject] Installing original source of libtext-affixes-perl

Added:
    branches/upstream/libtext-affixes-perl/
    branches/upstream/libtext-affixes-perl/current/
    branches/upstream/libtext-affixes-perl/current/Affixes.pm
    branches/upstream/libtext-affixes-perl/current/Changes
    branches/upstream/libtext-affixes-perl/current/MANIFEST
    branches/upstream/libtext-affixes-perl/current/META.yml
    branches/upstream/libtext-affixes-perl/current/Makefile.PL
    branches/upstream/libtext-affixes-perl/current/README
    branches/upstream/libtext-affixes-perl/current/t/
    branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t
    branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t
    branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t
    branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t
    branches/upstream/libtext-affixes-perl/current/t/pod.t

Added: branches/upstream/libtext-affixes-perl/current/Affixes.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/Affixes.pm?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/Affixes.pm (added)
+++ branches/upstream/libtext-affixes-perl/current/Affixes.pm Fri Jul 24 21:30:53 2009
@@ -1,0 +1,253 @@
+package Text::Affixes;
+
+use 5.006;
+use strict;
+use warnings;
+
+require Exporter;
+use AutoLoader qw(AUTOLOAD);
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+	
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+	get_prefixes
+	get_suffixes
+);
+
+our $VERSION = '0.07';
+
+=head1 NAME
+
+Text::Affixes - Prefixes and suffixes analisys of text
+
+=head1 SYNOPSIS
+
+  use Text::Affixes;
+  my $text = "Hello, world. Hello, big world.";
+  my $prefixes = get_prefixes($text);
+
+  # $prefixes now holds
+  # {
+  # 	3 => {
+  # 		'Hel' => 2,
+  # 		'wor' => 2,
+  # 	}
+  # }
+
+  # or
+
+  $prefixes = get_prefixes({min => 1, max => 2},$text);
+
+  # $prefixes now holds
+  # {
+  # 	1 => {
+  # 		'H' => 2,
+  # 		'w' => 2,
+  # 		'b' => 1,
+  # 	},
+  # 	2 => {
+  # 		'He' => 2,
+  # 		'wo' => 2,
+  # 		'bi' => 1,
+  # 	}
+  # }
+
+  # the use for get_suffixes is similar
+
+=head1 DESCRIPTION
+
+Provides methods for prefixe and suffix analisys of text.
+
+=head1 METHODS
+
+=head2 get_prefixes
+
+Extracts prefixes from text. You can specify the minimum and maximum
+number of characters of prefixes you want.
+
+Returns a reference to a hash, where the specified limits are mapped
+in hashes; each of those hashes maps every prefix in the text into the
+number of times it was found.
+
+By default, both minimum and maximum limits are 3. If the minimum
+limit is greater than the lower one, an empty hash is returned.
+
+A prefix is considered to be a sequence of word characters (\w) in
+the beginning of a word (that is, after a word boundary) that does not
+reach the end of the word ("regular expressionly", a prefix is the $1
+of /\b(\w+)\w/).
+
+  # extracting prefixes of size 3
+  $prefixes = get_prefixes( $text );
+
+  # extracting prefixes of sizes 2 and 3
+  $prefixes = get_prefixes( {min => 2}, $text );
+
+  # extracting prefixes of sizes 3 and 4
+  $prefixes = get_prefixes( {max => 4}, $text );
+
+  # extracting prefixes of sizes 2, 3 and 4
+  $prefixes = get_prefixes( {min => 2, max=> 4}, $text);
+
+=cut
+
+sub get_prefixes {
+	return _get_elements(1, at _);
+}
+
+=head2 get_suffixes
+
+The get_suffixes function is similar to the get_prefixes one. You
+should read the documentation for that one and than come back to this
+point.
+
+A suffix is considered to be a sequence of word characters (\w) in
+the end of a word (that is, before a word boundary) that does not start
+at the beginning of the word ("regular expressionly" speaking, a
+prefix is the $1 of /\w(\w+)\b/).
+
+  # extracting suffixes of size 3
+  $suffixes = get_suffixes( $text );
+
+  # extracting suffixes of sizes 2 and 3
+  $suffixes = get_suffixes( {min => 2}, $text );
+
+  # extracting suffixes of sizes 3 and 4
+  $suffixes = get_suffixes( {max => 4}, $text );
+
+  # extracting suffixes of sizes 2, 3 and 4
+  $suffixes = get_suffixes( {min => 2, max=> 4}, $text);
+
+=cut
+
+sub get_suffixes {
+	return _get_elements(0, at _);
+}
+
+sub _get_elements {
+	my $task = shift;
+
+=head1 OPTIONS
+
+Apart from deciding on a minimum and maximum size for prefixes or suffixes, you
+can also decide on some configuration options.
+
+=cut
+
+	# configuration
+	my %conf = (	min             => 3,
+			max             => 3,
+			exclude_numbers => 1,
+			lowercase       => 0,
+		);
+	if (ref $_[0] eq 'HASH') {
+		%conf = (%conf, %{+shift});
+	}
+	return {} if $conf{max} < $conf{min};
+
+	# get the elements
+	my %elements;
+	my $text = shift || return undef;
+	$conf{min} = 1 if $conf{min} < 1;
+	for ($conf{min} .. $conf{max}) {
+
+		my $regex = $task ? qr/\b(\w{$_})\w/ :	# prefixes
+                                    qr/\w(\w{$_})\b/ ;	# suffixes
+
+		while ($text =~ /$regex/g) {
+			$elements{$_}{$1}++;
+		}
+
+	}
+
+=head2 exclude_numbers
+
+Set to 0 if you consider numbers as part of words. Default value is 1.
+
+  # this
+  get_suffixes( {min => 1, max => 1, exclude_numbers => 0}, "Hello, but w8" );
+
+  # returns this:
+    {
+      1 => {
+             'o' => 1,
+             't' => 1,
+             '8' => 1
+           }
+    }
+
+=cut
+
+	# exclude elements containing numbers
+	if ($conf{exclude_numbers}) {
+		for my $s (keys %elements) {
+			for (keys %{$elements{$s}}) {
+				delete ${$elements{$s}}{$_} if /\d/;
+			}
+		}
+	}
+
+=head2 lowercase
+
+Set to 1 to extract all prefixes in lowercase mode. Default value is 0.
+
+ATTENTION: This does not mean that prefixes with uppercased characters won't be
+extracted. It means they will be extracted after being lowercased.
+
+  # this...
+  get_prefixes( {min => 2, max => 2, lowercase => 1}, "Hello, hello");
+
+  # returns this:
+    {
+      2 => {
+             'he' => 2
+           }
+    }
+
+=cut
+
+	# elements containing uppercased characters become lowercased ones
+	if ($conf{lowercase}) {
+		for my $s (keys %elements) {
+			for (keys %{$elements{$s}}) {
+				if (/[A-Z]/) {
+					${$elements{$s}}{lc $_} +=
+						${$elements{$s}}{$_};
+					delete ${$elements{$s}}{$_};
+				}
+			}
+		}
+	}
+
+	return \%elements;
+}
+
+1;
+__END__
+
+=head1 TO DO
+
+=over 6
+
+=item * Make it more efficient (use C for that)
+
+=back
+
+=head1 AUTHOR
+
+Jose Castro, C<< <cog at cpan.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2004 Jose Castro, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut

Added: branches/upstream/libtext-affixes-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/Changes?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/Changes (added)
+++ branches/upstream/libtext-affixes-perl/current/Changes Fri Jul 24 21:30:53 2009
@@ -1,0 +1,32 @@
+Revision history for Text-Affixes
+
+0.07  Sat Nov 19 19:22:00 2005
+	- increased test coverage
+
+0.06  Fri Nov 05 00:17:14 2004
+	- added parameter "exclude_numbers" to the configuration
+	- added parameter "lowercase" to the configuration
+	- code for extracting prefixes and suffixes now resides in the same
+	  private function
+
+0.05  Mon Nov 01 15:18:00 2004
+	- Perl required version is now 5.006
+	- changes in the documentation
+	- added pod.t and pod-coverage.t to the tests directory
+	- when minimum size is bigger than maximum size, a reference to an
+	  empty hash is automatically returned
+
+0.04  Tue Oct 26 19:20:00 2004
+	- corrected grammar (it's "suffix", not "sufix"; duh!)
+
+0.03  Tue Oct 26 00:00:10 2004
+	- minor changes in the documentation
+	- added tests
+
+0.02  Sat Oct 23 17:04:20 2004
+	- added README
+
+0.01  Sat Oct 23 17:04:20 2004
+	- original version; created by h2xs 1.22 with options
+		-Xan Text::Affixes
+

Added: branches/upstream/libtext-affixes-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/MANIFEST?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/MANIFEST (added)
+++ branches/upstream/libtext-affixes-perl/current/MANIFEST Fri Jul 24 21:30:53 2009
@@ -1,0 +1,11 @@
+Affixes.pm
+Changes
+Makefile.PL
+MANIFEST
+README
+t/01-prefixes.t
+t/02-suffixes.t
+t/03-extreme_cases.t
+t/pod.t
+t/pod-coverage.t
+META.yml                                 Module meta-data (added by MakeMaker)

Added: branches/upstream/libtext-affixes-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/META.yml?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/META.yml (added)
+++ branches/upstream/libtext-affixes-perl/current/META.yml Fri Jul 24 21:30:53 2009
@@ -1,0 +1,10 @@
+# http://module-build.sourceforge.net/META-spec.html
+#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
+name:         Text-Affixes
+version:      0.07
+version_from: Affixes.pm
+installdirs:  site
+requires:
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.17

Added: branches/upstream/libtext-affixes-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/Makefile.PL?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/Makefile.PL (added)
+++ branches/upstream/libtext-affixes-perl/current/Makefile.PL Fri Jul 24 21:30:53 2009
@@ -1,0 +1,13 @@
+use 5.006;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+    'NAME'		=> 'Text::Affixes',
+    'VERSION_FROM'	=> 'Affixes.pm', # finds $VERSION
+    'PREREQ_PM'		=> {}, # e.g., Module::Name => 1.1
+    ($] >= 5.005 ?    ## Add these new keywords supported since 5.005
+      (ABSTRACT_FROM => 'Affixes.pm', # retrieve abstract from module
+       AUTHOR              => 'Jose Castro <cog at cpan.org>',
+       ) : ()),
+);

Added: branches/upstream/libtext-affixes-perl/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/README?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/README (added)
+++ branches/upstream/libtext-affixes-perl/current/README Fri Jul 24 21:30:53 2009
@@ -1,0 +1,156 @@
+Affixes.pm version 0.07
+=======================
+
+=head1 NAME
+
+Text::Affixes - Prefixes and suffixes analisys of text
+
+=head1 SYNOPSIS
+
+  use Text::Affixes;
+  my $text = "Hello, world. Hello, big world.";
+  my $prefixes = get_prefixes($text);
+
+  # $prefixes now holds
+  # {
+  # 	3 => {
+  # 		'Hel' => 2,
+  # 		'wor' => 2,
+  # 	}
+  # }
+
+  # or
+
+  $prefixes = get_prefixes({min => 1, max => 2},$text);
+
+  # $prefixes now holds
+  # {
+  # 	1 => {
+  # 		'H' => 2,
+  # 		'w' => 2,
+  # 		'b' => 1,
+  # 	},
+  # 	2 => {
+  # 		'He' => 2,
+  # 		'wo' => 2,
+  # 		'bi' => 1,
+  # 	}
+  # }
+
+  # the use for get_suffixes is similar
+
+=head1 DESCRIPTION
+
+Provides methods for prefixe and suffix analisys of text.
+
+=head1 METHODS
+
+=head2 get_prefixes
+
+Extracts prefixes from text. You can specify the minimum and maximum
+number of characters of prefixes you want.
+
+Returns a reference to a hash, where the specified limits are mapped
+in hashes; each of those hashes maps every prefix in the text into the
+number of times it was found.
+
+By default, both minimum and maximum limits are 3. If the minimum
+limit is greater than the lower one, an empty hash is returned.
+
+A prefix is considered to be a sequence of word characters (\w) in
+the beginning of a word (that is, after a word boundary) that does not
+reach the end of the word ("regular expressionly", a prefix is the $1
+of /\b(\w+)\w/).
+
+  # extracting prefixes of size 3
+  $prefixes = get_prefixes( $text );
+
+  # extracting prefixes of sizes 2 and 3
+  $prefixes = get_prefixes( {min => 2}, $text );
+
+  # extracting prefixes of sizes 3 and 4
+  $prefixes = get_prefixes( {max => 4}, $text );
+
+  # extracting prefixes of sizes 2, 3 and 4
+  $prefixes = get_prefixes( {min => 2, max=> 4}, $text);
+
+=head2 get_suffixes
+
+The get_suffixes function is similar to the get_prefixes one. You
+should read the documentation for that one and than come back to this
+point.
+
+A suffix is considered to be a sequence of word characters (\w) in
+the end of a word (that is, before a word boundary) that does not start
+at the beginning of the word ("regular expressionly" speaking, a
+prefix is the $1 of /\w(\w+)\b/).
+
+  # extracting suffixes of size 3
+  $suffixes = get_suffixes( $text );
+
+  # extracting suffixes of sizes 2 and 3
+  $suffixes = get_suffixes( {min => 2}, $text );
+
+  # extracting suffixes of sizes 3 and 4
+  $suffixes = get_suffixes( {max => 4}, $text );
+
+  # extracting suffixes of sizes 2, 3 and 4
+  $suffixes = get_suffixes( {min => 2, max=> 4}, $text);
+
+=head1 OPTIONS
+
+Apart from deciding on a minimum and maximum size for prefixes or suffixes, you
+can also decide on some configuration options.
+
+=head2 exclude_numbers
+
+Set to 0 if you consider numbers as part of words. Default value is 1.
+
+  # this
+  get_suffixes( {min => 1, max => 1, exclude_numbers => 0}, "Hello, but w8" );
+
+  # returns this:
+    {
+      1 => {
+             'o' => 1,
+             't' => 1,
+             '8' => 1
+           }
+    }
+
+=head2 lowercase
+
+Set to 1 to extract all prefixes in lowercase mode. Default value is 0.
+
+ATTENTION: This does not mean that prefixes with uppercased characters won't be
+extracted. It means they will be extracted after being lowercased.
+
+  # this...
+  get_prefixes( {min => 2, max => 2, lowercase => 1}, "Hello, hello");
+
+  # returns this:
+    {
+      2 => {
+             'he' => 2
+           }
+    }
+
+=head1 TO DO
+
+=over 6
+
+=item * Make it more efficient (use C for that)
+
+=back
+
+=head1 AUTHOR
+
+Jose Castro, C<< <cog at cpan.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2004 Jose Castro, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+

Added: branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/01-prefixes.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,76 @@
+use Test::More tests => 8;
+BEGIN { use_ok('Text::Affixes') };
+
+my $text = "Hello, world. Hello, big world.";
+is_deeply(
+  get_prefixes($text),
+  {
+      3 => {
+              'Hel' => 2,
+              'wor' => 2,
+      }
+  }
+);
+
+is_deeply(
+  get_prefixes({min => 1, max => 2},$text),
+  {
+      1 => {
+              'H' => 2,
+              'w' => 2,
+              'b' => 1,
+      },
+      2 => {
+              'He' => 2,
+              'wo' => 2,
+              'bi' => 1,
+      }
+  }
+);
+
+$text = "Hello1, 2world";
+
+is_deeply( get_prefixes({min => 2, max => 2}, $text),
+  {
+	2 => {
+		'He' => 1,
+	}
+  }
+);
+
+is_deeply( get_prefixes({min => 2, max => 2, exclude_numbers => 0}, $text),
+  {
+	2 => {
+		'He' => 1,
+		'2w' => 1,
+	}
+  }
+);
+
+is_deeply( get_prefixes({min => 2, max => 2, lowercase => 1}, $text),
+  {
+	2 => {
+		'he' => 1,
+	}
+  }
+);
+
+$text = "Hello, hello";
+is_deeply( get_prefixes({min => 2, max => 2, lowercase => 1}, $text),
+  {
+	2 => {
+		'he' => 2,
+	}
+  }
+);
+
+is_deeply( get_prefixes({min => 2, max => 2, lowercase => 0}, $text),
+  {
+	2 => {
+		'He' => 1,
+		'he' => 1,
+	}
+  }
+);
+
+

Added: branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/02-suffixes.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,72 @@
+use Test::More tests => 8;
+BEGIN { use_ok('Text::Affixes') };
+
+my $text = "Hello, world. Hello, big world.";
+is_deeply(
+  get_suffixes($text),
+  {
+      3 => {
+              'llo' => 2,
+              'rld' => 2,
+      }
+  });
+
+is_deeply(
+  get_suffixes({min => 1, max => 2},$text),
+  {
+      1 => {
+              'o' => 2,
+              'd' => 2,
+              'g' => 1,
+      },
+      2 => {
+              'lo' => 2,
+              'ld' => 2,
+              'ig' => 1,
+      }
+  });
+
+$text = "Hello1, 2worlD";
+
+is_deeply( get_suffixes({min => 2, max => 2}, $text),
+  {
+	2 => {
+		'lD' => 1,
+	}
+  }
+);
+
+is_deeply( get_suffixes({min => 2, max => 2, exclude_numbers => 0}, $text),
+  {
+	2 => {
+		'lD' => 1,
+		'o1' => 1,
+	}
+  }
+);
+
+is_deeply( get_suffixes({min => 2, max => 2, lowercase => 1}, $text),
+  {
+	2 => {
+		'ld' => 1,
+	}
+  }
+);
+
+$text = "Hello, hellO";
+is_deeply( get_suffixes({min => 2, max => 2, lowercase => 1}, $text),
+  {
+	2 => {
+		'lo' => 2,
+	}
+  }
+);
+
+is_deeply( get_suffixes({min => 2, max => 2, lowercase => 0}, $text),
+  {
+	2 => {
+		'lo' => 1,
+		'lO' => 1,
+	}
+  }
+);

Added: branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/03-extreme_cases.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,28 @@
+use Test::More tests => 5;
+BEGIN { use_ok('Text::Affixes') };
+
+my $text = "Hello, world. Hello, big world.";
+my $prefixes = get_prefixes( {}, $text);
+
+is_deeply( $prefixes ,
+  # $prefixes now holds
+  {
+      3 => {
+              'Hel' => 2,
+              'wor' => 2,
+      }
+  });
+
+$prefixes = get_prefixes({min => 0, max => 0},$text);
+
+is_deeply( $prefixes ,
+  # $prefixes now holds
+  { });
+
+$prefixes = get_prefixes({min => 4, max => 3},$text);
+
+is_deeply( $prefixes ,
+  # $prefixes now holds
+  { });
+
+is_deeply( get_prefixes(), undef );

Added: branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/pod-coverage.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,6 @@
+#!perl -T
+
+use Test::More;
+eval "use Test::Pod::Coverage 1.04";
+plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@;
+all_pod_coverage_ok();

Added: branches/upstream/libtext-affixes-perl/current/t/pod.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-affixes-perl/current/t/pod.t?rev=40686&op=file
==============================================================================
--- branches/upstream/libtext-affixes-perl/current/t/pod.t (added)
+++ branches/upstream/libtext-affixes-perl/current/t/pod.t Fri Jul 24 21:30:53 2009
@@ -1,0 +1,6 @@
+#!perl -T
+
+use Test::More;
+eval "use Test::Pod 1.14";
+plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
+all_pod_files_ok();




More information about the Pkg-perl-cvs-commits mailing list