r67639 - in /branches/upstream/liblingua-en-nameparse-perl/current: Changes MANIFEST META.yml lib/Lingua/EN/NameGrammar.pm lib/Lingua/EN/NameParse.pm lib/Lingua/EN/NameParse/ lib/Lingua/EN/NameParse/Grammar.pm t/rules.t

periapt-guest at users.alioth.debian.org periapt-guest at users.alioth.debian.org
Fri Jan 21 22:16:49 UTC 2011


Author: periapt-guest
Date: Fri Jan 21 22:16:37 2011
New Revision: 67639

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=67639
Log:
[svn-upgrade] new version liblingua-en-nameparse-perl (1.29)

Added:
    branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse/
    branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse/Grammar.pm
Removed:
    branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameGrammar.pm
Modified:
    branches/upstream/liblingua-en-nameparse-perl/current/Changes
    branches/upstream/liblingua-en-nameparse-perl/current/MANIFEST
    branches/upstream/liblingua-en-nameparse-perl/current/META.yml
    branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse.pm
    branches/upstream/liblingua-en-nameparse-perl/current/t/rules.t

Modified: branches/upstream/liblingua-en-nameparse-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/liblingua-en-nameparse-perl/current/Changes?rev=67639&op=diff
==============================================================================
--- branches/upstream/liblingua-en-nameparse-perl/current/Changes (original)
+++ branches/upstream/liblingua-en-nameparse-perl/current/Changes Fri Jan 21 22:16:37 2011
@@ -1,4 +1,11 @@
 Revision history for Perl CPAN module Lingua::En::NameParse
+
+1.29 23 Jan 2011
+   Corrected documentation of case_components module, thanks to John Hansen
+   Removed invalid space after /Pilot Officer/ in extended titles grammar, thanks to John Hansen
+   Added the 'Mr_J_Adam_Smith' name type, thanks to John Hansen
+   Added the 'John' name type, thanks to Graham Seamen
+   Moved NameGrammar.pm to Lingua::EN::NameParse::Grammar name space
 
 1.28 3 Jan 2011
    Added more extended titles including Pilot Officer, Count, Duke, Dutchess, Marquess (thanks to Hugh Myers)

Modified: branches/upstream/liblingua-en-nameparse-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/liblingua-en-nameparse-perl/current/MANIFEST?rev=67639&op=diff
==============================================================================
--- branches/upstream/liblingua-en-nameparse-perl/current/MANIFEST (original)
+++ branches/upstream/liblingua-en-nameparse-perl/current/MANIFEST Fri Jan 21 22:16:37 2011
@@ -4,7 +4,7 @@
 Makefile.PL
 examples/demo.pl
 lib/Lingua/EN/NameParse.pm
-lib/Lingua/EN/NameGrammar.pm
+lib/Lingua/EN/NameParse/Grammar.pm
 surname_prefs.txt
 t/main.t
 t/rules.t

Modified: branches/upstream/liblingua-en-nameparse-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/liblingua-en-nameparse-perl/current/META.yml?rev=67639&op=diff
==============================================================================
--- branches/upstream/liblingua-en-nameparse-perl/current/META.yml (original)
+++ branches/upstream/liblingua-en-nameparse-perl/current/META.yml Fri Jan 21 22:16:37 2011
@@ -1,14 +1,22 @@
 --- #YAML:1.0
-name:                Lingua-EN-NameParse
-version:             1.28
-abstract:            Manipulate peoples names, titles and initials
-license:             perl
-author:              
+name:               Lingua-EN-NameParse
+version:            1.29
+abstract:           Manipulate peoples names, titles and initials
+author:
     - Kim Ryan
-generated_by:        ExtUtils::MakeMaker version 6.42_01
-distribution_type:   module
-requires:     
-    Parse::RecDescent:             0
+license:            perl
+distribution_type:  module
+configure_requires:
+    ExtUtils::MakeMaker:  0
+build_requires:
+    ExtUtils::MakeMaker:  0
+requires:
+    Parse::RecDescent:  0
+no_index:
+    directory:
+        - t
+        - inc
+generated_by:       ExtUtils::MakeMaker version 6.56
 meta-spec:
-    url:     http://module-build.sourceforge.net/META-spec-v1.3.html
-    version: 1.3
+    url:      http://module-build.sourceforge.net/META-spec-v1.4.html
+    version:  1.4

Modified: branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse.pm?rev=67639&op=diff
==============================================================================
--- branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse.pm (original)
+++ branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse.pm Fri Jan 21 22:16:37 2011
@@ -107,12 +107,13 @@
     J_Adam_Smith
     John_Smith
     A_Smith
-
+    John
 
 Precursors and suffixes are only applied to the following formats:
 
     Mr_John_Adam_Smith
     Mr_John_A_Smith
+    Mr_J_Adam_Smith    
     Mr_John_Smith
     Mr_John_Smith
     Mr_A_Smith
@@ -274,7 +275,7 @@
 
 =head2 case_components
 
-   %my_name = $name->components;
+   %my_name = $name->case_components;
    $cased_surname = $my_name{surname_1};
 
 
@@ -391,22 +392,24 @@
 =item type
 
 The type of format a name is in, as one of the following strings:
-
-   Mr_A_Smith_&_Ms_B_Jones
-   Mr_&_Ms_A_&_B_Smith
-   Mr_A_&_Ms_B_Smith
-   Mr_&_Ms_A_Smith
-   Mr_A_&_B_Smith
-   Mr_John_Adam_Smith
-   Mr_John_A_Smith
-   Mr_John_Smith
-   Mr_A_Smith
-   John_Adam_Smith
-   John_A_Smith
-   J_Adam_Smith
-   John_Smith
-   A_Smith
-   unknown
+   
+    Mr_A_Smith_&_Ms_B_Jones
+    Mr_&_Ms_A_&_B_Smith
+    Mr_A_&_Ms_B_Smith
+    Mr_&_Ms_A_Smith
+    Mr_A_&_B_Smith
+    Mr_John_Adam_Smith
+    Mr_John_A_Smith
+    Mr_J_Adam_Smith
+    Mr_John_Smith
+    Mr_A_Smith
+    John_Adam_Smith
+    John_A_Smith
+    J_Adam_Smith
+    John_Smith
+    A_Smith
+    John
+    unknown
 
 
 =item non_matching
@@ -523,7 +526,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (c) 2008 Kim Ryan. All rights reserved.
+Copyright (c) 2011 Kim Ryan. All rights reserved.
 
 This library is free software; you can redistribute it and/or modify
 it under the same terms as Perl itself, either Perl version 5.8.4 or,
@@ -539,13 +542,13 @@
 use strict;
 use warnings;
 
-use Lingua::EN::NameGrammar;
+use Lingua::EN::NameParse::Grammar;
 use Parse::RecDescent;
 
 use Exporter;
 use vars qw (@ISA @EXPORT_OK);
 
-our $VERSION   = '1.28';
+our $VERSION = '1.29';
 @ISA       = qw(Exporter);
 @EXPORT_OK = qw(&clean &case_surname);
 
@@ -568,17 +571,10 @@
     my $current_key;
     foreach my $current_key (keys %args)
     {
-        if ( $current_key eq 'salutation' or $current_key eq 'sal_default' )
-        {
-            $name->{$current_key} = &_case_word($args{$current_key});
-        }
-        else
-        {
-            $name->{$current_key} = $args{$current_key};
-        }
-    }
-    
-    my $grammar = &Lingua::EN::NameGrammar::_create($name);
+        $name->{$current_key} = $args{$current_key};
+    }
+    
+    my $grammar = &Lingua::EN::NameParse::Grammar::_create($name);
     $name->{parse} = new Parse::RecDescent($grammar);
     
     return ($name);
@@ -728,7 +724,8 @@
     'John_A_Smith'            => ['precursor','given_name_1','initials_1','surname_1','suffix'],
     'J_Adam_Smith'            => ['precursor','initials_1','middle_name','surname_1','suffix'],
     'John_Smith'              => ['precursor','given_name_1','surname_1','suffix'],
-    'A_Smith'                 => ['precursor','initials_1','surname_1','suffix']
+    'A_Smith'                 => ['precursor','initials_1','surname_1','suffix'],
+    'John'                    => ['given_name_1']
 );
 
 # only include names with a single surname
@@ -1065,7 +1062,12 @@
                     push(@greeting,$sal_default);
                 }
             }            
-        }                
+        }
+        else
+        {
+            warn "Invalid sal_type : ", $sal_type;
+            push(@greeting,$sal_default);
+        }
     }
     return(join(' ', at greeting));
 }
@@ -1119,16 +1121,24 @@
 # PRIVATE METHODS
 
 #-------------------------------------------------------------------------------
-# Check that common reserved word (as found in company names) do not appear
+
 sub _pre_parse
 {
     my $name = shift;
-    
+    # Check that common reserved word (as found in company names) do not appear
     if ( $name->{input_string} =~ 
          /\bPty\.? Ltd\.?$|\bLtd\.?$|\bPLC$|Association|Department|National|Society/i )
     {
         $name->{error} = 1;
         $name->{properties}{non_matching} = $name->{input_string};
+    }
+    
+    # For the case of a single name such as 'Voltaire' we need to add a trailing space
+    # to the input string. This is because the grammar tree expects a terminator (the space)
+    # optionally followed by other productions or non matching text
+   if ( $name->{input_string} =~ /^[A-Z]{2,}(\-)?[A-Z]{0,}$/i )
+    {
+        $name->{input_string} .= ' ';
     }
     return($name);
 
@@ -1141,6 +1151,9 @@
 {
     my $name = shift;
     
+    # $::RD_TRACE  = 1;  # for debugging RecDescent output
+    # Use Parse::RecDescent to do the parsing. 'full_name' is a label for the complete grammar tree
+    # defined in Lingua::EN::NameParse::Grammar
     my $parsed_name = $name->{parse}->full_name($name->{input_string});
     
     # Place components into a separate hash, so they can be easily returned

Added: branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse/Grammar.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse/Grammar.pm?rev=67639&op=file
==============================================================================
--- branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse/Grammar.pm (added)
+++ branches/upstream/liblingua-en-nameparse-perl/current/lib/Lingua/EN/NameParse/Grammar.pm Fri Jan 21 22:16:37 2011
@@ -1,0 +1,763 @@
+=head1 NAME
+
+Lingua::EN::NameGrammar - grammar tree for Lingua::EN::NameParse
+
+=head1 SYNOPSIS
+
+Internal functions called from NameParse.pm module
+
+=head1 DESCRIPTION
+
+Grammar tree of personal name syntax for Lingua::EN::NameParse module.
+
+The grammar defined here is for use with the Parse::RecDescent module.
+Note that parsing is done depth first, meaning match the shortest string first.
+To avoid premature matches, when one rule is a sub set of another longer rule,
+it must appear after the longer rule. See the Parse::RecDescent documentation
+for more details.
+
+
+=head1 AUTHOR
+
+NameParse::Grammar was written by Kim Ryan <kimryan at cpan dot org>.
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2011 Kim Ryan. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.8.4 or,
+at your option, any later version of Perl 5 you may have available.
+
+
+
+
+=cut
+#------------------------------------------------------------------------------
+
+package Lingua::EN::NameParse::Grammar;
+use strict;
+use warnings;
+
+our $VERSION = '1.29';
+
+
+# Rules that define valid orderings of a names components
+
+my $rules_start = q{ full_name : };
+
+my $rules_joint_names =
+q{
+
+   # A (?) refers to an optional component, occurring 0 or more times.
+   # Optional items are returned as an array, which for our case will
+   # always consist of one element, when they exist.
+
+   title given_name surname conjunction title given_name surname non_matching(?)
+   {
+      # block of code to define actions upon successful completion of a
+      # 'production' or rule
+
+      # Two separate people
+      $return =
+      {
+         # Parse::RecDescent lets you return a single scalar, which we use as
+         # an anonymous hash reference
+         title_1       => $item[1],
+         given_name_1  => $item[2],
+         surname_1     => $item[3],
+         conjunction_1 => $item[4],
+         title_2       => $item[5],
+         given_name_2  => $item[6],
+         surname_2     => $item[7],
+         non_matching  => $item[8][0],
+         number        => 2,
+         type          => 'Mr_John_Smith_&_Ms_Mary_Jones'
+      }
+   }
+   |
+
+
+   title initials surname conjunction title initials surname non_matching(?)
+   {
+      $return =
+      {
+         title_1       => $item[1],
+         initials_1    => $item[2],
+         surname_1     => $item[3],
+         conjunction_1 => $item[4],
+         title_2       => $item[5],
+         initials_2    => $item[6],
+         surname_2     => $item[7],
+         non_matching  => $item[8][0],
+         number        => 2,
+         type          => 'Mr_A_Smith_&_Ms_B_Jones'
+      }
+   }
+   |
+
+   title initials conjunction initials surname non_matching(?)
+   {
+      # Two related people, shared title, separate initials,
+      # shared surname. Example, father and son, sisters
+      $return =
+      {
+         title_1       => $item[1],
+         initials_1    => $item[2],
+         conjunction_1 => $item[3],
+         initials_2    => $item[4],
+         surname_1     => $item[5],
+         non_matching  => $item[6][0],
+         number        => 2,
+         type          => 'Mr_A_&_B_Smith'
+      }
+   }
+   |
+
+   title conjunction title initials conjunction initials surname non_matching(?)
+   {
+      # Two related people, own initials, shared surname
+
+      $return =
+      {
+         title_1       => $item[1],
+         conjunction_1 => $item[2],
+         title_2       => $item[3],
+         initials_1    => $item[4],
+         conjunction_2 => $item[5],
+         initials_2    => $item[6],
+         surname_1     => $item[7],
+         non_matching  => $item[8][0],
+         number        => 2,
+         type          => 'Mr_&_Ms_A_&_B_Smith'
+      }
+   }
+   |
+
+   title initials conjunction title initials surname non_matching(?)
+   {
+      # Two related people, own initials, shared surname
+      $return =
+      {
+         title_1       => $item[1],
+         initials_1    => $item[2],
+         conjunction_1 => $item[3],
+         title_2       => $item[4],
+         initials_2    => $item[5],
+         surname_1     => $item[6],
+         non_matching  => $item[7][0],
+         number        => 2,
+         type          => 'Mr_A_&_Ms_B_Smith'
+      }
+   }
+   |
+
+   title conjunction title initials surname non_matching(?)
+   {
+      # Two related people, shared initials, shared surname
+      $return =
+      {
+         title_1       => $item[1],
+         conjunction_1 => $item[2],
+         title_2       => $item[3],
+         initials_1    => $item[4],
+         surname_1     => $item[5],
+         non_matching  => $item[6][0],
+         number        => 2,
+         type          => 'Mr_&_Ms_A_Smith'
+      }
+   }
+   |
+
+   given_name surname conjunction  given_name surname non_matching(?)
+   {
+      $return =
+      {
+         given_name_1  => $item[1],
+         surname_1     => $item[2],
+         conjunction_1 => $item[3],
+         given_name_2  => $item[4],
+         surname_2     => $item[5],
+         non_matching  => $item[6][0],
+         number        => 2,
+         type          => 'John_Smith_&_Mary_Jones'
+      }
+   }
+   |
+
+   initials surname conjunction  initials surname non_matching(?)
+   {
+      $return =
+      {
+         initials_1    => $item[1],
+         surname_1     => $item[2],
+         conjunction_1 => $item[3],
+         initials_2    => $item[4],
+         surname_2     => $item[5],
+         non_matching  => $item[6][0],
+         number        => 2,
+         type          => 'A_Smith_&_B_Jones'
+      }
+   }
+   |
+
+   given_name conjunction given_name surname non_matching(?)
+   {
+      $return =
+      {
+         given_name_1  => $item[1],
+         conjunction_1 => $item[2],
+         given_name_2  => $item[3],
+         surname_2     => $item[4],
+         non_matching  => $item[5][0],
+         number        => 2,
+         type          => 'John_&_Mary_Smith'
+      }
+   }
+   |
+
+};
+
+my $rules_single_names =
+q{
+
+    precursor(?) title given_name middle_name surname suffix(?) non_matching(?)
+    {
+       $return =
+       {
+          precursor     => $item[1][0],
+          title_1       => $item[2],
+          given_name_1  => $item[3],
+          middle_name   => $item[4],
+          surname_1     => $item[5],
+          suffix        => $item[6][0],
+          non_matching  => $item[7][0],
+          number        => 1,
+          type          => 'Mr_John_Adam_Smith'
+       }
+    }
+    |
+
+   precursor(?) title given_name single_initial surname suffix(?) non_matching(?)
+   {
+      $return =
+      {
+         precursor     => $item[1][0],
+         title_1       => $item[2],
+         given_name_1  => $item[3],
+         initials_1    => $item[4],
+         surname_1     => $item[5],
+         suffix        => $item[6][0],
+         non_matching  => $item[7][0],
+         number        => 1,
+         type          => 'Mr_John_A_Smith'
+      }
+   }
+   |
+   
+    precursor(?) title single_initial middle_name surname suffix(?) non_matching(?)
+    {
+       $return =
+       {
+          precursor     => $item[1][0],
+          title_1       => $item[2],
+          initials_1    => $item[3],
+          middle_name   => $item[4],
+          surname_1     => $item[5],
+          suffix        => $item[6][0],
+          non_matching  => $item[7][0],
+          number        => 1,
+          type          => 'Mr_J_Adam_Smith'
+       }
+    }
+    |   
+   
+
+
+   precursor(?) title given_name surname suffix(?) non_matching(?)
+   {
+      $return =
+      {
+         precursor     => $item[1][0],
+         title_1       => $item[2],
+         given_name_1  => $item[3],
+         surname_1     => $item[4],
+         suffix        => $item[5][0],
+         non_matching  => $item[6][0],
+         number        => 1,
+         type          => 'Mr_John_Smith'
+      }
+   }
+   |
+
+   precursor(?) title initials surname suffix(?) non_matching(?)
+   {
+      $return =
+      {
+         precursor     => $item[1][0],
+         title_1       => $item[2],
+         initials_1    => $item[3],
+         surname_1     => $item[4],
+         suffix        => $item[5][0],
+         non_matching  => $item[6][0],
+         number        => 1,
+         type          => 'Mr_A_Smith'
+      }
+   }
+   |
+
+   precursor(?)  given_name_min_2 middle_name surname suffix(?) non_matching(?)
+   {
+      $return =
+      {
+         precursor     => $item[1][0],
+         given_name_1  => $item[2],
+         middle_name   => $item[3],
+         surname_1     => $item[4],
+         suffix        => $item[5][0],
+         non_matching  => $item[6][0],
+         number        => 1,
+         type          => 'John_Adam_Smith'
+      }
+   }
+   |
+
+   precursor(?) given_name_min_2 single_initial surname suffix(?) non_matching(?)
+   {
+      $return =
+      {
+         precursor     => $item[1][0],
+         given_name_1  => $item[2],
+         initials_1    => $item[3],
+         surname_1     => $item[4],
+         suffix        => $item[5][0],
+         non_matching  => $item[6][0],
+         number        => 1,
+         type          => 'John_A_Smith'
+      }
+   }
+   |
+   
+   precursor(?) single_initial middle_name surname suffix(?) non_matching(?)
+   {
+      $return =
+      {
+         precursor     => $item[1][0],
+         initials_1    => $item[2],
+         middle_name   => $item[3],
+         surname_1     => $item[4],
+         suffix        => $item[5][0],
+         non_matching  => $item[6][0],
+         number        => 1,
+         type          => 'J_Adam_Smith'
+      }
+   }
+   |   
+
+   precursor(?) given_name surname suffix(?) non_matching(?)
+   {
+      $return =
+      {
+         precursor     => $item[1][0],
+         given_name_1  => $item[2],
+         surname_1     => $item[3],
+         suffix        => $item[4][0],
+         non_matching  => $item[5][0],
+         number        => 1,
+         type          => 'John_Smith'
+      }
+   }
+   |
+
+   precursor(?) initials surname suffix(?) non_matching(?)
+   {
+      $return =
+      {
+         precursor     => $item[1][0],
+         initials_1    => $item[2],
+         surname_1     => $item[3],
+         suffix        => $item[4][0],
+         non_matching  => $item[5][0],
+         number        => 1,
+         type          => 'A_Smith'
+      }
+   }
+   |
+   
+   given_name non_matching(?)
+   {
+      $return =
+      {
+         given_name_1  => $item[1],
+         non_matching  => $item[2][0],
+         number        => 1,
+         type          => 'John'
+      }
+   }
+   |
+
+   non_matching(?)
+   {
+      $return =
+      {
+         non_matching  => $item[1][0],
+         number        => 0,
+         type          => 'unknown'
+      }
+   }
+};
+
+#------------------------------------------------------------------------------
+# Individual components that a name can be composed from. Components are
+# expressed as literals or Perl regular expressions.
+
+my $precursors =
+q
+{
+    precursor : 
+
+    /Estate Of (The Late )?/i |
+    /His (Excellency|Honou?r) /i |
+    /Her (Excellency|Honou?r) /i |
+    /The Right Honou?rable /i |
+    /The Honou?rable /i |
+    /Right Honou?rable /i |
+    /The Rt\.? Hon\.? /i |
+    /The Hon\.? /i |
+    /Rt\.? Hon\.? /i
+
+};
+
+my $titles =
+q{
+
+   title :
+
+   /Mr\.? /i           |
+   /Ms\.? /i           |
+   /M\/s\.? /i         |
+   /Mrs\.? /i          |
+   /Miss\.? /i         |
+
+   /Dr\.? /i           |
+   /Sir /i             |
+   /Dame /i            
+
+};
+   
+my $extended_titles =
+q{
+                       |
+   /Messrs /i          |   # plural or Mr
+   /Mme\.? /i          |   # Madame
+   /Mister /i          |
+   /Mast(\.|er)? /i    |
+   /Ms?gr\.? /i        |   # Monsignor
+   /Count /i           |
+   /Countess /i        |   
+   /Duke /i            |
+   /Duchess /i         |
+   /Lord /i            |
+   /Lady /i            |
+   /Marquess i/        |
+
+   /Madam(e)? /i       |
+
+   # Medical
+   /Doctor /i          |
+   /Sister /i          |
+   /Matron /i          |
+
+   # Legal
+   /Judge /i           |
+   /Justice /i         |
+
+   # Police
+   /Det\.? /i          |
+   /Insp\.? /i         |
+
+   # Military
+   /Brig(adier)? /i       |
+   /Captain /i            |
+   /Capt\.? /i            |
+   /Colonel /i            |
+   /Col\.? /i             |
+   /Commander /i          |
+   /Commodore /i          |
+   /Cdr\.? /i             |   # Commander, Commodore
+   /Field Marshall /i     |
+   /Fl\.? Off\.? /i       |
+   /Flight Officer /i     |
+   /Flt Lt /i             |
+   /Flight Lieutenant /i  |
+   /Gen(\.|eral)? /i      |
+   /Gen\. /i              |
+   /Pte\. /i              |
+   /Private /i            |
+   /Sgt\.? /i             |
+   /Sargent /i            |
+   /Air Commander /i      |
+   /Air Commodore /i      |
+   /Air Marshall /i       |
+   /Lieutenant Colonel /i |
+   /Lt\.? Col\.? /i       |
+   /Lt\.? Gen\.? /i       |
+   /Lt\.? Cdr\.? /i       |
+   /Lieutenant /i         |
+   /(Lt|Leut|Lieut)\.? /i |
+   /Major General /i      |
+   /Maj\.? Gen\.?/i       |
+   /Major /i              |
+   /Maj\.? /i             |
+   /Pilot Officer /i      |
+
+
+   # Religious
+   /Rabbi /i              |
+   /Bishop /i             |
+   /Brother /i            |
+   /Chaplain /i           |
+   /Father /i             |
+   /Pastor /i             |
+   /Mother Superior /i    |
+   /Mother /i             |
+   /Most Rever[e|a]nd /i  |
+   /Very Rever[e|a]nd /i  |
+   /Rever[e|a]nd /i       |
+   /Mt\.? Revd\.? /i      |
+   /V\.? Revd?\.? /i      |
+   /Revd?\.? /i           |
+
+
+   # Other
+   /Prof(\.|essor)? /i    |
+   /Ald(\.|erman)? /i
+};
+
+my $conjunction = q{ conjunction : /And |& /i };
+
+# Used in the John_A_Smith and J_Adam_Smith name types. Although this 
+# duplicates $initials_1, it is needed because this type of initial must 
+# always be one character long, regardless of the length of initials set 
+# by the user in the 'new' method.
+my $single_initial = q{ single_initial: /[A-Z]\.? /i };
+
+# Define given name combinations, specifying the minimum number of letters.
+# The correct pair of rules is determined by the 'initials' key in the hash
+# passed to the 'new' method.
+
+# Examples are Jo, Jo-Anne, D'Artagnan, O'Shaugnessy La'Keishia, T-Bone
+my $given_name_min_2 =
+q{
+    given_name: /[A-Z]{2,} /i | /[A-Z]{2,}\-[A-Z]{2,} /i | /[A-Z]{1,}\'[A-Z]{2,} /i | /T\-Bone /i
+};
+
+# Joe, Jo-Anne ...
+my $given_name_min_3 =
+q{
+    given_name: /[A-Z]{3,} /i | /[A-Z]{2,}\-[A-Z]{2,} /i | /[A-Z]{1,}\'[A-Z]{2,} /i | /T\-Bone /i
+};
+
+
+# John ...
+my $given_name_min_4 =
+q{
+    given_name: /[A-Z]{4,} /i | /[A-Z]{2,}\-[A-Z]{2,} /i | /[A-Z]{1,}\'[A-Z]{3,} /i | /T\-Bone /i
+};
+
+# For use with John_Adam_Smith and John_A_Smith name types
+my $fixed_length_given_name =
+q{
+    given_name_min_2 : /[A-Z]{2,} /i | /[A-Z]{2,}\-[A-Z]{2,} /i | /[A-Z]{1,}\'[A-Z]{2,} /i
+};
+
+
+# Define initials combinations specifying the minimum and maximum letters.
+# Order from most complex to simplest,  to avoid premature matching.
+
+# 'A' 'A.'
+my $initials_1 = q{ initials: /[A-Z]\.? /i };
+
+# 'A. B.' 'A.B.' 'AB' 'A B'
+
+my $initials_2 =
+q{
+   initials:  /([A-Z]\. ){1,2}/i | /([A-Z]\.){1,2} /i | /([A-Z] ){1,2}/i | /([A-Z]){1,2} /i
+};
+
+# 'A. B. C. '  'A.B.C' 'ABC' 'A B C'
+my $initials_3 =
+q{
+   initials: /([A-Z]\. ){1,3}/i |  /([A-Z]\.){1,3} /i | /([A-Z] ){1,3}/i | /([A-Z]){1,3} /i
+};
+
+
+# Jo, Jo-Anne, La'Keishia, D'Artagnan, O'Shaugnessy 
+my $middle_name =
+q{
+   middle_name: 
+   
+   # Dont grab surname prefix too early. For example, John Van Dam could be
+   # interpreted as middle name of Van and Surname of Dam. So exclude prefixs
+   # from middle names
+   ...!prefix /[A-Z]{2,} /i | /[A-Z]{2,}\-[A-Z]{2,} /i | /[A-Z]{1,}\'[A-Z]{2,} /i
+   {
+      $return = $item[2];
+   }
+};
+
+
+my $full_surname =
+q{
+   # Use look-ahead to avoid ambiguity between surname and suffix. For example,
+   # John Smith Snr, would detect Snr as the surname and Smith as the middle name
+   surname : ...!suffix sub_surname second_name(?)
+   {
+      if ( $item[2] and $item[3][0] )
+      {
+         $return = "$item[2]$item[3][0]";
+      }
+      else
+      {
+         $return = $item[2];
+      }
+   }
+
+   sub_surname : prefix(?) name
+   {
+      # To prevent warnings when compiling with the -w switch,
+      # do not return uninitialized variables.
+      if ( $item[1][0] )
+      {
+         $return = "$item[1][0]$item[2]";
+      }
+      else
+      {
+         $return = $item[2];
+      }
+   }
+
+   second_name : '-' sub_surname
+   {
+      if ( $item[1] and $item[2] )
+      {
+         $return = "$item[1]$item[2]";
+      }
+   }
+
+   # Patronymic, place name and other surname prefixes
+   prefix:
+
+      /[A|E]l /i         |   # Arabic, Greek,
+      /Ap /i             |   # Welsh
+      /Ben /i            |   # Hebrew
+
+      /Dell([a|e])? /i   |   # ITALIAN
+      /Dalle /i          |
+      /D[a|e]ll'/i       |
+      /Dela /i           |
+      /Del /i            |
+      /De (La |Los )?/i  |
+      /D[a|i|u] /i       |
+      /L[a|e|o] /i       |
+
+      /[D|L|O]'/i        |   # Italian, Irish or French
+      /St\.? /i          |   # abbreviation for Saint
+      /San /i            |   # Spanish
+
+      /Den /i            |   # DUTCH
+      /Von (Der )?/i     |
+      /Van (De(n|r)? )?/i
+
+   # space needed for any following text
+   name: /[A-Z]{2,} ?/i
+
+};
+
+my $suffix =
+q{
+   suffix:
+
+      # word boundaries are used to stop partial matches from surnames such as 
+      # the "VI" in "VINCE"
+
+      /Esq(\.|uire)?\b ?/i |
+      /Sn?r\.?\b ?/i | # Senior
+      /Jn?r\.?\b ?/i | # Junior
+      /PhD\.?\b ?/i  | 
+      /MD\.?\b ?/i   | 
+      /LLB\.?\b ?/i  | 
+
+
+      /XI{1,3}\b ?/i | # 11th, 12th, 13th
+      /X\b ?/i       | # 10th
+      /IV\b ?/i      | # 4th
+      /VI{1,3}\b ?/i | # 6th, 7th, 8th
+      /V\b ?/i       | # 5th
+      /IX\b ?/i      | # 9th
+      /I{1,3}\b ?/i    # 1st, 2nd, 3rd
+};
+
+# Two or more characters. This is set to 2 as a work around for the problem
+# with detecting suffixes like Snr. and Jnr. The dot here gets picked up
+# as non matching.
+
+my $non_matching = q{ non_matching: /.{2,}/ };
+
+
+#-------------------------------------------------------------------------------
+# Assemble correct combination for grammar tree.
+
+sub _create
+{
+   my $name = shift;
+
+   my $grammar = $rules_start;
+
+   if ( $name->{joint_names} )
+   {
+       $grammar .= $rules_joint_names;
+   }    
+   $grammar .= $rules_single_names . $precursors . $titles;
+
+    if ( $name->{extended_titles} )
+    {
+        $grammar .= $extended_titles;
+    }
+
+   $grammar .= $conjunction;
+
+   $grammar .= $single_initial;
+
+   $name->{initials} > 3 and $name->{initials} = 3;
+   $name->{initials} < 1 and $name->{initials} = 1;
+
+   # Define limit of when a string is treated as an initial, or
+   # a given name. For example, if initials are set to 2, MR TO SMITH
+   # will have initials of T & O and no given name, but MR TOM SMITH will
+   # have no initials, and a given name of Tom.
+
+   if ( $name->{initials} == 1 )
+   {
+      $grammar .= $given_name_min_2 . $initials_1;
+   }
+   elsif ( $name->{initials} == 2 )
+   {
+      $grammar .= $given_name_min_3 . $initials_2;
+   }
+   elsif ( $name->{initials} == 3 )
+   {
+      $grammar .= $given_name_min_4 . $initials_3;
+   }
+   
+   $grammar .= $fixed_length_given_name
+             . $middle_name
+             . $full_surname
+             . $suffix
+             . $non_matching
+             ;
+
+   return($grammar);
+}
+#-------------------------------------------------------------------------------
+1;

Modified: branches/upstream/liblingua-en-nameparse-perl/current/t/rules.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/liblingua-en-nameparse-perl/current/t/rules.t?rev=67639&op=diff
==============================================================================
--- branches/upstream/liblingua-en-nameparse-perl/current/t/rules.t (original)
+++ branches/upstream/liblingua-en-nameparse-perl/current/t/rules.t Fri Jan 21 22:16:37 2011
@@ -4,7 +4,7 @@
 #------------------------------------------------------------------------------
 
 use strict;
-use Test::Simple tests => 18;
+use Test::Simple tests => 20;
 use Lingua::EN::NameParse;
 
 my %args =
@@ -67,6 +67,11 @@
 %props = $name->properties;
 ok( $props{type} eq 'Mr_John_Adam_Smith', 'Mr_John_Adam_Smith format');
 
+$input = "MR J FITZGERALD KENNEDY";
+$name->parse($input);
+%props = $name->properties;
+ok( $props{type} eq 'Mr_J_Adam_Smith', 'Mr_J_Adam_Smith format');
+
 $input = "MR JOHN F KENNEDY";
 $name->parse($input);
 %props = $name->properties;
@@ -107,4 +112,9 @@
 %props = $name->properties;
 ok( $props{type} eq 'A_Smith', 'A_Smith format');
 
+$input = "Voltaire";
+$name->parse($input);
+%props = $name->properties;
+ok( $props{type} eq 'John', 'John format');
 
+




More information about the Pkg-perl-cvs-commits mailing list