r5581 - in /packages/libmarc-charset-perl/branches/upstream/current: Changes META.yml lib/MARC/Charset.pm lib/MARC/Charset/Code.pm lib/MARC/Charset/Compiler.pm t/marc8_to_utf8.t t/space.t

dmn at users.alioth.debian.org dmn at users.alioth.debian.org
Thu Jun 7 22:30:51 UTC 2007


Author: dmn
Date: Thu Jun  7 22:30:51 2007
New Revision: 5581

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=5581
Log:
[svn-upgrade] Integrating new upstream version, libmarc-charset-perl (0.97)

Removed:
    packages/libmarc-charset-perl/branches/upstream/current/t/marc8_to_utf8.t
Modified:
    packages/libmarc-charset-perl/branches/upstream/current/Changes
    packages/libmarc-charset-perl/branches/upstream/current/META.yml
    packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset.pm
    packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Code.pm
    packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Compiler.pm
    packages/libmarc-charset-perl/branches/upstream/current/t/space.t

Modified: packages/libmarc-charset-perl/branches/upstream/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/packages/libmarc-charset-perl/branches/upstream/current/Changes?rev=5581&op=diff
==============================================================================
--- packages/libmarc-charset-perl/branches/upstream/current/Changes (original)
+++ packages/libmarc-charset-perl/branches/upstream/current/Changes Thu Jun  7 22:30:51 2007
@@ -1,4 +1,14 @@
 Revision history for MARC::Charset
+
+0.97 Sun May 20 13:48:31 EDT 2007
+     - added t/null.t
+     - fixed Charset::Compiler to use the <alt> element when <ucs> is not 
+       defined. Previous versions of MARC::Charset would convert valid MARC8
+       to null when it encountered a mapping that lacked a UCS value 
+       many thanks to Michael O'Connor.
+     - allow carriage return and line feeds to pass unmolesteed, much the
+       same as spaces today.  Apparently, UNIMARC records embed these
+       formatting characters on a regular basis.
 
 0.96 Wed Mar 14 01:24:48 EDT 2007
      - added ignore_errors() to skip MARC8 -> UTF8 snafus

Modified: packages/libmarc-charset-perl/branches/upstream/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/packages/libmarc-charset-perl/branches/upstream/current/META.yml?rev=5581&op=diff
==============================================================================
--- packages/libmarc-charset-perl/branches/upstream/current/META.yml (original)
+++ packages/libmarc-charset-perl/branches/upstream/current/META.yml Thu Jun  7 22:30:51 2007
@@ -1,7 +1,7 @@
 # http://module-build.sourceforge.net/META-spec.html
 #XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
 name:         MARC-Charset
-version:      0.96
+version:      0.97
 version_from: lib/MARC/Charset.pm
 installdirs:  site
 requires:
@@ -11,4 +11,4 @@
     XML::SAX:                      0
 
 distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.17
+generated_by: ExtUtils::MakeMaker version 6.30

Modified: packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset.pm
URL: http://svn.debian.org/wsvn/pkg-perl/packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset.pm?rev=5581&op=diff
==============================================================================
--- packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset.pm (original)
+++ packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset.pm Thu Jun  7 22:30:51 2007
@@ -1,6 +1,6 @@
 package MARC::Charset;
 
-our $VERSION = '0.96';
+our $VERSION = '0.97';
 use strict;
 use warnings;
 
@@ -9,6 +9,7 @@
 
 use Unicode::Normalize;
 use Encode 'decode';
+use charnames ':full';
 use MARC::Charset::Table;
 use MARC::Charset::Constants qw(:all);
 
@@ -153,10 +154,10 @@
     my $combining = '';
     CHAR_LOOP: while ($index < $length) 
     {
-        # spaces just get added on unmolested
-        if (substr($marc8, $index, 1) eq ' ')
-        {
-            $utf8 .= ' ';
+        # whitespace, line feeds and carriage returns just get added on unmolested
+        if (substr($marc8, $index, 1) =~ m/(\s+|\x0A+|\x0D+)/so)
+        {
+            $utf8 .= $1;
             $index += 1;
             next CHAR_LOOP;
         }

Modified: packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Code.pm
URL: http://svn.debian.org/wsvn/pkg-perl/packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Code.pm?rev=5581&op=diff
==============================================================================
--- packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Code.pm (original)
+++ packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Code.pm Thu Jun  7 22:30:51 2007
@@ -8,7 +8,7 @@
 use MARC::Charset::Constants qw(:all);
 
 MARC::Charset::Code
-    ->mk_accessors(qw(marc ucs name charset is_combining));
+    ->mk_accessors(qw(marc ucs name charset is_combining alt));
 
 =head1 NAME
 

Modified: packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Compiler.pm
URL: http://svn.debian.org/wsvn/pkg-perl/packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Compiler.pm?rev=5581&op=diff
==============================================================================
--- packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Compiler.pm (original)
+++ packages/libmarc-charset-perl/branches/upstream/current/lib/MARC/Charset/Compiler.pm Thu Jun  7 22:30:51 2007
@@ -94,6 +94,12 @@
     # if we're ending a code element
     if ($code and $name eq 'code')
     {
+        # if there is no ucs code, use what's in alt
+        $code->ucs($code->alt()) unless $code->ucs;
+
+        # can't process a code point that lacks a unicode representation
+        die("invalid code: " . $code->to_string()) unless $code->ucs;
+        
         # set the charset code
         $code->charset($self->{current_charset});
 
@@ -109,7 +115,7 @@
     }
    
     # add these elements
-    elsif ($code and $name =~ /marc|ucs|is_combining/)
+    elsif ($code and $name =~ /^(marc|ucs|is_combining|alt)$/)
     {
         $code->$name($self->text());
     }

Modified: packages/libmarc-charset-perl/branches/upstream/current/t/space.t
URL: http://svn.debian.org/wsvn/pkg-perl/packages/libmarc-charset-perl/branches/upstream/current/t/space.t?rev=5581&op=diff
==============================================================================
--- packages/libmarc-charset-perl/branches/upstream/current/t/space.t (original)
+++ packages/libmarc-charset-perl/branches/upstream/current/t/space.t Thu Jun  7 22:30:51 2007
@@ -1,4 +1,4 @@
-use Test::More tests => 3;
+use Test::More tests => 4;
 use strict;
 use warnings;
 
@@ -7,6 +7,7 @@
 
 is('foo bar', marc8_to_utf8('foo bar'), 'one space');
 is('foo  bar', marc8_to_utf8('foo  bar'), 'two spaces');
+is("a\r \x{0A}b \x{0D}c\n", marc8_to_utf8("a\r \x{0A}b \x{0D}c\n"), 'spaces with newlines and carriage returns');
 
 my $test = 
     'a   ' . 
@@ -18,4 +19,3 @@
 my $expected = 'a   ' . chr(0x0396) . '   b';
 is(marc8_to_utf8($test), $expected, 'spacing with escape');
 
-




More information about the Pkg-perl-cvs-commits mailing list