r77048 - in /branches/upstream/libmarc-charset-perl/current: Changes MANIFEST META.yml lib/MARC/Charset.pm t/iii.t t/marc8_to_utf8.t t/null.t t/o-stroke.t

ansgar at users.alioth.debian.org ansgar at users.alioth.debian.org
Mon Jul 4 08:43:20 UTC 2011


Author: ansgar
Date: Mon Jul  4 08:43:11 2011
New Revision: 77048

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=77048
Log:
[svn-upgrade] new version libmarc-charset-perl (1.32)

Added:
    branches/upstream/libmarc-charset-perl/current/t/iii.t
    branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t
    branches/upstream/libmarc-charset-perl/current/t/null.t
    branches/upstream/libmarc-charset-perl/current/t/o-stroke.t
Modified:
    branches/upstream/libmarc-charset-perl/current/Changes
    branches/upstream/libmarc-charset-perl/current/MANIFEST
    branches/upstream/libmarc-charset-perl/current/META.yml
    branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm

Modified: branches/upstream/libmarc-charset-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/Changes?rev=77048&op=diff
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/Changes (original)
+++ branches/upstream/libmarc-charset-perl/current/Changes Mon Jul  4 08:43:11 2011
@@ -1,4 +1,7 @@
 Revision history for MARC::Charset
+
+1.32 Thu Jun 30 16:38:32 EDT 2011
+    - make sure utf8 flag set in output of marc8_to_utf8
 
 1.31 Thu Sep 30 10:53:00 EDT 2010
     - minor revision to get v1.3 Changes into the CPAN distro :-)

Modified: branches/upstream/libmarc-charset-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/MANIFEST?rev=77048&op=diff
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/MANIFEST (original)
+++ branches/upstream/libmarc-charset-perl/current/MANIFEST Mon Jul  4 08:43:11 2011
@@ -1,8 +1,8 @@
 bin/compile_table
 bin/print_table
 Changes
+etc/additional-iii-characters.xml
 etc/codetables.xml
-etc/additional-iii-characters.xml
 lib/MARC/Charset.pm
 lib/MARC/Charset/Code.pm
 lib/MARC/Charset/Compiler.pm
@@ -28,8 +28,12 @@
 t/hebrew2.marc
 t/hebrew3.marc
 t/hebrew4.marc
+t/iii.t
 t/load.t
+t/marc8_to_utf8.t
 t/no_escape.t
+t/null.t
+t/o-stroke.t
 t/pod.t
 t/space.t
 t/table.t

Modified: branches/upstream/libmarc-charset-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/META.yml?rev=77048&op=diff
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/META.yml (original)
+++ branches/upstream/libmarc-charset-perl/current/META.yml Mon Jul  4 08:43:11 2011
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:               MARC-Charset
-version:            1.31
+version:            1.32
 abstract:           convert MARC-8 encoded strings to UTF-8
 author:
     - Ed Summers <ehs at pobox.com>

Modified: branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm?rev=77048&op=diff
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm (original)
+++ branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm Mon Jul  4 08:43:11 2011
@@ -1,6 +1,6 @@
 package MARC::Charset;
 
-our $VERSION = '1.31';
+our $VERSION = '1.32';
 use strict;
 use warnings;
 
@@ -52,7 +52,7 @@
 =head2 ignore_errors()
 
 Tells MARC::Charset whether or not to ignore all encoding errors, and
-returns the current setting.  This is helepfuli if you have records that
+returns the current setting.  This is helpful if you have records that
 contain both MARC8 and UNICODE characters.
 
     my $ignore = MARC::Charset->ignore_errors();
@@ -227,6 +227,7 @@
 
     # return the utf8
     reset_charsets();
+    utf8::upgrade($utf8);
     return $utf8;
 }
 

Added: branches/upstream/libmarc-charset-perl/current/t/iii.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/t/iii.t?rev=77048&op=file
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/t/iii.t (added)
+++ branches/upstream/libmarc-charset-perl/current/t/iii.t Mon Jul  4 08:43:11 2011
@@ -1,0 +1,30 @@
+use strict;
+use warnings;
+
+use Test::More tests => 1;
+use MARC::Charset qw(marc8_to_utf8);
+use MARC::Charset::Constants qw(:all);
+
+my $marc8 = 
+    'a ' . 
+    ESCAPE . MULTI_G0_A . CJK .          # escape to CJK for G0
+    chr(0x21) . chr(0x20) . chr(0x3d) .  # horizontal ellipsis
+    chr(0x21) . chr(0x20) . chr(0x40) .  # left double quotation mark
+    chr(0x7f) . chr(0x20) . chr(0x14) .  # em dash
+    chr(0x7f) . chr(0x20) . chr(0x19) .  # right single quotation mark
+    chr(0x7f) . chr(0x20) . chr(0x20) .  # right double quotation mark
+    chr(0x7f) . chr(0x21) . chr(0x22) .  # trade mark sign
+    ESCAPE . SINGLE_G0_A . BASIC_LATIN . # back to latin
+    ' z';
+   
+my $expected = 'a '. 
+               chr(0x2026) .
+               chr(0x201c) .
+               chr(0x2014) .
+               chr(0x2019) .
+               chr(0x201d) .
+               chr(0x2122) .
+               ' z';
+is($expected, marc8_to_utf8($marc8), 'III non-standard');
+
+

Added: branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t?rev=77048&op=file
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t (added)
+++ branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t Mon Jul  4 08:43:11 2011
@@ -1,0 +1,14 @@
+use Test::More qw(no_plan);
+use Unicode::Normalize;
+use strict;
+use warnings;
+
+sub entityize {
+	my $stuff = NFC(shift());
+	$stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+	return $stuff;
+}
+
+use MARC::Charset qw(marc8_to_utf8 utf8_to_marc8);
+is( entityize(marc8_to_utf8('fotografâias')), 'fotograf&#xED;as' , 'marc8_to_utf8');
+

Added: branches/upstream/libmarc-charset-perl/current/t/null.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/t/null.t?rev=77048&op=file
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/t/null.t (added)
+++ branches/upstream/libmarc-charset-perl/current/t/null.t Mon Jul  4 08:43:11 2011
@@ -1,0 +1,21 @@
+use strict;
+use warnings;
+use Test::More tests => 2;
+
+use MARC::Charset qw(marc8_to_utf8);
+
+# once upon a time MARC::Charset::Compiler did not know that there were
+# code points in the lc mapping table that lacked ucs values and used 
+# alt instead...these caused nulls to get sprinkled in MARC::Charset output
+# now MARC::Charset::Compiler should use the alt value when available
+
+unlike 
+  marc8_to_utf8("\xEB\x70\xEC\x75"), 
+  qr/\x00/, 
+  'no nulls';
+unlike 
+  marc8_to_utf8("\x31\x20\x1f\x61\x44\x6f\x6e\xeb\x74\xec\x73\x6f\x76\x61\x2c\x20\x44\x61\x72\xa7\xeb\x69\xec\x61\x2e\x1e"),
+  qr/\x00/, 
+  'no nulls';
+
+

Added: branches/upstream/libmarc-charset-perl/current/t/o-stroke.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/t/o-stroke.t?rev=77048&op=file
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/t/o-stroke.t (added)
+++ branches/upstream/libmarc-charset-perl/current/t/o-stroke.t Mon Jul  4 08:43:11 2011
@@ -1,0 +1,13 @@
+use strict;
+use warnings;
+
+use MARC::Charset qw/marc8_to_utf8/;
+use Test::More tests => 2;
+
+use utf8;
+
+my $marc8_ostroke = "\xB2";
+my $utf8_ostroke  = marc8_to_utf8($marc8_ostroke);
+
+ok(utf8::is_utf8($utf8_ostroke), 'UTF8 flag set after converting LATIN SMALL LETTER O WITH STROKE to UTF8');
+is($utf8_ostroke, 'ø', 'successful conversion of LATIN SMALL LETTER O WITH STROKE');




More information about the Pkg-perl-cvs-commits mailing list