r77048 - in /branches/upstream/libmarc-charset-perl/current: Changes MANIFEST META.yml lib/MARC/Charset.pm t/iii.t t/marc8_to_utf8.t t/null.t t/o-stroke.t
ansgar at users.alioth.debian.org
ansgar at users.alioth.debian.org
Mon Jul 4 08:43:20 UTC 2011
Author: ansgar
Date: Mon Jul 4 08:43:11 2011
New Revision: 77048
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=77048
Log:
[svn-upgrade] new version libmarc-charset-perl (1.32)
Added:
branches/upstream/libmarc-charset-perl/current/t/iii.t
branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t
branches/upstream/libmarc-charset-perl/current/t/null.t
branches/upstream/libmarc-charset-perl/current/t/o-stroke.t
Modified:
branches/upstream/libmarc-charset-perl/current/Changes
branches/upstream/libmarc-charset-perl/current/MANIFEST
branches/upstream/libmarc-charset-perl/current/META.yml
branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm
Modified: branches/upstream/libmarc-charset-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/Changes?rev=77048&op=diff
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/Changes (original)
+++ branches/upstream/libmarc-charset-perl/current/Changes Mon Jul 4 08:43:11 2011
@@ -1,4 +1,7 @@
Revision history for MARC::Charset
+
+1.32 Thu Jun 30 16:38:32 EDT 2011
+ - make sure utf8 flag set in output of marc8_to_utf8
1.31 Thu Sep 30 10:53:00 EDT 2010
- minor revision to get v1.3 Changes into the CPAN distro :-)
Modified: branches/upstream/libmarc-charset-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/MANIFEST?rev=77048&op=diff
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/MANIFEST (original)
+++ branches/upstream/libmarc-charset-perl/current/MANIFEST Mon Jul 4 08:43:11 2011
@@ -1,8 +1,8 @@
bin/compile_table
bin/print_table
Changes
+etc/additional-iii-characters.xml
etc/codetables.xml
-etc/additional-iii-characters.xml
lib/MARC/Charset.pm
lib/MARC/Charset/Code.pm
lib/MARC/Charset/Compiler.pm
@@ -28,8 +28,12 @@
t/hebrew2.marc
t/hebrew3.marc
t/hebrew4.marc
+t/iii.t
t/load.t
+t/marc8_to_utf8.t
t/no_escape.t
+t/null.t
+t/o-stroke.t
t/pod.t
t/space.t
t/table.t
Modified: branches/upstream/libmarc-charset-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/META.yml?rev=77048&op=diff
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/META.yml (original)
+++ branches/upstream/libmarc-charset-perl/current/META.yml Mon Jul 4 08:43:11 2011
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: MARC-Charset
-version: 1.31
+version: 1.32
abstract: convert MARC-8 encoded strings to UTF-8
author:
- Ed Summers <ehs at pobox.com>
Modified: branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm?rev=77048&op=diff
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm (original)
+++ branches/upstream/libmarc-charset-perl/current/lib/MARC/Charset.pm Mon Jul 4 08:43:11 2011
@@ -1,6 +1,6 @@
package MARC::Charset;
-our $VERSION = '1.31';
+our $VERSION = '1.32';
use strict;
use warnings;
@@ -52,7 +52,7 @@
=head2 ignore_errors()
Tells MARC::Charset whether or not to ignore all encoding errors, and
-returns the current setting. This is helepfuli if you have records that
+returns the current setting. This is helpful if you have records that
contain both MARC8 and UNICODE characters.
my $ignore = MARC::Charset->ignore_errors();
@@ -227,6 +227,7 @@
# return the utf8
reset_charsets();
+ utf8::upgrade($utf8);
return $utf8;
}
Added: branches/upstream/libmarc-charset-perl/current/t/iii.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/t/iii.t?rev=77048&op=file
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/t/iii.t (added)
+++ branches/upstream/libmarc-charset-perl/current/t/iii.t Mon Jul 4 08:43:11 2011
@@ -1,0 +1,30 @@
+use strict;
+use warnings;
+
+use Test::More tests => 1;
+use MARC::Charset qw(marc8_to_utf8);
+use MARC::Charset::Constants qw(:all);
+
+my $marc8 =
+ 'a ' .
+ ESCAPE . MULTI_G0_A . CJK . # escape to CJK for G0
+ chr(0x21) . chr(0x20) . chr(0x3d) . # horizontal ellipsis
+ chr(0x21) . chr(0x20) . chr(0x40) . # left double quotation mark
+ chr(0x7f) . chr(0x20) . chr(0x14) . # em dash
+ chr(0x7f) . chr(0x20) . chr(0x19) . # right single quotation mark
+ chr(0x7f) . chr(0x20) . chr(0x20) . # right double quotation mark
+ chr(0x7f) . chr(0x21) . chr(0x22) . # trade mark sign
+ ESCAPE . SINGLE_G0_A . BASIC_LATIN . # back to latin
+ ' z';
+
+my $expected = 'a '.
+ chr(0x2026) .
+ chr(0x201c) .
+ chr(0x2014) .
+ chr(0x2019) .
+ chr(0x201d) .
+ chr(0x2122) .
+ ' z';
+is($expected, marc8_to_utf8($marc8), 'III non-standard');
+
+
Added: branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t?rev=77048&op=file
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t (added)
+++ branches/upstream/libmarc-charset-perl/current/t/marc8_to_utf8.t Mon Jul 4 08:43:11 2011
@@ -1,0 +1,14 @@
+use Test::More qw(no_plan);
+use Unicode::Normalize;
+use strict;
+use warnings;
+
+sub entityize {
+ my $stuff = NFC(shift());
+ $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+ return $stuff;
+}
+
+use MARC::Charset qw(marc8_to_utf8 utf8_to_marc8);
+is( entityize(marc8_to_utf8('fotografâias')), 'fotografías' , 'marc8_to_utf8');
+
Added: branches/upstream/libmarc-charset-perl/current/t/null.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/t/null.t?rev=77048&op=file
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/t/null.t (added)
+++ branches/upstream/libmarc-charset-perl/current/t/null.t Mon Jul 4 08:43:11 2011
@@ -1,0 +1,21 @@
+use strict;
+use warnings;
+use Test::More tests => 2;
+
+use MARC::Charset qw(marc8_to_utf8);
+
+# once upon a time MARC::Charset::Compiler did not know that there were
+# code points in the lc mapping table that lacked ucs values and used
+# alt instead...these caused nulls to get sprinkled in MARC::Charset output
+# now MARC::Charset::Compiler should use the alt value when available
+
+unlike
+ marc8_to_utf8("\xEB\x70\xEC\x75"),
+ qr/\x00/,
+ 'no nulls';
+unlike
+ marc8_to_utf8("\x31\x20\x1f\x61\x44\x6f\x6e\xeb\x74\xec\x73\x6f\x76\x61\x2c\x20\x44\x61\x72\xa7\xeb\x69\xec\x61\x2e\x1e"),
+ qr/\x00/,
+ 'no nulls';
+
+
Added: branches/upstream/libmarc-charset-perl/current/t/o-stroke.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libmarc-charset-perl/current/t/o-stroke.t?rev=77048&op=file
==============================================================================
--- branches/upstream/libmarc-charset-perl/current/t/o-stroke.t (added)
+++ branches/upstream/libmarc-charset-perl/current/t/o-stroke.t Mon Jul 4 08:43:11 2011
@@ -1,0 +1,13 @@
+use strict;
+use warnings;
+
+use MARC::Charset qw/marc8_to_utf8/;
+use Test::More tests => 2;
+
+use utf8;
+
+my $marc8_ostroke = "\xB2";
+my $utf8_ostroke = marc8_to_utf8($marc8_ostroke);
+
+ok(utf8::is_utf8($utf8_ostroke), 'UTF8 flag set after converting LATIN SMALL LETTER O WITH STROKE to UTF8');
+is($utf8_ostroke, 'ø', 'successful conversion of LATIN SMALL LETTER O WITH STROKE');
More information about the Pkg-perl-cvs-commits
mailing list