r2077 - in packages/libunicode-maputf8-perl/trunk: . debian
lib/Unicode t
Niko Tyni
ntyni-guest at costa.debian.org
Thu Feb 2 19:37:20 UTC 2006
Author: ntyni-guest
Date: 2006-02-02 19:37:19 +0000 (Thu, 02 Feb 2006)
New Revision: 2077
Added:
packages/libunicode-maputf8-perl/trunk/Build.PL
packages/libunicode-maputf8-perl/trunk/Changes
packages/libunicode-maputf8-perl/trunk/META.yml
packages/libunicode-maputf8-perl/trunk/lib/Unicode/MapUTF8.pod
packages/libunicode-maputf8-perl/trunk/pod/
packages/libunicode-maputf8-perl/trunk/t/97_distribution.t
packages/libunicode-maputf8-perl/trunk/t/98_pod_coverage.t
packages/libunicode-maputf8-perl/trunk/t/99_pod.t
Modified:
packages/libunicode-maputf8-perl/trunk/MANIFEST
packages/libunicode-maputf8-perl/trunk/Makefile.PL
packages/libunicode-maputf8-perl/trunk/README
packages/libunicode-maputf8-perl/trunk/debian/changelog
packages/libunicode-maputf8-perl/trunk/lib/Unicode/MapUTF8.pm
packages/libunicode-maputf8-perl/trunk/t/01_unicode_maputf8.t
Log:
svn-upgrade to 1.11
Copied: packages/libunicode-maputf8-perl/trunk/Build.PL (from rev 2076, packages/libunicode-maputf8-perl/branches/upstream/current/Build.PL)
Copied: packages/libunicode-maputf8-perl/trunk/Changes (from rev 2076, packages/libunicode-maputf8-perl/branches/upstream/current/Changes)
Modified: packages/libunicode-maputf8-perl/trunk/MANIFEST
===================================================================
--- packages/libunicode-maputf8-perl/trunk/MANIFEST 2006-02-02 19:35:52 UTC (rev 2076)
+++ packages/libunicode-maputf8-perl/trunk/MANIFEST 2006-02-02 19:37:19 UTC (rev 2077)
@@ -1,3 +1,5 @@
+Build.PL
+Changes
MANIFEST
Makefile.PL
README
@@ -2,2 +4,10 @@
lib/Unicode/MapUTF8.pm
+lib/Unicode/MapUTF8.pod
+pod/MapUTF8.ja_JP.utf8.pod
+pod/MapUTF8.ja_JP.eucjp.pod
+pod/MapUTF8.en.pod
t/01_unicode_maputf8.t
+t/97_distribution.t
+t/98_pod_coverage.t
+t/99_pod.t
+META.yml
Copied: packages/libunicode-maputf8-perl/trunk/META.yml (from rev 2076, packages/libunicode-maputf8-perl/branches/upstream/current/META.yml)
Modified: packages/libunicode-maputf8-perl/trunk/Makefile.PL
===================================================================
--- packages/libunicode-maputf8-perl/trunk/Makefile.PL 2006-02-02 19:35:52 UTC (rev 2076)
+++ packages/libunicode-maputf8-perl/trunk/Makefile.PL 2006-02-02 19:37:19 UTC (rev 2077)
@@ -1,16 +1,45 @@
use ExtUtils::MakeMaker;
+use File::Spec;
+use File::Copy qw (copy);
+
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
# the contents of the Makefile that is written.
+
+my $lang = defined($ENV{'LANG'}) ? $ENV{'LANG'} : 'en';
+my $target_pod = File::Spec->catfile('lib','Unicode','MapUTF8.pod');
+if ($lang =~ m/^(ja|ja_JP|ja_JP.utf-8|ja_JP.utf8|ja.utf8|ja.utf-8)$/i) {
+ $source_pod = File::Spec->catfile('pod','MapUTF8.ja_JP.utf8.pod');
+ copy ($source_pod, $target_pod);
+
+} elsif ($lang =~ m/^(ja_JP.eucjp|ja_JP.euc|ja_euc|ja_eucjp)$/i) {
+ $source_pod = File::Spec->catfile('pod','MapUTF8.ja_JP.eucjp.pod');
+ copy ($source_pod, $target_pod);
+} else {
+ $source_pod = File::Spec->catfile('pod','MapUTF8.en.pod');
+ copy ($source_pod, $target_pod);
+}
+
WriteMakefile(
- 'NAME' => 'Unicode::MapUTF8',
- 'VERSION_FROM' => 'lib/Unicode/MapUTF8.pm',
- 'linkext' => { 'LINKTYPE' => '' }, # no link needed
- 'dist' => { 'COMPRESS' => 'gzip -9f', 'SUFFIX' => 'gz',
- 'ZIP' => '/usr/bin/zip','ZIPFLAGS'=>'-rl'},
-'PREREQ_PM' => {
- 'Unicode::Map' => 0,
- 'Unicode::String' => 0,
- 'Unicode::Map8' => 0,
- 'Jcode' => 0,
- },
+ 'NAME' => 'Unicode::MapUTF8',
+ 'VERSION_FROM' => 'lib/Unicode/MapUTF8.pm',
+ 'linkext' => { LINKTYPE=>'' }, # no link needed
+ 'dist' => {'COMPRESS'=>'gzip -9f', 'SUFFIX' => 'gz',
+ 'ZIP'=>'/usr/bin/zip','ZIPFLAGS'=>'-rl'},
+ 'PREREQ_PM' => {
+ 'Carp' => 0,
+ 'File::Copy' => 0,
+ 'Unicode::Map' => 0,
+ 'Unicode::String' => 0,
+ 'Unicode::Map8' => 0,
+ 'Jcode' => 0,
+ },
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (ABSTRACT_FROM => 'lib/Unicode/MapUTF8.pod', # retrieve abstract from module
+ AUTHOR => 'Benjamin Franz <snowhare at nihongo.org>') : ()),
+
+ ($] >= 5.800 ? ## Add these new keywords supported since 5.8
+ (NO_META => 1) : ()),
+
+ PL_FILES => {},
+
);
Modified: packages/libunicode-maputf8-perl/trunk/README
===================================================================
--- packages/libunicode-maputf8-perl/trunk/README 2006-02-02 19:35:52 UTC (rev 2076)
+++ packages/libunicode-maputf8-perl/trunk/README 2006-02-02 19:37:19 UTC (rev 2077)
@@ -1,37 +1,5 @@
Unicode::MapUTF8 - Conversions to and from arbitrary character sets and UTF8
-1.08 2000.11.06 Added 'utf8_charset_alias' function to allow for runtime
- setting of character set aliases. Added several alternate
- names for 'sjis' (shiftjis, shift-jis, shift_jis, s-jis,
- and s_jis).
-
- Corrected 'croak' messages for 'from_utf8' functions to
- appropriate function name.
-
- Corrected fatal problem in jcode-unicode internals. Problem
- and fix found by Brian Wisti <wbrian2 at uswest.net>.
-
-1.07 2000.11.01 Added 'croak' to use Carp declaration to fix error
- messages. Problem and fix found by <wbrian2 at uswest.net>.
-
-1.06 2000.10.30 Fix to handle change in stringification of overloaded
- objects between Perl 5.005 and 5.6.
- Problem noticed by Brian Wisti <wbrian2 at uswest.net>.
-
-1.05 2000.10.23 Error in conversions from UTF8 to multibyte encodings corrected
-
-1.04 2000.10.23 Additional diagnostic error messages added for
- internal errors
-
-1.03 2000.10.22 Bug fix for load time Unicode::Map encoding
- detection
-
-1.02 2000.10.22 Bug fix to 'from_utf8' method and load time
- detection of Unicode::Map8 supported character
- set encodings
-
-1.01 2000.10.02 Initial public release
-
Provides an adapter layer between core routines for converting
to and from UTF8 and other encodings. In essence, a way to give
multiple existing Unicode modules a single common interface so
@@ -41,6 +9,10 @@
Unicode::Map and Jcode modules in a standardized and simple
API.
+Mainly intended for use with Perl 5.6 and 5.0 since starting
+with Perl 5.8 the Encode modules are the preferred way of
+handling character set encodings.
+
To install:
perl Makefile.PL
@@ -48,3 +20,11 @@
make test
make install
+Alternatively, if you have Module::Build installed,
+
+perl Build.PL
+./Build
+./Build test
+./Build install
+
+See 'perldoc Unicode::MapUTF8' for the documentation.
Modified: packages/libunicode-maputf8-perl/trunk/debian/changelog
===================================================================
--- packages/libunicode-maputf8-perl/trunk/debian/changelog 2006-02-02 19:35:52 UTC (rev 2076)
+++ packages/libunicode-maputf8-perl/trunk/debian/changelog 2006-02-02 19:37:19 UTC (rev 2077)
@@ -1,3 +1,9 @@
+libunicode-maputf8-perl (1.11-1) unstable; urgency=low
+
+ * New upstream release
+
+ -- Niko Tyni <ntyni at iki.fi> Thu, 2 Feb 2006 21:36:01 +0200
+
libunicode-maputf8-perl (1.09-6) unstable; urgency=low
* Adopting package.
Modified: packages/libunicode-maputf8-perl/trunk/lib/Unicode/MapUTF8.pm
===================================================================
--- packages/libunicode-maputf8-perl/trunk/lib/Unicode/MapUTF8.pm 2006-02-02 19:35:52 UTC (rev 2076)
+++ packages/libunicode-maputf8-perl/trunk/lib/Unicode/MapUTF8.pm 2006-02-02 19:37:19 UTC (rev 2077)
@@ -2,7 +2,6 @@
use strict;
use Carp qw(confess croak carp);
-use Exporter;
use Unicode::String;
use Unicode::Map;
use Unicode::Map8;
@@ -11,12 +10,13 @@
use vars qw ($VERSION @EXPORT @EXPORT_OK @EXPORT_TAGS @ISA);
use subs qw (utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias _init_charsets);
+require Exporter;
BEGIN {
@ISA = qw(Exporter);
@EXPORT = qw ();
@EXPORT_OK = qw (utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias);
@EXPORT_TAGS = qw ();
- $VERSION = "1.09";
+ $VERSION = "1.11";
}
############################
@@ -26,170 +26,8 @@
my $_Charset_Aliases;
_init_charsets;
-=head1 NAME
+##############
-Unicode::MapUTF8 - Conversions to and from arbitrary character sets and UTF8
-
-=head1 SYNOPSIS
-
- use Unicode::MapUTF8 qw(to_utf8 from_utf8 utf8_supported_charset);
-
- # Convert a string in 'ISO-8859-1' to 'UTF8'
- my $output = to_utf8({ -string => 'An example', -charset => 'ISO-8859-1' });
-
- # Convert a string in 'UTF8' encoding to encoding 'ISO-8859-1'
- my $other = from_utf8({ -string => 'Other text', -charset => 'ISO-8859-1' });
-
- # List available character set encodings
- my @character_sets = utf8_supported_charset;
-
- # Add a character set alias
- utf8_charset_alias({ 'ms-japanese' => 'sjis' });
-
- # Convert between two arbitrary (but largely compatible) charset encodings
- # (SJIS to EUC-JP)
- my $utf8_string = to_utf8({ -string =>$sjis_string, -charset => 'sjis'});
- my $euc_jp_string = from_utf8({ -string => $utf8_string, -charset => 'euc-jp' })
-
- # Verify that a specific character set is supported
- if (utf8_supported_charset('ISO-8859-1') {
- # Yes
- }
-
-=head1 DESCRIPTION
-
-Provides an adapter layer between core routines for converting
-to and from UTF8 and other encodings. In essence, a way to give multiple
-existing Unicode modules a single common interface so you don't have to know
-the underlaying implementations to do simple UTF8 to-from other character set
-encoding conversions. As such, it wraps the Unicode::String, Unicode::Map8,
-Unicode::Map and Jcode modules in a standardized and simple API.
-
-This also provides general character set conversion operation based on UTF8 - it is
-possible to convert between any two compatible and supported character sets
-via a simple two step chaining of conversions.
-
-As with most things Perlish - if you give it a few big chunks of text to chew on
-instead of lots of small ones it will handle many more characters per second.
-
-By design, it can be easily extended to encompass any new charset encoding
-conversion modules that arrive on the scene.
-
-=head1 CHANGES
-
-1.09 2001.08.22 - Fixed multiple typo occurances of 'uft'
- where 'utf' was meant in code. Problem affected
- utf16 and utf7 encodings. Problem found
- by devon smith <devon at taller.PSCL.cwru.edu>
-
-1.08 2000.11.06 - Added 'utf8_charset_alias' function to
- allow for runtime setting of character
- set aliases. Added several alternate
- names for 'sjis' (shiftjis, shift-jis,
- shift_jis, s-jis, and s_jis).
-
- Corrected 'croak' messages for
- 'from_utf8' functions to appropriate
- function name.
-
- Tightened up initialization encapsulation
-
- Corrected fatal problem in jcode from
- unicode internals. Problem and fix
- found by Brian Wisti <wbrian2 at uswest.net>.
-
-1.07 2000.11.01 - Added 'croak' to use Carp declaration to
- fix error messages. Problem and fix
- found by Brian Wisti
- <wbrian2 at uswest.net>.
-
-1.06 2000.10.30 - Fix to handle change in stringification
- of overloaded objects between Perl 5.005
- and 5.6. Problem noticed by Brian Wisti
- <wbrian2 at uswest.net>.
-
-1.05 2000.10.23 - Error in conversions from UTF8 to
- multibyte encodings corrected
-
-1.04 2000.10.23 - Additional diagnostic messages added
- for internal error conditions
-
-1.03 2000.10.22 - Bug fix for load time autodetction of
- Unicode::Map8 encodings
-
-1.02 2000.10.22 - Added load time autodetection of
- Unicode::Map8 supported character set
- encodings.
-
- Fixed internal calling error for some
- character sets with 'from_utf8'. Thanks
- goes to Ilia Lobsanov
- <ilia at lobsanov.com> for reporting this
- problem.
-
-1.01 2000.10.02 - Fixed handling of empty strings and
- added more identification for error
- messages.
-
-1.00 2000.09.29 - Pre-release version
-
-=head1 FUNCTIONS
-
-=cut
-
-######################################################################
-
-=over 4
-
-=item utf8_charset_alias({ $alias => $charset });
-
-Used for runtime assignment of character set aliases.
-
-Called with no parameters, returns a hash of defined aliases and the character sets
-they map to.
-
-Example:
-
- my $aliases = utf8_charset_alias;
- my @alias_names = keys %$aliases;
-
-If called with ONE parameter, returns the name of the 'real' charset
-if the alias is defined. Returns undef if it is not found in the aliases.
-
-Example:
-
- if (! utf8_charset_alias('VISCII')) {
- # No alias for this
- }
-
-If called with a list of 'alias' => 'charset' pairs, defines those aliases for use.
-
-Example:
-
- utf8_charset_alias({ 'japanese' => 'sjis', 'japan' => 'sjis' });
-
-Note: It will croak if a passed pair does not map to a character set
-defined in the predefined set of character encoding. It is NOT
-allowed to alias something to another alias.
-
-Multiple character set aliases can be set with a single call.
-
-To clear an alias, pass a character set mapping of undef.
-
-Example:
-
- utf8_charset_alias({ 'japanese' => undef });
-
-While an alias is set, the 'utf8_supported_charset' function
-will return the alias as if it were a predefined charset.
-
-Overriding a base defined character encoding with an alias
-will generate a warning message to STDERR.
-
-=back
-
-=cut
-
sub utf8_charset_alias {
if ($#_ == -1) {
my $aliases = {};
@@ -238,36 +76,8 @@
}
}
-######################################################################
+####
-=over 4
-
-=item utf8_supported_charset($charset_name);
-
-
-Returns true if the named charset is supported (including
-user defiend aliases).
-
-Returns false if it is not.
-
-Example:
-
- if (! utf8_supported_charset('VISCII')) {
- # No support yet
- }
-
-If called in a list context with no parameters, it will return
-a list of all supported character set names (including user
-defined aliases).
-
-Example:
-
- my @charsets = utf8_supported_charset;
-
-=back
-
-=cut
-
sub utf8_supported_charset {
if ($#_ == -1 && wantarray) {
my %all_charsets = (%$_Supported_Charsets, %$_Charset_Aliases);
@@ -284,19 +94,8 @@
return 0;
}
-######################################################################
+####
-=over 4
-
-=item to_utf8({ -string => $string, -charset => $source_charset });
-
-
-Returns the string converted to UTF8 from the specified source charset.
-
-=back
-
-=cut
-
sub to_utf8 {
my @parm_list = @_;
my $parms = {};
@@ -339,18 +138,8 @@
}
}
-######################################################################
+####
-=over 4
-
-=item from_utf8({ -string => $string, -charset => $target_charset});
-
-Returns the string converted from UTF8 to the specified target charset.
-
-=back
-
-=cut
-
sub from_utf8 {
my @parm_list = @_;
my $parms;
@@ -604,7 +393,7 @@
$target_charset = lc ($target_charset);
my $final;
- if ($target_charset eq 'iso-2022-jp') {
+ if ($target_charset =~ m/^iso[-_]2022[-_]jp$/) {
$final = $j->iso_2022_jp;
} elsif ($target_charset eq 'sjis') {
$final = $j->sjis;
@@ -631,17 +420,17 @@
$source_charset = lc ($source_charset);
my $final;
- if ($source_charset eq 'iso-2022-jp') {
- my $j = Jcode->new($string,$source_charset);
+ if ($source_charset =~ m/^iso[-_]2022[-_]jp$/) {
+ my $j = Jcode->new($string,'jis')->h2z;
$final = $j->utf8;
} elsif ($source_charset =~m/^(s[-_]?jis|shift[-_]?jis)$/) {
- my $j = Jcode->new($string,$source_charset);
+ my $j = Jcode->new($string,'sjis');
$final = $j->utf8;
} elsif ($source_charset eq 'euc-jp') {
- my $j = Jcode->new($string,$source_charset);
+ my $j = Jcode->new($string,'euc');
$final = $j->utf8;
} elsif ($source_charset eq 'jis') {
- my $j = Jcode->new($string,$source_charset);
+ my $j = Jcode->new($string,'jis');
$final = $j->utf8;
} else {
croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_jcode_to_utf8() - charset '$source_charset' is not supported\n");
@@ -672,6 +461,7 @@
'shift-jis' => 'jcode',
'shift_jis' => 'jcode',
'iso-2022-jp' => 'jcode',
+ 'iso_2022_jp' => 'jcode',
'jis' => 'jcode',
'euc-jp' => 'jcode',
};
@@ -747,29 +537,4 @@
######################################################################
-=head1 VERSION
-
-1.09 2001.08.22
-
-=head1 COPYRIGHT
-
-Copyright September, 2000 Benjamin Franz. All rights reserved.
-
-This software is free software. You can redistribute it
-and/or modify it under the same terms as Perl itself.
-
-=head1 AUTHOR
-
-Benjamin Franz <snowhare at nihongo.org>
-
-=head1 TODO
-
-Regression tests for Jcode, 2-byte encodings and encoding aliases
-
-=head1 SEE ALSO
-
-Unicode::String Unicode::Map8 Unicode::Map Jcode
-
-=cut
-
1;
Copied: packages/libunicode-maputf8-perl/trunk/lib/Unicode/MapUTF8.pod (from rev 2076, packages/libunicode-maputf8-perl/branches/upstream/current/lib/Unicode/MapUTF8.pod)
Copied: packages/libunicode-maputf8-perl/trunk/pod (from rev 2076, packages/libunicode-maputf8-perl/branches/upstream/current/pod)
Modified: packages/libunicode-maputf8-perl/trunk/t/01_unicode_maputf8.t
===================================================================
--- packages/libunicode-maputf8-perl/trunk/t/01_unicode_maputf8.t 2006-02-02 19:35:52 UTC (rev 2076)
+++ packages/libunicode-maputf8-perl/trunk/t/01_unicode_maputf8.t 2006-02-02 19:37:19 UTC (rev 2077)
@@ -2,6 +2,7 @@
use strict;
use lib ('./blib','../blib','../lib','./lib');
+#use bytes;
use Unicode::MapUTF8 qw(utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias);
# General info for writing test modules:
@@ -13,11 +14,12 @@
my @do_tests=(1..5);
my $test_subs = {
- 1 => { -code => \&test1, -desc => ' eight-bit ' },
- 2 => { -code => \&test2, -desc => ' unicode ' },
- 3 => { -code => \&test3, -desc => ' multi-byte ' },
- 4 => { -code => \&test4, -desc => ' jcode ' },
- 5 => { -code => \&test5, -desc => ' charset aliases ' },
+ 1 => { -code => \&test1, -desc => ' eight-bit ' },
+ 2 => { -code => \&test2, -desc => ' unicode ' },
+ 3 => { -code => \&test3, -desc => ' multi-byte ' },
+ 4 => { -code => \&test4, -desc => ' jcode ' },
+ 5 => { -code => \&test5, -desc => ' charset aliases ' },
+# 6 => { -code => \&big5_with_embedded_ascii, -desc => ' big5 embedded ascii ' },
};
my @charsets = utf8_supported_charset;
@@ -182,6 +184,38 @@
}
########################################
+# Test Big5 with embedded ASCII #
+########################################
+sub big5_with_embedded_ascii {
+ my $charset = 'big5';
+
+ my @errors = ();
+ {
+ my $source_string = "\xa5\x40\xa5\x41\x30";
+ my $utf8_string = to_utf8({ -charset => "ucs2", -string => "\x4e\x16\x4e\x15\x00\x30"});
+ my $result = test_general({ -charset => $charset,
+ -source => $source_string,
+ -utf8 => $utf8_string,
+ });
+ push(@errors,$result) if ($result ne '');
+ }
+
+ {
+ my $source_string = "\xa5\x40\xa5\x41\x30\xa5\x30\x41\xa5\x40";
+ my $utf8_string = to_utf8({ -charset => "ucs2", -string => "\x4e\x16\x4e\x15\x00\x30\x00\x41\x4e\x16"});
+ my $result = test_general({ -charset => $charset,
+ -source => $source_string,
+ -utf8 => $utf8_string,
+ });
+ push(@errors,$result) if ($result ne '');
+ }
+ if (0 < @errors) {
+ return join('', at errors);
+ }
+ return '';
+}
+
+########################################
# Generalized test framework #
########################################
@@ -195,16 +229,16 @@
my $result_string = to_utf8({ -string => $source_string,
-charset => $source_charset });
if ($utf8_string ne $result_string) {
- die ('(line ' . __LINE__ . ") conversion from '$source_charset' to UTF8 resulted in unexpected output. Expected '" . hexout($utf8_string) . "' but got '" . hexout($result_string) . "'\n");
+ die ('(line ' . __LINE__ . ") conversion from '$source_charset' to UTF8 resulted in unexpected output.\nExpected '" . hexout($utf8_string) . "' but got '" . hexout($result_string) . "'\n");
}
};
- if ($@) { return "Failed to convert UTF8 text to $source_charset: $@" }
+ if ($@) { return "Failed to convert UTF8 text to $source_charset:\n$@" }
eval {
my $result_string = from_utf8({ '-string' => $utf8_string,
'-charset' => $source_charset,
});
if ($source_string ne $result_string) {
- die ("conversion from UTF8 to '$source_charset' resulted in unexpected output. Expected '" . hexout($source_string) . "' but got '" . hexout($result_string) . "'\n");
+ die ("conversion from UTF8 to '$source_charset' resulted in unexpected output.\nExpected '" . hexout($source_string) . "' but got '" . hexout($result_string) . "'\n");
}
};
if ($@) { return "Failed to convert '$source_charset' text to UTF8: $@" }
Copied: packages/libunicode-maputf8-perl/trunk/t/97_distribution.t (from rev 2076, packages/libunicode-maputf8-perl/branches/upstream/current/t/97_distribution.t)
Copied: packages/libunicode-maputf8-perl/trunk/t/98_pod_coverage.t (from rev 2076, packages/libunicode-maputf8-perl/branches/upstream/current/t/98_pod_coverage.t)
Copied: packages/libunicode-maputf8-perl/trunk/t/99_pod.t (from rev 2076, packages/libunicode-maputf8-perl/branches/upstream/current/t/99_pod.t)
More information about the Pkg-perl-cvs-commits
mailing list