r153 - in packages: . libunicode-maputf8-perl libunicode-maputf8-perl/branches libunicode-maputf8-perl/branches/upstream libunicode-maputf8-perl/branches/upstream/current libunicode-maputf8-perl/branches/upstream/current/lib libunicode-maputf8-perl/branches/upstream/current/lib/Unicode libunicode-maputf8-perl/branches/upstream/current/t

Thu, 10 Jun 2004 08:33:24 -0600

Author: hoeve-guest
Date: 2004-06-10 08:33:17 -0600 (Thu, 10 Jun 2004)
New Revision: 153

Added:
   packages/libunicode-maputf8-perl/
   packages/libunicode-maputf8-perl/branches/
   packages/libunicode-maputf8-perl/branches/upstream/
   packages/libunicode-maputf8-perl/branches/upstream/current/
   packages/libunicode-maputf8-perl/branches/upstream/current/MANIFEST
   packages/libunicode-maputf8-perl/branches/upstream/current/Makefile.PL
   packages/libunicode-maputf8-perl/branches/upstream/current/README
   packages/libunicode-maputf8-perl/branches/upstream/current/lib/
   packages/libunicode-maputf8-perl/branches/upstream/current/lib/Unicode/
   packages/libunicode-maputf8-perl/branches/upstream/current/lib/Unicode/MapUTF8.pm
   packages/libunicode-maputf8-perl/branches/upstream/current/t/
   packages/libunicode-maputf8-perl/branches/upstream/current/t/01_unicode_maputf8.t
   packages/libunicode-maputf8-perl/tags/
Log:
[svn-inject] Installing original source of libunicode-maputf8-perl

Added: packages/libunicode-maputf8-perl/branches/upstream/current/MANIFEST
===================================================================

--- packages/libunicode-maputf8-perl/branches/upstream/current/MANIFEST	2004-06-10 14:32:11 UTC (rev 152)
+++ packages/libunicode-maputf8-perl/branches/upstream/current/MANIFEST	2004-06-10 14:33:17 UTC (rev 153)
@@ -0,0 +1,5 @@
+MANIFEST
+Makefile.PL
+README
+lib/Unicode/MapUTF8.pm
+t/01_unicode_maputf8.t

Added: packages/libunicode-maputf8-perl/branches/upstream/current/Makefile.PL
===================================================================
--- packages/libunicode-maputf8-perl/branches/upstream/current/Makefile.PL	2004-06-10 14:32:11 UTC (rev 152)
+++ packages/libunicode-maputf8-perl/branches/upstream/current/Makefile.PL	2004-06-10 14:33:17 UTC (rev 153)
@@ -0,0 +1,16 @@
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+    'NAME'       => 'Unicode::MapUTF8',
+  'VERSION_FROM' => 'lib/Unicode/MapUTF8.pm',
+    'linkext'    => { 'LINKTYPE' => '' },    # no link needed
+    'dist'       => { 'COMPRESS' => 'gzip -9f',     'SUFFIX' => 'gz',
+                           'ZIP' => '/usr/bin/zip','ZIPFLAGS'=>'-rl'},
+'PREREQ_PM'      => {
+                'Unicode::Map'     => 0,
+                'Unicode::String'  => 0,
+                'Unicode::Map8'    => 0,
+                'Jcode'            => 0,
+                        },
+);

Added: packages/libunicode-maputf8-perl/branches/upstream/current/README
===================================================================
--- packages/libunicode-maputf8-perl/branches/upstream/current/README	2004-06-10 14:32:11 UTC (rev 152)
+++ packages/libunicode-maputf8-perl/branches/upstream/current/README	2004-06-10 14:33:17 UTC (rev 153)
@@ -0,0 +1,50 @@
+Unicode::MapUTF8 - Conversions to and from arbitrary character sets and UTF8
+
+1.08 2000.11.06 Added 'utf8_charset_alias' function to allow for runtime
+                setting of character set aliases. Added several alternate
+                names for 'sjis' (shiftjis, shift-jis, shift_jis, s-jis,
+                and s_jis). 
+
+                Corrected 'croak' messages for 'from_utf8' functions to
+                appropriate function name.
+
+                Corrected fatal problem in jcode-unicode internals. Problem 
+                and fix found by Brian Wisti <wbrian2@uswest.net>.
+
+1.07 2000.11.01 Added 'croak' to use Carp declaration to fix error
+                messages. Problem and fix found by <wbrian2@uswest.net>.
+
+1.06 2000.10.30 Fix to handle change in stringification of overloaded
+                objects between Perl 5.005 and 5.6.  
+                Problem noticed by Brian Wisti <wbrian2@uswest.net>.
+
+1.05 2000.10.23 Error in conversions from UTF8 to multibyte encodings corrected
+
+1.04 2000.10.23 Additional diagnostic error messages added for
+                internal errors
+
+1.03 2000.10.22 Bug fix for load time Unicode::Map encoding
+                detection
+
+1.02 2000.10.22 Bug fix to 'from_utf8' method and load time
+                detection of Unicode::Map8 supported character
+                set encodings
+
+1.01 2000.10.02 Initial public release
+
+Provides an adapter layer between core routines for converting
+to and from UTF8 and other encodings. In essence, a way to give 
+multiple existing Unicode modules a single common interface so 
+you don't have to know the underlaying implementations to do 
+simple UTF8 to-from other character set string conversions. 
+As such, it wraps the Unicode::String, Unicode::Map8,
+Unicode::Map and Jcode modules in a standardized and simple 
+API.
+
+To install:
+
+perl Makefile.PL
+make
+make test
+make install
+

Added: packages/libunicode-maputf8-perl/branches/upstream/current/lib/Unicode/MapUTF8.pm
===================================================================
--- packages/libunicode-maputf8-perl/branches/upstream/current/lib/Unicode/MapUTF8.pm	2004-06-10 14:32:11 UTC (rev 152)
+++ packages/libunicode-maputf8-perl/branches/upstream/current/lib/Unicode/MapUTF8.pm	2004-06-10 14:33:17 UTC (rev 153)
@@ -0,0 +1,775 @@
+package Unicode::MapUTF8;
+
+use strict;
+use Carp qw(confess croak carp);
+use Exporter;
+use Unicode::String;
+use Unicode::Map;
+use Unicode::Map8;
+use Jcode;
+
+use vars qw ($VERSION @EXPORT @EXPORT_OK @EXPORT_TAGS @ISA);
+use subs qw (utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias _init_charsets);
+
+BEGIN {
+    @ISA         = qw(Exporter);
+    @EXPORT      = qw ();
+    @EXPORT_OK   = qw (utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias);
+    @EXPORT_TAGS = qw ();
+    $VERSION     = "1.09";
+}
+
+############################
+# File level package globals (class variables)
+my $_Supported_Charsets;
+my $_Charset_Names;
+my $_Charset_Aliases;
+_init_charsets;
+
+=head1 NAME
+
+Unicode::MapUTF8 - Conversions to and from arbitrary character sets and UTF8
+
+=head1 SYNOPSIS
+
+ use Unicode::MapUTF8 qw(to_utf8 from_utf8 utf8_supported_charset);
+
+ # Convert a string in 'ISO-8859-1' to 'UTF8'
+ my $output = to_utf8({ -string => 'An example', -charset => 'ISO-8859-1' });
+
+ # Convert a string in 'UTF8' encoding to encoding 'ISO-8859-1'
+ my $other  = from_utf8({ -string => 'Other text', -charset => 'ISO-8859-1' });
+
+ # List available character set encodings
+ my @character_sets = utf8_supported_charset;
+
+ # Add a character set alias
+ utf8_charset_alias({ 'ms-japanese' => 'sjis' });
+
+ # Convert between two arbitrary (but largely compatible) charset encodings
+ # (SJIS to EUC-JP)
+ my $utf8_string   = to_utf8({ -string =>$sjis_string, -charset => 'sjis'});
+ my $euc_jp_string = from_utf8({ -string => $utf8_string, -charset => 'euc-jp' })
+
+ # Verify that a specific character set is supported
+ if (utf8_supported_charset('ISO-8859-1') {
+     # Yes
+ }
+
+=head1 DESCRIPTION
+
+Provides an adapter layer between core routines for converting
+to and from UTF8 and other encodings. In essence, a way to give multiple
+existing Unicode modules a single common interface so you don't have to know
+the underlaying implementations to do simple UTF8 to-from other character set
+encoding conversions. As such, it wraps the Unicode::String, Unicode::Map8,
+Unicode::Map and Jcode modules in a standardized and simple API.
+
+This also provides general character set conversion operation based on UTF8 - it is
+possible to convert between any two compatible and supported character sets
+via a simple two step chaining of conversions.
+
+As with most things Perlish - if you give it a few big chunks of text to chew on
+instead of lots of small ones it will handle many more characters per second.
+
+By design, it can be easily extended to encompass any new charset encoding
+conversion modules that arrive on the scene.
+
+=head1 CHANGES
+
+1.09 2001.08.22 - Fixed multiple typo occurances of 'uft'
+                  where 'utf' was meant in code. Problem affected
+                  utf16 and utf7 encodings. Problem found
+                  by devon smith <devon@taller.PSCL.cwru.edu>
+
+1.08 2000.11.06 - Added 'utf8_charset_alias' function to
+                  allow for runtime setting of character
+                  set aliases. Added several alternate
+                  names for 'sjis' (shiftjis, shift-jis,
+                  shift_jis, s-jis, and s_jis).
+
+                  Corrected 'croak' messages for
+                  'from_utf8' functions to appropriate
+                  function name.
+
+                  Tightened up initialization encapsulation
+
+                  Corrected fatal problem in jcode from
+                  unicode internals. Problem and fix
+                  found by Brian Wisti <wbrian2@uswest.net>.
+
+1.07 2000.11.01 - Added 'croak' to use Carp declaration to
+                  fix error messages.  Problem and fix
+                  found by Brian Wisti
+                  <wbrian2@uswest.net>.
+
+1.06 2000.10.30 - Fix to handle change in stringification
+                  of overloaded objects between Perl 5.005
+                  and 5.6. Problem noticed by Brian Wisti
+                  <wbrian2@uswest.net>.
+
+1.05 2000.10.23 - Error in conversions from UTF8 to
+                  multibyte encodings corrected
+
+1.04 2000.10.23 - Additional diagnostic messages added
+                  for internal error conditions
+
+1.03 2000.10.22 - Bug fix for load time autodetction of
+                  Unicode::Map8 encodings
+
+1.02 2000.10.22 - Added load time autodetection of
+                  Unicode::Map8 supported character set
+                  encodings.
+
+                  Fixed internal calling error for some
+                  character sets with 'from_utf8'. Thanks
+                  goes to Ilia Lobsanov
+                  <ilia@lobsanov.com> for reporting this
+                  problem.
+
+1.01 2000.10.02 - Fixed handling of empty strings and
+                  added more identification for error
+                  messages.
+
+1.00 2000.09.29 - Pre-release version
+
+=head1 FUNCTIONS
+
+=cut
+
+######################################################################
+
+=over 4
+
+=item utf8_charset_alias({ $alias => $charset });
+
+Used for runtime assignment of character set aliases.
+
+Called with no parameters, returns a hash of defined aliases and the character sets
+they map to.
+
+Example:
+
+  my $aliases     = utf8_charset_alias;
+  my @alias_names = keys %$aliases;
+
+If called with ONE parameter, returns the name of the 'real' charset
+if the alias is defined. Returns undef if it is not found in the aliases.
+
+Example:
+
+    if (! utf8_charset_alias('VISCII')) {
+        # No alias for this
+    }
+
+If called with a list of 'alias' => 'charset' pairs, defines those aliases for use.
+
+Example:
+
+    utf8_charset_alias({ 'japanese' => 'sjis', 'japan' => 'sjis' });
+
+Note: It will croak if a passed pair does not map to a character set
+defined in the predefined set of character encoding. It is NOT
+allowed to alias something to another alias.
+
+Multiple character set aliases can be set with a single call.
+
+To clear an alias, pass a character set mapping of undef.
+
+Example:
+
+    utf8_charset_alias({ 'japanese' => undef });
+
+While an alias is set, the 'utf8_supported_charset' function
+will return the alias as if it were a predefined charset.
+
+Overriding a base defined character encoding with an alias
+will generate a warning message to STDERR.
+
+=back
+
+=cut
+
+sub utf8_charset_alias {
+    if ($#_ == -1) {
+        my $aliases = {};
+        %$aliases   =  %$_Charset_Aliases;
+        return $aliases;
+    }
+    my $parms;
+    my @parms_list = @_;
+    if (($#parms_list == 0) && (ref ($parms_list[0]) eq 'HASH')) {
+        _set_utf8_charset_alias($parms_list[0]);
+        return;
+    } elsif (($#parms_list > 0) && (($#parms_list % 2) == 1)) {
+        _set_utf8_charset_alias({ @parms_list });
+        return;
+    } elsif ($#parms_list == 0) {
+        my $lc_charset = lc($parms_list[0]);
+        my $result     = $_Charset_Aliases->{$lc_charset};
+        return $result;
+    }
+    croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::utf8_charset_alias() - invalid parameters passed\n");
+}
+
+######################################################################
+# Sets (or clears ;-) ) a runtime character set alias.
+
+sub _set_utf8_charset_alias {
+    my ($parms) = @_;
+    my @alias_names = keys %$parms;
+    foreach my $alias (@alias_names) {
+        my $lc_alias = lc ($alias);
+        my $charset  = $parms->{$alias};
+        if (! defined $charset) {
+            if (exists ($_Charset_Aliases->{$lc_alias})) {
+                delete $_Charset_Aliases->{$lc_alias};
+            }
+            next;
+        }
+        my $lc_charset = lc ($charset);
+        if (! exists ($_Charset_Names->{$lc_charset})) {
+            croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::utf8_charset_alias() - attempted to set alias '$alias' to point to unknown charset encoding of '$charset'\n");
+        }
+        if (exists ($_Charset_Names->{$lc_alias})) {
+            carp('[' . localtime(time) . '] [warning] ' . __PACKAGE__ . "::utf8_charset_alias() - Aliased base defined charset name '$alias' to '$charset'.");
+        }
+        $_Charset_Aliases->{$lc_alias} = $lc_charset;
+    }
+}
+
+######################################################################
+
+=over 4
+
+=item utf8_supported_charset($charset_name);
+
+
+Returns true if the named charset is supported (including
+user defiend aliases).
+
+Returns false if it is not.
+
+Example:
+
+    if (! utf8_supported_charset('VISCII')) {
+        # No support yet
+    }
+
+If called in a list context with no parameters, it will return
+a list of all supported character set names (including user
+defined aliases).
+
+Example:
+
+    my @charsets = utf8_supported_charset;
+
+=back
+
+=cut
+
+sub utf8_supported_charset {
+    if ($#_ == -1 && wantarray) {
+        my %all_charsets = (%$_Supported_Charsets, %$_Charset_Aliases);
+        my @charsets     = sort keys %all_charsets;
+        return @charsets;
+    }
+    my $charset = shift;
+    if (not defined $charset) {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::utf8_supported_charset() - no character set specified\n");
+    }
+    my $lc_charset = lc($charset);
+    return 1 if (exists ($_Charset_Names->{$lc_charset}));
+    return 1 if (exists ($_Charset_Aliases->{$lc_charset}));
+    return 0;
+}
+
+######################################################################
+
+=over 4
+
+=item to_utf8({ -string => $string, -charset => $source_charset });
+
+
+Returns the string converted to UTF8 from the specified source charset.
+
+=back
+
+=cut
+
+sub to_utf8 {
+    my @parm_list = @_;
+    my $parms  = {};
+    if (($#parm_list > 0) && (($#parm_list % 2) == 1)) {
+        $parms = { @parm_list };
+    } elsif ($#parm_list == 0) {
+        $parms = $parm_list[0];
+        if (! ref($parms)) {
+            croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - invalid parameters passed\n");
+        }
+    } else {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - bad parameters passed\n");
+    }
+
+    if (! (exists $parms->{-string})) {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - missing '-string' parameter\n");
+    }
+    my $string  = $parms->{-string};
+    my $charset = $parms->{-charset};
+
+    if (! defined ($charset)) {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - missing '-charset' parameter value\n");
+    }
+    my $lc_charset    = lc ($charset);
+    my $alias_charset = $_Charset_Aliases->{$lc_charset};
+    my $true_charset  = defined($alias_charset) ? $_Charset_Names->{$alias_charset} : $_Charset_Names->{$lc_charset};
+    if (! defined $true_charset) {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - character set '$charset' is not supported\n");
+    }
+
+    $string = '' if (! defined ($string));
+
+    my $converter = $_Supported_Charsets->{$true_charset};
+    if    ($converter eq 'map8')       { return _unicode_map8_to_utf8   ($string,$true_charset); }
+    if    ($converter eq 'unicode-map'){ return _unicode_map_to_utf8    ($string,$true_charset); }
+    elsif ($converter eq 'string')     { return _unicode_string_to_utf8 ($string,$true_charset); }
+    elsif ($converter eq 'jcode')      { return _jcode_to_utf8          ($string,$true_charset); }
+    else {
+        croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::to_utf8() - charset '$charset' is not supported\n");
+    }
+}
+
+######################################################################
+
+=over 4
+
+=item from_utf8({ -string => $string, -charset => $target_charset});
+
+Returns the string converted from UTF8 to the specified target charset.
+
+=back
+
+=cut
+
+sub from_utf8 {
+    my @parm_list = @_;
+    my $parms;
+    if (($#parm_list > 0) && (($#parm_list % 2) == 1)) {
+        $parms = { @parm_list };
+    } elsif ($#parm_list == 0) {
+        $parms = $parm_list[0];
+        if (! ref($parms)) {
+            croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - invalid parameters passed\n");
+        }
+    } else {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - bad parameters passed\n");
+    }
+
+    if (! (exists $parms->{-string})) {
+    ; croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - missing '-string' parameter\n");
+    }
+
+    my $string  = $parms->{-string};
+    my $charset = $parms->{-charset};
+
+    if (! defined ($charset)) {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - missing '-charset' parameter value\n");
+    }
+    my $lc_charset    = lc ($charset);
+    my $alias_charset = $_Charset_Aliases->{$lc_charset};
+    my $true_charset  = defined($alias_charset) ? $_Charset_Names->{$alias_charset} : $_Charset_Names->{$lc_charset};
+    if (! defined $true_charset) {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - character set '$charset' is not supported\n");
+    }
+
+    $string = '' if (! defined ($string));
+
+    my $converter = $_Supported_Charsets->{$true_charset};
+    my $result;
+    if    ($converter eq 'map8')        { $result = _unicode_map8_from_utf8   ($string,$true_charset); }
+    elsif ($converter eq 'unicode-map') { $result = _unicode_map_from_utf8    ($string,$true_charset); }
+    elsif ($converter eq 'string')      { $result = _unicode_string_from_utf8 ($string,$true_charset); }
+    elsif ($converter eq 'jcode')       { $result = _jcode_from_utf8          ($string,$true_charset); }
+    else {
+        croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::from_utf8() - charset '$charset' is not supported\n");
+    }
+    return $result;
+}
+
+######################################################################
+#
+# _unicode_map_from_utf8($string,$target_charset);
+#
+# Returns the string converted from UTF8 to the specified target multibyte charset.
+#
+
+sub _unicode_map_from_utf8 {
+    my ($string,$target_charset) = @_;
+
+    if (! defined $target_charset) {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map_from_utf8() - (line ' . __LINE__ . ") No target character set specified\n");
+    }
+
+    my $ucs2   = from_utf8 ({ -string => $string, -charset => 'ucs2' });
+    my $target = Unicode::Map->new($target_charset);
+    if (! defined $target) {
+        confess( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map_from_utf8() - (line ' . __LINE__ . ") failed to instantate Unicode::Map object for charset '$target_charset': $!\n");
+    }
+    my $result = $target->from_unicode($ucs2);
+    return $result;
+}
+
+######################################################################
+#
+# _unicode_map_to_utf8($string,$source_charset);
+#
+# Returns the string converted the specified target multibyte charset to UTF8.
+#
+sub _unicode_map_to_utf8 {
+    my ($string,$source_charset) = @_;
+
+    if (! defined $source_charset) {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map_to_utf8() - (line ' . __LINE__ . ") No source character set specified\n");
+    }
+
+    my $source = Unicode::Map->new($source_charset);
+    if (! defined $source) {
+        confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::Map object: $!\n");
+    }
+    my $ucs2   = $source->to_unicode($string);
+    my $result = to_utf8({ -string => $ucs2, -charset => 'ucs2' });
+    return $result;
+}
+
+######################################################################
+#
+# _unicode_map8_from_utf8($string,$target_charset);
+#
+# Returns the string converted from UTF8 to the specified target 8bit charset.
+#
+
+sub _unicode_map8_from_utf8 {
+    my ($string,$target_charset) = @_;
+
+    if (! defined $target_charset) {
+        croak( '[' . localtime(time) . '] ' . __PACKAGE__ . '::_unicode_map8_from_utf8() - (line ' . __LINE__ . ") No target character set specified\n");
+    }
+
+    my $u = Unicode::String::utf8($string);
+    if (! defined $u) {
+        confess( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_from_utf8() - (line " . __LINE__ . ") failed to instantate Unicode::String::utf8 object: $!\n");
+    }
+    my $ordering = $u->ord;
+    $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE));
+    my $ucs2_string = $u->ucs2;
+
+    my $target = Unicode::Map8->new($target_charset);
+    if (! defined $target) {
+        confess( '[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_from_utf8() - (line " . __LINE__ . ") ailed to instantate Unicode::Map8 object for character set '$target_charset':  $!\n");
+    }
+    my $result = $target->to8($ucs2_string);
+
+    return $result;
+}
+
+######################################################################
+#
+# _unicode_map8_to_utf8($string,$source_charset);
+#
+# Returns the string converted the specified target 8bit charset to UTF8.
+#
+#
+
+sub _unicode_map8_to_utf8 {
+    my ($string,$source_charset) = @_;
+
+    my $source = Unicode::Map8->new($source_charset);
+    if (! defined $source) {
+        confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::Map8 object for character set '$source_charset': $!\n");
+    }
+
+    my $ucs2_string = $source->tou($string);
+    if (! defined $ucs2_string) {
+            confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_map8_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf16 object: $!\n");
+    }
+    my $utf8_string = $ucs2_string->utf8;
+
+    return $utf8_string;
+}
+
+######################################################################
+#
+# _unicode_string_from_utf8($string,$target_charset);
+#
+# Returns the string converted from UTF8 to the specified unicode encoding.
+#
+
+sub _unicode_string_from_utf8 {
+    my ($string,$target_charset) = @_;
+
+    $target_charset = lc ($target_charset);
+    my $final;
+    if ($target_charset eq 'utf8') {
+        $final = $string;
+    } elsif ($target_charset eq 'ucs2') {
+        my $u = Unicode::String::utf8($string);
+        my $ordering = $u->ord;
+        $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE));
+        $final = $u->ucs2;
+    } elsif ($target_charset eq 'ucs4') {
+        my $u = Unicode::String::utf8($string);
+        my $ordering = $u->ord;
+        $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE));
+        $final = $u->ucs4;
+    } elsif ($target_charset eq 'utf16') {
+        my $u = Unicode::String::utf8($string);
+        my $ordering = $u->ord;
+        $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE));
+        $final = $u->utf16;
+    } elsif ($target_charset eq 'utf7') {
+        my $u = Unicode::String::utf8($string);
+        my $ordering = $u->ord;
+        $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE));
+        $final = $u->utf7;
+    } else {
+        croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_from_utf8() - charset '$target_charset' is not supported\n");
+    }
+    return $final;
+}
+
+######################################################################
+#
+# _unicode_string_to_utf8($string,$source_charset);
+#
+# Returns the string converted the specified unicode encoding to UTF8.
+#
+
+sub _unicode_string_to_utf8 {
+    my ($string,$source_charset) = @_;
+
+    $source_charset = lc ($source_charset);
+    my $final;
+    if    ($source_charset eq 'utf8') {
+        $final = $string;
+    } elsif ($source_charset eq 'ucs2') {
+        my $u = Unicode::String::utf16($string);
+        if (! defined $u) {
+            confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf16 object: $!\n");
+        }
+        my $ordering = $u->ord;
+        $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE));
+        $final = $u->utf8;
+    } elsif ($source_charset eq 'ucs4') {
+        my $u = Unicode::String::ucs4($string);
+        if (! defined $u) {
+            confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::ucs4 object: $!\n");
+        }
+        my $ordering = $u->ord;
+        $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE));
+        $final = $u->utf8;
+    } elsif ($source_charset eq 'utf16') {
+        my $u = Unicode::String::utf16($string);
+        if (! defined $u) {
+            confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf16 object: $!\n");
+        }
+        my $ordering = $u->ord;
+        $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE));
+        $final = $u->utf8;
+    } elsif ($source_charset eq 'utf7') {
+        my $u = Unicode::String::utf7($string);
+        if (! defined $u) {
+            confess('[' . localtime(time) . '] ' . __PACKAGE__ . "::_unicode_string_to_utf8() - (line " . __LINE__ . ") failed to instantate a Unicode::String::utf7 object: $!\n");
+        }
+        my $ordering = $u->ord;
+        $u->byteswap if (defined($ordering) && ($ordering == 0xFFFE));
+        $final = $u->utf8;
+    } else {
+        croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . ":: _unicode_string_to_utf8() - charset '$source_charset' is not supported\n");
+    }
+
+    return $final;
+}
+
+######################################################################
+#
+# _jcode_from_utf8($string,$target_charset);
+#
+# Returns the string converted from UTF8 to the specified Jcode encoding.
+#
+
+sub _jcode_from_utf8 {
+    my ($string,$target_charset) = @_;
+
+    my $j = Jcode->new($string,'utf8');
+
+    $target_charset = lc ($target_charset);
+    my $final;
+    if    ($target_charset eq 'iso-2022-jp') {
+        $final = $j->iso_2022_jp;
+    } elsif ($target_charset eq 'sjis') {
+        $final = $j->sjis;
+    } elsif ($target_charset eq 'euc-jp') {
+        $final = $j->euc;
+    } elsif ($target_charset eq 'jis') {
+        $final = $j->jis;
+    } else {
+        croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::_jcode_from_utf8() - charset '$target_charset' is not supported\n");
+    }
+    return $final;
+}
+
+######################################################################
+#
+# _jcode_to_utf8($string,$source_charset);
+#
+# Returns the string converted from the specified Jcode encoding to UTF8.
+#
+
+sub _jcode_to_utf8 {
+    my ($string,$source_charset) = @_;
+
+    $source_charset = lc ($source_charset);
+
+    my $final;
+    if    ($source_charset eq 'iso-2022-jp') {
+        my $j  = Jcode->new($string,$source_charset);
+        $final = $j->utf8;
+    } elsif ($source_charset =~m/^(s[-_]?jis|shift[-_]?jis)$/) {
+        my $j  = Jcode->new($string,$source_charset);
+        $final = $j->utf8;
+    } elsif ($source_charset eq 'euc-jp') {
+        my $j  = Jcode->new($string,$source_charset);
+        $final = $j->utf8;
+    } elsif ($source_charset eq 'jis') {
+        my $j  = Jcode->new($string,$source_charset);
+        $final = $j->utf8;
+    } else {
+        croak(  '[' . localtime(time) . '] ' . __PACKAGE__ . "::_jcode_to_utf8() - charset '$source_charset' is not supported\n");
+    }
+
+    return $final;
+}
+
+#######################################################################
+#
+# Character set handlers maps
+#
+
+sub _init_charsets {
+
+    $_Charset_Aliases    = {};
+
+    $_Supported_Charsets = {
+        'utf8'                    => 'string',
+        'ucs2'                    => 'string',
+        'ucs4'                    => 'string',
+        'utf7'                    => 'string',
+        'utf16'                   => 'string',
+        'sjis'                    => 'jcode',
+        's-jis'                   => 'jcode',
+        's_jis'                   => 'jcode',
+        'shiftjis'                => 'jcode',
+        'shift-jis'               => 'jcode',
+        'shift_jis'               => 'jcode',
+        'iso-2022-jp'             => 'jcode',
+        'jis'                     => 'jcode',
+        'euc-jp'                  => 'jcode',
+    };
+    $_Charset_Names = { map { lc ($_) => $_ } keys %$_Supported_Charsets };
+
+    # All the Unicode::Map8 charsets
+    {
+        my @map_ids = &_list_unicode_map8_charsets;
+        foreach my $id (@map_ids) {
+            my $lc_id = lc ($id);
+            next if (exists ($_Charset_Names->{$lc_id}));
+            $_Supported_Charsets->{$id} = 'map8';
+            $_Charset_Names->{$lc_id}    = $id;
+        }
+    }
+    $_Charset_Names = { map { lc ($_) => $_ } keys %$_Supported_Charsets };
+
+    # Add any charsets not already listed from Unicode::Map
+    {
+        my $unicode_map = Unicode::Map->new;
+        my @map_ids     = $unicode_map->ids;
+        foreach my $id (@map_ids) {
+            my $lc_id = lc ($id);
+            next if (exists ($_Charset_Names->{$lc_id}));
+            $_Supported_Charsets->{$id} = 'unicode-map';
+            $_Charset_Names->{$lc_id}    = $id;
+        }
+    }
+}
+
+######################################################################
+#
+# Code taken and modified from the 'usr/bin/umap' code distributed
+# with Unicode::Map8. It wouldn't be necessary if Unicode::Map8
+# had a direct method for this....
+#
+
+sub _list_unicode_map8_charsets {
+    my %set = (
+           ucs4 => {},
+           ucs2 => {utf16 => 1},
+           utf7 => {},
+           utf8 => {},
+          );
+    if (opendir(DIR, $Unicode::Map8::MAPS_DIR)) {
+        my @files = grep(!/^\.\.?$/,readdir(DIR));
+        foreach my $f (@files) {
+            next unless -f "$Unicode::Map8::MAPS_DIR/$f";
+            $f =~ s/\.(?:bin|txt)$//;
+            my $supported =
+            $set{$f} = {} if Unicode::Map8->new($f);
+        }
+    }
+
+    my $avoid_warning = keys %Unicode::Map8::ALIASES;
+    while ( my($alias, $charset) = each %Unicode::Map8::ALIASES) {
+        if (exists $set{$charset}) {
+            $set{$charset}{$alias} = 1;
+        }
+    }
+
+    my %merged_set = ();
+    foreach my $encoding (keys %set) {
+        $merged_set{$encoding} = 1;
+        my $set_item = $set{$encoding};
+        while (my ($key,$value) = each (%$set_item)) {
+                $merged_set{$key} = $value;
+        }
+    }
+    my @final_charsets = sort keys %merged_set;
+    return @final_charsets;
+}
+
+######################################################################
+
+=head1 VERSION
+
+1.09 2001.08.22
+
+=head1 COPYRIGHT
+
+Copyright September, 2000 Benjamin Franz. All rights reserved.
+
+This software is free software.  You can redistribute it
+and/or modify it under the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Benjamin Franz <snowhare@nihongo.org>
+
+=head1 TODO
+
+Regression tests for Jcode, 2-byte encodings and encoding aliases
+
+=head1 SEE ALSO
+
+Unicode::String Unicode::Map8 Unicode::Map Jcode
+
+=cut
+
+1;

Added: packages/libunicode-maputf8-perl/branches/upstream/current/t/01_unicode_maputf8.t
===================================================================
--- packages/libunicode-maputf8-perl/branches/upstream/current/t/01_unicode_maputf8.t	2004-06-10 14:32:11 UTC (rev 152)
+++ packages/libunicode-maputf8-perl/branches/upstream/current/t/01_unicode_maputf8.t	2004-06-10 14:33:17 UTC (rev 153)
@@ -0,0 +1,231 @@
+#!/usr/bin/perl -w
+
+use strict;
+use lib ('./blib','../blib','../lib','./lib');
+use Unicode::MapUTF8 qw(utf8_supported_charset to_utf8 from_utf8 utf8_charset_alias);
+
+# General info for writing test modules: 
+#
+# When running as 'make test' the default
+# working directory is the one _above_ the 
+# 't/' directory. 
+
+my @do_tests=(1..5);
+
+my $test_subs = { 
+       1 => { -code => \&test1, -desc => ' eight-bit                 ' },
+       2 => { -code => \&test2, -desc => ' unicode                   ' },
+       3 => { -code => \&test3, -desc => ' multi-byte                ' },
+       4 => { -code => \&test4, -desc => ' jcode                     ' },
+       5 => { -code => \&test5, -desc => ' charset aliases           ' },
+};
+
+my @charsets = utf8_supported_charset;
+
+print $do_tests[0],'..',$do_tests[$#do_tests],"\n";
+print STDERR "\n";
+my $n_failures = 0;
+foreach my $test (@do_tests) {
+	my $sub  = $test_subs->{$test}->{-code};
+	my $desc = $test_subs->{$test}->{-desc};
+	my $failure = '';
+	eval { $failure = &$sub; };
+	if ($@) {
+		$failure = $@;
+	}
+	if ($failure ne '') {
+		chomp $failure;
+		print "not ok $test\n";
+		print STDERR "    $desc - $failure\n";
+		$n_failures++;
+	} else {
+		print "ok $test\n";
+		print STDERR "    $desc - ok\n";
+
+	}
+}
+print "END\n";
+exit;
+
+########################################
+# Eight bit conversions                #
+########################################
+sub test1 {
+    my $charset       = 'ISO-8859-1';
+    my $source_string = 'Hello World';
+    my $utf8_string   = 'Hello World';
+    my $result = test_general({ -charset => $charset,
+                                 -source => $source_string,
+                                   -utf8 => $utf8_string,
+                             });
+    return $result if ($result ne '');
+
+    $source_string = '';
+    $utf8_string    = '';
+    $result = test_general({ -charset => $charset,
+                              -source => $source_string,
+                                -utf8 => $utf8_string,
+                             });
+    return $result if ($result ne '');
+
+    return '';
+}
+
+########################################
+# Unicode conversions                  #
+########################################
+
+sub test2 {
+    my $charset       = 'UCS2';
+    my $source_string = "\x00H\x00e\x00l\x00l\x00o\x00 \x00W\x00o\x00r\x00l\x00d";
+    my $utf8_string   = 'Hello World';
+    my $result = test_general({ -charset => $charset,
+                                 -source => $source_string,
+                                   -utf8 => $utf8_string,
+                             });
+    return $result if ($result ne '');
+
+    $source_string = '';
+    $utf8_string    = '';
+    $result = test_general({ -charset => $charset,
+                              -source => $source_string,
+                                -utf8 => $utf8_string,
+            });
+    return $result if ($result ne '');
+
+    return '';
+}
+
+########################################
+# Multibyte conversions                #
+########################################
+sub test3 {
+    return '';
+}
+
+########################################
+# Japanese (Jcode) conversions         #
+########################################
+sub test4 {
+    my $charset       = 'euc-jp';
+    my $source_string = "Hello World";
+    my $utf8_string   = 'Hello World';
+    my $result = test_general({ -charset => $charset,
+                                 -source => $source_string,
+                                   -utf8 => $utf8_string,
+                             });
+    return $result if ($result ne '');
+
+    $source_string = '';
+    $utf8_string   = '';
+    $result = test_general({ -charset => $charset,
+                              -source => $source_string,
+                                -utf8 => $utf8_string,
+            });
+    return $result if ($result ne '');
+    
+    return '';
+}
+
+########################################
+# Charset aliases                      #
+########################################
+sub test5 {
+    my $charset='ISO-8859-1';
+    my $alias  ='latin-1_sort_of';
+    eval {
+        utf8_charset_alias({ $alias => $charset });
+    };
+	if ($@) { return "$@" }
+    eval {
+        my $aliased = utf8_charset_alias($alias);
+        if ((! defined $aliased) || (lc($charset) ne lc($aliased))) {
+            die("Alias crosscheck for '$alias' -> '$charset' returned a *different* charset of '$aliased'");
+        }
+    };
+	if ($@) { return "Failed to alias character set '$charset' to '$alias': $@" }
+
+    $charset       = $alias;
+    my $source_string = 'Hello World';
+    my $utf8_string   = 'Hello World';
+    my $result = test_general({ -charset => $charset,
+                                 -source => $source_string,
+                                   -utf8 => $utf8_string,
+                             });
+    return $result if ($result ne '');
+
+    $source_string = '';
+    $utf8_string    = '';
+    $result = test_general({ -charset => $charset,
+                              -source => $source_string,
+                                -utf8 => $utf8_string,
+                             });
+    return $result if ($result ne '');
+
+    eval {
+        utf8_charset_alias({ $alias => undef });
+    };
+	if ($@) { return "$@" }
+
+    $source_string = 'Hello World';
+    $utf8_string   = 'Hello World';
+    eval { my $result = test_general({ -charset => $charset,
+                                 -source => $source_string,
+                                   -utf8 => $utf8_string,
+                             });
+    };
+    if (! defined $@) {
+        return "Failed to catch use of non-aliased charset";
+    }
+
+    return '';
+}
+
+########################################
+# Generalized test framework           #
+########################################
+
+sub test_general {
+    my ($parms) = shift;
+    my $source_charset = $parms->{-charset};
+    my $source_string  = $parms->{-source};
+    my $utf8_string    = $parms->{-utf8};
+
+	eval { 
+        my $result_string = to_utf8({ -string => $source_string, 
+                                     -charset => $source_charset });
+        if ($utf8_string ne $result_string) {
+           die ('(line ' . __LINE__ . ") conversion from '$source_charset' to UTF8 resulted in unexpected output. Expected '" . hexout($utf8_string) . "' but got '" . hexout($result_string) . "'\n");
+        }
+    };
+	if ($@) { return "Failed to convert UTF8 text to $source_charset: $@" }
+	eval { 
+        my $result_string = from_utf8({ '-string' => $utf8_string, 
+                                       '-charset' => $source_charset,
+                                       }); 
+        if ($source_string ne $result_string) {
+           die ("conversion from UTF8 to '$source_charset' resulted in unexpected output. Expected '" . hexout($source_string) . "' but got '" . hexout($result_string) . "'\n");
+        }
+    };
+	if ($@) { return "Failed to convert '$source_charset' text to UTF8: $@" }
+
+
+	eval { 
+           my $result_string = from_utf8({ -string => $source_string, 
+                                          -charset => $source_charset,
+                                          }); 
+           if ($source_string ne to_utf8({ -string => $result_string, 
+                                          -charset => $source_charset })) {
+                die ("input and output strings differed");
+           }     
+    };
+	if ($@) { return "Round trip conversion of '$source_charset' to UTF8 failed: $@" }
+
+    return '';
+}
+
+sub hexout {
+    my ($string) = @_;
+    $string =~ s/([\x00-\xff])/unpack("H",$1).unpack("h",$1)/egos;
+    return $string;
+}


Property changes on: packages/libunicode-maputf8-perl/branches/upstream/current/t/01_unicode_maputf8.t
___________________________________________________________________
Name: svn:executable
   +