r18120 - in /branches/upstream/libmime-charset-perl/current: Changes Charset.pm Charset/JA_JP.pod Charset/_Compat.pm MANIFEST META.yml README t/03info.t t/03ooinfo.t t/04alias.t

Sat Mar 29 17:07:20 UTC 2008

Author: gregoa-guest
Date: Sat Mar 29 17:07:19 2008
New Revision: 18120

URL: http://svn.debian.org/wsvn/?sc=1&rev=18120
Log:
[svn-upgrade] Integrating new upstream version, libmime-charset-perl (1.003)

Added:
    branches/upstream/libmime-charset-perl/current/t/04alias.t
Modified:
    branches/upstream/libmime-charset-perl/current/Changes
    branches/upstream/libmime-charset-perl/current/Charset.pm
    branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod
    branches/upstream/libmime-charset-perl/current/Charset/_Compat.pm
    branches/upstream/libmime-charset-perl/current/MANIFEST
    branches/upstream/libmime-charset-perl/current/META.yml
    branches/upstream/libmime-charset-perl/current/README
    branches/upstream/libmime-charset-perl/current/t/03info.t
    branches/upstream/libmime-charset-perl/current/t/03ooinfo.t

Modified: branches/upstream/libmime-charset-perl/current/Changes
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Changes?rev=18120&op=diff
==============================================================================

--- branches/upstream/libmime-charset-perl/current/Changes (original)
+++ branches/upstream/libmime-charset-perl/current/Changes Sat Mar 29 17:07:19 2008
@@ -1,3 +1,12 @@
+2008-03-29	Hatuka*nezumi - IKEDA Soji	<hatuka at nezumi.nu>
+	* Release 1.003.
+	* Added ESTI GSM 03.38 which won't be used for MIME
+	  messages (experimental).
+	* _Compat.pm: resolve_alias(): real aliasing taken from
+	  Encode module.
+	* Numeric values are allowed for ``Replacement'' options.
+	* Added tests for aliases (some MIME preferred names only).
+
 2008-03-20	Hatuka*nezumi - IKEDA Soji	<hatuka at nezumi.nu>
 	* Release 1.002.
 	* New method undecode().

Modified: branches/upstream/libmime-charset-perl/current/Charset.pm
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Charset.pm?rev=18120&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/Charset.pm (original)
+++ branches/upstream/libmime-charset-perl/current/Charset.pm Sat Mar 29 17:07:19 2008
@@ -44,7 +44,7 @@
     MIME::Charset::default("iso-8859-1");
     MIME::Charset::fallback("us-ascii");
 
-Non-OOP functions (may be deprecated in near future):
+Non-OO functions (may be deprecated in near future):
 
     use MIME::Charset qw(:info);
 
@@ -121,7 +121,7 @@
     }
 }
 
-$VERSION = '1.002';
+$VERSION = '1.003';
 
 ######## Private Attributes ########
 
@@ -155,6 +155,7 @@
 		'ISO-2022-JP' =>	['B',	undef,	undef],
 		'KOI8-R' =>		['B',	'B',	undef],
 		'UTF-8' =>		['S',	'B',	undef],
+		'GSM03.38' =>		[undef,	undef,	undef], # not for MIME
 		# We're making this one up to represent raw unencoded 8bit
 		'8BIT' =>		[undef,	'B',	'ISO-8859-1'],
 		);
@@ -172,9 +173,10 @@
 		       "KS_C_5601" =>		"KS_C_5601-1987",
 		       "SHIFTJIS" =>		"SHIFT_JIS",
 		       "SHIFTJISX0213" =>	"SHIFT_JISX0213",
-		       "UNICODE-1-1-UTF-7" =>	"UTF-7",
+		       "UNICODE-1-1-UTF-7" =>	"UTF-7", # RFC 1642 (obs.)
 		       "UTF8" =>		"UTF-8",
-		       "UTF-8-STRICT" =>	"UTF-8",
+		       "UTF-8-STRICT" =>	"UTF-8", # Perl internal use
+		       "GSM0338" =>		"GSM03.38",
 		       );
 
 # Some vendors encode characters beyond standardized mappings using extended
@@ -200,7 +202,7 @@
 				     ['iso-2022-jp-ms', 'Encode::ISO2022JPMS'],
 				     ['iso-2022-jp-1'], # Encode::JP (note*)
 				    ],
-		    'SHIFT_JIS'  => [['cp932'], ],      # Encode::JP (note*)
+		    'SHIFT_JIS'  => [['cp932'], ],      # Encode::JP
 		    'EUC-KR'     => [['cp949'], ],      # Encode::KR
 		    'BIG5'       => [
 				     # ['big5plus',     'Encode::HanExtra'],
@@ -217,13 +219,14 @@
 		    'ISO-2022-JP-3' => [['iso-2022-jp-3', 'Encode::JIS2K'], ],
 		    'SHIFT_JISX0213'=> [['shiftjisx0213', 'Encode::JIS2K'], ],
 		    'EUC-TW'        => [['euc-tw',      'Encode::HanExtra'], ],
+		    'GSM03.38'      => [['gsm0338'], ],	# Encode::GSM0338
 		},
 );
 
-# note*: This encoder is not UCM-based.
-
-# ISO-2022-* escape sequnces to detect charset from unencoded data.
-my @ISO2022_SEQ = (# escape seq	possible charset
+# ISO-2022-* escape sequences etc. to detect charset from unencoded data.
+my @ESCAPE_SEQS = ( 
+		# ISO-2022-* sequences
+		   # escape seq, possible charset
 		   # Following sequences are commonly used.
 		   ["\033\$\@",	"ISO-2022-JP"],	# RFC 1468
 		   ["\033\$B",	"ISO-2022-JP"],	# ditto
@@ -238,13 +241,28 @@
 		   ["\033\$*H",	"ISO-2022-CN"], # ditto
 		   # Other sequences will be used with appropriate charset
 		   # parameters, or hardly used.
-		   );
 
 		   # note*: This RFC defines ISO-2022-JP-1, superset of 
 		   # ISO-2022-JP.  But that charset name is rarely used.
 		   # OTOH many of encoders for ISO-2022-JP recognize this
 		   # sequence so that comatibility with EUC-JP will be
 		   # guaranteed.
+
+		# Singlebyte 7-bit sequences
+		   # escape seq, possible charset
+		   ["\033e",	"GSM03.38"],	# ESTI GSM 03.38 (note*)
+		   ["\033\012",	"GSM03.38"],	# ditto
+		   ["\033<",	"GSM03.38"],	# ditto
+		   ["\033/",	"GSM03.38"],	# ditto
+		   ["\033>",	"GSM03.38"],	# ditto
+		   ["\033\024",	"GSM03.38"],	# ditto
+		   ["\033(",	"GSM03.38"],	# ditto
+		   ["\033\@",	"GSM03.38"],	# ditto
+		   ["\033)",	"GSM03.38"],	# ditto
+		   ["\033=",	"GSM03.38"],	# ditto
+
+		   # note*: This is not used for MIME message.
+		  );
 
 ######## Public Configuration Attributes ########
 
@@ -407,6 +425,11 @@
 Get L<"Encode::Encoding"> object to encode Unicode string using compatible
 charset recommended to be used for messages on Internet.
 
+If optional CHARSET is specified, replace encoder (and output charset
+name) of $charset object with those of CHARSET, therefore,
+$charset object will be a converter between original charset and
+new CHARSET.
+
 =cut
 
 sub encoder($$;) {
@@ -480,7 +503,7 @@
 
 =item Replacement => REPLACEMENT
 
-Specifies error handling scheme.  See L<"ERROR HANDLING">.
+Specifies error handling scheme.  See L<"Error Handling">.
 
 =item Detect7bit => YESNO
 
@@ -656,7 +679,7 @@
 
 =item Replacement => REPLACEMENT
 
-Specifies error handling scheme.  See L<"ERROR HANDLING">.
+Specifies error handling scheme.  See L<"Error Handling">.
 
 =item Detect7bit => YESNO
 
@@ -727,8 +750,8 @@
     my $charset = shift;
     my $s = shift;
     my %params = @_;
-    my $replacement = uc($params{'Replacement'}) || $Config->{Replacement};
-    my $detect7bit = uc($params{'Detect7bit'}) || $Config->{Detect7bit};
+    my $replacement = uc($params{'Replacement'} || $Config->{Replacement});
+    my $detect7bit = uc($params{'Detect7bit'} || $Config->{Detect7bit});
 
     unless ($charset and $charset->{InputCharset}) {
 	if ($s =~ $NONASCIIRE) {
@@ -741,78 +764,54 @@
     }
 
     # Unknown charset.
-    return ($s, $charset)
-	unless $charset->{Decoder};
+    unless ($charset->{Decoder}) {
+	croak "unknown charset ``$charset->{InputCharset}''"
+	    if is_utf8($s) or $s =~ /[^\x00-\xFF]/;
+	return ($s, $charset);
+    }
+
+    my $check = ($replacement and $replacement =~ /^\d+$/)?
+	$replacement:
+    {
+	'CROAK' => FB_CROAK(),
+	'STRICT' => FB_CROAK(),
+	'FALLBACK' => FB_CROAK(), # special
+	'PERLQQ' => FB_PERLQQ(),
+	'HTMLCREF' => FB_HTMLCREF(),
+	'XMLCREF' => FB_XMLCREF(),
+    }->{$replacement || ""} || 0;
 
     # Encode data by output charset if required.  If failed, fallback to
     # fallback charset.
     my $encoded;
-
-    if (is_utf8($s) or $s =~ /[^\x00-\xFF]/) {
-	if ($replacement =~ /^(?:CROAK|STRICT|FALLBACK)$/) {
+    if (is_utf8($s) or $s =~ /[^\x00-\xFF]/ or
+	$charset->{InputCharset} ne $charset->{OutputCharset}) {
+	if ($check & 0x1) { # CROAK or FALLBACK
 	    eval {
 		$encoded = $s;
 		$encoded = $charset->encode($encoded, FB_CROAK());
 	    };
 	    if ($@) {
 		if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
-		    $charset = MIME::Charset->new($FALLBACK_CHARSET);
-		    # croak unknown charset
-		    croak "Unknown charset: $FALLBACK_CHARSET"
-			unless $charset->{Decoder};
-		    # No charset transformation.
-		    $charset->{OutputCharset} = $charset->{InputCharset};
-		    $charset->{Encoder} = $charset->{Decoder};
-
-		    $encoded = $s;
-		    $encoded = $charset->encode($encoded);
-		} else {
-		    $@ =~ s/ at .+$//;
-		    croak $@;
-		}
-	    }
-	} elsif ($replacement eq "PERLQQ") {
-	    $encoded = $charset->encode($s, FB_PERLQQ());
-	} elsif ($replacement eq "HTMLCREF") {
-	    $encoded = $charset->encode($s, FB_HTMLCREF());
-	} elsif ($replacement eq "XMLCREF") {
-	    $encoded = $charset->encode($s, FB_XMLCREF());
-	} else {
-	    $encoded = $charset->encode($s);
-	}
-    } elsif ($charset->{InputCharset} ne $charset->{OutputCharset}) {
-	$encoded = $s;
-	if ($replacement =~ /^(?:CROAK|STRICT|FALLBACK)$/) {
-	    eval {
-		$encoded = $charset->encode($encoded, FB_CROAK());
-	    };
-	    if ($@) {
-		if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
 		    my $cset = MIME::Charset->new($FALLBACK_CHARSET);
 		    # croak unknown charset
-		    croak "Unknown charset: $FALLBACK_CHARSET"
-			unless $cset->{Decoder};
-		    # No charset transformations.
-		    $charset->{OutputCharset} = $cset->{OutputCharset} =
-			$cset->{InputCharset};
-		    $charset->{Encoder} = $cset->{Encoder} = $cset->{Decoder};
+		    croak "unknown charset ``$FALLBACK_CHARSET''"
+			unless $charset->{Decoder};
+		    # charset transformation
+		    $charset->encoder($cset);
 		    $encoded = $s;
-		    $encoded = $charset->encode($encoded);
+		    $encoded = $charset->encode($encoded, 0);
+		    $cset->encoder($cset);
 		    $charset = $cset;
 		} else {
 		    $@ =~ s/ at .+$//;
 		    croak $@;
 		}
 	    }
-        } elsif ($replacement eq "PERLQQ") {
-            $encoded = $charset->encode($encoded, FB_PERLQQ());
-        } elsif ($replacement eq "HTMLCREF") {
-            $encoded = $charset->encode($encoded, FB_HTMLCREF());
-        } elsif ($replacement eq "XMLCREF") {
-            $encoded = $charset->encode($encoded, FB_XMLCREF());
-        } else {
-            $encoded = $charset->encode($encoded);
-        }
+	} else {
+	    $encoded = $s;
+	    $encoded = $charset->encode($encoded, $check);
+	}
     } else {
         $encoded = $s;
     }
@@ -825,8 +824,8 @@
     my $s = shift;
     return $DEFAULT_CHARSET unless $s;
 
-    # Try to detect ISO-2022-* escape sequences.
-    foreach (@ISO2022_SEQ) {
+    # Try to detect 7-bit escape sequences.
+    foreach (@ESCAPE_SEQS) {
 	my ($seq, $cset) = @$_;
 	if (index($s, $seq) >= 0) {
             my $decoder = MIME::Charset->new($cset);
@@ -842,7 +841,7 @@
 	}
     }
 
-    # How about HZ, VIQR, ...?
+    # How about HZ, VIQR, UTF-7, ...?
 
     return $DEFAULT_CHARSET;
 }
@@ -850,7 +849,7 @@
 =item $charset->undecode(STRING [,CHECK])
 
 Encode Unicode string STRING to byte string by input charset of $charset.
-This is a equivalent to C<$charset->decoder->encode()>.
+This is equivalent to C<$charset-E<gt>decoder-E<gt>encode()>.
 
 B<Note>:
 When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
@@ -1042,6 +1041,11 @@
 Use C<FB_PERLQQ>, C<FB_HTMLCREF> or C<FB_XMLCREF>
 scheme defined by L<Encode> module.
 
+=item numeric values
+
+Numeric values are also allowed.
+For more details see L<Encode/Handling Malformed Data>.
+
 =back
 
 If error handling scheme is not specified or unknown scheme is specified,

Modified: branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod?rev=18120&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod (original)
+++ branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod Sat Mar 29 17:07:19 2008
@@ -41,7 +41,7 @@
     MIME::Charset::default("iso-8859-1");
     MIME::Charset::fallback("us-ascii");
 
-éžOOPé–¢æ•° (è¿‘ã„å°†æ¥ã«å»ƒæ¢):
+éžOOé–¢æ•° (è¿‘ã„å°†æ¥ã«å»ƒæ¢):
 
     use MIME::Charset qw(:info);
 
@@ -79,9 +79,7 @@
 B<ã‚ãƒ£ãƒ©ã‚¯ã‚¿ã‚»ãƒƒãƒˆ> ã¨ã¯ã€MIME ã§ã® ``character set'' ã®ã“ã¨ã§ã€
 ã‚ªã‚¯ãƒ†ãƒƒãƒˆã®åˆ—ã‚’æ–‡å—ã®åˆ—ã«å¤‰æ›ã™ã‚‹æ–¹æ³•ã‚’æŒ‡ã™ã€‚
 ã“ã‚Œã¯ã€ISO/IEC ã«ãŠã‘ã‚‹ ``ç¬¦å·åŒ–æ–‡å—é›†åˆ'' (CCS) ã¨
-``æ–‡å—ç¬¦åˆåŒ–æ³•'' (CES) ã®ä¸¡æ–¹ã®æ¦‚å¿µã‚’åŒ
å«ã™ã‚‹
-(ã“ã®å®šç¾©ã¯ä¸æ£ç¢ºã‹ã‚‚ã—ã‚Œã¾ã›ã‚“ã€‚
-ã‚ˆã‚Šæ£ç¢ºãªå®šç¾©ã‚’ã”å˜ã˜ã®æ–¹ã”æŒ‡å—ãã ã•ã„)ã€‚
+``æ–‡å—ç¬¦åˆåŒ–æ³•'' (CES) ã®ä¸¡æ–¹ã®æ¦‚å¿µã‚’åŒ
å«ã™ã‚‹ã€‚
 
 B<ã‚¨ãƒ³ã‚³ãƒ¼ãƒ‡ã‚£ãƒ³ã‚°> ã¨ã¯ã€MIME ã§ã®ãã‚Œã®ã“ã¨ã§ã€
 ãƒ¡ãƒƒã‚»ãƒ¼ã‚¸æœ¬ä½“ã‚„ãƒ¡ãƒƒã‚»ãƒ¼ã‚¸ãƒ˜ãƒƒãƒ€æœ¬ä½“ã‚’å°å—å¯èƒ½ãª
@@ -150,6 +148,11 @@
 ã‚¤ãƒ³ã‚¿ãƒ¼ãƒãƒƒãƒˆä¸Šã® MIME
 ãƒ¡ãƒƒã‚»ãƒ¼ã‚¸ã§ä½¿ã†ã“ã¨ã‚’æŽ¨å¥¨ã•ã‚Œã‚‹äº’æ›ã‚ãƒ£ãƒ©ã‚¯ã‚¿ã‚»ãƒƒãƒˆã§ã‚¨ãƒ³ã‚³ãƒ¼ãƒ‰ã™ã‚‹ã®ã«ä½¿ã†
 L<"Encode::Encoding"> ã‚ªãƒ–ã‚¸ã‚§ã‚¯ãƒˆã‚’è¿”ã™ã€‚
+
+CHARSET å¼•æ•°ã‚’æŒ‡å®šã—ãŸå ´åˆã€$charset ã‚ªãƒ–ã‚¸ã‚§ã‚¯ãƒˆã®ã‚¨ãƒ³ã‚³ãƒ¼ãƒ€
+(ãŠã‚ˆã³å‡ºåŠ›ã‚ãƒ£ãƒ©ã‚¯ã‚¿ã‚»ãƒƒãƒˆå) ã‚’ã€CHARSET ã®ãã‚Œã«ç½®ãæ›ãˆã‚‹ã€‚
+ã¤ã¾ã‚Šã€$charset ã‚ªãƒ–ã‚¸ã‚§ã‚¯ãƒˆã¯å
ƒã®ã‚ãƒ£ãƒ©ã‚¯ã‚¿ã‚»ãƒƒãƒˆã‹ã‚‰æ–°ãŸãª
+CHARSET ã¸ã®å¤‰æ›å™¨ã¨ãªã‚‹ã€‚
 
 =cut
 
@@ -305,7 +308,7 @@
 
 Unicode æ–‡å—åˆ— string ã‚’ã€
 $charset ã®å
¥åŠ›ã‚ãƒ£ãƒ©ã‚¯ã‚¿ã‚»ãƒƒãƒˆã‚’ä½¿ã£ã¦æ–‡å—åˆ—ã«å¤‰æ›ã™ã‚‹ã€‚
-ã“ã‚Œã¯ C<$charset->decoder->encode()> ã¨åŒç‰ã§ã‚ã‚‹ã€‚
+ã“ã‚Œã¯ C<$charset-E<gt>decoder-E<gt>encode()> ã¨åŒç‰ã§ã‚ã‚‹ã€‚
 
 B<NOTE>:
 Unicode/ãƒžãƒ«ãƒãƒã‚¤ãƒˆå¯¾å¿œãŒæœ‰åŠ¹ã«ãªã£ã¦ã„ãªã„ã¨ã (L<"USE_ENCODE"> å‚ç
§) ã¯ã€
@@ -441,6 +444,11 @@
 C<FB_PERLQQ>ã€C<FB_HTMLCREF>ã€C<FB_XMLCREF>
 ã®æ–¹å¼ã‚’ä½¿ã†ã€‚
 
+=item æ•°å€¤
+
+æ•°å€¤ã‚’æŒ‡å®šã™ã‚‹ã“ã¨ã‚‚ã§ãã‚‹ã€‚
+è©³ç´°ã¯ L<Encode/Handling Malformed Data> ã‚’è¦‹ã¦ã»ã—ã„ã€‚
+
 =back
 
 ã‚¨ãƒ©ãƒ¼å‡¦ç†æ³•ãŒæŒ‡å®šã•ã‚Œãªã„ã‹ã€ä¸Šè¨˜ä»¥å¤–ã®ã‚¨ãƒ©ãƒ¼å‡¦ç†æ³•ãŒæŒ‡å®šã•ã‚ŒãŸã¨ãã¯ã€

Modified: branches/upstream/libmime-charset-perl/current/Charset/_Compat.pm
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Charset/_Compat.pm?rev=18120&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/Charset/_Compat.pm (original)
+++ branches/upstream/libmime-charset-perl/current/Charset/_Compat.pm Sat Mar 29 17:07:19 2008
@@ -3,21 +3,22 @@
 use 5.004;
 
 use strict;
+use Carp qw(croak);
 
 use vars qw($VERSION);
 
-$VERSION = "0.04";
+$VERSION = "1.003";
 
-sub FB_CROAK { 1; }
-sub FB_PERLQQ { 100; }
-sub FB_HTMLCREF { 200; }
-sub FB_XMLCREF { 400; }
+sub FB_CROAK { 0x1; }
+sub FB_PERLQQ { 0x100; }
+sub FB_HTMLCREF { 0x200; }
+sub FB_XMLCREF { 0x400; }
 sub encode { $_[1]; }
 sub decode { $_[1]; }
 sub from_to {
     if ((lc($_[2]) eq "us-ascii" or lc($_[1]) eq "us-ascii") and
 	$_[0] =~ s/[^\x01-\x7e]/?/g and $_[3] == 1) {
-	die "Non-ASCII characters";
+	croak "Non-ASCII characters";
     }
     $_[0];
 }
@@ -27,9 +28,77 @@
     if ($cset eq "8bit" or $cset !~ /\S/) {
 	return undef;
     } else {
+	# Taken from Encode-2.24.
+	my %Winlatin2cp = (
+	   'latin1'     => 1252,
+	   'latin2'     => 1250,
+	   'cyrillic'   => 1251,
+	   'greek'      => 1253,
+	   'turkish'    => 1254,
+	   'hebrew'     => 1255,
+	   'arabic'     => 1256,
+	   'baltic'     => 1257,
+	   'vietnamese' => 1258,
+	);
+	my @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
+	$cset =~ s/^(\S+)[\s_]+(.*)$/$1-$2/i;
+	$cset =~ s/^UTF-8$/utf8/i;
+	$cset =~ s/^.*\bhk(?:scs)?[-_]?big5$/big5-hkscs/i;
+	$cset =~ s/^.*\bbig5-?hk(?:scs)?$/big5-hkscs/i;
+	$cset =~ s/^.*\btca[-_]?big5$/big5-eten/i;
+	$cset =~ s/^.*\bbig5-?et(?:en)?$/big5-eten/i;
+	$cset =~ s/^.*\bbig-?5$/big5-eten/i;
+	$cset =~ s/^.*\bks_c_5601-1987$/cp949/i;
+	$cset =~ s/^.*(?:x-)?windows-949$/cp949/i;
+	$cset =~ s/^.*(?:x-)?uhc$/cp949/i;
+	$cset =~ s/^.*\bkr.*euc$/euc-kr/i;
+	$cset =~ s/^.*\beuc.*kr$/euc-kr/i;
+	$cset =~ s/^.*\bsjis$/shiftjis/i;
+	$cset =~ s/^.*\bshift.*jis$/shiftjis/i;
+	$cset =~ s/^.*\bujis$/euc-jp/i;
+	$cset =~ s/^.*\bjp.*euc$/euc-jp/i;
+	$cset =~ s/^.*\beuc.*jp$/euc-jp/i;
+	$cset =~ s/^.*\bjis$/7bit-jis/i;
+	$cset =~ s/^.*\bGB[-_ ]?2312(?!-?raw).*$/euc-cn/i;
+	$cset =~ s/^gbk$/cp936/i;
+	$cset =~ s/^.*\bcn.*euc$/euc-cn/i;
+	$cset =~ s/^.*\beuc.*cn$/euc-cn/i;
+	$cset =~ s/^.*\bkoi8[-\s_]*([ru])$/koi8-$1/i;
+	$cset =~ s/^mac_(.*)$/mac$1/i;
+	$cset =~ s/^.*\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/cp$1/i;
+	$cset =~ s/^tis620$/iso-8859-11/i;
+	$cset =~ s/^thai$/iso-8859-11/i;
+	$cset =~ s/^hebrew$/iso-8859-8/i;
+	$cset =~ s/^greek$/iso-8859-7/i;
+	$cset =~ s/^arabic$/iso-8859-6/i;
+	$cset =~ s/^cyrillic$/iso-8859-5/i;
+	$cset =~ s/^ascii$/US-ascii/i;
+	if ($cset =~ /^.*\bwin(latin[12]|cyrillic|baltic|greek|turkish|
+			    hebrew|arabic|baltic|vietnamese)$/ix) {
+	    $cset = "cp" . $Winlatin2cp{lc($1)};
+	}
+	if ($cset =~ /^.*\b(?:iso[-_]?)?latin[-_]?(\d+)$/i) {
+	    $cset = defined $Latin2iso[$1] ? "iso-8859-$Latin2iso[$1]" : undef;
+	}
+	$cset =~ s/^(.+)\@euro$/$1/i;
+	$cset =~ s/^.*\bANSI[-_]?X3\.4[-_]?1968$/ascii/i;
+	$cset =~ s/^.*\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/${1}8/i;
+	$cset =~ s/^.*\biso8859(\d+)$/iso-8859-$1/i;
+	$cset =~ s/^.*\biso[-_]?(\d+)[-_](\d+)$/iso-$1-$2/i;
+	$cset =~ s/^.*\bISO[-_]?646[-_]?US$/ascii/i;
+	$cset =~ s/^C$/ascii/i;
+	$cset =~ s/^(?:US-?)ascii$/ascii/i;
+	$cset =~ s/^UTF(16|32)$/UTF-$1/i;
+	$cset =~ s/^UTF(16|32)-?LE$/UTF-$1LE/i;
+	$cset =~ s/^UTF(16|32)-?BE$/UTF-$1BE/i;
+	$cset =~ s/^iso-10646-1$/UCS-2BE/i;
+	$cset =~ s/^UCS-?4-?(BE|LE)?$/uc("UTF-32$1")/ie;
+	$cset =~ s/^UCS-?2-?(BE)?$/UCS-2BE/i;
+	$cset =~ s/^UCS-?2-?LE$/UCS-2LE/i;
+	$cset =~ s/^UTF-?7$/UTF-7/i;
+	$cset =~ s/^(.*)$/\L$1/;
 	return $cset;
     }
 }
 
 1;
-

Modified: branches/upstream/libmime-charset-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/MANIFEST?rev=18120&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/MANIFEST (original)
+++ branches/upstream/libmime-charset-perl/current/MANIFEST Sat Mar 29 17:07:19 2008
@@ -15,3 +15,4 @@
 t/02ooenclen.t
 t/03info.t
 t/03ooinfo.t
+t/04alias.t

Modified: branches/upstream/libmime-charset-perl/current/META.yml
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/META.yml?rev=18120&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/META.yml (original)
+++ branches/upstream/libmime-charset-perl/current/META.yml Sat Mar 29 17:07:19 2008
@@ -1,7 +1,7 @@
 # http://module-build.sourceforge.net/META-spec.html
 #XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
 name:         MIME-Charset
-version:      1.002
+version:      1.003
 version_from: Charset.pm
 installdirs:  site
 requires:

Modified: branches/upstream/libmime-charset-perl/current/README
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/README?rev=18120&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/README (original)
+++ branches/upstream/libmime-charset-perl/current/README Sat Mar 29 17:07:19 2008
@@ -38,7 +38,7 @@
         MIME::Charset::default("iso-8859-1");
         MIME::Charset::fallback("us-ascii");
 
-    Non-OOP functions (may be deprecated in near future):
+    Non-OO functions (may be deprecated in near future):
 
         use MIME::Charset qw(:info);
 
@@ -113,6 +113,11 @@
         Get "Encode::Encoding" object to encode Unicode string using
         compatible charset recommended to be used for messages on Internet.
 
+        If optional CHARSET is specified, replace encoder (and output
+        charset name) of $charset object with those of CHARSET, therefore,
+        $charset object will be a converter between original charset and new
+        CHARSET.
+
     $charset->header_encoding
     header_encoding CHARSET
         Get recommended encoding scheme of CHARSET for message header.
@@ -142,7 +147,7 @@
         will not be performed. So these options do not have any effects.
 
         Replacement => REPLACEMENT
-            Specifies error handling scheme. See "ERROR HANDLING".
+            Specifies error handling scheme. See "Error Handling".
 
         Detect7bit => YESNO
             Try auto-detecting 7-bit charset when CHARSET is not given.
@@ -190,7 +195,7 @@
         will not be performed. So these options do not have any effects.
 
         Replacement => REPLACEMENT
-            Specifies error handling scheme. See "ERROR HANDLING".
+            Specifies error handling scheme. See "Error Handling".
 
         Detect7bit => YESNO
             Try auto-detecting 7-bit charset when CHARSET is not given.
@@ -208,7 +213,7 @@
 
     $charset->undecode(STRING [,CHECK])
     Encode Unicode string STRING to byte string by input charset of
-    $charset. This is a equivalent to "$charset-"decoder->encode()>.
+    $charset. This is equivalent to "$charset->decoder->encode()".
 
     Note: When Unicode/multibyte support is disabled (see "USE_ENCODE"),
     this function will die.
@@ -311,6 +316,10 @@
         Use "FB_PERLQQ", "FB_HTMLCREF" or "FB_XMLCREF" scheme defined by
         Encode module.
 
+    numeric values
+        Numeric values are also allowed. For more details see "Handling
+        Malformed Data" in Encode.
+
     If error handling scheme is not specified or unknown scheme is
     specified, "DEFAULT" will be assumed.
 

Modified: branches/upstream/libmime-charset-perl/current/t/03info.t
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/t/03info.t?rev=18120&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/t/03info.t (original)
+++ branches/upstream/libmime-charset-perl/current/t/03info.t Sat Mar 29 17:07:19 2008
@@ -5,15 +5,13 @@
 
 use MIME::Charset qw(:info);
 
-ok(body_encoding("iso-8859-2") eq "Q");
+ok(body_encoding("iso-8859-2"), "Q", body_encoding("iso-8859-2"));
+ok(canonical_charset("ANSI X3.4-1968"), "US-ASCII",
+   canonical_charset("ANSI X3.4-1968"));
+ok(header_encoding("utf-8"), "S", header_encoding("utf-8"));
 if (MIME::Charset::USE_ENCODE) {
-    ok(canonical_charset("ANSI X3.4-1968") eq "US-ASCII");
+    ok(output_charset("shift_jis"), "ISO-2022-JP",
+       output_charset("shift_jis"));
 } else {
-    ok(canonical_charset("ascii") eq "US-ASCII");
+    ok(output_charset("shift_jis"), "SHIFT_JIS", output_charset("shift_jis"));
 }
-ok(header_encoding("utf-8") eq "S");
-if (MIME::Charset::USE_ENCODE) {
-    ok(output_charset("shift_jis") eq "ISO-2022-JP");
-} else {
-    ok(output_charset("shift_jis") eq "SHIFT_JIS");
-}

Modified: branches/upstream/libmime-charset-perl/current/t/03ooinfo.t
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/t/03ooinfo.t?rev=18120&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/t/03ooinfo.t (original)
+++ branches/upstream/libmime-charset-perl/current/t/03ooinfo.t Sat Mar 29 17:07:19 2008
@@ -7,19 +7,14 @@
 
 my $obj;
 $obj = MIME::Charset->new("iso-8859-2");
-ok($obj->body_encoding eq "Q");
-if (MIME::Charset::USE_ENCODE) {
-    $obj = MIME::Charset->new("ANSI X3.4-1968");
-    ok($obj->canonical_charset eq "US-ASCII");
-} else {
-    $obj = MIME::Charset->new("ascii");
-    ok($obj->canonical_charset eq "US-ASCII");
-}
-$obj = MIME::Charset->new("utf-9");
-ok($obj->header_encoding eq "S");
+ok($obj->body_encoding, "Q", $obj->body_encoding);
+$obj = MIME::Charset->new("ANSI X3.4-1968");
+ok($obj->canonical_charset, "US-ASCII", $obj->canonical_charset);
+$obj = MIME::Charset->new("utf-8");
+ok($obj->header_encoding, "S", $obj->header_encoding);
 $obj = MIME::Charset->new("shift_jis");
 if (MIME::Charset::USE_ENCODE) {
-    ok($obj->output_charset eq "ISO-2022-JP");
+    ok($obj->output_charset, "ISO-2022-JP", $obj->output_charset);
 } else {
-    ok($obj->output_charset eq "SHIFT_JIS");
+    ok($obj->output_charset, "SHIFT_JIS", $obj->output_charset);
 }

Added: branches/upstream/libmime-charset-perl/current/t/04alias.t
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/t/04alias.t?rev=18120&op=file
==============================================================================
--- branches/upstream/libmime-charset-perl/current/t/04alias.t (added)
+++ branches/upstream/libmime-charset-perl/current/t/04alias.t Sat Mar 29 17:07:19 2008
@@ -1,0 +1,21 @@
+use strict;
+use Test;
+
+BEGIN { plan tests => 25 }
+
+my @names = qw(
+	    US-ASCII
+	    ISO-8859-1 ISO-8859-2 ISO-8859-3 ISO-8859-4 ISO-8859-5
+	    ISO-8859-6 ISO-8859-7 ISO-8859-8 ISO-8859-9 ISO-8859-10
+	    SHIFT_JIS EUC-JP ISO-2022-KR EUC-KR ISO-2022-JP ISO-2022-JP-2
+	    ISO-8859-6-I ISO-8859-6-E ISO-8859-8-E ISO-8859-8-I
+	    GB2312 BIG5 KOI8-R
+	    UTF-8
+	   );
+
+use MIME::Charset qw(:info);
+
+foreach my $name (@names) {
+    my $aliased = MIME::Charset->new($name)->as_string;
+    ok($aliased, $name, $aliased);
+}