r18513 - in /branches/upstream/libmime-charset-perl/current: Changes Charset.pm Charset/JA_JP.pod META.yml README t/01encode.t t/01ooencode.t t/04alias.t

gregoa-guest at users.alioth.debian.org gregoa-guest at users.alioth.debian.org
Sat Apr 12 19:12:19 UTC 2008


Author: gregoa-guest
Date: Sat Apr 12 19:12:18 2008
New Revision: 18513

URL: http://svn.debian.org/wsvn/?sc=1&rev=18513
Log:
[svn-upgrade] Integrating new upstream version, libmime-charset-perl (1.006)

Modified:
    branches/upstream/libmime-charset-perl/current/Changes
    branches/upstream/libmime-charset-perl/current/Charset.pm
    branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod
    branches/upstream/libmime-charset-perl/current/META.yml
    branches/upstream/libmime-charset-perl/current/README
    branches/upstream/libmime-charset-perl/current/t/01encode.t
    branches/upstream/libmime-charset-perl/current/t/01ooencode.t
    branches/upstream/libmime-charset-perl/current/t/04alias.t

Modified: branches/upstream/libmime-charset-perl/current/Changes
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Changes?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/Changes (original)
+++ branches/upstream/libmime-charset-perl/current/Changes Sat Apr 12 19:12:18 2008
@@ -1,3 +1,9 @@
+2008-04-12	Hatuka*nezumi - IKEDA Soji	<hatuka at nezumi.nu>
+	* Release 1.006.
+	* Workarounds for ``US-ASCII transformation'' charsets
+	  i.e. HZ-GB-2312 (RFC1842) and UTF-7 (RFC 2152).
+	* Added tests.
+
 2008-04-07	Hatuka*nezumi - IKEDA Soji	<hatuka at nezumi.nu>
 	* Release 1.005.
 	* _enclen_Q(): Restrict characters in encoded-word

Modified: branches/upstream/libmime-charset-perl/current/Charset.pm
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Charset.pm?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/Charset.pm (original)
+++ branches/upstream/libmime-charset-perl/current/Charset.pm Sat Apr 12 19:12:18 2008
@@ -121,7 +121,7 @@
     }
 }
 
-$VERSION = '1.005';
+$VERSION = '1.006';
 
 ######## Private Attributes ########
 
@@ -155,6 +155,8 @@
 		'ISO-2022-JP' =>	['B',	undef,	undef],
 		'KOI8-R' =>		['B',	'B',	undef],
 		'UTF-8' =>		['S',	'B',	undef],
+		'HZ-GB-2312' =>		['B',	undef,	undef],
+		'UTF-7' =>		['Q',	undef,	undef],
 		'GSM03.38' =>		[undef,	undef,	undef], # not for MIME
 		# We're making this one up to represent raw unencoded 8bit
 		'8BIT' =>		[undef,	'B',	'ISO-8859-1'],
@@ -176,6 +178,7 @@
 		       "UNICODE-1-1-UTF-7" =>	"UTF-7", # RFC 1642 (obs.)
 		       "UTF8" =>		"UTF-8",
 		       "UTF-8-STRICT" =>	"UTF-8", # Perl internal use
+		       "HZ" =>			"HZ-GB-2312", # RFC 1842
 		       "GSM0338" =>		"GSM03.38",
 		       );
 
@@ -219,6 +222,7 @@
 		    'ISO-2022-JP-3' => [['iso-2022-jp-3', 'Encode::JIS2K'], ],
 		    'SHIFT_JISX0213'=> [['shiftjisx0213', 'Encode::JIS2K'], ],
 		    'EUC-TW'        => [['euc-tw',      'Encode::HanExtra'], ],
+		    'HZ-GB-2312'    => [['hz'], ],	# Encode::CN
 		    'GSM03.38'      => [['gsm0338'], ],	# Encode::GSM0338
 		},
 );
@@ -284,7 +288,11 @@
 }x;
 
 my $ISO2022RE = qr{
-    ^ISO-2022-
+    ISO-2022-.+
+}ix;
+
+my $ASCIITRANSRE = qr{
+    HZ-GB-2312 | UTF-7
 }ix;
 
 
@@ -321,6 +329,7 @@
     my %params = @_;
     my $mapping = uc($params{'Mapping'} || $Config->{Mapping});
 
+    $charset = "HZ" if $charset =~ /\bhz.?gb.?2312$/i; # workaround
     $charset = resolve_alias($charset) || $charset;
     $charset = $CHARSET_ALIASES{uc($charset)} || uc($charset);
     my ($henc, $benc, $outcset);
@@ -456,8 +465,8 @@
 	$charset = __PACKAGE__->new($charset) unless ref $charset;
 	$self->{OutputCharset} = $charset->{InputCharset};
 	$self->{Encoder} = $charset->{Decoder};
-	#XXX$self->{BodyEncoding} = $charset->{BodyEncoding};
-	#XXX$self->{HeaderEncoding} = $charset->{HeaderEncoding};
+	$self->{BodyEncoding} = $charset->{BodyEncoding};
+	$self->{HeaderEncoding} = $charset->{HeaderEncoding};
     }
     $self->{Encoder};
 }
@@ -518,14 +527,14 @@
 
 =over 4
 
-=item Replacement => REPLACEMENT
-
-Specifies error handling scheme.  See L<"Error Handling">.
-
 =item Detect7bit => YESNO
 
 Try auto-detecting 7-bit charset when CHARSET is not given.
 Default is C<"YES">.
+
+=item Replacement => REPLACEMENT
+
+Specifies error handling scheme.  See L<"Error Handling">.
 
 =back
 
@@ -549,24 +558,18 @@
 	$text = $self;
 	$self = __PACKAGE__->new(shift);
     }
-    my ($encoded, $charset) = &_text_encode($self, $text, @_);
+    my ($encoded, $charset) = $self->_text_encode($text, @_);
     return ($encoded, undef, 'BASE64')
 	unless $charset and $charset->{InputCharset};
     my $cset = $charset->{OutputCharset};
 
     # Determine transfer-encoding.
-    my $enc;
-    if ($encoded !~ /$NONASCIIRE/) {
-	$cset = "US-ASCII";
-	$enc = undef;
-    } else {
-	$enc = $charset->{BodyEncoding};
-    }
+    my $enc = $charset->{BodyEncoding};
 
     if (!$enc and $encoded !~ /\x00/) {	# Eliminate hostile NUL character.
         if ($encoded =~ $NON7BITRE) {	# String contains 8bit char(s).
             $enc = '8BIT';
-	} elsif ($cset =~ $ISO2022RE) {	# ISO-2022-* outputs are 7BIT.
+	} elsif ($cset =~ /^($ISO2022RE|$ASCIITRANSRE)$/) {	# 7BIT.
             $enc = '7BIT';
         } else {			# Pure ASCII.
             $enc = '7BIT';
@@ -688,14 +691,14 @@
 
 =over 4
 
-=item Replacement => REPLACEMENT
-
-Specifies error handling scheme.  See L<"Error Handling">.
-
 =item Detect7bit => YESNO
 
 Try auto-detecting 7-bit charset when CHARSET is not given.
 Default is C<"YES">.
+
+=item Replacement => REPLACEMENT
+
+Specifies error handling scheme.  See L<"Error Handling">.
 
 =back
 
@@ -722,22 +725,16 @@
 	$text = $self;
 	$self = __PACKAGE__->new(shift);
     }
-    my ($encoded, $charset) = &_text_encode($self, $text, @_);
+    my ($encoded, $charset) = $self->_text_encode($text, @_);
     return ($encoded, '8BIT', undef)
 	unless $charset and $charset->{InputCharset};
     my $cset = $charset->{OutputCharset};
 
     # Determine encoding scheme.
-    my $enc;
-    if ($encoded !~ /$NONASCIIRE/) {
-	$cset = "US-ASCII";
-	$enc = undef;
-    } else {
-	$enc = $charset->{HeaderEncoding};
-    }
+    my $enc = $charset->{HeaderEncoding};
 
     if (!$enc and $encoded !~ $NON7BITRE) {
-	unless ($cset =~ $ISO2022RE) {	# ISO-2022-* outputs are 7BIT.
+	unless ($cset =~ /^($ISO2022RE|$ASCIITRANSRE)$/) {	# 7BIT.
             $cset = 'US-ASCII';
         }
     } elsif ($enc eq 'S') {
@@ -758,24 +755,28 @@
     my %params = @_;
     my $replacement = uc($params{'Replacement'} || $Config->{Replacement});
     my $detect7bit = uc($params{'Detect7bit'} || $Config->{Detect7bit});
-
-    unless ($charset and $charset->{InputCharset}) {
+    my $encoding = $params{'Encoding'} ||
+	(exists $params{'Encoding'}? undef: 'A'); # undocumented
+
+    if (!$encoding or $encoding ne 'A') { # no 7-bit auto-detection
+	$detect7bit = 'NO';
+    }
+    unless ($charset->{InputCharset}) {
 	if ($s =~ $NON7BITRE) {
 	    return ($s, undef);
 	} elsif ($detect7bit ne "NO") {
 	    $charset = __PACKAGE__->new(&_detect_7bit_charset($s));
 	} else {
-	    $charset = __PACKAGE__->new($DEFAULT_CHARSET);
+	    $charset = __PACKAGE__->new($DEFAULT_CHARSET,
+					Mapping => 'STANDARD');
 	} 
     }
-
-    # Unknown charset.
-    unless ($charset->{Decoder}) {
-	croak "unknown charset ``$charset->{InputCharset}''"
-	    if is_utf8($s) or $s =~ /[^\x00-\xFF]/;
-	return ($s, $charset);
-    }
-
+    if (!$encoding or $encoding ne 'A') { # no conversion
+	$charset = $charset->dup;
+	$charset->encoder($charset);
+	$charset->{HeaderEncoding} = $encoding;
+	$charset->{BodyEncoding} = $encoding;
+    }
     my $check = ($replacement and $replacement =~ /^\d+$/)?
 	$replacement:
     {
@@ -791,7 +792,7 @@
     # fallback charset.
     my $encoded;
     if (is_utf8($s) or $s =~ /[^\x00-\xFF]/ or
-	$charset->{InputCharset} ne $charset->{OutputCharset}) {
+	($charset->{InputCharset} || "") ne ($charset->{OutputCharset} || "")) {
 	if ($check & 0x1) { # CROAK or FALLBACK
 	    eval {
 		$encoded = $s;
@@ -799,7 +800,8 @@
 	    };
 	    if ($@) {
 		if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
-		    my $cset = __PACKAGE__->new($FALLBACK_CHARSET);
+		    my $cset = __PACKAGE__->new($FALLBACK_CHARSET,
+						Mapping => 'STANDARD');
 		    # croak unknown charset
 		    croak "unknown charset ``$FALLBACK_CHARSET''"
 			unless $charset->{Decoder};
@@ -822,6 +824,27 @@
         $encoded = $s;
     }
 
+    if ($encoded !~ /$NONASCIIRE/) { # maybe ASCII
+	# check ``ASCII transformation'' charsets
+	if ($charset->{OutputCharset} =~ /^($ASCIITRANSRE)$/ and
+	    $encoded =~ /[+~]/) {
+	    my $u = $encoded;
+	    if ($charset->encoder) {
+		$u = $charset->encoder->decode($encoded); # dec. by output
+	    } elsif (!USE_ENCODE) { # workaround for pre-Encode environment
+		$u = "x$u";
+	    } else { # NOTREACHED
+		croak __PACKAGE__.": bug in _text_encode.  Report developer.";
+	    }
+	    $charset->encoder(__PACKAGE__->new($DEFAULT_CHARSET,
+					       Mapping => 'STANDARD'))
+		if $u eq $encoded;
+	} elsif ($charset->{OutputCharset} ne "US-ASCII") {
+	    $charset->encoder(__PACKAGE__->new($DEFAULT_CHARSET,
+					       Mapping => 'STANDARD'));
+	}
+    }
+
     return ($encoded, $charset);
 }
 

Modified: branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod (original)
+++ branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod Sat Apr 12 19:12:18 2008
@@ -206,14 +206,14 @@
 
 =over 4
 
-=item Replacement => REPLACEMENT
-
-エラー処理法の指定。L<"エラー処理"> 参ç
§ã€‚
-
 =item Detect7bit => YESNO
 
 CHARSET がないとき、7ビットのキャラクタセットを自動認識しようとする。
 既定は C<"YES">。
+
+=item Replacement => REPLACEMENT
+
+エラー処理法の指定。L<"エラー処理"> 参ç
§ã€‚
 
 =back
 
@@ -283,14 +283,14 @@
 
 =over 4
 
-=item Replacement => REPLACEMENT
-
-エラー処理法の指定。L<"エラー処理"> 参ç
§ã€‚
-
 =item Detect7bit => YESNO
 
 CHARSET がないとき、7ビットのキャラクタセットを自動認識しようとする。
 既定は C<"YES">。
+
+=item Replacement => REPLACEMENT
+
+エラー処理法の指定。L<"エラー処理"> 参ç
§ã€‚
 
 =back
 
@@ -424,7 +424,7 @@
 =item C<"DEFAULT">
 
 不正な文字を置き換え文字で置き換える。
-UCM に基づくエンコーダを持つキャラクタセットでは <subchar> を使う。
+UCM に基づくエンコーダを持つキャラクタセットでは <subchar> を使うことがある。
 
 =item C<"FALLBACK">
 

Modified: branches/upstream/libmime-charset-perl/current/META.yml
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/META.yml?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/META.yml (original)
+++ branches/upstream/libmime-charset-perl/current/META.yml Sat Apr 12 19:12:18 2008
@@ -1,7 +1,7 @@
 # http://module-build.sourceforge.net/META-spec.html
 #XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
 name:         MIME-Charset
-version:      1.005
+version:      1.006
 version_from: Charset.pm
 installdirs:  site
 requires:

Modified: branches/upstream/libmime-charset-perl/current/README
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/README?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/README (original)
+++ branches/upstream/libmime-charset-perl/current/README Sat Apr 12 19:12:18 2008
@@ -149,12 +149,12 @@
         Unicode/multibyte support is disabled (see "USE_ENCODE"), conversion
         will not be performed. So these options do not have any effects.
 
-        Replacement => REPLACEMENT
-            Specifies error handling scheme. See "Error Handling".
-
         Detect7bit => YESNO
             Try auto-detecting 7-bit charset when CHARSET is not given.
             Default is "YES".
+
+        Replacement => REPLACEMENT
+            Specifies error handling scheme. See "Error Handling".
 
         3-item list of (*converted string*, *charset for output*,
         *transfer-encoding*) will be returned. *Transfer-encoding* will be
@@ -197,12 +197,12 @@
         Unicode/multibyte support is disabled (see "USE_ENCODE"), conversion
         will not be performed. So these options do not have any effects.
 
-        Replacement => REPLACEMENT
-            Specifies error handling scheme. See "Error Handling".
-
         Detect7bit => YESNO
             Try auto-detecting 7-bit charset when CHARSET is not given.
             Default is "YES".
+
+        Replacement => REPLACEMENT
+            Specifies error handling scheme. See "Error Handling".
 
         3-item list of (*converted string*, *charset for output*, *encoding
         scheme*) will be returned. *Encoding scheme* will be either "B", "Q"

Modified: branches/upstream/libmime-charset-perl/current/t/01encode.t
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/t/01encode.t?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/t/01encode.t (original)
+++ branches/upstream/libmime-charset-perl/current/t/01encode.t Sat Apr 12 19:12:18 2008
@@ -1,7 +1,7 @@
 use strict;
 use Test;
 
-BEGIN { plan tests => 12 }
+BEGIN { plan tests => 18 }
 
 use MIME::Charset qw(:trans);
 
@@ -14,47 +14,59 @@
 ($converted, $charset, $encoding) = body_encode($src, "euc-jp");
 if (MIME::Charset::USE_ENCODE) {
     ok($converted eq $dst);
-    ok($charset eq "ISO-2022-JP");
-    ok($encoding eq "7BIT");
+    ok($charset, "ISO-2022-JP", $charset);
+    ok($encoding, "7BIT", $encoding);
 } else {
     ok($converted eq $src);
-    ok($charset eq "EUC-JP");
-    ok($encoding eq "8BIT");
+    ok($charset, "EUC-JP", $charset);
+    ok($encoding, "8BIT", $encoding);
 }
 
 # test get encodings for body with auto-detection of 7-bit
 ($converted, $charset, $encoding) = body_encode($dst);
 if (MIME::Charset::USE_ENCODE) {
     ok($converted eq $dst);
-    ok($charset eq "ISO-2022-JP");
-    ok($encoding eq "7BIT");
+    ok($charset, "ISO-2022-JP", $charset);
+    ok($encoding, "7BIT", $encoding);
 } else {
     ok($converted eq $dst);
-    ok($charset eq "US-ASCII");
-    ok($encoding eq "7BIT");
+    ok($charset, "US-ASCII", $charset);
+    ok($encoding, "7BIT", $encoding);
 }
 
 # test get encodings for header
 ($converted, $charset, $encoding) = header_encode($src, "euc-jp");
 if (MIME::Charset::USE_ENCODE) {
     ok($converted eq $dst);
-    ok($charset eq "ISO-2022-JP");
-    ok($encoding eq "B");
+    ok($charset, "ISO-2022-JP", $charset);
+    ok($encoding, "B", $encoding);
 } else {
     ok($converted eq $src);
-    ok($charset eq "EUC-JP");
-    ok($encoding eq "B");
+    ok($charset, "EUC-JP", $charset);
+    ok($encoding, "B", $encoding);
 }
 
 # test get encodings for header with auto-detection of 7-bit
 ($converted, $charset, $encoding) = header_encode($dst);
 if (MIME::Charset::USE_ENCODE) {
     ok($converted eq $dst);
-    ok($charset eq "ISO-2022-JP");
-    ok($encoding eq "B");
+    ok($charset, "ISO-2022-JP", $charset);
+    ok($encoding, "B", $encoding);
 } else {
     ok($converted eq $dst);
-    ok($charset eq "US-ASCII");
-    ok(!defined $encoding);
+    ok($charset, "US-ASCII", $charset);
+    ok($encoding, undef, $encoding);
 }
 
+$src = "~{<:Ky2;S{#,NpJ)l6HK!#~}~";
+($converted, $charset, $encoding) = header_encode($src, "hz-gb-2312");
+ok($converted eq $src);
+ok($charset, "HZ-GB-2312", $charset);
+ok($encoding, "B", $encoding);
+
+$src = "This doesn't contain non-ASCII.";
+($converted, $charset, $encoding) = header_encode($src, "hz-gb-2312");
+ok($converted eq $src);
+ok($charset, "US-ASCII", $charset);
+ok($encoding, undef, $encoding);
+

Modified: branches/upstream/libmime-charset-perl/current/t/01ooencode.t
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/t/01ooencode.t?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/t/01ooencode.t (original)
+++ branches/upstream/libmime-charset-perl/current/t/01ooencode.t Sat Apr 12 19:12:18 2008
@@ -1,7 +1,7 @@
 use strict;
 use Test;
 
-BEGIN { plan tests => 12 }
+BEGIN { plan tests => 18 }
 
 use MIME::Charset qw(:trans);
 
@@ -16,47 +16,61 @@
 ($converted, $charset, $encoding) = $obj->body_encode($src);
 if (MIME::Charset::USE_ENCODE) {
     ok($converted eq $dst);
-    ok($charset eq "ISO-2022-JP");
-    ok($encoding eq "7BIT");
+    ok($charset, "ISO-2022-JP", $charset);
+    ok($encoding, "7BIT", $encoding);
 } else {
     ok($converted eq $src);
-    ok($charset eq "EUC-JP");
-    ok($encoding eq "8BIT");
+    ok($charset, "EUC-JP", $charset);
+    ok($encoding, "8BIT", $encoding);
 }
 
 # test get encodings for body with auto-detection of 7-bit
 ($converted, $charset, $encoding) = $null->body_encode($dst);
 if (MIME::Charset::USE_ENCODE) {
     ok($converted eq $dst);
-    ok($charset eq "ISO-2022-JP");
-    ok($encoding eq "7BIT");
+    ok($charset, "ISO-2022-JP", $charset);
+    ok($encoding, "7BIT", $encoding);
 } else {
     ok($converted eq $dst);
-    ok($charset eq "US-ASCII");
-    ok($encoding eq "7BIT");
+    ok($charset, "US-ASCII", $charset);
+    ok($encoding, "7BIT", $encoding);
 }
 
 # test get encodings for header
 ($converted, $charset, $encoding) = $obj->header_encode($src);
 if (MIME::Charset::USE_ENCODE) {
     ok($converted eq $dst);
-    ok($charset eq "ISO-2022-JP");
-    ok($encoding eq "B");
+    ok($charset, "ISO-2022-JP", $charset);
+    ok($encoding, "B", $encoding);
 } else {
     ok($converted eq $src);
-    ok($charset eq "EUC-JP");
-    ok($encoding eq "B");
+    ok($charset, "EUC-JP", $charset);
+    ok($encoding, "B", $encoding);
 }
 
 # test get encodings for header with auto-detection of 7-bit
 ($converted, $charset, $encoding) = $null->header_encode($dst);
 if (MIME::Charset::USE_ENCODE) {
     ok($converted eq $dst);
-    ok($charset eq "ISO-2022-JP");
-    ok($encoding eq "B");
+    ok($charset, "ISO-2022-JP", $charset);
+    ok($encoding, "B", $encoding);
 } else {
     ok($converted eq $dst);
-    ok($charset eq "US-ASCII");
-    ok(!defined $encoding);
+    ok($charset, "US-ASCII", $charset);
+    ok($encoding, undef, $encoding);
 }
 
+$obj = MIME::Charset->new("hz-gb-2312");
+$src = "~{<:Ky2;S{#,NpJ)l6HK!#~}~";
+
+($converted, $charset, $encoding) = $obj->header_encode($src);
+ok($converted eq $src);
+ok($charset, "HZ-GB-2312", $charset);
+ok($encoding, "B", $encoding);
+
+$src = "This doesn't contain non-ASCII.";
+($converted, $charset, $encoding) = $obj->header_encode($src);
+ok($converted eq $src);
+ok($charset, "US-ASCII", $charset);
+ok($encoding, undef, $encoding);
+

Modified: branches/upstream/libmime-charset-perl/current/t/04alias.t
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/t/04alias.t?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/t/04alias.t (original)
+++ branches/upstream/libmime-charset-perl/current/t/04alias.t Sat Apr 12 19:12:18 2008
@@ -1,7 +1,7 @@
 use strict;
 use Test;
 
-BEGIN { plan tests => 25 }
+BEGIN { plan tests => 26 }
 
 my @names = qw(
 	    US-ASCII
@@ -11,6 +11,7 @@
 	    ISO-8859-6-I ISO-8859-6-E ISO-8859-8-E ISO-8859-8-I
 	    GB2312 BIG5 KOI8-R
 	    UTF-8
+	    HZ-GB-2312
 	   );
 
 use MIME::Charset qw(:info);




More information about the Pkg-perl-cvs-commits mailing list