r18513 - in /branches/upstream/libmime-charset-perl/current: Changes Charset.pm Charset/JA_JP.pod META.yml README t/01encode.t t/01ooencode.t t/04alias.t
gregoa-guest at users.alioth.debian.org
gregoa-guest at users.alioth.debian.org
Sat Apr 12 19:12:19 UTC 2008
Author: gregoa-guest
Date: Sat Apr 12 19:12:18 2008
New Revision: 18513
URL: http://svn.debian.org/wsvn/?sc=1&rev=18513
Log:
[svn-upgrade] Integrating new upstream version, libmime-charset-perl (1.006)
Modified:
branches/upstream/libmime-charset-perl/current/Changes
branches/upstream/libmime-charset-perl/current/Charset.pm
branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod
branches/upstream/libmime-charset-perl/current/META.yml
branches/upstream/libmime-charset-perl/current/README
branches/upstream/libmime-charset-perl/current/t/01encode.t
branches/upstream/libmime-charset-perl/current/t/01ooencode.t
branches/upstream/libmime-charset-perl/current/t/04alias.t
Modified: branches/upstream/libmime-charset-perl/current/Changes
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Changes?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/Changes (original)
+++ branches/upstream/libmime-charset-perl/current/Changes Sat Apr 12 19:12:18 2008
@@ -1,3 +1,9 @@
+2008-04-12 Hatuka*nezumi - IKEDA Soji <hatuka at nezumi.nu>
+ * Release 1.006.
+ * Workarounds for ``US-ASCII transformation'' charsets
+ i.e. HZ-GB-2312 (RFC1842) and UTF-7 (RFC 2152).
+ * Added tests.
+
2008-04-07 Hatuka*nezumi - IKEDA Soji <hatuka at nezumi.nu>
* Release 1.005.
* _enclen_Q(): Restrict characters in encoded-word
Modified: branches/upstream/libmime-charset-perl/current/Charset.pm
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Charset.pm?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/Charset.pm (original)
+++ branches/upstream/libmime-charset-perl/current/Charset.pm Sat Apr 12 19:12:18 2008
@@ -121,7 +121,7 @@
}
}
-$VERSION = '1.005';
+$VERSION = '1.006';
######## Private Attributes ########
@@ -155,6 +155,8 @@
'ISO-2022-JP' => ['B', undef, undef],
'KOI8-R' => ['B', 'B', undef],
'UTF-8' => ['S', 'B', undef],
+ 'HZ-GB-2312' => ['B', undef, undef],
+ 'UTF-7' => ['Q', undef, undef],
'GSM03.38' => [undef, undef, undef], # not for MIME
# We're making this one up to represent raw unencoded 8bit
'8BIT' => [undef, 'B', 'ISO-8859-1'],
@@ -176,6 +178,7 @@
"UNICODE-1-1-UTF-7" => "UTF-7", # RFC 1642 (obs.)
"UTF8" => "UTF-8",
"UTF-8-STRICT" => "UTF-8", # Perl internal use
+ "HZ" => "HZ-GB-2312", # RFC 1842
"GSM0338" => "GSM03.38",
);
@@ -219,6 +222,7 @@
'ISO-2022-JP-3' => [['iso-2022-jp-3', 'Encode::JIS2K'], ],
'SHIFT_JISX0213'=> [['shiftjisx0213', 'Encode::JIS2K'], ],
'EUC-TW' => [['euc-tw', 'Encode::HanExtra'], ],
+ 'HZ-GB-2312' => [['hz'], ], # Encode::CN
'GSM03.38' => [['gsm0338'], ], # Encode::GSM0338
},
);
@@ -284,7 +288,11 @@
}x;
my $ISO2022RE = qr{
- ^ISO-2022-
+ ISO-2022-.+
+}ix;
+
+my $ASCIITRANSRE = qr{
+ HZ-GB-2312 | UTF-7
}ix;
@@ -321,6 +329,7 @@
my %params = @_;
my $mapping = uc($params{'Mapping'} || $Config->{Mapping});
+ $charset = "HZ" if $charset =~ /\bhz.?gb.?2312$/i; # workaround
$charset = resolve_alias($charset) || $charset;
$charset = $CHARSET_ALIASES{uc($charset)} || uc($charset);
my ($henc, $benc, $outcset);
@@ -456,8 +465,8 @@
$charset = __PACKAGE__->new($charset) unless ref $charset;
$self->{OutputCharset} = $charset->{InputCharset};
$self->{Encoder} = $charset->{Decoder};
- #XXX$self->{BodyEncoding} = $charset->{BodyEncoding};
- #XXX$self->{HeaderEncoding} = $charset->{HeaderEncoding};
+ $self->{BodyEncoding} = $charset->{BodyEncoding};
+ $self->{HeaderEncoding} = $charset->{HeaderEncoding};
}
$self->{Encoder};
}
@@ -518,14 +527,14 @@
=over 4
-=item Replacement => REPLACEMENT
-
-Specifies error handling scheme. See L<"Error Handling">.
-
=item Detect7bit => YESNO
Try auto-detecting 7-bit charset when CHARSET is not given.
Default is C<"YES">.
+
+=item Replacement => REPLACEMENT
+
+Specifies error handling scheme. See L<"Error Handling">.
=back
@@ -549,24 +558,18 @@
$text = $self;
$self = __PACKAGE__->new(shift);
}
- my ($encoded, $charset) = &_text_encode($self, $text, @_);
+ my ($encoded, $charset) = $self->_text_encode($text, @_);
return ($encoded, undef, 'BASE64')
unless $charset and $charset->{InputCharset};
my $cset = $charset->{OutputCharset};
# Determine transfer-encoding.
- my $enc;
- if ($encoded !~ /$NONASCIIRE/) {
- $cset = "US-ASCII";
- $enc = undef;
- } else {
- $enc = $charset->{BodyEncoding};
- }
+ my $enc = $charset->{BodyEncoding};
if (!$enc and $encoded !~ /\x00/) { # Eliminate hostile NUL character.
if ($encoded =~ $NON7BITRE) { # String contains 8bit char(s).
$enc = '8BIT';
- } elsif ($cset =~ $ISO2022RE) { # ISO-2022-* outputs are 7BIT.
+ } elsif ($cset =~ /^($ISO2022RE|$ASCIITRANSRE)$/) { # 7BIT.
$enc = '7BIT';
} else { # Pure ASCII.
$enc = '7BIT';
@@ -688,14 +691,14 @@
=over 4
-=item Replacement => REPLACEMENT
-
-Specifies error handling scheme. See L<"Error Handling">.
-
=item Detect7bit => YESNO
Try auto-detecting 7-bit charset when CHARSET is not given.
Default is C<"YES">.
+
+=item Replacement => REPLACEMENT
+
+Specifies error handling scheme. See L<"Error Handling">.
=back
@@ -722,22 +725,16 @@
$text = $self;
$self = __PACKAGE__->new(shift);
}
- my ($encoded, $charset) = &_text_encode($self, $text, @_);
+ my ($encoded, $charset) = $self->_text_encode($text, @_);
return ($encoded, '8BIT', undef)
unless $charset and $charset->{InputCharset};
my $cset = $charset->{OutputCharset};
# Determine encoding scheme.
- my $enc;
- if ($encoded !~ /$NONASCIIRE/) {
- $cset = "US-ASCII";
- $enc = undef;
- } else {
- $enc = $charset->{HeaderEncoding};
- }
+ my $enc = $charset->{HeaderEncoding};
if (!$enc and $encoded !~ $NON7BITRE) {
- unless ($cset =~ $ISO2022RE) { # ISO-2022-* outputs are 7BIT.
+ unless ($cset =~ /^($ISO2022RE|$ASCIITRANSRE)$/) { # 7BIT.
$cset = 'US-ASCII';
}
} elsif ($enc eq 'S') {
@@ -758,24 +755,28 @@
my %params = @_;
my $replacement = uc($params{'Replacement'} || $Config->{Replacement});
my $detect7bit = uc($params{'Detect7bit'} || $Config->{Detect7bit});
-
- unless ($charset and $charset->{InputCharset}) {
+ my $encoding = $params{'Encoding'} ||
+ (exists $params{'Encoding'}? undef: 'A'); # undocumented
+
+ if (!$encoding or $encoding ne 'A') { # no 7-bit auto-detection
+ $detect7bit = 'NO';
+ }
+ unless ($charset->{InputCharset}) {
if ($s =~ $NON7BITRE) {
return ($s, undef);
} elsif ($detect7bit ne "NO") {
$charset = __PACKAGE__->new(&_detect_7bit_charset($s));
} else {
- $charset = __PACKAGE__->new($DEFAULT_CHARSET);
+ $charset = __PACKAGE__->new($DEFAULT_CHARSET,
+ Mapping => 'STANDARD');
}
}
-
- # Unknown charset.
- unless ($charset->{Decoder}) {
- croak "unknown charset ``$charset->{InputCharset}''"
- if is_utf8($s) or $s =~ /[^\x00-\xFF]/;
- return ($s, $charset);
- }
-
+ if (!$encoding or $encoding ne 'A') { # no conversion
+ $charset = $charset->dup;
+ $charset->encoder($charset);
+ $charset->{HeaderEncoding} = $encoding;
+ $charset->{BodyEncoding} = $encoding;
+ }
my $check = ($replacement and $replacement =~ /^\d+$/)?
$replacement:
{
@@ -791,7 +792,7 @@
# fallback charset.
my $encoded;
if (is_utf8($s) or $s =~ /[^\x00-\xFF]/ or
- $charset->{InputCharset} ne $charset->{OutputCharset}) {
+ ($charset->{InputCharset} || "") ne ($charset->{OutputCharset} || "")) {
if ($check & 0x1) { # CROAK or FALLBACK
eval {
$encoded = $s;
@@ -799,7 +800,8 @@
};
if ($@) {
if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
- my $cset = __PACKAGE__->new($FALLBACK_CHARSET);
+ my $cset = __PACKAGE__->new($FALLBACK_CHARSET,
+ Mapping => 'STANDARD');
# croak unknown charset
croak "unknown charset ``$FALLBACK_CHARSET''"
unless $charset->{Decoder};
@@ -822,6 +824,27 @@
$encoded = $s;
}
+ if ($encoded !~ /$NONASCIIRE/) { # maybe ASCII
+ # check ``ASCII transformation'' charsets
+ if ($charset->{OutputCharset} =~ /^($ASCIITRANSRE)$/ and
+ $encoded =~ /[+~]/) {
+ my $u = $encoded;
+ if ($charset->encoder) {
+ $u = $charset->encoder->decode($encoded); # dec. by output
+ } elsif (!USE_ENCODE) { # workaround for pre-Encode environment
+ $u = "x$u";
+ } else { # NOTREACHED
+ croak __PACKAGE__.": bug in _text_encode. Report developer.";
+ }
+ $charset->encoder(__PACKAGE__->new($DEFAULT_CHARSET,
+ Mapping => 'STANDARD'))
+ if $u eq $encoded;
+ } elsif ($charset->{OutputCharset} ne "US-ASCII") {
+ $charset->encoder(__PACKAGE__->new($DEFAULT_CHARSET,
+ Mapping => 'STANDARD'));
+ }
+ }
+
return ($encoded, $charset);
}
Modified: branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod (original)
+++ branches/upstream/libmime-charset-perl/current/Charset/JA_JP.pod Sat Apr 12 19:12:18 2008
@@ -206,14 +206,14 @@
=over 4
-=item Replacement => REPLACEMENT
-
-ã¨ã©ã¼å¦çæ³ã®æå®ãL<"ã¨ã©ã¼å¦ç"> åç
§ã
-
=item Detect7bit => YESNO
CHARSET ããªãã¨ãã7ãããã®ãã£ã©ã¯ã¿ã»ãããèªåèªèãããã¨ããã
æ¢å®ã¯ C<"YES">ã
+
+=item Replacement => REPLACEMENT
+
+ã¨ã©ã¼å¦çæ³ã®æå®ãL<"ã¨ã©ã¼å¦ç"> åç
§ã
=back
@@ -283,14 +283,14 @@
=over 4
-=item Replacement => REPLACEMENT
-
-ã¨ã©ã¼å¦çæ³ã®æå®ãL<"ã¨ã©ã¼å¦ç"> åç
§ã
-
=item Detect7bit => YESNO
CHARSET ããªãã¨ãã7ãããã®ãã£ã©ã¯ã¿ã»ãããèªåèªèãããã¨ããã
æ¢å®ã¯ C<"YES">ã
+
+=item Replacement => REPLACEMENT
+
+ã¨ã©ã¼å¦çæ³ã®æå®ãL<"ã¨ã©ã¼å¦ç"> åç
§ã
=back
@@ -424,7 +424,7 @@
=item C<"DEFAULT">
ä¸æ£ãªæåãç½®ãæãæåã§ç½®ãæããã
-UCM ã«åºã¥ãã¨ã³ã³ã¼ããæã¤ãã£ã©ã¯ã¿ã»ããã§ã¯ <subchar> ã使ãã
+UCM ã«åºã¥ãã¨ã³ã³ã¼ããæã¤ãã£ã©ã¯ã¿ã»ããã§ã¯ <subchar> ã使ããã¨ãããã
=item C<"FALLBACK">
Modified: branches/upstream/libmime-charset-perl/current/META.yml
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/META.yml?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/META.yml (original)
+++ branches/upstream/libmime-charset-perl/current/META.yml Sat Apr 12 19:12:18 2008
@@ -1,7 +1,7 @@
# http://module-build.sourceforge.net/META-spec.html
#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
name: MIME-Charset
-version: 1.005
+version: 1.006
version_from: Charset.pm
installdirs: site
requires:
Modified: branches/upstream/libmime-charset-perl/current/README
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/README?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/README (original)
+++ branches/upstream/libmime-charset-perl/current/README Sat Apr 12 19:12:18 2008
@@ -149,12 +149,12 @@
Unicode/multibyte support is disabled (see "USE_ENCODE"), conversion
will not be performed. So these options do not have any effects.
- Replacement => REPLACEMENT
- Specifies error handling scheme. See "Error Handling".
-
Detect7bit => YESNO
Try auto-detecting 7-bit charset when CHARSET is not given.
Default is "YES".
+
+ Replacement => REPLACEMENT
+ Specifies error handling scheme. See "Error Handling".
3-item list of (*converted string*, *charset for output*,
*transfer-encoding*) will be returned. *Transfer-encoding* will be
@@ -197,12 +197,12 @@
Unicode/multibyte support is disabled (see "USE_ENCODE"), conversion
will not be performed. So these options do not have any effects.
- Replacement => REPLACEMENT
- Specifies error handling scheme. See "Error Handling".
-
Detect7bit => YESNO
Try auto-detecting 7-bit charset when CHARSET is not given.
Default is "YES".
+
+ Replacement => REPLACEMENT
+ Specifies error handling scheme. See "Error Handling".
3-item list of (*converted string*, *charset for output*, *encoding
scheme*) will be returned. *Encoding scheme* will be either "B", "Q"
Modified: branches/upstream/libmime-charset-perl/current/t/01encode.t
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/t/01encode.t?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/t/01encode.t (original)
+++ branches/upstream/libmime-charset-perl/current/t/01encode.t Sat Apr 12 19:12:18 2008
@@ -1,7 +1,7 @@
use strict;
use Test;
-BEGIN { plan tests => 12 }
+BEGIN { plan tests => 18 }
use MIME::Charset qw(:trans);
@@ -14,47 +14,59 @@
($converted, $charset, $encoding) = body_encode($src, "euc-jp");
if (MIME::Charset::USE_ENCODE) {
ok($converted eq $dst);
- ok($charset eq "ISO-2022-JP");
- ok($encoding eq "7BIT");
+ ok($charset, "ISO-2022-JP", $charset);
+ ok($encoding, "7BIT", $encoding);
} else {
ok($converted eq $src);
- ok($charset eq "EUC-JP");
- ok($encoding eq "8BIT");
+ ok($charset, "EUC-JP", $charset);
+ ok($encoding, "8BIT", $encoding);
}
# test get encodings for body with auto-detection of 7-bit
($converted, $charset, $encoding) = body_encode($dst);
if (MIME::Charset::USE_ENCODE) {
ok($converted eq $dst);
- ok($charset eq "ISO-2022-JP");
- ok($encoding eq "7BIT");
+ ok($charset, "ISO-2022-JP", $charset);
+ ok($encoding, "7BIT", $encoding);
} else {
ok($converted eq $dst);
- ok($charset eq "US-ASCII");
- ok($encoding eq "7BIT");
+ ok($charset, "US-ASCII", $charset);
+ ok($encoding, "7BIT", $encoding);
}
# test get encodings for header
($converted, $charset, $encoding) = header_encode($src, "euc-jp");
if (MIME::Charset::USE_ENCODE) {
ok($converted eq $dst);
- ok($charset eq "ISO-2022-JP");
- ok($encoding eq "B");
+ ok($charset, "ISO-2022-JP", $charset);
+ ok($encoding, "B", $encoding);
} else {
ok($converted eq $src);
- ok($charset eq "EUC-JP");
- ok($encoding eq "B");
+ ok($charset, "EUC-JP", $charset);
+ ok($encoding, "B", $encoding);
}
# test get encodings for header with auto-detection of 7-bit
($converted, $charset, $encoding) = header_encode($dst);
if (MIME::Charset::USE_ENCODE) {
ok($converted eq $dst);
- ok($charset eq "ISO-2022-JP");
- ok($encoding eq "B");
+ ok($charset, "ISO-2022-JP", $charset);
+ ok($encoding, "B", $encoding);
} else {
ok($converted eq $dst);
- ok($charset eq "US-ASCII");
- ok(!defined $encoding);
+ ok($charset, "US-ASCII", $charset);
+ ok($encoding, undef, $encoding);
}
+$src = "~{<:Ky2;S{#,NpJ)l6HK!#~}~";
+($converted, $charset, $encoding) = header_encode($src, "hz-gb-2312");
+ok($converted eq $src);
+ok($charset, "HZ-GB-2312", $charset);
+ok($encoding, "B", $encoding);
+
+$src = "This doesn't contain non-ASCII.";
+($converted, $charset, $encoding) = header_encode($src, "hz-gb-2312");
+ok($converted eq $src);
+ok($charset, "US-ASCII", $charset);
+ok($encoding, undef, $encoding);
+
Modified: branches/upstream/libmime-charset-perl/current/t/01ooencode.t
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/t/01ooencode.t?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/t/01ooencode.t (original)
+++ branches/upstream/libmime-charset-perl/current/t/01ooencode.t Sat Apr 12 19:12:18 2008
@@ -1,7 +1,7 @@
use strict;
use Test;
-BEGIN { plan tests => 12 }
+BEGIN { plan tests => 18 }
use MIME::Charset qw(:trans);
@@ -16,47 +16,61 @@
($converted, $charset, $encoding) = $obj->body_encode($src);
if (MIME::Charset::USE_ENCODE) {
ok($converted eq $dst);
- ok($charset eq "ISO-2022-JP");
- ok($encoding eq "7BIT");
+ ok($charset, "ISO-2022-JP", $charset);
+ ok($encoding, "7BIT", $encoding);
} else {
ok($converted eq $src);
- ok($charset eq "EUC-JP");
- ok($encoding eq "8BIT");
+ ok($charset, "EUC-JP", $charset);
+ ok($encoding, "8BIT", $encoding);
}
# test get encodings for body with auto-detection of 7-bit
($converted, $charset, $encoding) = $null->body_encode($dst);
if (MIME::Charset::USE_ENCODE) {
ok($converted eq $dst);
- ok($charset eq "ISO-2022-JP");
- ok($encoding eq "7BIT");
+ ok($charset, "ISO-2022-JP", $charset);
+ ok($encoding, "7BIT", $encoding);
} else {
ok($converted eq $dst);
- ok($charset eq "US-ASCII");
- ok($encoding eq "7BIT");
+ ok($charset, "US-ASCII", $charset);
+ ok($encoding, "7BIT", $encoding);
}
# test get encodings for header
($converted, $charset, $encoding) = $obj->header_encode($src);
if (MIME::Charset::USE_ENCODE) {
ok($converted eq $dst);
- ok($charset eq "ISO-2022-JP");
- ok($encoding eq "B");
+ ok($charset, "ISO-2022-JP", $charset);
+ ok($encoding, "B", $encoding);
} else {
ok($converted eq $src);
- ok($charset eq "EUC-JP");
- ok($encoding eq "B");
+ ok($charset, "EUC-JP", $charset);
+ ok($encoding, "B", $encoding);
}
# test get encodings for header with auto-detection of 7-bit
($converted, $charset, $encoding) = $null->header_encode($dst);
if (MIME::Charset::USE_ENCODE) {
ok($converted eq $dst);
- ok($charset eq "ISO-2022-JP");
- ok($encoding eq "B");
+ ok($charset, "ISO-2022-JP", $charset);
+ ok($encoding, "B", $encoding);
} else {
ok($converted eq $dst);
- ok($charset eq "US-ASCII");
- ok(!defined $encoding);
+ ok($charset, "US-ASCII", $charset);
+ ok($encoding, undef, $encoding);
}
+$obj = MIME::Charset->new("hz-gb-2312");
+$src = "~{<:Ky2;S{#,NpJ)l6HK!#~}~";
+
+($converted, $charset, $encoding) = $obj->header_encode($src);
+ok($converted eq $src);
+ok($charset, "HZ-GB-2312", $charset);
+ok($encoding, "B", $encoding);
+
+$src = "This doesn't contain non-ASCII.";
+($converted, $charset, $encoding) = $obj->header_encode($src);
+ok($converted eq $src);
+ok($charset, "US-ASCII", $charset);
+ok($encoding, undef, $encoding);
+
Modified: branches/upstream/libmime-charset-perl/current/t/04alias.t
URL: http://svn.debian.org/wsvn/branches/upstream/libmime-charset-perl/current/t/04alias.t?rev=18513&op=diff
==============================================================================
--- branches/upstream/libmime-charset-perl/current/t/04alias.t (original)
+++ branches/upstream/libmime-charset-perl/current/t/04alias.t Sat Apr 12 19:12:18 2008
@@ -1,7 +1,7 @@
use strict;
use Test;
-BEGIN { plan tests => 25 }
+BEGIN { plan tests => 26 }
my @names = qw(
US-ASCII
@@ -11,6 +11,7 @@
ISO-8859-6-I ISO-8859-6-E ISO-8859-8-E ISO-8859-8-I
GB2312 BIG5 KOI8-R
UTF-8
+ HZ-GB-2312
);
use MIME::Charset qw(:info);
More information about the Pkg-perl-cvs-commits
mailing list