[libencode-perl] 01/05: Imported Upstream version 2.72

dom at earth.li dom at earth.li
Sun Mar 29 17:46:06 UTC 2015


This is an automated email from the git hooks/post-receive script.

dom pushed a commit to branch experimental
in repository libencode-perl.

commit 47e67306957211b9f3bb4d5d4e0d7606e3265302
Author: Dominic Hargreaves <dom at earth.li>
Date:   Sun Mar 29 18:18:47 2015 +0100

    Imported Upstream version 2.72
---
 Changes                   |  79 +++++++-
 Encode.pm                 |  12 +-
 Encode.xs                 |  19 +-
 MANIFEST                  |   1 +
 META.json                 |   4 +-
 META.yml                  |   4 +-
 Makefile.PL               |   3 +-
 bin/enc2xs                |  40 ++--
 bin/encguess              | 145 ++++++++++++++
 encoding.pm               | 468 ++++++++++++++++++++++++----------------------
 lib/Encode/Alias.pm       |   6 +-
 lib/Encode/MIME/Header.pm |   4 +-
 t/Aliases.t               |   2 +
 t/taint.t                 |  27 ++-
 14 files changed, 545 insertions(+), 269 deletions(-)

diff --git a/Changes b/Changes
index 3567397..1622af0 100644
--- a/Changes
+++ b/Changes
@@ -1,15 +1,88 @@
 # Revision history for Perl extension Encode.
 #
-# $Id: Changes,v 2.64 2014/10/29 15:37:54 dankogai Exp dankogai $
+# $Id: Changes,v 2.72 2015/03/14 02:44:39 dankogai Exp dankogai $
 #
-$Revision: 2.64 $ $Date: 2014/10/29 15:37:54 $
+$Revision: 2.72 $ $Date: 2015/03/14 02:44:39 $
+! encoding.pm
+  Copied from bleadperl to be in sync with it again.
+  http://www.nntp.perl.org/group/perl.perl5.porters/2015/03/msg226576.html
+
+2.71 2015/03/12 00:03:52
+! encoding.pm
+  Pulled: Don't fail 'no encoding' on EBCDIC
+  https://github.com/dankogai/p5-encode/pull/38
+! lib/Encode/Alias.pm t/Aliases.t
+  Add cp65000 => UTF-7 and cp65001 => utf-8-strict
+  https://github.com/dankogai/p5-encode/issues/37
+! encoding.pm
+  Sync w/ bleadperl
+  https://github.com/dankogai/p5-encode/pull/36
+! bin/encguess
+  Pulled: show encguess example per #33
+  https://github.com/dankogai/p5-encode/pull/34
+
+2.70 2015/02/05 10:53:00
+! Makefile.PL
+  add bin/encguess to EXE_FILES
+
+2.69 2015/02/05 10:35:11
+! bin/encguess
+  Refactored so that
+  * does not depend on non-core module (File::Slurp in particular)
+  * PODified document
+  * -s "encA encB" to -s encA,encB which is more shell-friendly
+  * and more
+! MANIFEST
++ bin/encguess
+  Pulled: Added CLI wrapper for Encode::Guess
+  https://github.com/dankogai/p5-encode/pull/32 
+! Unicode/Unicode.pm
+  Pulled: Bump $VERSION in module changed since Encode-2.60
+  https://github.com/dankogai/p5-encode/pull/31
+
+2.68 2015/01/22 10:17:32
+! Pulled: Fix C++ build on Windows with VC++
+  https://github.com/dankogai/p5-encode/pull/30
+  https://rt.cpan.org/Public/Bug/Display.html?id=82897
+! lib/Encode/MIME/Header.pm t/taint.t
+  Pulled: maintain taint flag when encoding MIME on old perl
+  https://github.com/dankogai/p5-encode/pull/29
+! Encode.pm
+  POD fixes
+  https://github.com/dankogai/p5-encode/pull/27
+! bin/enc2xs
+  Addressed: RT#101345: [PATCH] reduce compiler warnings and stderr noise
+  enc2xs no longer emits verbose messages to STDERR 
+  unless -v switch or $ENV{ENC2XS_VERBOSE} is set.
+  https://rt.cpan.org/Public/Bug/Display.html?id=101345
+
+2.67 2014/12/04 20:13:00
+! t/taint.t
+  Now skips nonexistent methods like Encode::Detect->encode() should
+  that be installed.  This resolves RT#100105.
+  https://rt.cpan.org/Ticket/Display.html?id=100105
+
+2.66 2014/12/02 23:30:34 $
+! bin/enc2xs
+  Resolved RT#100656: enc2xs -C fails if URL::Encode::XS is installed
+  https://rt.cpan.org/Ticket/Display.html?id=100656
+
+2.65 2014/11/27 14:08:33
+! Changes Encode.xs bin/enc2xs
+  Applied 3 patches from jhi:
+    0001-For-C-don-t-use-the-array-size-in-forward-declaratiotion
+    0002-Unused-variables
+    0003-1-needs-casting-to-STRLEN
+  Message-Id: <54753674.6070909 at iki.fi>
+
+2.64 2014/10/29 15:37:54
 ! t/utf8warnings.t MANIFEST
   Retouch pull #26 so it works with perl < 5.14
 ! Encode.pm
 + t/utf8warnings.t
   Pulled: Catch and re-issue utf8 warnings at a higher level
   https://github.com/dankogai/p5-encode/pull/26
-+ Encode.xs
+! Encode.xs
   Pulled: Validate continuations in the incremental UTF-X decoder
   https://github.com/dankogai/p5-encode/pull/25
 
diff --git a/Encode.pm b/Encode.pm
index 820d6f7..3bb1097 100644
--- a/Encode.pm
+++ b/Encode.pm
@@ -1,10 +1,10 @@
 #
-# $Id: Encode.pm,v 2.64 2014/10/29 15:37:54 dankogai Exp dankogai $
+# $Id: Encode.pm,v 2.72 2015/03/14 02:43:24 dankogai Exp $
 #
 package Encode;
 use strict;
 use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.64 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.72 $ =~ /(\d+)/g;
 use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
 use XSLoader ();
 XSLoader::load( __PACKAGE__, $VERSION );
@@ -483,7 +483,7 @@ If the $string is C<undef>, then C<undef> is returned.
 
 This function returns the string that results from decoding the scalar
 value I<OCTETS>, assumed to be a sequence of octets in I<ENCODING>, into
-Perl's internal form.  The returns the resulting string.  As with encode(),
+Perl's internal form.  As with encode(),
 I<ENCODING> can be either a canonical name or an alias. For encoding names
 and aliases, see L</"Defining Aliases">; for I<CHECK>, see L</"Handling
 Malformed Data">.
@@ -573,7 +573,7 @@ Also note that:
 
   from_to($octets, $from, $to, $check);
 
-is equivalent t:o
+is equivalent to:
 
   $octets = encode($to, decode($from, $octets), $check);
 
@@ -700,7 +700,7 @@ In the first version above, you let the appropriate encoding layer
 handle the conversion.  In the second, you explicitly translate
 from one encoding to the other.
 
-Unfortunately, it may be that encodings are C<PerlIO>-savvy.  You can check
+Unfortunately, it may be that encodings are not C<PerlIO>-savvy.  You can check
 to see whether your encoding is supported by C<PerlIO> by invoking the
 C<perlio_ok> method on it:
 
@@ -836,7 +836,7 @@ Acts like C<FB_PERLQQ> but U+I<XXXX> is used instead of C<\x{I<XXXX>}>.
 
 Even the fallback for C<decode> must return octets, which are
 then decoded with the character encoding that C<decode> accepts. So for
-example if you wish to decode octests as UTF-8, and use ISO-8859-15 as
+example if you wish to decode octets as UTF-8, and use ISO-8859-15 as
 a fallback for bytes that are not valid UTF-8, you could write
 
     $str = decode 'UTF-8', $octets, sub {
diff --git a/Encode.xs b/Encode.xs
index 32be9b8..73f64a8 100644
--- a/Encode.xs
+++ b/Encode.xs
@@ -1,5 +1,5 @@
 /*
- $Id: Encode.xs,v 2.31 2014/10/29 15:37:54 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.33 2015/01/22 10:17:32 dankogai Exp $
  */
 
 #define PERL_NO_GET_CONTEXT
@@ -7,6 +7,7 @@
 #include "perl.h"
 #include "XSUB.h"
 #include "encode.h"
+#include "def_t.h"
 
 # define PERLIO_MODNAME  "PerlIO::encoding"
 # define PERLIO_FILENAME "PerlIO/encoding.pm"
@@ -21,6 +22,8 @@
 
 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {		\
 			Perl_croak_nocontext("panic_unimplemented");	\
+                        PERL_UNUSED_VAR(sv); \
+                        PERL_UNUSED_VAR(encoding); \
              return (y)0; /* fool picky compilers */ \
                          }
 /**/
@@ -67,6 +70,10 @@ void
 call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
 {
     /* Exists for breakpointing */
+    PERL_UNUSED_VAR(routine);
+    PERL_UNUSED_VAR(done);
+    PERL_UNUSED_VAR(dest);
+    PERL_UNUSED_VAR(orig);
 }
 
 
@@ -363,11 +370,11 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
         if (strict && uv > PERL_UNICODE_MAX)
         ulen = (STRLEN) -1;
 #endif
-            if (ulen == -1) {
+            if (ulen == (STRLEN) -1) {
                 if (strict) {
                     uv = utf8n_to_uvuni(s, e - s, &ulen,
                                         UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT);
-                    if (ulen == -1)
+                    if (ulen == (STRLEN) -1)
                         goto malformed_byte;
                     goto malformed;
                 }
@@ -507,7 +514,6 @@ PREINIT:
     U8 *s;
     U8 *e;
     SV *dst;
-    bool renewed = 0;
     int check;
 CODE:
 {
@@ -568,6 +574,7 @@ Method_renew(obj)
 SV *	obj
 CODE:
 {
+    PERL_UNUSED_VAR(obj);
     XSRETURN(1);
 }
 
@@ -576,6 +583,7 @@ Method_renewed(obj)
 SV *    obj
 CODE:
     RETVAL = 0;
+    PERL_UNUSED_VAR(obj);
 OUTPUT:
     RETVAL
 
@@ -677,6 +685,7 @@ SV *	obj
 CODE:
 {
     /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
+    PERL_UNUSED_VAR(obj);
     ST(0) = &PL_sv_no;
     XSRETURN(1);
 }
@@ -689,6 +698,7 @@ CODE:
     /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
     /* require_pv(PERLIO_FILENAME); */
 
+    PERL_UNUSED_VAR(obj);
     eval_pv("require PerlIO::encoding", 0);
     SPAGAIN;
 
@@ -993,6 +1003,5 @@ OUTPUT:
 
 BOOT:
 {
-#include "def_t.h"
 #include "def_t.exh"
 }
diff --git a/MANIFEST b/MANIFEST
index ffa08d6..ce0c9c9 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -31,6 +31,7 @@ Unicode/Makefile.PL	Encode extension
 Unicode/Unicode.pm	Encode extension
 Unicode/Unicode.xs	Encode extension
 bin/enc2xs	Encode module generator
+bin/encguess	Guess the encoding of file(s)
 bin/piconv	iconv by perl
 bin/ucm2table	Table Generator for testing
 bin/ucmlint	A UCM Lint utility
diff --git a/META.json b/META.json
index 7915165..73e1379 100644
--- a/META.json
+++ b/META.json
@@ -4,7 +4,7 @@
       "unknown"
    ],
    "dynamic_config" : 1,
-   "generated_by" : "ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.142690",
+   "generated_by" : "ExtUtils::MakeMaker version 7.02, CPAN::Meta::Converter version 2.143240",
    "license" : [
       "perl_5"
    ],
@@ -43,5 +43,5 @@
          "url" : "https://github.com/dankogai/p5-encode"
       }
    },
-   "version" : "2.64"
+   "version" : "2.72"
 }
diff --git a/META.yml b/META.yml
index 9a26208..5649a84 100644
--- a/META.yml
+++ b/META.yml
@@ -7,7 +7,7 @@ build_requires:
 configure_requires:
   ExtUtils::MakeMaker: '0'
 dynamic_config: 1
-generated_by: 'ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.142690'
+generated_by: 'ExtUtils::MakeMaker version 7.02, CPAN::Meta::Converter version 2.143240'
 license: perl
 meta-spec:
   url: http://module-build.sourceforge.net/META-spec-v1.4.html
@@ -22,4 +22,4 @@ requires:
   parent: '0.221'
 resources:
   repository: https://github.com/dankogai/p5-encode
-version: '2.64'
+version: '2.72'
diff --git a/Makefile.PL b/Makefile.PL
index 18d5915..7da9329 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,5 +1,5 @@
 #
-# $Id: Makefile.PL,v 2.12 2013/09/14 07:51:59 dankogai Exp $
+# $Id: Makefile.PL,v 2.13 2015/02/05 10:53:00 dankogai Exp $
 #
 use 5.007003;
 use strict;
@@ -23,6 +23,7 @@ my %tables =
 
 my @exe_files = qw(bin/enc2xs
            bin/piconv
+           bin/encguess
            );
 my @more_exe_files = qw(
             unidump
diff --git a/bin/enc2xs b/bin/enc2xs
index 5e9f04a..19f2b2b 100755
--- a/bin/enc2xs
+++ b/bin/enc2xs
@@ -10,7 +10,7 @@ use warnings;
 use Getopt::Std;
 use Config;
 my @orig_ARGV = @ARGV;
-our $VERSION  = do { my @r = (q$Revision: 2.14 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION  = do { my @r = (q$Revision: 2.17 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 
 # These may get re-ordered.
 # RAW is a do_now as inserted by &enter
@@ -133,10 +133,18 @@ my %opt;
 # -o <output> to specify the output file name (else it's the first arg)
 # -f <inlist> to give a file with a list of input files (else use the args)
 # -n <name> to name the encoding (else use the basename of the input file.
-getopts('CM:SQqOo:f:n:',\%opt);
+getopts('CM:SQqOo:f:n:v',\%opt);
 
 $opt{M} and make_makefile_pl($opt{M}, @ARGV);
 $opt{C} and make_configlocal_pm($opt{C}, @ARGV);
+$opt{v} ||= $ENV{ENC2XS_VERBOSE};
+
+sub verbose {
+    print STDERR @_ if $opt{v};
+}
+sub verbosef {
+    printf STDERR @_ if $opt{v};
+}
 
 # This really should go first, else the die here causes empty (non-erroneous)
 # output files to be written.
@@ -252,7 +260,7 @@ foreach my $enc (sort cmp_name @encfiles)
 
 if ($doC)
  {
-  print STDERR "Writing compiled form\n";
+  verbose "Writing compiled form\n";
   foreach my $name (sort cmp_name keys %encoding)
    {
     my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
@@ -272,8 +280,9 @@ if ($doC)
     # push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep));
    }
   my $cpp = ($Config{d_cplusplus} || '') eq 'define';
-  my $exta = $cpp ? 'extern "C" ' : "static";
-  my $extb = $cpp ? 'extern "C" ' : "";
+  my $ext  = $cpp ? 'extern "C"' : "extern";
+  my $exta = $cpp ? 'extern "C"' : "static";
+  my $extb = $cpp ? 'extern "C"' : "";
   foreach my $enc (sort cmp_name keys %encoding)
    {
     # my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}};
@@ -300,7 +309,7 @@ if ($doC)
    {
     my $sym = "${enc}_encoding";
     $sym =~ s/\W+/_/g;
-    print H "extern encode_t $sym;\n";
+    print H "${ext} encode_t $sym;\n";
     print D " Encode_XSEncoding(aTHX_ &$sym);\n";
    }
 
@@ -347,10 +356,10 @@ END
 
   my $perc_saved    = $saved/($strings + $saved) * 100;
   my $perc_subsaved = $subsave/($strings + $subsave) * 100;
-  printf STDERR "%d bytes in string tables\n",$strings;
-  printf STDERR "%d bytes (%.3g%%) saved spotting duplicates\n",
+  verbosef "%d bytes in string tables\n",$strings;
+  verbosef "%d bytes (%.3g%%) saved spotting duplicates\n",
     $saved, $perc_saved              if $saved;
-  printf STDERR "%d bytes (%.3g%%) saved using substrings\n",
+  verbosef "%d bytes (%.3g%%) saved using substrings\n",
     $subsave, $perc_subsaved         if $subsave;
  }
 elsif ($doEnc)
@@ -701,7 +710,12 @@ sub addstrings
    my $cpp = ($Config{d_cplusplus} || '') eq 'define';
    my $var = $^O eq 'MacOS' || $cpp ? 'extern' : 'static';
    my $const = $cpp ? '' : 'const';
-   print $fh "$var $const encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n";
+   my $ccflags = $Config{ccflags};
+   if (defined $Config{ccwarnflags}) {
+       $ccflags .= " " . $Config{ccwarnflags};
+   }
+   my $count = $ccflags =~ /-Wc\+\+-compat/ ? '' : scalar(@{$a->{'Entries'}});
+   print $fh "$var $const encpage_t $name\[$count];\n";
   }
  $a->{'DoneStrings'} = 1;
  foreach my $b (@{$a->{'Entries'}})
@@ -1001,9 +1015,9 @@ sub make_configlocal_pm {
 	$mod =~ s/.*\bEncode\b/Encode/o;
 	$mod =~ s/\.pm\z//o;
 	$mod =~ s,/,::,og;
-	warn qq{ require $mod;\n};
 	eval qq{ require $mod; };
-	$@ and die "Can't require $mod: $@\n";
+        return if $@;
+        warn qq{ require $mod;\n};
 	for my $enc ( Encode->encodings() ) {
 	    no warnings;
 	    $in_core{$enc}                   and next;
@@ -1017,7 +1031,7 @@ sub make_configlocal_pm {
         $_ModLines .=
           qq(\$Encode::ExtModule{'$enc'} = "$LocalMod{$enc}";\n);
     }
-    warn $_ModLines;
+    warn $_ModLines if $_ModLines;
     $_LocalVer = _mkversion();
     $_E2X      = find_e2x();
     $_Inc      = $INC{"Encode.pm"};
diff --git a/bin/encguess b/bin/encguess
new file mode 100755
index 0000000..5d7ac80
--- /dev/null
+++ b/bin/encguess
@@ -0,0 +1,145 @@
+#!./perl
+use 5.008001;
+use strict;
+use warnings;
+use Encode;
+use Getopt::Std;
+use Carp;
+use Encode::Guess;
+$Getopt::Std::STANDARD_HELP_VERSION = 1;
+
+my %opt;
+getopts( "huSs:", \%opt );
+my @suspect_list;
+list_valid_suspects() and exit if $opt{S};
+ at suspect_list = split /:,/, $opt{s} if $opt{s};
+HELP_MESSAGE() if $opt{h};
+HELP_MESSAGE() unless @ARGV;
+do_guess($_) for @ARGV;
+
+sub read_file {
+    my $filename = shift;
+    local $/;
+    open my $fh, '<:raw', $filename or croak "$filename:$!";
+    my $content = <$fh>;
+    close $fh;
+    return $content;
+}
+
+sub do_guess {
+    my $filename = shift;
+    my $data     = read_file($filename);
+    my $enc      = guess_encoding( $data, @suspect_list );
+    if ( !ref($enc) && $opt{u} ) {
+        return 1;
+    }
+    print "$filename\t";
+    if ( ref($enc) ) {
+        print $enc->mime_name();
+    }
+    else {
+        print "unknown";
+    }
+    print "\n";
+    return 1;
+}
+
+sub list_valid_suspects {
+    print join( "\n", Encode->encodings(":all") );
+    print "\n";
+    return 1;
+}
+
+sub HELP_MESSAGE {
+    exec 'pod2usage', $0 or die "pod2usage: $!" 
+}
+__END__
+=head1 NAME
+
+encguess - guess character encodings of files
+
+=head1 VERSION
+
+$Id: encguess,v 0.1 2015/02/05 10:34:19 dankogai Exp $
+
+=head1 SYNOPSIS
+
+  encguess [switches] filename...
+
+=head2 SWITCHES
+
+=over 2
+
+=item -h
+
+show this message and exit.
+
+=item -s
+
+specify a list of "suspect encoding types" to test, 
+seperated by either C<:> or C<,>
+
+=item -S
+
+output a list of all acceptable encoding types that can be used with
+the -s param
+
+=item -u
+
+suppress display of unidentified types
+
+=back
+
+=head2 EXAMPLES:
+
+=over 2
+
+=item *
+
+Guess encoding of a file named C<test.txt>, using only the default
+suspect types.
+
+   encguess test.txt
+
+=item *
+
+Guess the encoding type of a file named C<test.txt>, using the suspect
+types C<euc-jp,shiftjis,7bit-jis>.
+
+   encguess -s euc-jp,shiftjis,7bit-jis test.txt
+   encguess -s euc-jp:shiftjis:7bit-jis test.txt
+
+=item *
+
+Guess the encoding type of several files, do not display results for
+unidentified files.
+
+   encguess -us euc-jp,shiftjis,7bit-jis test*.txt
+
+=back
+
+=head1 DESCRIPTION
+
+The encoding identification is done by checking one encoding type at a
+time until all but the right type are eliminated. The set of encoding
+types to try is defined by the -s parameter and defaults to ascii,
+utf8 and UTF-16/32 with BOM. This can be overridden by passing one or
+more encoding types via the -s parameter. If you need to pass in
+multiple suspect encoding types, use a quoted string with the a space
+separating each value.
+
+=head1 SEE ALSO
+
+L<Encode::Guess>, L<Encode::Detect>
+
+=head1 LICENSE AND COPYRIGHT
+
+Copyright 2015 Michael LaGrasta and Dan Kogai.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the the Artistic License (2.0). You may obtain a
+copy of the full license at:
+
+L<http://www.perlfoundation.org/artistic_license_2_0>
+
+=cut
diff --git a/encoding.pm b/encoding.pm
index c0bff08..fde410d 100644
--- a/encoding.pm
+++ b/encoding.pm
@@ -1,6 +1,6 @@
-# $Id: encoding.pm,v 2.12 2013/04/26 18:30:46 dankogai Exp $
+# $Id: encoding.pm,v 2.14 2015/03/14 02:44:39 dankogai Exp dankogai $
 package encoding;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.12 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.14 $ =~ /(\d+)/g;
 
 use Encode;
 use strict;
@@ -131,11 +131,23 @@ sub import {
     $name = $enc->name;    # canonize
     unless ( $arg{Filter} ) {
         DEBUG and warn "_exception($name) = ", _exception($name);
-        _exception($name) or ${^ENCODING} = $enc;
+        if (! _exception($name)) {
+            if (!$^V || $^V lt v5.21.7) {
+                ${^ENCODING} = $enc;
+            }
+            else {
+                # Starting with 5.21.7, this pragma uses a shadow variable
+                # designed explicitly for it, ${^E_NCODING}, to enforce
+                # lexical scope; instead of ${^ENCODING}.
+                $^H{'encoding'} = 1;
+                ${^E_NCODING} = $enc;
+            }
+        }
         $HAS_PERLIO or return 1;
     }
     else {
         defined( ${^ENCODING} ) and undef ${^ENCODING};
+        undef ${^E_NCODING} if $^V && $^V ge v5.21.7;
 
         # implicitly 'use utf8'
         require utf8;      # to fetch $utf8::hint_bits;
@@ -185,6 +197,7 @@ sub import {
 sub unimport {
     no warnings;
     undef ${^ENCODING};
+    undef ${^E_NCODING} if $^V && $^V ge v5.21.7;
     if ($HAS_PERLIO) {
         binmode( STDIN,  ":raw" );
         binmode( STDOUT, ":raw" );
@@ -205,20 +218,12 @@ __END__
 
 =head1 NAME
 
-encoding - allows you to write your script in non-ascii or non-utf8
+encoding - allows you to write your script in non-ASCII and non-UTF-8
 
 =head1 WARNING
 
-This module is deprecated under perl 5.18.  It uses a mechanism provided by
-perl that is deprecated under 5.18 and higher, and may be removed in a
-future version.
-
-The easiest and the best alternative is to write your script in UTF-8
-and declear:
-
-  use utf8; # not use encoding ':utf8';
-
-See L<perluniintro> and L<utf8> for details.
+This module has been deprecated since perl v5.18.  See L</DESCRIPTION> and
+L</BUGS>.
 
 =head1 SYNOPSIS
 
@@ -235,68 +240,84 @@ See L<perluniintro> and L<utf8> for details.
   # A simple euc-cn => utf-8 converter
   use encoding "euc-cn", STDOUT => "utf8";  while(<>){print};
 
-  # "no encoding;" supported (but not scoped!)
+  # "no encoding;" supported
   no encoding;
 
   # an alternate way, Filter
   use encoding "euc-jp", Filter=>1;
   # now you can use kanji identifiers -- in euc-jp!
 
-  # switch on locale -
-  # note that this probably means that unless you have a complete control
-  # over the environments the application is ever going to be run, you should
-  # NOT use the feature of encoding pragma allowing you to write your script
-  # in any recognized encoding because changing locale settings will wreck
-  # the script; you can of course still use the other features of the pragma.
+  # encode based on the current locale - specialized purposes only;
+  # fraught with danger!!
   use encoding ':locale';
 
-=head1 ABSTRACT
+=head1 DESCRIPTION
 
-Let's start with a bit of history: Perl 5.6.0 introduced Unicode
-support.  You could apply C<substr()> and regexes even to complex CJK
-characters -- so long as the script was written in UTF-8.  But back
-then, text editors that supported UTF-8 were still rare and many users
-instead chose to write scripts in legacy encodings, giving up a whole
-new feature of Perl 5.6.
+This pragma is used to enable a Perl script to be written in encodings that
+aren't strictly ASCII nor UTF-8.  It translates all or portions of the Perl
+program script from a given encoding into UTF-8, and changes the PerlIO layers
+of C<STDIN> and C<STDOUT> to the encoding specified.
 
-Rewind to the future: starting from perl 5.8.0 with the B<encoding>
-pragma, you can write your script in any encoding you like (so long
-as the C<Encode> module supports it) and still enjoy Unicode support.
-This pragma achieves that by doing the following:
+This pragma dates from the days when UTF-8-enabled editors were uncommon.  But
+that was long ago, and the need for it is greatly diminished.  That, coupled
+with the fact that it doesn't work with threads, along with other problems,
+(see L</BUGS>) have led to its being deprecated.  It is planned to remove this
+pragma in a future Perl version.  New code should be written in UTF-8, and the
+C<use utf8> pragma used instead (see L<perluniintro> and L<utf8> for details).
+Old code should be converted to UTF-8, via something like the recipe in the
+L</SYNOPSIS> (though this simple approach may require manual adjustments
+afterwards).
 
-=over
+The only legitimate use of this pragma is almost certainly just one per file,
+near the top, with file scope, as the file is likely going to only be written
+in one encoding.  Further restrictions apply in Perls before v5.22 (see
+L</Prior to Perl v5.22>).
 
-=item *
+There are two basic modes of operation (plus turning if off):
 
-Internally converts all literals (C<q//,qq//,qr//,qw///, qx//>) from
-the encoding specified to utf8.  In Perl 5.8.1 and later, literals in
-C<tr///> and C<DATA> pseudo-filehandle are also converted.
+=over 4
 
-=item *
+=item C<use encoding ['I<ENCNAME>'] ;>
 
-Changing PerlIO layers of C<STDIN> and C<STDOUT> to the encoding
- specified.
+This is the normal operation.  It translates various literals encountered in
+the Perl source file from the encoding I<ENCNAME> into UTF-8, and similarly
+converts character code points.  This is used when the script is a combination
+of ASCII (for the variable names and punctuation, I<etc>), but the literal
+data is in the specified encoding.
 
-=back
+I<ENCNAME> is optional.  If omitted, the encoding specified in the environment
+variable L<C<PERL_ENCODING>|perlrun/PERL_ENCODING> is used.  If this isn't
+set, or the resolved-to encoding is not known to C<L<Encode>>, the error
+C<Unknown encoding 'I<ENCNAME>'> will be thrown.
 
-=head2 Literal Conversions
+Starting in Perl v5.8.6 (C<Encode> version 2.0.1), I<ENCNAME> may be the
+name C<:locale>.  This is for very specialized applications, and is documented
+in L</The C<:locale> sub-pragma> below.
 
-You can write code in EUC-JP as follows:
+The literals that are converted are C<q//, qq//, qr//, qw///, qx//>, and
+starting in v5.8.1, C<tr///>.  Operations that do conversions include C<chr>,
+C<ord>, C<utf8::upgrade> (but not C<utf8::downgrade>), and C<chomp>.
+
+Also starting in v5.8.1, the C<DATA> pseudo-filehandle is translated from the
+encoding into UTF-8.
+
+For example, you can write code in EUC-JP as follows:
 
   my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji
                #<-char-><-char->   # 4 octets
   s/\bCamel\b/$Rakuda/;
 
 And with C<use encoding "euc-jp"> in effect, it is the same thing as
-the code in UTF-8:
+that code in UTF-8:
 
   my $Rakuda = "\x{99F1}\x{99DD}"; # two Unicode Characters
   s/\bCamel\b/$Rakuda/;
 
-=head2 PerlIO layers for C<STD(IN|OUT)>
+See L</EXAMPLE> below for a more complete example.
 
-The B<encoding> pragma also modifies the filehandle layers of
-STDIN and STDOUT to the specified encoding.  Therefore,
+Unless C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, the
+PerlIO layers of C<STDIN> and C<STDOUT> are set to "C<:encoding(I<ENCNAME>)>".
+Therefore,
 
   use encoding "euc-jp";
   my $message = "Camel is the symbol of perl.\n";
@@ -304,183 +325,145 @@ STDIN and STDOUT to the specified encoding.  Therefore,
   $message =~ s/\bCamel\b/$Rakuda/;
   print $message;
 
-Will print "\xF1\xD1\xF1\xCC is the symbol of perl.\n",
-not "\x{99F1}\x{99DD} is the symbol of perl.\n".
-
-You can override this by giving extra arguments; see below.
-
-=head2 Implicit upgrading for byte strings
-
-By default, if strings operating under byte semantics and strings
-with Unicode character data are concatenated, the new string will
-be created by decoding the byte strings as I<ISO 8859-1 (Latin-1)>.
-
-The B<encoding> pragma changes this to use the specified encoding
-instead.  For example:
+will print
 
-    use encoding 'utf8';
-    my $string = chr(20000); # a Unicode string
-    utf8::encode($string);   # now it's a UTF-8 encoded byte string
-    # concatenate with another Unicode string
-    print length($string . chr(20000));
+ "\xF1\xD1\xF1\xCC is the symbol of perl.\n"
 
-Will print C<2>, because C<$string> is upgraded as UTF-8.  Without
-C<use encoding 'utf8';>, it will print C<4> instead, since C<$string>
-is three octets when interpreted as Latin-1.
+not
 
-=head2 Side effects
+ "\x{99F1}\x{99DD} is the symbol of perl.\n"
 
-If the C<encoding> pragma is in scope then the lengths returned are
-calculated from the length of C<$/> in Unicode characters, which is not
-always the same as the length of C<$/> in the native encoding.
+You can override this by giving extra arguments; see below.
 
-This pragma affects utf8::upgrade, but not utf8::downgrade.
+Note that C<STDERR> WILL NOT be changed, regardless.
 
-=head1 FEATURES THAT REQUIRE 5.8.1
+Also note that non-STD file handles remain unaffected.  Use C<use
+open> or C<binmode> to change the layers of those.
 
-Some of the features offered by this pragma requires perl 5.8.1.  Most
-of these are done by Inaba Hiroto.  Any other features and changes
-are good for 5.8.0.
+=item C<use encoding I<ENCNAME> Filter=E<gt>1;>
 
-=over
+This operates as above, but the C<Filter> argument with a non-zero
+value causes the entire script, and not just literals, to be translated from
+the encoding into UTF-8.  This allows identifiers in the source to be in that
+encoding as well.  (Problems may occur if the encoding is not a superset of
+ASCII; imagine all your semi-colons being translated into something
+different.)  One can use this form to make
 
-=item "NON-EUC" doublebyte encodings
+ ${"\x{4eba}"}++
 
-Because perl needs to parse script before applying this pragma, such
-encodings as Shift_JIS and Big-5 that may contain '\' (BACKSLASH;
-\x5c) in the second byte fails because the second byte may
-accidentally escape the quoting character that follows.  Perl 5.8.1
-or later fixes this problem.
+work.  (This is equivalent to C<$I<human>++>, where I<human> is a single Han
+ideograph).
 
-=item tr//
+This effectively means that your source code behaves as if it were written in
+UTF-8 with C<'use utf8>' in effect.  So even if your editor only supports
+Shift_JIS, for example, you can still try examples in Chapter 15 of
+C<Programming Perl, 3rd Ed.>.
 
-C<tr//> was overlooked by Perl 5 porters when they released perl 5.8.0
-See the section below for details.
+This option is significantly slower than the other one.
 
-=item DATA pseudo-filehandle
+=item C<no encoding;>
 
-Another feature that was overlooked was C<DATA>.
+Unsets the script encoding. The layers of C<STDIN>, C<STDOUT> are
+reset to "C<:raw>" (the default unprocessed raw stream of bytes).
 
 =back
 
-=head1 USAGE
+=head1 OPTIONS
 
-=over 4
+=head2 Setting C<STDIN> and/or C<STDOUT> individually
 
-=item use encoding [I<ENCNAME>] ;
+The encodings of C<STDIN> and C<STDOUT> are individually settable by parameters to
+the pragma:
 
-Sets the script encoding to I<ENCNAME>.  And unless ${^UNICODE}
-exists and non-zero, PerlIO layers of STDIN and STDOUT are set to
-":encoding(I<ENCNAME>)".
+ use encoding 'euc-tw', STDIN => 'greek'  ...;
 
-Note that STDERR WILL NOT be changed.
+In this case, you cannot omit the first I<ENCNAME>.  C<< STDIN => undef >>
+turns the I/O transcoding completely off for that filehandle.
 
-Also note that non-STD file handles remain unaffected.  Use C<use
-open> or C<binmode> to change layers of those.
+When C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero,
+these options will be completely ignored.  See L<perlvar/C<${^UNICODE}>> and
+L<"C<-C>" in perlrun|perlrun/-C [numberE<sol>list]> for details.
 
-If no encoding is specified, the environment variable L<PERL_ENCODING>
-is consulted.  If no encoding can be found, the error C<Unknown encoding
-'I<ENCNAME>'> will be thrown.
+=head2 The C<:locale> sub-pragma
 
-=item use encoding I<ENCNAME> [ STDIN =E<gt> I<ENCNAME_IN> ...] ;
+Starting in v5.8.6, the encoding name may be C<:locale>.  This means that the
+encoding is taken from the current locale, and not hard-coded by the pragma.
+Since a script really can only be encoded in exactly one encoding, this option
+is dangerous.  It makes sense only if the script itself is written in ASCII,
+and all the possible locales that will be in use when the script is executed
+are supersets of ASCII.  That means that the script itself doesn't get
+changed, but the I/O handles have the specified encoding added, and the
+operations like C<chr> and C<ord> use that encoding.
 
-You can also individually set encodings of STDIN and STDOUT via the
-C<< STDIN => I<ENCNAME> >> form.  In this case, you cannot omit the
-first I<ENCNAME>.  C<< STDIN => undef >> turns the IO transcoding
-completely off.
+The logic of finding which locale C<:locale> uses is as follows:
 
-When ${^UNICODE} exists and non-zero, these options will completely
-ignored.  ${^UNICODE} is a variable introduced in perl 5.8.1.  See
-L<perlrun> see L<perlvar/"${^UNICODE}"> and L<perlrun/"-C"> for
-details (perl 5.8.1 and later).
+=over 4
 
-=item use encoding I<ENCNAME> Filter=E<gt>1;
+=item 1.
 
-This turns the encoding pragma into a source filter.  While the
-default approach just decodes interpolated literals (in qq() and
-qr()), this will apply a source filter to the entire source code.  See
-L</"The Filter Option"> below for details.
+If the platform supports the C<langinfo(CODESET)> interface, the codeset
+returned is used as the default encoding for the open pragma.
 
-=item no encoding;
+=item 2.
 
-Unsets the script encoding. The layers of STDIN, STDOUT are
-reset to ":raw" (the default unprocessed raw stream of bytes).
+If 1. didn't work but we are under the locale pragma, the environment
+variables C<LC_ALL> and C<LANG> (in that order) are matched for encodings
+(the part after "C<.>", if any), and if any found, that is used
+as the default encoding for the open pragma.
 
-=back
+=item 3.
 
-=head1 The Filter Option
+If 1. and 2. didn't work, the environment variables C<LC_ALL> and C<LANG>
+(in that order) are matched for anything looking like UTF-8, and if
+any found, C<:utf8> is used as the default encoding for the open
+pragma.
 
-The magic of C<use encoding> is not applied to the names of
-identifiers.  In order to make C<${"\x{4eba}"}++> ($human++, where human
-is a single Han ideograph) work, you still need to write your script
-in UTF-8 -- or use a source filter.  That's what 'Filter=>1' does.
+=back
 
-What does this mean?  Your source code behaves as if it is written in
-UTF-8 with 'use utf8' in effect.  So even if your editor only supports
-Shift_JIS, for example, you can still try examples in Chapter 15 of
-C<Programming Perl, 3rd Ed.>.  For instance, you can use UTF-8
-identifiers.
+If your locale environment variables (C<LC_ALL>, C<LC_CTYPE>, C<LANG>)
+contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
+the default encoding of your C<STDIN>, C<STDOUT>, and C<STDERR>, and of
+B<any subsequent file open>, is UTF-8.
 
-This option is significantly slower and (as of this writing) non-ASCII
-identifiers are not very stable WITHOUT this option and with the
-source code written in UTF-8.
+=head1 CAVEATS
 
-=head2 Filter-related changes at Encode version 1.87
+=head2 SIDE EFFECTS
 
 =over
 
 =item *
 
-The Filter option now sets STDIN and STDOUT like non-filter options.
-And C<< STDIN=>I<ENCODING> >> and C<< STDOUT=>I<ENCODING> >> work like
-non-filter version.
+If the C<encoding> pragma is in scope then the lengths returned are
+calculated from the length of C<$/> in Unicode characters, which is not
+always the same as the length of C<$/> in the native encoding.
 
 =item *
 
-C<use utf8> is implicitly declared so you no longer have to C<use
-utf8> to C<${"\x{4eba}"}++>.
-
-=back
-
-=head1 CAVEATS
-
-=head2 NOT SCOPED
-
-The pragma is a per script, not a per block lexical.  Only the last
-C<use encoding> or C<no encoding> matters, and it affects
-B<the whole script>.  However, the <no encoding> pragma is supported and
-B<use encoding> can appear as many times as you want in a given script.
-The multiple use of this pragma is discouraged.
-
-By the same reason, the use this pragma inside modules is also
-discouraged (though not as strongly discouraged as the case above.
-See below).
-
-If you still have to write a module with this pragma, be very careful
-of the load order.  See the codes below;
+Without this pragma, if strings operating under byte semantics and strings
+with Unicode character data are concatenated, the new string will
+be created by decoding the byte strings as I<ISO 8859-1 (Latin-1)>.
 
-  # called module
-  package Module_IN_BAR;
-  use encoding "bar";
-  # stuff in "bar" encoding here
-  1;
+The B<encoding> pragma changes this to use the specified encoding
+instead.  For example:
 
-  # caller script
-  use encoding "foo"
-  use Module_IN_BAR;
-  # surprise! use encoding "bar" is in effect.
+    use encoding 'utf8';
+    my $string = chr(20000); # a Unicode string
+    utf8::encode($string);   # now it's a UTF-8 encoded byte string
+    # concatenate with another Unicode string
+    print length($string . chr(20000));
 
-The best way to avoid this oddity is to use this pragma RIGHT AFTER
-other modules are loaded.  i.e.
+Will print C<2>, because C<$string> is upgraded as UTF-8.  Without
+C<use encoding 'utf8';>, it will print C<4> instead, since C<$string>
+is three octets when interpreted as Latin-1.
 
-  use Module_IN_BAR;
-  use encoding "foo";
+=back
 
 =head2 DO NOT MIX MULTIPLE ENCODINGS
 
 Notice that only literals (string or regular expression) having only
 legacy code points are affected: if you mix data like this
 
+    \x{100}\xDF
     \xDF\x{100}
 
 the data is assumed to be in (Latin 1 and) Unicode, not in your native
@@ -509,10 +492,70 @@ resort to \x{....} just to spell your name in a native encoding.
 So feel free to put your strings in your encoding in quotes and
 regexes.
 
-=head2 tr/// with ranges
+=head2 Prior to Perl v5.22
+
+The pragma was a per script, not a per block lexical.  Only the last
+C<use encoding> or C<no encoding> mattered, and it affected
+B<the whole script>.  However, the C<no encoding> pragma was supported and
+C<use encoding> could appear as many times as you want in a given script
+(though only the last was effective).
+
+Since the scope wasn't lexical, other modules' use of C<chr>, C<ord>, I<etc.>
+were affected.  This leads to spooky, incorrect action at a distance that is
+hard to debug.
+
+This means you would have to be very careful of the load order:
+
+  # called module
+  package Module_IN_BAR;
+  use encoding "bar";
+  # stuff in "bar" encoding here
+  1;
+
+  # caller script
+  use encoding "foo"
+  use Module_IN_BAR;
+  # surprise! use encoding "bar" is in effect.
+
+The best way to avoid this oddity is to use this pragma RIGHT AFTER
+other modules are loaded.  i.e.
+
+  use Module_IN_BAR;
+  use encoding "foo";
+
+=head2 Prior to Encode version 1.87
+
+=over
+
+=item *
+
+C<STDIN> and C<STDOUT> were not set under the filter option.
+And C<< STDIN=>I<ENCODING> >> and C<< STDOUT=>I<ENCODING> >> didn't work like
+non-filter version.
+
+=item *
+
+C<use utf8> wasn't implicitly declared so you have to C<use utf8> to do
+
+ ${"\x{4eba}"}++
+
+=back
+
+=head2 Prior to Perl v5.8.1
+
+=over
+
+=item "NON-EUC" doublebyte encodings
+
+Because perl needs to parse the script before applying this pragma, such
+encodings as Shift_JIS and Big-5 that may contain C<'\'> (BACKSLASH;
+C<\x5c>) in the second byte fail because the second byte may
+accidentally escape the quoting character that follows.
+
+=item C<tr///>
 
 The B<encoding> pragma works by decoding string literals in
-C<q//,qq//,qr//,qw///, qx//> and so forth.  In perl 5.8.0, this
+C<q//,qq//,qr//,qw///, qx//> and so forth.  In perl v5.8.0, this
 does not apply to C<tr///>.  Therefore,
 
   use encoding 'euc-jp';
@@ -537,25 +580,21 @@ Does not work as
 
 =back
 
-This counterintuitive behavior has been fixed in perl 5.8.1.
+This counterintuitive behavior has been fixed in perl v5.8.1.
 
-=head3 workaround to tr///;
-
-In perl 5.8.0, you can work around as follows;
+In perl v5.8.0, you can work around this as follows;
 
   use encoding 'euc-jp';
   #  ....
   eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ };
 
 Note the C<tr//> expression is surrounded by C<qq{}>.  The idea behind
-is the same as classic idiom that makes C<tr///> 'interpolate'.
+this is the same as the classic idiom that makes C<tr///> 'interpolate':
 
    tr/$from/$to/;            # wrong!
    eval qq{ tr/$from/$to/ }; # workaround.
 
-Nevertheless, in case of B<encoding> pragma even C<q//> is affected so
-C<tr///> not being decoded was obviously against the will of Perl5
-Porters so it has been fixed in Perl 5.8.1 or later.
+=back
 
 =head1 EXAMPLE - Greekperl
 
@@ -590,10 +629,24 @@ Porters so it has been fixed in Perl 5.8.1 or later.
 
     print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
 
-=head1 KNOWN PROBLEMS
+=head1 BUGS
 
 =over
 
+=item Thread safety
+
+C<use encoding ...> is not thread-safe (i.e., do not use in threaded
+applications).
+
+=item Can't be used by more than one module in a single program.
+
+Only one encoding is allowed.  If you combine modules in a program that have
+different encodings, only one will be actually used.
+
+=item Other modules using C<STDIN> and C<STDOUT> get the encoded stream
+
+They may be expecting something completely different.
+
 =item literals in regex that are longer than 127 bytes
 
 For native multibyte encodings (either fixed or variable length),
@@ -603,13 +656,11 @@ recoding errors for regular expression literals longer than 127 bytes.
 =item EBCDIC
 
 The encoding pragma is not supported on EBCDIC platforms.
-(Porters who are willing and able to remove this limitation are
-welcome.)
 
-=item format
+=item C<format>
 
-This pragma doesn't work well with format because PerlIO does not
-get along very well with it.  When format contains non-ascii
+This pragma doesn't work well with C<format> because PerlIO does not
+get along very well with it.  When C<format> contains non-ASCII
 characters it prints funny or gets "wide character warnings".
 To understand it, try the code below.
 
@@ -628,56 +679,19 @@ To understand it, try the code below.
 Without binmode this happens to work but without binmode, print()
 fails instead of write().
 
-At any rate, the very use of format is questionable when it comes to
+At any rate, the very use of C<format> is questionable when it comes to
 unicode characters since you have to consider such things as character
 width (i.e. double-width for ideographs) and directions (i.e. BIDI for
 Arabic and Hebrew).
 
-=item Thread safety
-
-C<use encoding ...> is not thread-safe (i.e., do not use in threaded
-applications).
+=item See also L</CAVEATS>
 
 =back
 
-=head2 The Logic of :locale
-
-The logic of C<:locale> is as follows:
-
-=over 4
-
-=item 1.
-
-If the platform supports the langinfo(CODESET) interface, the codeset
-returned is used as the default encoding for the open pragma.
-
-=item 2.
-
-If 1. didn't work but we are under the locale pragma, the environment
-variables LC_ALL and LANG (in that order) are matched for encodings
-(the part after C<.>, if any), and if any found, that is used
-as the default encoding for the open pragma.
-
-=item 3.
-
-If 1. and 2. didn't work, the environment variables LC_ALL and LANG
-(in that order) are matched for anything looking like UTF-8, and if
-any found, C<:utf8> is used as the default encoding for the open
-pragma.
-
-=back
-
-If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
-contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
-the default encoding of your STDIN, STDOUT, and STDERR, and of
-B<any subsequent file open>, is UTF-8.
-
 =head1 HISTORY
 
-This pragma first appeared in Perl 5.8.0.  For features that require
-5.8.1 and better, see above.
-
-The C<:locale> subpragma was implemented in 2.01, or Perl 5.8.6.
+This pragma first appeared in Perl v5.8.0.  It has been enhanced in later
+releases as specified above.
 
 =head1 SEE ALSO
 
diff --git a/lib/Encode/Alias.pm b/lib/Encode/Alias.pm
index 28d3cad..c0945be 100644
--- a/lib/Encode/Alias.pm
+++ b/lib/Encode/Alias.pm
@@ -2,7 +2,7 @@ package Encode::Alias;
 use strict;
 use warnings;
 no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.18 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
 
 use Exporter 'import';
@@ -256,6 +256,10 @@ sub init_aliases {
         define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
     }
 
+    # https://github.com/dankogai/p5-encode/issues/37
+    define_alias(qr/cp65000/i => '"UTF-7"');
+    define_alias(qr/cp65001/i => '"utf-8-strict"');
+
     # utf8 is blessed :)
     define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' );
 
diff --git a/lib/Encode/MIME/Header.pm b/lib/Encode/MIME/Header.pm
index 090a177..b970b62 100644
--- a/lib/Encode/MIME/Header.pm
+++ b/lib/Encode/MIME/Header.pm
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 no warnings 'redefine';
 
-our $VERSION = do { my @r = ( q$Revision: 2.15 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.16 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 use Encode qw(find_encoding encode_utf8 decode_utf8);
 use MIME::Base64;
 use Carp;
@@ -139,7 +139,7 @@ sub encode($$;$) {
         push @line, join( "\n " => @subline );
     }
     $_[1] = '' if $chk;
-    return join( "\n", @line );
+    return (substr($str, 0, 0) . join( "\n", @line ));
 }
 
 use constant HEAD   => '=?UTF-8?';
diff --git a/t/Aliases.t b/t/Aliases.t
index d7a72d2..2fc14cc 100644
--- a/t/Aliases.t
+++ b/t/Aliases.t
@@ -81,6 +81,8 @@ sub init_a2c{
         'jis0208-raw'   => $ON_EBCDIC ? '' : 'jis0208-raw',
         'jis0212-raw'   => $ON_EBCDIC ? '' : 'jis0212-raw',
         'ksc5601-raw'   => $ON_EBCDIC ? '' : 'ksc5601-raw',
+        'cp65000' => 'UTF-7',
+        'cp65001' => 'utf-8-strict',
        );
 
     for my $i (1..11,13..16){
diff --git a/t/taint.t b/t/taint.t
index ade8b07..2446dd7 100644
--- a/t/taint.t
+++ b/t/taint.t
@@ -3,13 +3,26 @@ use strict;
 use Encode qw(encode decode);
 use Scalar::Util qw(tainted);
 use Test::More;
-
-my $str = "abc" . substr($ENV{PATH},0,0); # tainted string
+my $taint = substr($ENV{PATH},0,0);
+my $str = "dan\x{5f3e}" . $taint;                 # tainted string to encode
+my $bin = encode('UTF-8', $str);                  # tainted binary to decode
 my @names = Encode->encodings(':all');
 plan tests => 2 * @names;
-for my $name (@names){
-    my $e = encode($name, $str);
-    ok tainted($e), "encode $name";
-    my $d = decode($name, $e);
-    ok tainted($d), "decode $name";
+for my $name (@names) {
+    my ($d, $e, $s);
+    eval {
+        $e = encode($name, $str);
+    };
+  SKIP: {
+      skip $@, 1 if $@;
+      ok tainted($e), "encode $name";
+    }
+    $bin = $e.$taint if $e;
+    eval {
+        $d = decode($name, $bin);
+    };
+  SKIP: {
+      skip $@, 1 if $@;
+      ok tainted($d), "decode $name";
+    }
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libencode-perl.git



More information about the Pkg-perl-cvs-commits mailing list