[libfile-bom-perl] 01/03: Imported Upstream version 0.15

gregor herrmann gregoa at debian.org
Tue Dec 15 17:32:42 UTC 2015


This is an automated email from the git hooks/post-receive script.

gregoa pushed a commit to branch master
in repository libfile-bom-perl.

commit c5d8e2d90b9217a9a1b9d161fc6cbe24528e6a7c
Author: gregor herrmann <gregoa at debian.org>
Date:   Tue Dec 15 18:31:50 2015 +0100

    Imported Upstream version 0.15
---
 Changes         |   3 +
 MANIFEST        |   1 +
 META.json       |  50 ++++
 META.yml        |  53 ++--
 Makefile.PL     |  66 ++---
 README          | 746 +++++++++++++++++++++++++++++---------------------------
 lib/File/BOM.pm |   4 +-
 7 files changed, 502 insertions(+), 421 deletions(-)

diff --git a/Changes b/Changes
index 3151854..5356f46 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,8 @@
 File::BOM changes document
 
+0.15 - Mon Dec 14 2015
+    - Fix spelling errors spotted by debian team. Thanks to Gregor Herrmann.
+
 0.14 - Wed Oct 4 2006
     - More workarounds for platforms with odd read() behaviour.
 
diff --git a/MANIFEST b/MANIFEST
index 3fa107e..dbdfb84 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -15,3 +15,4 @@ t/99..shutdown.t
 t/data/broken_bom.txt
 t/lib/Test/Framework.pm
 TODO
+META.json
diff --git a/META.json b/META.json
new file mode 100644
index 0000000..d319f74
--- /dev/null
+++ b/META.json
@@ -0,0 +1,50 @@
+{
+   "abstract" : "Utilities for handling Byte Order Marks",
+   "author" : [
+      "Matt Lawrence E<lt>mattlaw at cpan.orgE<gt>"
+   ],
+   "dynamic_config" : 1,
+   "generated_by" : "Module::Build version 0.4212",
+   "license" : [
+      "perl_5"
+   ],
+   "meta-spec" : {
+      "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+      "version" : "2"
+   },
+   "name" : "File-BOM",
+   "prereqs" : {
+      "build" : {
+         "requires" : {
+            "Module::Build" : "0.20",
+            "Test::Exception" : "0.20",
+            "Test::More" : "0.10"
+         }
+      },
+      "configure" : {
+         "requires" : {
+            "Module::Build" : "0.42"
+         }
+      },
+      "runtime" : {
+         "requires" : {
+            "Encode" : "1.99",
+            "Readonly" : "0.06",
+            "perl" : "v5.8.3"
+         }
+      }
+   },
+   "provides" : {
+      "File::BOM" : {
+         "file" : "lib/File/BOM.pm",
+         "version" : "0.15"
+      }
+   },
+   "release_status" : "stable",
+   "resources" : {
+      "license" : [
+         "http://dev.perl.org/licenses/"
+      ]
+   },
+   "version" : "0.15"
+}
diff --git a/META.yml b/META.yml
index 7ff3e7d..1e4304b 100644
--- a/META.yml
+++ b/META.yml
@@ -1,25 +1,28 @@
----
-name: File-BOM
-version: 0.14
-author:
-  - 'Matt Lawrence E<lt>mattlaw at cpan.orgE<gt>'
-abstract: Utilities for handling Byte Order Marks
-license: perl
-resources:
-  license: http://dev.perl.org/licenses/
-requires:
-  Encode: 1.99
-  Readonly: 0.06
-  perl: 5.8.3
-build_requires:
-  Module::Build: 0.20
-  Test::Exception: 0.20
-  Test::More: 0.10
-provides:
-  File::BOM:
-    file: lib/File/BOM.pm
-    version: 0.14
-generated_by: Module::Build version 0.28
-meta-spec:
-  url: http://module-build.sourceforge.net/META-spec-v1.2.html
-  version: 1.2
+---
+abstract: 'Utilities for handling Byte Order Marks'
+author:
+  - 'Matt Lawrence E<lt>mattlaw at cpan.orgE<gt>'
+build_requires:
+  Module::Build: '0.20'
+  Test::Exception: '0.20'
+  Test::More: '0.10'
+configure_requires:
+  Module::Build: '0.42'
+dynamic_config: 1
+generated_by: 'Module::Build version 0.4212, CPAN::Meta::Converter version 2.150001'
+license: perl
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.4.html
+  version: '1.4'
+name: File-BOM
+provides:
+  File::BOM:
+    file: lib/File/BOM.pm
+    version: '0.15'
+requires:
+  Encode: '1.99'
+  Readonly: '0.06'
+  perl: v5.8.3
+resources:
+  license: http://dev.perl.org/licenses/
+version: '0.15'
diff --git a/Makefile.PL b/Makefile.PL
index 192903a..d7086b7 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,31 +1,35 @@
-# Note: this file was auto-generated by Module::Build::Compat version 0.03
-    
-    unless (eval "use Module::Build::Compat 0.02; 1" ) {
-      print "This module requires Module::Build to install itself.\n";
-      
-      require ExtUtils::MakeMaker;
-      my $yn = ExtUtils::MakeMaker::prompt
-	('  Install Module::Build now from CPAN?', 'y');
-      
-      unless ($yn =~ /^y/i) {
-	die " *** Cannot install without Module::Build.  Exiting ...\n";
-      }
-      
-      require Cwd;
-      require File::Spec;
-      require CPAN;
-      
-      # Save this 'cause CPAN will chdir all over the place.
-      my $cwd = Cwd::cwd();
-      
-      CPAN::Shell->install('Module::Build::Compat');
-      CPAN::Shell->expand("Module", "Module::Build::Compat")->uptodate
-	or die "Couldn't install Module::Build, giving up.\n";
-      
-      chdir $cwd or die "Cannot chdir() back to $cwd: $!";
-    }
-    eval "use Module::Build::Compat 0.02; 1" or die $@;
-    
-    Module::Build::Compat->run_build_pl(args => \@ARGV);
-    require Module::Build;
-    Module::Build::Compat->write_makefile(build_class => 'Module::Build');
+# Note: this file was auto-generated by Module::Build::Compat version 0.4212
+require 5.008003;
+
+    unless (eval "use Module::Build::Compat 0.02; 1" ) {
+      print "This module requires Module::Build to install itself.\n";
+
+      require ExtUtils::MakeMaker;
+      my $yn = ExtUtils::MakeMaker::prompt
+	('  Install Module::Build now from CPAN?', 'y');
+
+      unless ($yn =~ /^y/i) {
+	die " *** Cannot install without Module::Build.  Exiting ...\n";
+      }
+
+      require Cwd;
+      require File::Spec;
+      require CPAN;
+
+      # Save this 'cause CPAN will chdir all over the place.
+      my $cwd = Cwd::cwd();
+
+      CPAN::Shell->install('Module::Build::Compat');
+      CPAN::Shell->expand("Module", "Module::Build::Compat")->uptodate
+	or die "Couldn't install Module::Build, giving up.\n";
+
+      chdir $cwd or die "Cannot chdir() back to $cwd: $!";
+    }
+    eval "use Module::Build::Compat 0.02; 1" or die $@;
+    
+    Module::Build::Compat->run_build_pl(args => \@ARGV);
+    my $build_script = 'Build';
+    $build_script .= '.com' if $^O eq 'VMS';
+    exit(0) unless(-e $build_script); # cpantesters convention
+    require Module::Build;
+    Module::Build::Compat->write_makefile(build_class => 'Module::Build');
diff --git a/README b/README
index bc66c7a..50f4d8a 100644
--- a/README
+++ b/README
@@ -1,363 +1,383 @@
-NAME
-    File::BOM - Utilities for handling Byte Order Marks
-
-SYNOPSIS
-        use File::BOM qw( :all )
-
-  high-level functions
-        # read a file with encoding from the BOM:
-        open_bom(FH, $file)
-        open_bom(FH, $file, ':utf8') # the same but with a default encoding
-
-        # get encoding too
-        $encoding = open_bom(FH, $file, ':utf8');
-
-        # open a potentially unseekable file:
-        ($encoding, $spillage) = open_bom(FH, $file, ':utf8');
-
-        # change encoding of an open handle according to BOM
-        $encoding = defuse(*HANDLE);
-        ($encoding, $spillage) = defuse(*HANDLE);
-
-        # Decode a string according to leading BOM:
-        $unicode = decode_from_bom($string_with_bom);
-    
-        # Decode a string and get the encoding:
-        ($unicode, $encoding) = decode_from_bom($string_with_bom)
-
-  PerlIO::via interface
-        # Read the Right Thing from a unicode file with BOM:
-        open(HANDLE, '<:via(File::BOM)', $filename)
-
-        # Writing little-endian UTF-16 file with BOM:
-        open(HANDLE, '>:encoding(UTF-16LE):via(File::BOM)', $filename)
-
-  lower-level functions
-        # read BOM encoding from a filehandle:
-        $encoding = get_encoding_from_filehandle(FH)
-
-        # Get encoding even if FH is unseekable:
-        ($encoding, $spillage) = get_encoding_from_filehandle(FH);
-
-        # Get encoding from a known unseekable handle:
-        ($encdoing, $spillage) = get_encoding_from_stream(FH);
-
-        # get encoding and BOM length from BOM at start of string:
-        ($encoding, $offset) = get_encoding_from_bom($string);
-
-  variables
-        # print a BOM for a known encoding
-        print FH $enc2bom{$encoding};
-
-        # get an encoding from a known BOM
-        $enc = $bom2enc{$bom}
-
-DESCRIPTION
-    This module provides functions for handling unicode byte order marks,
-    which are to be found at the beginning of some files and streams.
-
-    For details about what a byte order mark is, see
-    <http://www.unicode.org/unicode/faq/utf_bom.html#BOM>
-
-    The intention of File::BOM is for files with BOMs to be readable as
-    seamlessly as possible, regardless of the encoding used. To that end,
-    several different interfaces are available, as shown in the synopsis
-    above.
-
-EXPORTS
-    Nothing by default.
-
-  symbols
-    * open_bom()
-    * defuse()
-    * decode_from_bom()
-    * get_encoding_from_filehandle()
-    * get_encoding_from_stream()
-    * get_encoding_from_bom()
-    * %bom2enc
-    * %enc2bom
-
-  tags
-    * :all
-        All of the above
-
-    * :subs
-        subroutines only
-
-    * :vars
-        just %bom2enc and %enc2bom
-
-VARIABLES
-  %bom2enc
-    Maps Byte Order marks to their encodings.
-
-    The keys of this hash are strings which represent the BOMs, the values
-    are their encodings, in a format which is understood by Encode
-
-    The encodings represented in this hash are: UTF-8, UTF-16BE, UTF-16LE,
-    UTF-32BE and UTF-32LE
-
-  %enc2bom
-    A reverse-lookup hash for bom2enc, with a few aliases used in Encode,
-    namely utf8, iso-10646-1 and UCS-2.
-
-    Note that UTF-16, UTF-32 and UCS-4 are not included in this hash. Mainly
-    because Encode::encode automatically puts BOMs on output. See
-    Encode::Unicode
-
-FUNCTIONS
-  open_bom
-        $encoding = open_bom(HANDLE, $filename, $default_mode)
-
-        ($encoding, $spill) = open_bom(HANDLE, $filename, $default_mode)
-
-    opens HANDLE for reading on $filename, setting the mode to the
-    appropriate encoding for the BOM stored in the file.
-
-    On failure, a fatal error is raised, see the DIAGNOSTICS section for
-    details on how to catch these. This is in order to allow the return
-    value(s) to be used for other purposes.
-
-    If the file doesn't contain a BOM, $default_mode is used instead. Hence:
-
-        open_bom(FH, 'my_file.txt', ':utf8')
-
-    Opens my_file.txt for reading in an appropriate encoding found from the
-    BOM in that file, or as a UTF-8 file if none is found.
-
-    In the absense of a $default_mode argument, the following 2 calls should
-    be equivalent:
-
-        open_bom(FH, 'no_bom.txt');
-
-        open(FH, '<', 'no_bom.txt');
-
-    If an undefined value is passed as the handle, a symbol will be
-    generated for it like open() does:
-
-        # create filehandle on the fly
-        $enc = open_bom(my $fh, $filename, ':utf8');
-        $line = <$fh>;
-
-    The filehandle will be cued up to read after the BOM. Unseekable files
-    (e.g. fifos) will cause croaking, unless called in list context to catch
-    spillage from the handle. Any spillage will be automatically decoded
-    from the encoding, if found.
-
-        e.g.
-
-        # croak if my_socket is unseekable
-        open_bom(FH, 'my_socket');
-
-        # keep spillage if my_socket is unseekable
-        ($encoding, $spillage) = open_bom(FH, 'my_socket');
-
-        # discard any spillage from open_bom
-        ($encoding) = open_bom(FH, 'my_socket');
-
-  defuse
-        $enc = defuse(FH);
-
-        ($enc, $spill) = defuse(FH);
-
-    FH should be a filehandle opened for reading, it will have the relevant
-    encoding layer pushed onto it be binmode if a BOM is found. Spillage
-    should be Unicode, not bytes.
-
-    Any uncaptured spillage will be silently lost. If the handle is
-    unseekable, use list context to avoid data loss.
-
-    If no BOM is found, the mode will be unaffected.
-
-  decode_from_bom
-        $unicode_string = decode_from_bom($string, $default, $check)
-
-        ($unicode_string, $encoding) = decode_from_bom($string, $default, $check)
-
-    Reads a BOM from the beginning of $string, decodes $string (minus the
-    BOM) and returns it to you as a perl unicode string.
-
-    if $string doesn't have a BOM, $default is used instead.
-
-    $check, if supplied, is passed to Encode::decode as the third argument.
-
-    If there's no BOM and no default, the original string is returned and
-    encoding is ''.
-
-    See Encode
-
-  get_encoding_from_filehandle
-        $encoding = get_encoding_from_filehandle(HANDLE)
-
-        ($encoding, $spillage) = get_encoding_from_filehandle(HANDLE)
-
-    Returns the encoding found in the given filehandle.
-
-    The handle should be opened in a non-unicode way (e.g. mode '<:bytes')
-    so that the BOM can be read in its natural state.
-
-    After calling, the handle will be set to read at a point after the BOM
-    (or at the beginning of the file if no BOM was found)
-
-    If called in scalar context, unseekable handles cause a croak().
-
-    If called in list context, unseekable handles will be read byte-by-byte
-    and any spillage will be returned. See get_encoding_from_stream()
-
-  get_encoding_from_stream
-        ($encoding, $spillage) = get_encoding_from_stream(*FH);
-
-    Read a BOM from an unrewindable source. This means reading the stream
-    one byte at a time until either a BOM is found or every possible BOM is
-    ruled out. Any non-BOM bytes read from the handle will be returned in
-    $spillage.
-
-    If a BOM is found and the spillage contains a partial character (judging
-    by the expected character width for the encoding) more bytes will be
-    read from the handle to ensure that a complete character is returned.
-
-    Spillage is always in bytes, not characters.
-
-    This function is less efficient than get_encoding_from_filehandle, but
-    should work just as well on a seekable handle as on an unseekable one.
-
-  get_encoding_from_bom
-        ($encoding, $offset) = get_encoding_from_bom($string)
-
-    Returns the encoding and length in bytes of the BOM in $string.
-
-    If there is no BOM, an empty string is returned and $offset is zero.
-
-    To get the data from the string, the following should work:
-
-        use Encode;
-
-        my($encoding, $offset) = get_encoding_from_bom($string);
-
-        if ($encoding) {
-            $string = decode($encoding, substr($string, $offset))
-        }
-
-PerlIO::via interface
-    File::BOM can be used as a PerlIO::via interface.
-
-        open(HANDLE, '<:via(File::BOM)', 'my_file.txt');
-
-        open(HANDLE, '>:encoding(UTF-16LE):via(File::BOM)', 'out_file.txt)
-        print "foo\n"; # BOM is written to file here
-
-    This method is less prone to errors on non-seekable files as spillage is
-    incorporated into an internal buffer, but it doesn't give you any
-    information about the encoding being used, or indeed whether or not a
-    BOM was present.
-
-    There are a few known problems with this interface, especially
-    surrounding seek() and tell(), please see the BUGS section for more
-    details about this.
-
-  Reading
-    The via(File::BOM) layer must be added before the handle is read from,
-    otherwise any BOM will be missed. If there is no BOM, no decoding will
-    be done.
-
-    Because of a limitation in PerlIO::via, read() always works on bytes,
-    not characters. BOM decoding will still be done but output will be bytes
-    of UTF-8.
-
-        open(BOM, '<:via(File::BOM)', $file)
-        $bytes_read = read(BOM, $buffer, $length);
-        $unicode = decode('UTF-8', $buffer, Encode::FB_QUIET);
-
-        # Now $unicode is valid unicode and $buffer contains any left-over bytes
-
-  Writing
-    Add the via(File::BOM) layer on top of a unicode encoding layer to print
-    a BOM at the start of the output file. This needs to be done before any
-    data is written. The BOM is written as part of the first print command
-    on the handle, so if you don't print anything to the handle, you won't
-    get a BOM.
-
-    There is a "Wide character in print" warning generated when the
-    via(File::BOM) layer doesn't receive utf8 on writing. This glitch was
-    resolved in perl version 5.8.7, but if your perl version is older than
-    that, you'll need to make sure that the via(File::BOM) layer receives
-    utf8 like this:
-
-        # This works OK
-        open(FH, '>:encoding(UTF-16LE):via(File::BOM):utf8', $filename)
-
-        # This generates warnings with older perls
-        open(FH, '>:encoding(UTF-16LE):via(File::BOM)', $filename)
-
-  Seeking
-    Seeking with SEEK_SET results in an offset equal to the length of any
-    detected BOM being applied to the position parameter. Thus:
-
-        # Seek to end of BOM (not start of file!)
-        seek(FILE_BOM_HANDLE, 0, SEEK_SET)
-
-  Telling
-    In order to work correctly with seek(), tell() also returns a postion
-    adjusted by the length of the BOM.
-
-SEE ALSO
-    * Encode
-    * Encode::Unicode
-    * <http://www.unicode.org/unicode/faq/utf_bom.html#BOM>
-
-DIAGNOSTICS
-    The following exceptions are raised via croak()
-
-    * Couldn't read '<filename>': $!
-        open_bom() couldn't open the given file for reading
-
-    * Couldn't set binmode of handle opened on '<filename>' to '<mode>': $!
-        open_bom() couldn't set the binmode of the handle
-
-    * No string
-        decode_from_bom called on an undefined value
-
-    * Unseekable handle: $!
-        get_encoding_from_filehandle() or open_bom() called on an unseekable
-        file or handle in scalar context.
-
-    * Couldn't read from handle: $!
-        _get_encoding_seekable() couldn't read the handle. This function is
-        called from get_encoding_from_filehandle(), defuse() and open_bom()
-
-    * Couldn't reset read position: $!
-        _get_encoding_seekable couldn't seek to the position after the BOM.
-
-    * Couldn't read byte: $!
-        get_encoding_from_stream couldn't read from the handle. This
-        function is called from get_encoding_from_filehandle() and
-        open_bom() when the handle or file is unseekable.
-
-BUGS
-    Older versions of PerlIO::via have a few problems with writing, see
-    above.
-
-    The current version of PerlIO::via has limitations with regard to seek
-    and tell, currently only line-wise seek and tell are supported by this
-    module. If read() is used to read partial lines, tell() will still give
-    the position of the end of the last line read.
-
-    Under windows, tell() seems to return erroneously when reading files
-    with unix line endings.
-
-    Under windows, warnings may be generated when using the PerlIO::via
-    interface to read UTF-16LE and UTF-32LE encoded files. This seems to be
-    a bug in the relevant encoding(...) layers.
-
-AUTHOR
-    Matt Lawrence <mattlaw at cpan.org>
-
-    With thanks to Mark Fowler and Steve Purkis for additional tests and
-    advice.
-
-COPYRIGHT
-    Copyright 2005 Matt Lawrence, All Rights Reserved.
-
-    This program is free software; you can redistribute it and/or modify it
-    under the same terms as Perl itself.
-
+NAME
+    File::BOM - Utilities for handling Byte Order Marks
+
+SYNOPSIS
+        use File::BOM qw( :all )
+
+  high-level functions
+        # read a file with encoding from the BOM:
+        open_bom(FH, $file)
+        open_bom(FH, $file, ':utf8') # the same but with a default encoding
+
+        # get encoding too
+        $encoding = open_bom(FH, $file, ':utf8');
+
+        # open a potentially unseekable file:
+        ($encoding, $spillage) = open_bom(FH, $file, ':utf8');
+
+        # change encoding of an open handle according to BOM
+        $encoding = defuse(*HANDLE);
+        ($encoding, $spillage) = defuse(*HANDLE);
+
+        # Decode a string according to leading BOM:
+        $unicode = decode_from_bom($string_with_bom);
+    
+        # Decode a string and get the encoding:
+        ($unicode, $encoding) = decode_from_bom($string_with_bom)
+
+  PerlIO::via interface
+        # Read the Right Thing from a unicode file with BOM:
+        open(HANDLE, '<:via(File::BOM)', $filename)
+
+        # Writing little-endian UTF-16 file with BOM:
+        open(HANDLE, '>:encoding(UTF-16LE):via(File::BOM)', $filename)
+
+  lower-level functions
+        # read BOM encoding from a filehandle:
+        $encoding = get_encoding_from_filehandle(FH)
+
+        # Get encoding even if FH is unseekable:
+        ($encoding, $spillage) = get_encoding_from_filehandle(FH);
+
+        # Get encoding from a known unseekable handle:
+        ($encdoing, $spillage) = get_encoding_from_stream(FH);
+
+        # get encoding and BOM length from BOM at start of string:
+        ($encoding, $offset) = get_encoding_from_bom($string);
+
+  variables
+        # print a BOM for a known encoding
+        print FH $enc2bom{$encoding};
+
+        # get an encoding from a known BOM
+        $enc = $bom2enc{$bom}
+
+DESCRIPTION
+    This module provides functions for handling unicode byte order marks,
+    which are to be found at the beginning of some files and streams.
+
+    For details about what a byte order mark is, see
+    <http://www.unicode.org/unicode/faq/utf_bom.html#BOM>
+
+    The intention of File::BOM is for files with BOMs to be readable as
+    seamlessly as possible, regardless of the encoding used. To that end,
+    several different interfaces are available, as shown in the synopsis
+    above.
+
+EXPORTS
+    Nothing by default.
+
+  symbols
+    *   open_bom()
+
+    *   defuse()
+
+    *   decode_from_bom()
+
+    *   get_encoding_from_filehandle()
+
+    *   get_encoding_from_stream()
+
+    *   get_encoding_from_bom()
+
+    *   %bom2enc
+
+    *   %enc2bom
+
+  tags
+    *   :all
+
+        All of the above
+
+    *   :subs
+
+        subroutines only
+
+    *   :vars
+
+        just %bom2enc and %enc2bom
+
+VARIABLES
+  %bom2enc
+    Maps Byte Order marks to their encodings.
+
+    The keys of this hash are strings which represent the BOMs, the values
+    are their encodings, in a format which is understood by Encode
+
+    The encodings represented in this hash are: UTF-8, UTF-16BE, UTF-16LE,
+    UTF-32BE and UTF-32LE
+
+  %enc2bom
+    A reverse-lookup hash for bom2enc, with a few aliases used in Encode,
+    namely utf8, iso-10646-1 and UCS-2.
+
+    Note that UTF-16, UTF-32 and UCS-4 are not included in this hash. Mainly
+    because Encode::encode automatically puts BOMs on output. See
+    Encode::Unicode
+
+FUNCTIONS
+  open_bom
+        $encoding = open_bom(HANDLE, $filename, $default_mode)
+
+        ($encoding, $spill) = open_bom(HANDLE, $filename, $default_mode)
+
+    opens HANDLE for reading on $filename, setting the mode to the
+    appropriate encoding for the BOM stored in the file.
+
+    On failure, a fatal error is raised, see the DIAGNOSTICS section for
+    details on how to catch these. This is in order to allow the return
+    value(s) to be used for other purposes.
+
+    If the file doesn't contain a BOM, $default_mode is used instead. Hence:
+
+        open_bom(FH, 'my_file.txt', ':utf8')
+
+    Opens my_file.txt for reading in an appropriate encoding found from the
+    BOM in that file, or as a UTF-8 file if none is found.
+
+    In the absence of a $default_mode argument, the following 2 calls should
+    be equivalent:
+
+        open_bom(FH, 'no_bom.txt');
+
+        open(FH, '<', 'no_bom.txt');
+
+    If an undefined value is passed as the handle, a symbol will be
+    generated for it like open() does:
+
+        # create filehandle on the fly
+        $enc = open_bom(my $fh, $filename, ':utf8');
+        $line = <$fh>;
+
+    The filehandle will be cued up to read after the BOM. Unseekable files
+    (e.g. fifos) will cause croaking, unless called in list context to catch
+    spillage from the handle. Any spillage will be automatically decoded
+    from the encoding, if found.
+
+        e.g.
+
+        # croak if my_socket is unseekable
+        open_bom(FH, 'my_socket');
+
+        # keep spillage if my_socket is unseekable
+        ($encoding, $spillage) = open_bom(FH, 'my_socket');
+
+        # discard any spillage from open_bom
+        ($encoding) = open_bom(FH, 'my_socket');
+
+  defuse
+        $enc = defuse(FH);
+
+        ($enc, $spill) = defuse(FH);
+
+    FH should be a filehandle opened for reading, it will have the relevant
+    encoding layer pushed onto it be binmode if a BOM is found. Spillage
+    should be Unicode, not bytes.
+
+    Any uncaptured spillage will be silently lost. If the handle is
+    unseekable, use list context to avoid data loss.
+
+    If no BOM is found, the mode will be unaffected.
+
+  decode_from_bom
+        $unicode_string = decode_from_bom($string, $default, $check)
+
+        ($unicode_string, $encoding) = decode_from_bom($string, $default, $check)
+
+    Reads a BOM from the beginning of $string, decodes $string (minus the
+    BOM) and returns it to you as a perl unicode string.
+
+    if $string doesn't have a BOM, $default is used instead.
+
+    $check, if supplied, is passed to Encode::decode as the third argument.
+
+    If there's no BOM and no default, the original string is returned and
+    encoding is ''.
+
+    See Encode
+
+  get_encoding_from_filehandle
+        $encoding = get_encoding_from_filehandle(HANDLE)
+
+        ($encoding, $spillage) = get_encoding_from_filehandle(HANDLE)
+
+    Returns the encoding found in the given filehandle.
+
+    The handle should be opened in a non-unicode way (e.g. mode '<:bytes')
+    so that the BOM can be read in its natural state.
+
+    After calling, the handle will be set to read at a point after the BOM
+    (or at the beginning of the file if no BOM was found)
+
+    If called in scalar context, unseekable handles cause a croak().
+
+    If called in list context, unseekable handles will be read byte-by-byte
+    and any spillage will be returned. See get_encoding_from_stream()
+
+  get_encoding_from_stream
+        ($encoding, $spillage) = get_encoding_from_stream(*FH);
+
+    Read a BOM from an unrewindable source. This means reading the stream
+    one byte at a time until either a BOM is found or every possible BOM is
+    ruled out. Any non-BOM bytes read from the handle will be returned in
+    $spillage.
+
+    If a BOM is found and the spillage contains a partial character (judging
+    by the expected character width for the encoding) more bytes will be
+    read from the handle to ensure that a complete character is returned.
+
+    Spillage is always in bytes, not characters.
+
+    This function is less efficient than get_encoding_from_filehandle, but
+    should work just as well on a seekable handle as on an unseekable one.
+
+  get_encoding_from_bom
+        ($encoding, $offset) = get_encoding_from_bom($string)
+
+    Returns the encoding and length in bytes of the BOM in $string.
+
+    If there is no BOM, an empty string is returned and $offset is zero.
+
+    To get the data from the string, the following should work:
+
+        use Encode;
+
+        my($encoding, $offset) = get_encoding_from_bom($string);
+
+        if ($encoding) {
+            $string = decode($encoding, substr($string, $offset))
+        }
+
+PerlIO::via interface
+    File::BOM can be used as a PerlIO::via interface.
+
+        open(HANDLE, '<:via(File::BOM)', 'my_file.txt');
+
+        open(HANDLE, '>:encoding(UTF-16LE):via(File::BOM)', 'out_file.txt)
+        print "foo\n"; # BOM is written to file here
+
+    This method is less prone to errors on non-seekable files as spillage is
+    incorporated into an internal buffer, but it doesn't give you any
+    information about the encoding being used, or indeed whether or not a
+    BOM was present.
+
+    There are a few known problems with this interface, especially
+    surrounding seek() and tell(), please see the BUGS section for more
+    details about this.
+
+  Reading
+    The via(File::BOM) layer must be added before the handle is read from,
+    otherwise any BOM will be missed. If there is no BOM, no decoding will
+    be done.
+
+    Because of a limitation in PerlIO::via, read() always works on bytes,
+    not characters. BOM decoding will still be done but output will be bytes
+    of UTF-8.
+
+        open(BOM, '<:via(File::BOM)', $file)
+        $bytes_read = read(BOM, $buffer, $length);
+        $unicode = decode('UTF-8', $buffer, Encode::FB_QUIET);
+
+        # Now $unicode is valid unicode and $buffer contains any left-over bytes
+
+  Writing
+    Add the via(File::BOM) layer on top of a unicode encoding layer to print
+    a BOM at the start of the output file. This needs to be done before any
+    data is written. The BOM is written as part of the first print command
+    on the handle, so if you don't print anything to the handle, you won't
+    get a BOM.
+
+    There is a "Wide character in print" warning generated when the
+    via(File::BOM) layer doesn't receive utf8 on writing. This glitch was
+    resolved in perl version 5.8.7, but if your perl version is older than
+    that, you'll need to make sure that the via(File::BOM) layer receives
+    utf8 like this:
+
+        # This works OK
+        open(FH, '>:encoding(UTF-16LE):via(File::BOM):utf8', $filename)
+
+        # This generates warnings with older perls
+        open(FH, '>:encoding(UTF-16LE):via(File::BOM)', $filename)
+
+  Seeking
+    Seeking with SEEK_SET results in an offset equal to the length of any
+    detected BOM being applied to the position parameter. Thus:
+
+        # Seek to end of BOM (not start of file!)
+        seek(FILE_BOM_HANDLE, 0, SEEK_SET)
+
+  Telling
+    In order to work correctly with seek(), tell() also returns a postion
+    adjusted by the length of the BOM.
+
+SEE ALSO
+    *   Encode
+
+    *   Encode::Unicode
+
+    *   <http://www.unicode.org/unicode/faq/utf_bom.html#BOM>
+
+DIAGNOSTICS
+    The following exceptions are raised via croak()
+
+    *   Couldn't read '<filename>': $!
+
+        open_bom() couldn't open the given file for reading
+
+    *   Couldn't set binmode of handle opened on '<filename>' to '<mode>':
+        $!
+
+        open_bom() couldn't set the binmode of the handle
+
+    *   No string
+
+        decode_from_bom called on an undefined value
+
+    *   Unseekable handle: $!
+
+        get_encoding_from_filehandle() or open_bom() called on an unseekable
+        file or handle in scalar context.
+
+    *   Couldn't read from handle: $!
+
+        _get_encoding_seekable() couldn't read the handle. This function is
+        called from get_encoding_from_filehandle(), defuse() and open_bom()
+
+    *   Couldn't reset read position: $!
+
+        _get_encoding_seekable couldn't seek to the position after the BOM.
+
+    *   Couldn't read byte: $!
+
+        get_encoding_from_stream couldn't read from the handle. This
+        function is called from get_encoding_from_filehandle() and
+        open_bom() when the handle or file is unseekable.
+
+BUGS
+    Older versions of PerlIO::via have a few problems with writing, see
+    above.
+
+    The current version of PerlIO::via has limitations with regard to seek
+    and tell, currently only line-wise seek and tell are supported by this
+    module. If read() is used to read partial lines, tell() will still give
+    the position of the end of the last line read.
+
+    Under windows, tell() seems to return erroneously when reading files
+    with unix line endings.
+
+    Under windows, warnings may be generated when using the PerlIO::via
+    interface to read UTF-16LE and UTF-32LE encoded files. This seems to be
+    a bug in the relevant encoding(...) layers.
+
+AUTHOR
+    Matt Lawrence <mattlaw at cpan.org>
+
+    With thanks to Mark Fowler and Steve Purkis for additional tests and
+    advice.
+
+COPYRIGHT
+    Copyright 2005 Matt Lawrence, All Rights Reserved.
+
+    This program is free software; you can redistribute it and/or modify it
+    under the same terms as Perl itself.
+
diff --git a/lib/File/BOM.pm b/lib/File/BOM.pm
index 18d2c9b..05b071e 100644
--- a/lib/File/BOM.pm
+++ b/lib/File/BOM.pm
@@ -101,7 +101,7 @@ my @subs = qw(
 
 my @vars = qw( %bom2enc %enc2bom );
 
-our $VERSION = '0.14';
+our $VERSION = '0.15';
 
 our @EXPORT = ();
 our @EXPORT_OK = ( @subs, @vars );
@@ -234,7 +234,7 @@ If the file doesn't contain a BOM, $default_mode is used instead. Hence:
 Opens my_file.txt for reading in an appropriate encoding found from the BOM in
 that file, or as a UTF-8 file if none is found.
 
-In the absense of a $default_mode argument, the following 2 calls should be equivalent:
+In the absence of a $default_mode argument, the following 2 calls should be equivalent:
 
     open_bom(FH, 'no_bom.txt');
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libfile-bom-perl.git



More information about the Pkg-perl-cvs-commits mailing list