[libcbor-xs-perl] 02/07: Imported Upstream version 1.50

Wed May 25 22:14:57 UTC 2016

This is an automated email from the git hooks/post-receive script.

nickm-guest pushed a commit to branch master
in repository libcbor-xs-perl.

commit 439f260710caad832debfb0a9635630a385fa829
Author: Nick Morrott <knowledgejunkie at gmail.com>
Date:   Wed May 25 19:37:27 2016 +0100

    Imported Upstream version 1.50
---
 Changes       |  22 +++++++
 MANIFEST      |   1 +
 META.json     |   5 +-
 META.yml      |   5 +-
 Makefile.PL   |   3 +
 README        | 114 +++++++++++++++++++++++++++++-----
 XS.pm         | 192 ++++++++++++++++++++++++++++++++++++++++------------------
 XS.xs         |  52 +++++++++++++---
 t/53_bignum.t |  25 +++++++-
 t/58_hv.t     |  70 +++++++++++++++++++++
 10 files changed, 398 insertions(+), 91 deletions(-)

diff --git a/Changes b/Changes
index c621b57..65274cd 100644
--- a/Changes
+++ b/Changes
@@ -5,6 +5,28 @@ TODO: document encode_cbor_sharing?
 TODO: weaken cyclic structures?
 TODO: allowed_classes or so?
 
+1.5  Wed Apr 27 11:38:39 CEST 2016
+        - Math::BigFloat madness workaround, see
+          http://blog.schmorp.de/2016-04-23-mathbigfloat-maintainer-fail.html
+          (bugreport by zdm at softvisio.net).
+        - add text_keys and text_strings options to force CBOR text encoding
+          for perl hash keys or all strings, as a result of discussions
+          with Fredrik Ljunggren.
+        - implement support for arbitrary-exponent numbers (see
+          http://peteroupc.github.io/CBOR/bigfrac.html, tags 264 and 265)
+          for both en- and decoding.
+        - implement support for rational numbers (see
+          http://peteroupc.github.io/CBOR/rational.html, tag 30) for both
+          en- and decoding.
+        - the above effectively implements all registered CBOR extensions
+          in a sensible manner.
+        - remove some weird dead code that was duplicated (%FILTER).
+        - add t/58_hv.t, which tests hashes and the new text_* flags.
+          hashes apparently were not encoded at all in any of the existing
+          tests.
+        - document Math::BigFloat base-2 performance/crash issues.
+        - use stability canary.
+
 1.41 Thu 25 Feb 15:22:03 CET 2016
 	- avoid perl panics on nested FREEZE/THAW calls (testcase by
           Victor Efimov).
diff --git a/MANIFEST b/MANIFEST
index d2ba32e..44caa1a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -16,6 +16,7 @@ t/54_sharing.t
 t/55_utf8.t
 t/56_filter.t
 t/57_incr.t
+t/58_hv.t
 t/99_binary.t
 META.yml                                 Module YAML meta-data (added by MakeMaker)
 META.json                                Module JSON meta-data (added by MakeMaker)
diff --git a/META.json b/META.json
index 6f0bce4..28d10a3 100644
--- a/META.json
+++ b/META.json
@@ -27,7 +27,8 @@
       },
       "configure" : {
          "requires" : {
-            "ExtUtils::MakeMaker" : "0"
+            "Canary::Stability" : "0",
+            "ExtUtils::MakeMaker" : "6.52"
          }
       },
       "runtime" : {
@@ -38,5 +39,5 @@
       }
    },
    "release_status" : "stable",
-   "version" : 1.41
+   "version" : 1.5
 }
diff --git a/META.yml b/META.yml
index bba02ce..6a8cc94 100644
--- a/META.yml
+++ b/META.yml
@@ -5,7 +5,8 @@ author:
 build_requires:
   ExtUtils::MakeMaker: '0'
 configure_requires:
-  ExtUtils::MakeMaker: '0'
+  Canary::Stability: '0'
+  ExtUtils::MakeMaker: '6.52'
 dynamic_config: 1
 generated_by: 'ExtUtils::MakeMaker version 7.1, CPAN::Meta::Converter version 2.150001'
 license: unknown
@@ -20,4 +21,4 @@ no_index:
 requires:
   Types::Serialiser: '0'
   common::sense: '0'
-version: 1.41
+version: 1.5
diff --git a/Makefile.PL b/Makefile.PL
index 31f6ed9..0cdcfe0 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,6 +1,8 @@
 use 5.010001; # for utf-8, and Time::Piece
 use ExtUtils::MakeMaker;
 
+use Canary::Stability CBOR::XS => 1, 5.010001;
+
 WriteMakefile(
     dist	 => {
 	             PREOP	=> 'pod2text XS.pm | tee README >$(DISTVNAME)/README; chmod -R u=rwX,go=rX . ;',
@@ -13,5 +15,6 @@ WriteMakefile(
        common::sense     => 0,
        Types::Serialiser => 0,
     },
+    CONFIGURE_REQUIRES => { ExtUtils::MakeMaker => 6.52, Canary::Stability => 0 },
 );
 
diff --git a/README b/README
index e35943c..45cb122 100644
--- a/README
+++ b/README
@@ -207,6 +207,49 @@ OBJECT-ORIENTED INTERFACE
         This option does not affect "decode" in any way - string references
         will always be decoded properly if present.
 
+    $cbor = $cbor->text_keys ([$enable])
+    $enabled = $cbor->get_text_keys
+        If $enabled is true (or missing), then "encode" will encode all perl
+        hash keys as CBOR text strings/UTF-8 string, upgrading them as
+        needed.
+
+        If $enable is false (the default), then "encode" will encode hash
+        keys normally - upgraded perl strings (strings internally encoded as
+        UTF-8) as CBOR text strings, and downgraded perl strings as CBOR
+        byte strings.
+
+        This option does not affect "decode" in any way.
+
+        This option is useful for interoperability with CBOR decoders that
+        don't treat byte strings as a form of text. It is especially useful
+        as Perl gives very little control over hash keys.
+
+        Enabling this option can be slow, as all downgraded hash keys that
+        are encoded need to be scanned and converted to UTF-8.
+
+    $cbor = $cbor->text_strings ([$enable])
+    $enabled = $cbor->get_text_strings
+        This option works similar to "text_keys", above, but works on all
+        strings (including hash keys), so "text_keys" has no further effect
+        after enabling "text_strings".
+
+        If $enabled is true (or missing), then "encode" will encode all perl
+        strings as CBOR text strings/UTF-8 strings, upgrading them as
+        needed.
+
+        If $enable is false (the default), then "encode" will encode strings
+        normally (but see "text_keys") - upgraded perl strings (strings
+        internally encoded as UTF-8) as CBOR text strings, and downgraded
+        perl strings as CBOR byte strings.
+
+        This option does not affect "decode" in any way.
+
+        This option has similar advantages and disadvantages as "text_keys".
+        In addition, this option effectively removes the ability to encode
+        byte strings, which might break some "FREEZE" and "TO_CBOR" methods
+        that rely on this, such as bignum encoding, so this option is mainly
+        useful for very simple data.
+
     $cbor = $cbor->validate_utf8 ([$enable])
     $enabled = $cbor->get_validate_utf8
         If $enable is true (or missing), then "decode" will validate that
@@ -219,7 +262,7 @@ OBJECT-ORIENTED INTERFACE
 
         If $enable is false (the default), then "decode" will blindly accept
         UTF-8 data, marking them as valid UTF-8 in the resulting data
-        structure regardless of whether thats true or not.
+        structure regardless of whether that's true or not.
 
         Perl isn't too happy about corrupted UTF-8 in strings, but should
         generally not crash or do similarly evil things. Extensions might be
@@ -411,7 +454,7 @@ MAPPING
         Perl hash references become CBOR maps. As there is no inherent
         ordering in hash keys (or CBOR maps), they will usually be encoded
         in a pseudo-random order. This order can be different each time a
-        hahs is encoded.
+        hash is encoded.
 
         Currently, tied hashes will use the indefinite-length format, while
         normal hashes will use the fixed-length format.
@@ -470,15 +513,18 @@ MAPPING
            $x .= "";    # another, more awkward way to stringify
            print $x;    # perl does it for you, too, quite often
 
-        You can force whether a string ie encoded as byte or text string by
-        using "utf8::upgrade" and "utf8::downgrade"):
+        You can force whether a string is encoded as byte or text string by
+        using "utf8::upgrade" and "utf8::downgrade" (if "text_strings" is
+        disabled):
 
           utf8::upgrade $x;   # encode $x as text string
           utf8::downgrade $x; # encode $x as byte string
 
         Perl doesn't define what operations up- and downgrade strings, so if
         the difference between byte and text is important, you should up- or
-        downgrade your string as late as possible before encoding.
+        downgrade your string as late as possible before encoding. You can
+        also force the use of CBOR text strings by using "text_keys" or
+        "text_strings".
 
         You can force the type to be a CBOR number by numifying it:
 
@@ -583,7 +629,6 @@ MAPPING
 
        sub URI::THAW {
           my ($class, $serialiser, $uri) = @_;
-
           $class->new ($uri)
        }
 
@@ -689,7 +734,7 @@ TAG HANDLING AND EXTENSIONS
 
   ENFORCED TAGS
     These tags are always handled when decoding, and their handling cannot
-    be overriden by the user.
+    be overridden by the user.
 
     26 (perl-object, <http://cbor.schmorp.de/perl-object>)
         These tags are automatically created (and decoded) for serialisable
@@ -722,8 +767,8 @@ TAG HANDLING AND EXTENSIONS
 
     22098 (indirection, <http://cbor.schmorp.de/indirection>)
         This tag is automatically generated when a reference are encountered
-        (with the exception of hash and array refernces). It is converted to
-        a reference when decoding.
+        (with the exception of hash and array references). It is converted
+        to a reference when decoding.
 
     55799 (self-describe CBOR, RFC 7049)
         This value is not generated on encoding (unless explicitly requested
@@ -731,8 +776,8 @@ TAG HANDLING AND EXTENSIONS
 
   NON-ENFORCED TAGS
     These tags have default filters provided when decoding. Their handling
-    can be overriden by changing the %CBOR::XS::FILTER entry for the tag, or
-    by providing a custom "filter" callback when decoding.
+    can be overridden by changing the %CBOR::XS::FILTER entry for the tag,
+    or by providing a custom "filter" callback when decoding.
 
     When they result in decoding into a specific Perl class, the module
     usually provides a corresponding "TO_CBOR" method as well.
@@ -757,15 +802,23 @@ TAG HANDLING AND EXTENSIONS
         "Math::BigInt::TO_CBOR" method encodes "small" bigints into normal
         CBOR integers, and others into positive/negative CBOR bignums.
 
-    4, 5 (decimal fraction/bigfloat)
+    4, 5, 264, 265 (decimal fraction/bigfloat)
         Both decimal fractions and bigfloats are decoded into Math::BigFloat
         objects. The corresponding "Math::BigFloat::TO_CBOR" method *always*
-        encodes into a decimal fraction.
+        encodes into a decimal fraction (either tag 4 or 264).
 
-        CBOR cannot represent bigfloats with *very* large exponents -
-        conversion of such big float objects is undefined.
+        NaN and infinities are not encoded properly, as they cannot be
+        represented in CBOR.
 
-        Also, NaN and infinities are not encoded properly.
+        See "BIGNUM SECURITY CONSIDERATIONS" for more info.
+
+    30 (rational numbers)
+        These tags are decoded into Math::BigRat objects. The corresponding
+        "Math::BigRat::TO_CBOR" method encodes rational numbers with
+        denominator 1 via their numerator only, i.e., they become normal
+        integers or "bignums".
+
+        See "BIGNUM SECURITY CONSIDERATIONS" for more info.
 
     21, 22, 23 (expected later JSON conversion)
         CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore
@@ -822,6 +875,35 @@ SECURITY CONSIDERATIONS
     information you might want to make sure that exceptions thrown by
     CBOR::XS will not end up in front of untrusted eyes.
 
+BIGNUM SECURITY CONSIDERATIONS
+    CBOR::XS provides a "TO_CBOR" method for both Math::BigInt and
+    Math::BigFloat that tries to encode the number in the simplest possible
+    way, that is, either a CBOR integer, a CBOR bigint/decimal fraction (tag
+    4) or an arbitrary-exponent decimal fraction (tag 264). Rational numbers
+    (Math::BigRat, tag 30) can also contain bignums as members.
+
+    CBOR::XS will also understand base-2 bigfloat or arbitrary-exponent
+    bigfloats (tags 5 and 265), but it will never generate these on its own.
+
+    Using the built-in Math::BigInt::Calc support, encoding and decoding
+    decimal fractions is generally fast. Decoding bigints can be slow for
+    very big numbers (tens of thousands of digits, something that could
+    potentially be caught by limiting the size of CBOR texts), and decoding
+    bigfloats or arbitrary-exponent bigfloats can be *extremely* slow
+    (minutes, decades) for large exponents (roughly 40 bit and longer).
+
+    Additionally, Math::BigInt can take advantage of other bignum libraries,
+    such as Math::GMP, which cannot handle big floats with large exponents,
+    and might simply abort or crash your program, due to their code quality.
+
+    This can be a concern if you want to parse untrusted CBOR. If it is, you
+    might want to disable decoding of tag 2 (bigint) and 3 (negative bigint)
+    types. You should also disable types 5 and 265, as these can be slow
+    even without bigints.
+
+    Disabling bigints will also partially or fully disable types that rely
+    on them, e.g. rational numbers that use bignums.
+
 CBOR IMPLEMENTATION NOTES
     This section contains some random implementation notes. They do not
     describe guaranteed behaviour, but merely behaviour as-is implemented
diff --git a/XS.pm b/XS.pm
index ffcf9f3..6877316 100644
--- a/XS.pm
+++ b/XS.pm
@@ -66,7 +66,7 @@ package CBOR::XS;
 
 use common::sense;
 
-our $VERSION = 1.41;
+our $VERSION = 1.5;
 our @ISA = qw(Exporter);
 
 our @EXPORT = qw(encode_cbor decode_cbor);
@@ -249,6 +249,50 @@ the standard CBOR way.
 This option does not affect C<decode> in any way - string references will
 always be decoded properly if present.
 
+=item $cbor = $cbor->text_keys ([$enable])
+
+=item $enabled = $cbor->get_text_keys
+
+If C<$enabled> is true (or missing), then C<encode> will encode all
+perl hash keys as CBOR text strings/UTF-8 string, upgrading them as needed.
+
+If C<$enable> is false (the default), then C<encode> will encode hash keys
+normally - upgraded perl strings (strings internally encoded as UTF-8) as
+CBOR text strings, and downgraded perl strings as CBOR byte strings.
+
+This option does not affect C<decode> in any way.
+
+This option is useful for interoperability with CBOR decoders that don't
+treat byte strings as a form of text. It is especially useful as Perl
+gives very little control over hash keys.
+
+Enabling this option can be slow, as all downgraded hash keys that are
+encoded need to be scanned and converted to UTF-8.
+
+=item $cbor = $cbor->text_strings ([$enable])
+
+=item $enabled = $cbor->get_text_strings
+
+This option works similar to C<text_keys>, above, but works on all strings
+(including hash keys), so C<text_keys> has no further effect after
+enabling C<text_strings>.
+
+If C<$enabled> is true (or missing), then C<encode> will encode all perl
+strings as CBOR text strings/UTF-8 strings, upgrading them as needed.
+
+If C<$enable> is false (the default), then C<encode> will encode strings
+normally (but see C<text_keys>) - upgraded perl strings (strings
+internally encoded as UTF-8) as CBOR text strings, and downgraded perl
+strings as CBOR byte strings.
+
+This option does not affect C<decode> in any way.
+
+This option has similar advantages and disadvantages as C<text_keys>. In
+addition, this option effectively removes the ability to encode byte
+strings, which might break some C<FREEZE> and C<TO_CBOR> methods that rely
+on this, such as bignum encoding, so this option is mainly useful for very
+simple data.
+
 =item $cbor = $cbor->validate_utf8 ([$enable])
 
 =item $enabled = $cbor->get_validate_utf8
@@ -263,7 +307,7 @@ of the official UTF-8.
 
 If C<$enable> is false (the default), then C<decode> will blindly accept
 UTF-8 data, marking them as valid UTF-8 in the resulting data structure
-regardless of whether thats true or not.
+regardless of whether that's true or not.
 
 Perl isn't too happy about corrupted UTF-8 in strings, but should
 generally not crash or do similarly evil things. Extensions might be not
@@ -483,7 +527,7 @@ is meant by a perl value.
 
 Perl hash references become CBOR maps. As there is no inherent ordering in
 hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
-order. This order can be different each time a hahs is encoded.
+order. This order can be different each time a hash is encoded.
 
 Currently, tied hashes will use the indefinite-length format, while normal
 hashes will use the fixed-length format.
@@ -546,15 +590,16 @@ You can force the type to be a CBOR string by stringifying it:
    $x .= "";    # another, more awkward way to stringify
    print $x;    # perl does it for you, too, quite often
 
-You can force whether a string ie encoded as byte or text string by using
-C<utf8::upgrade> and C<utf8::downgrade>):
+You can force whether a string is encoded as byte or text string by using
+C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled):
 
   utf8::upgrade $x;   # encode $x as text string
   utf8::downgrade $x; # encode $x as byte string
 
 Perl doesn't define what operations up- and downgrade strings, so if the
 difference between byte and text is important, you should up- or downgrade
-your string as late as possible before encoding.
+your string as late as possible before encoding. You can also force the
+use of CBOR text strings by using C<text_keys> or C<text_strings>.
 
 You can force the type to be a CBOR number by numifying it:
 
@@ -665,7 +710,6 @@ would be a possible implementation:
 
    sub URI::THAW {
       my ($class, $serialiser, $uri) = @_;
-
       $class->new ($uri)
    }
 
@@ -806,7 +850,7 @@ additional tags (such as base64url).
 =head2 ENFORCED TAGS
 
 These tags are always handled when decoding, and their handling cannot be
-overriden by the user.
+overridden by the user.
 
 =over 4
 
@@ -844,7 +888,7 @@ encoded, however, when C<pack_strings> is enabled.
 =item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
 
 This tag is automatically generated when a reference are encountered (with
-the exception of hash and array refernces). It is converted to a reference
+the exception of hash and array references). It is converted to a reference
 when decoding.
 
 =item 55799 (self-describe CBOR, RFC 7049)
@@ -857,7 +901,7 @@ the user), and is simply ignored when decoding.
 =head2 NON-ENFORCED TAGS
 
 These tags have default filters provided when decoding. Their handling can
-be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
+be overridden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
 providing a custom C<filter> callback when decoding.
 
 When they result in decoding into a specific Perl class, the module
@@ -885,16 +929,25 @@ These tags are decoded into L<Math::BigInt> objects. The corresponding
 C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
 integers, and others into positive/negative CBOR bignums.
 
-=item 4, 5 (decimal fraction/bigfloat)
+=item 4, 5, 264, 265 (decimal fraction/bigfloat)
 
 Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
 objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
-encodes into a decimal fraction.
+encodes into a decimal fraction (either tag 4 or 264).
 
-CBOR cannot represent bigfloats with I<very> large exponents - conversion
-of such big float objects is undefined.
+NaN and infinities are not encoded properly, as they cannot be represented
+in CBOR.
 
-Also, NaN and infinities are not encoded properly.
+See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
+
+=item 30 (rational numbers)
+
+These tags are decoded into L<Math::BigRat> objects. The corresponding
+C<Math::BigRat::TO_CBOR> method encodes rational numbers with denominator
+C<1> via their numerator only, i.e., they become normal integers or
+C<bignums>.
+
+See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
 
 =item 21, 22, 23 (expected later JSON conversion)
 
@@ -910,48 +963,6 @@ C<URI::TO_CBOR> method again results in a CBOR URI value.
 
 =cut
 
-our %FILTER = (
-   # 0 # rfc4287 datetime, utf-8
-   # 1 # unix timestamp, any
-
-   2 => sub { # pos bigint
-      require Math::BigInt;
-      Math::BigInt->new ("0x" . unpack "H*", pop)
-   },
-
-   3 => sub { # neg bigint
-      require Math::BigInt;
-      -Math::BigInt->new ("0x" . unpack "H*", pop)
-   },
-
-   4 => sub { # decimal fraction, array
-      require Math::BigFloat;
-      Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
-   },
-
-   5 => sub { # bigfloat, array
-      require Math::BigFloat;
-      scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
-   },
-
-   21 => sub { pop }, # expected conversion to base64url encoding
-   22 => sub { pop }, # expected conversion to base64 encoding
-   23 => sub { pop }, # expected conversion to base16 encoding
-
-   # 24 # embedded cbor, byte string
-
-   32 => sub {
-      require URI;
-      URI->new (pop)
-   },
-
-   # 33 # base64url rfc4648, utf-8
-   # 34 # base64 rfc46484, utf-8
-   # 35 # regex pcre/ecma262, utf-8
-   # 36 # mime message rfc2045, utf-8
-);
-
-
 =head1 CBOR and JSON
 
 CBOR is supposed to implement a superset of the JSON data model, and is,
@@ -1002,6 +1013,39 @@ structures in its error messages, so when you serialise sensitive
 information you might want to make sure that exceptions thrown by CBOR::XS
 will not end up in front of untrusted eyes.
 
+
+=head1 BIGNUM SECURITY CONSIDERATIONS
+
+CBOR::XS provides a C<TO_CBOR> method for both L<Math::BigInt> and
+L<Math::BigFloat> that tries to encode the number in the simplest possible
+way, that is, either a CBOR integer, a CBOR bigint/decimal fraction (tag
+4) or an arbitrary-exponent decimal fraction (tag 264). Rational numbers
+(L<Math::BigRat>, tag 30) can also contain bignums as members.
+
+CBOR::XS will also understand base-2 bigfloat or arbitrary-exponent
+bigfloats (tags 5 and 265), but it will never generate these on its own.
+
+Using the built-in L<Math::BigInt::Calc> support, encoding and decoding
+decimal fractions is generally fast. Decoding bigints can be slow for very
+big numbers (tens of thousands of digits, something that could potentially
+be caught by limiting the size of CBOR texts), and decoding bigfloats or
+arbitrary-exponent bigfloats can be I<extremely> slow (minutes, decades)
+for large exponents (roughly 40 bit and longer).
+
+Additionally, L<Math::BigInt> can take advantage of other bignum
+libraries, such as L<Math::GMP>, which cannot handle big floats with large
+exponents, and might simply abort or crash your program, due to their code
+quality.
+
+This can be a concern if you want to parse untrusted CBOR. If it is, you
+might want to disable decoding of tag 2 (bigint) and 3 (negative bigint)
+types. You should also disable types 5 and 265, as these can be slow even
+without bigints.
+
+Disabling bigints will also partially or fully disable types that rely on
+them, e.g. rational numbers that use bignums.
+
+
 =head1 CBOR IMPLEMENTATION NOTES
 
 This section contains some random implementation notes. They do not
@@ -1096,9 +1140,24 @@ our %FILTER = (
       Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
    },
 
+   264 => sub { # decimal fraction with arbitrary exponent
+      require Math::BigFloat;
+      Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
+   },
+
    5 => sub { # bigfloat, array
       require Math::BigFloat;
-      scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
+      scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
+   },
+
+   265 => sub { # bigfloat with arbitrary exponent
+      require Math::BigFloat;
+      scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
+   },
+
+   30 => sub { # rational number
+      require Math::BigRat;
+      Math::BigRat->new ("$_[1][0]/$_[1][1]") # separate parameters only work in recent versons
    },
 
    21 => sub { pop }, # expected conversion to base64url encoding
@@ -1129,7 +1188,7 @@ sub URI::TO_CBOR {
 }
 
 sub Math::BigInt::TO_CBOR {
-   if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
+   if (-2147483648 <= $_[0] && $_[0] <= 2147483647) {
       $_[0]->numify
    } else {
       my $hex = substr $_[0]->as_hex, 2;
@@ -1140,7 +1199,20 @@ sub Math::BigInt::TO_CBOR {
 
 sub Math::BigFloat::TO_CBOR {
    my ($m, $e) = $_[0]->parts;
-   tag 4, [$e->numify, $m]
+
+   -9223372036854775808 <= $e && $e <= 18446744073709551615
+      ? tag 4, [$e->numify, $m]
+      : tag 264, [$e, $m]
+}
+
+sub Math::BigRat::TO_CBOR {
+   my ($n, $d) = $_[0]->parts;
+
+   # older versions of BigRat need *1, as they not always return numbers
+
+   $d*1 == 1
+      ? $n*1
+      : tag 30, [$n*1, $d*1]
 }
 
 sub Time::Piece::TO_CBOR {
diff --git a/XS.xs b/XS.xs
index 1611124..9e20259 100644
--- a/XS.xs
+++ b/XS.xs
@@ -102,7 +102,9 @@ enum cbor_tag
 #define F_ALLOW_SHARING   0x00000004UL
 #define F_ALLOW_CYCLES    0x00000008UL
 #define F_PACK_STRINGS    0x00000010UL
-#define F_VALIDATE_UTF8   0x00000020UL
+#define F_TEXT_KEYS       0x00000020UL
+#define F_TEXT_STRINGS    0x00000040UL
+#define F_VALIDATE_UTF8   0x00000080UL
 
 #define INIT_SIZE   32 // initial scalar size to be allocated
 
@@ -278,17 +280,47 @@ encode_tag (enc_t *enc, UV tag)
   encode_uint (enc, MAJOR_TAG, tag);
 }
 
+// exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
+ecb_noinline static void
+encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
+{
+  STRLEN ulen = len;
+  U8 *p, *pend = (U8 *)str + len;
+
+  for (p = (U8 *)str; p < pend; ++p)
+    ulen += *p >> 7; // count set high bits
+
+  encode_uint (enc, MAJOR_TEXT, ulen);
+
+  need (enc, ulen);
+  for (p = (U8 *)str; p < pend; ++p)
+    if (*p < 0x80)
+      *enc->cur++ = *p;
+    else
+      {
+        *enc->cur++ = 0xc0 + (*p >> 6);
+        *enc->cur++ = 0x80 + (*p & 63);
+      }
+}
+
 ecb_inline void
-encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
+encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
 {
+  if (ecb_expect_false (upgrade_utf8))
+    if (!utf8)
+      {
+        encode_str_utf8 (enc, utf8, str, len);
+        return;
+      }
+
   encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
   need (enc, len);
   memcpy (enc->cur, str, len);
   enc->cur += len;
 }
 
-static void
-encode_strref (enc_t *enc, int utf8, char *str, STRLEN len)
+ecb_inline void
+encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
 {
   if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
     {
@@ -309,7 +341,7 @@ encode_strref (enc_t *enc, int utf8, char *str, STRLEN len)
         }
     }
 
-  encode_str (enc, utf8, str, len);
+  encode_str (enc, upgrade_utf8, utf8, str, len);
 }
 
 static void encode_sv (enc_t *enc, SV *sv);
@@ -365,7 +397,7 @@ encode_hv (enc_t *enc, HV *hv)
       if (HeKLEN (he) == HEf_SVKEY)
         encode_sv (enc, HeSVKEY (he));
       else
-        encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
+        encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
 
       encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
     }
@@ -493,7 +525,7 @@ encode_rv (enc_t *enc, SV *sv)
 
           encode_tag (enc, CBOR_TAG_PERL_OBJECT);
           encode_uint (enc, MAJOR_ARRAY, count + 1);
-          encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
+          encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
 
           while (count)
             encode_sv (enc, SP[1 - count--]);
@@ -562,7 +594,7 @@ encode_sv (enc_t *enc, SV *sv)
     {
       STRLEN len;
       char *str = SvPV (sv, len);
-      encode_strref (enc, SvUTF8 (sv), str, len);
+      encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
     }
   else if (SvNOKp (sv))
     encode_nv (enc, sv);
@@ -1392,6 +1424,8 @@ void shrink (CBOR *self, int enable = 1)
         allow_sharing   = F_ALLOW_SHARING
         allow_cycles    = F_ALLOW_CYCLES
         pack_strings    = F_PACK_STRINGS
+        text_keys       = F_TEXT_KEYS
+        text_strings    = F_TEXT_STRINGS
         validate_utf8   = F_VALIDATE_UTF8
 	PPCODE:
 {
@@ -1410,6 +1444,8 @@ void get_shrink (CBOR *self)
         get_allow_sharing   = F_ALLOW_SHARING
         get_allow_cycles    = F_ALLOW_CYCLES
         get_pack_strings    = F_PACK_STRINGS
+        get_text_keys       = F_TEXT_KEYS
+        get_text_strings    = F_TEXT_STRINGS
         get_validate_utf8   = F_VALIDATE_UTF8
 	PPCODE:
         XPUSHs (boolSV (self->flags & ix));
diff --git a/t/53_bignum.t b/t/53_bignum.t
index daaf36f..d31bc0e 100644
--- a/t/53_bignum.t
+++ b/t/53_bignum.t
@@ -1,8 +1,10 @@
-BEGIN { $| = 1; print "1..100\n"; }
+BEGIN { $| = 1; print "1..105\n"; }
 BEGIN { $^W = 0 } # hate
 
 use CBOR::XS;
 
+use Math::BigInt only => "FastCalc"; # needed for representation stability
+
 print "ok 1\n";
 
 my $t = decode_cbor pack "H*", "82c48221196ab3c5822003";
@@ -14,7 +16,7 @@ $t = encode_cbor $t;
 
 print $t eq (pack "H*", "82c48221196ab3c482200f") ? "" : "not ", "ok 4 # ", (unpack "H*", $t), "\n";
 
-# Math::BigFloat is loaded by now...
+# Math::BigFloat must be loaded by now...
 
 for (5..99) {
    my $n = Math::BigFloat->new ((int rand 1e9) . "." . (int rand 1e9) . "e" . ((int rand 1e8) - 0.5e8));
@@ -26,5 +28,22 @@ for (5..99) {
    print $n != $m ? "not " : "ok $_ # $n eq $m\n";
 }
 
-print "ok 100\n";
+$t = encode_cbor CBOR::XS::tag 264, [Math::BigInt->new ("99999999999999999998"), Math::BigInt->new ("799999999999999999998")];
+$t = decode_cbor $t;
+print "799999999999999999998e+99999999999999999998" eq $t->bsstr ? "" : "not ", "ok 100\n";
+
+$t = encode_cbor $t;
+print "d9010882c249056bc75e2d63100000c2492b5e3af16b187ffffe" eq (unpack "H*", $t) ? "" : "not ", "ok 101\n";
+
+$t = encode_cbor CBOR::XS::tag 30, [4, 2];
+$t = decode_cbor $t;
+print $t eq 2 ? "" : "not ", "ok 102 # $t\n";
+
+$t = encode_cbor $t;
+print "02" eq (unpack "H*", $t) ? "" : "not ", "ok 103\n";
+
+$t = encode_cbor decode_cbor encode_cbor CBOR::XS::tag 30, [Math::BigInt->new (5), 2];
+print "d81e820502" eq (unpack "H*", $t) ? "" : "not ", "ok 104\n";
+
+print "ok 105\n";
 
diff --git a/t/58_hv.t b/t/58_hv.t
new file mode 100644
index 0000000..1fd44c0
--- /dev/null
+++ b/t/58_hv.t
@@ -0,0 +1,70 @@
+BEGIN { $| = 1; print "1..21\n"; }
+
+# none of the other tests serialise hv's, gross
+# also checks text_keys/text_strings
+
+use CBOR::XS;
+
+print "ok 1\n";
+
+$enc = encode_cbor {};
+print $enc ne "\xa0" ? "not " : "", "ok 2\n";
+
+$enc = encode_cbor { 5 => 6 };
+print $enc ne (pack "H*", "a1413506") ? "not " : "", "ok 3\n";
+
+$enc = encode_cbor { "" => \my $dummy };
+print $enc ne (pack "H*", "a140d95652f6") ? "not " : "", "ok 4\n";
+
+$enc = encode_cbor { undef() => \my $dummy };
+print $enc ne (pack "H*", "a140d95652f6") ? "not " : "", "ok 5\n";
+
+$enc = encode_cbor { "abc" => "def" };
+print $enc ne (pack "H*", "a14361626343646566") ? "not " : "", "ok 6\n";
+
+$enc = encode_cbor { "abc" => "def", "geh" => "ijk" };
+print $enc !~ /^\xa2/ ? "not " : "", "ok 7\n";
+print 17 ne length $enc ? "not " : "", "ok 8\n";
+
+$enc = encode_cbor { "\x{7f}" => undef };
+print $enc ne (pack "H*", "a1417ff6") ? "not " : "", "ok 9\n";
+
+$dec = decode_cbor pack "H*", "a1417ff6";
+print +(keys %$dec)[0] ne "\x{7f}" ? "not " : "", "ok 10\n";
+
+$enc = encode_cbor { "\x{100}" => undef };
+print $enc ne (pack "H*", "a162c480f6") ? "not " : "", "ok 11\n";
+
+$dec = decode_cbor pack "H*", "a162c480f6";
+print +(keys %$dec)[0] ne "\x{100}" ? "not " : "", "ok 12\n";
+
+$enc = encode_cbor { "\x{8f}" => undef };
+print $enc ne (pack "H*", "a1418ff6") ? "not " : "", "ok 13\n";
+
+$text_strings = CBOR::XS->new->text_strings;
+
+$enc = $text_strings->encode ({ "\x{7f}" => "\x{3f}" });
+print $enc ne (pack "H*", "a1617f613f") ? "not " : "", "ok 14\n";
+
+$enc = $text_strings->encode ({ "\x{8f}" => "\x{c7}" });
+print $enc ne (pack "H*", "a162c28f62c387") ? "not " : "", "ok 15\n";
+
+$enc = $text_strings->encode ({ "\x{8f}gix\x{ff}x" => "a\x{80}b\x{fe}y" });
+print $enc ne (pack "H*", "a168c28f676978c3bf786761c28062c3be79") ? "not " : "", "ok 16\n";
+
+$dec = decode_cbor pack "H*", "a168c28f676978c3bf78f6";
+print +(keys %$dec)[0] ne "\x{8f}gix\x{ff}x" ? "not " : "", "ok 17\n";
+
+$text_keys = CBOR::XS->new->text_keys;
+
+$enc = $text_keys->encode ({ "\x{7f}" => "\x{3f}" });
+print $enc ne (pack "H*", "a1617f413f") ? "not " : "", "ok 18\n";
+
+$enc = $text_keys->encode ({ "\x{8f}" => "\x{c7}" });
+print $enc ne (pack "H*", "a162c28f41c7") ? "not " : "", "ok 19\n";
+
+$enc = $text_keys->encode ({ "\x{8f}gix\x{ff}x" => "a\x{80}b\x{fe}y" });
+print $enc ne (pack "H*", "a168c28f676978c3bf7845618062fe79") ? "not " : "", "ok 20\n";
+
+print "ok 21\n";
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcbor-xs-perl.git