[python-hdf5storage] 26/152: Added utility to convert strings to str, and changed other string conversion utilities to use it.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:24:31 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 0.1
in repository python-hdf5storage.
commit 2089de71330f62336886826d338114288f28be70
Author: Freja Nordsiek <fnordsie at gmail.com>
Date: Sat Jan 25 12:39:44 2014 -0500
Added utility to convert strings to str, and changed other string conversion utilities to use it.
---
hdf5storage/utilities.py | 105 +++++++++++++++++++++++++++++++++--------------
1 file changed, 74 insertions(+), 31 deletions(-)
diff --git a/hdf5storage/utilities.py b/hdf5storage/utilities.py
index 47075d5..d9ba425 100644
--- a/hdf5storage/utilities.py
+++ b/hdf5storage/utilities.py
@@ -31,6 +31,58 @@ import numpy as np
import h5py
+def decode_to_str(data):
+ """ Decodes data to the Python str type.
+
+ Decodes `data` to a Python str, which is. If it can't be decoded, it
+ is returned as is. Unsigned integers, Python ``bytes``, and Numpy
+ strings (``numpy.str_`` and ``numpy.string_``).
+
+ Parameters
+ ----------
+ data : some type
+ Data decode into an ``str`` string.
+
+ Returns
+ -------
+ str or data
+ If `data` can be decoded into a ``str``, the decoded version is
+ returned. Otherwise, `data` is returned unchanged.
+
+ See Also
+ --------
+ decode_to_numpy_unicode
+ decode_to_numpy_ascii
+
+ """
+ # How the conversion is done depends on the exact underlying
+ # type. Numpy types are handled separately. For uint types, it is
+ # assumed to be stored as ASCII, UTF-16, or UTF-32 depending on the
+ # size when converting to an str. numpy.string_ is just like
+ # converting a bytes. numpy.unicode has to be encoded into bytes
+ # before it can be decoded back into an str. bytes is decoded
+ # assuming it is in ASCII. Otherwise, data has to be returned as is.
+
+ if isinstance(data, (np.ndarray, np.uint8, np.uint16, np.uint32,
+ np.string_, np.unicode)):
+ if data.dtype.name == 'uint8':
+ return data.data.tobytes().decode(encoding='ASCII')
+ elif data.dtype.name == 'uint16':
+ return data.data.tobytes().decode(encoding='UTF-16')
+ elif data.dtype.name == 'uint32':
+ return data.data.tobytes().decode(encoding='UTF-32')
+ elif data.dtype.name.startswith('bytes'):
+ return data.decode(encoding='ASCII')
+ else:
+ return data.encode(encoding='UTF-32').decode( \
+ encoding='UTF-32', errors='replace')
+
+ if isinstance(data, bytes):
+ return data.decode(encoding='ASCII')
+ else:
+ return data
+
+
def decode_to_numpy_unicode(data):
""" Decodes data to Numpy unicode string (str_).
@@ -52,25 +104,21 @@ def decode_to_numpy_unicode(data):
See Also
--------
+ decode_to_str
decode_to_numpy_ascii
numpy.str_
"""
- # How the conversion is done depends on the exact underlying
- # type. For uint types, it is assumed to be stored as ASCII, UTF-16,
- # or UTF-32 depending on the size.
- if isinstance(data, (np.ndarray, np.uint8, np.uint16, np.uint32)):
- if data.dtype.name == 'uint8':
- data = data.data.tobytes().decode(encoding='ASCII')
- elif data.dtype.name == 'uint16':
- data = data.data.tobytes().decode(encoding='UTF-16')
- elif data.dtype.name == 'uint32':
- data = data.data.tobytes().decode(encoding='UTF-32')
-
+ # Convert first to a Python str if it isn't already an np.unicode.
+ if not isinstance(data, np.unicode) \
+ and not (isinstance(data, np.ndarray) \
+ and data.dtype.name.startswith('str')):
+ data = decode_to_str(data)
+
+ # If it is an str, then we can wrap it in unicode. Otherwise, we
+ # have to return it as is.
if isinstance(data, str):
return np.unicode(data)
- elif isinstance(data, (bytes, np.string_)):
- return np.unicode(data.decode())
else:
return data
@@ -96,27 +144,22 @@ def decode_to_numpy_ascii(data):
See Also
--------
+ decode_to_str
decode_to_numpy_unicode
- numpy.str_
+ numpy.string_
"""
- # How the conversion is done depends on the exact underlying
- # type. For uint types, it is assumed to be stored as ASCII, UTF-16,
- # or UTF-32 depending on the size when converting to an str. Then,
- # conversions from str, bytes, and numpy.unicode can be done.
-
- if isinstance(data, (np.ndarray, np.uint8, np.uint16, np.uint32)):
- if data.dtype.name == 'uint8':
- data = data.data.tobytes().decode(encoding='ASCII')
- elif data.dtype.name == 'uint16':
- data = data.data.tobytes().decode(encoding='UTF-16')
- elif data.dtype.name == 'uint32':
- data = data.data.tobytes().decode(encoding='UTF-32')
-
- if isinstance(data, bytes):
- return np.string_(data)
- elif isinstance(data, (str, np.unicode)):
- return np.string_(data.encode(encoding='ascii',
+ # Convert first to a Python str if it isn't already an np.string_.
+ if not isinstance(data, np.string) \
+ and not (isinstance(data, np.ndarray) \
+ and data.dtype.name.startswith('bytes')):
+ data = decode_to_str(data)
+
+ # If it is an str, then we can wrap it in string_ while performing a
+ # conversion to ASCII. Otherwise, we
+ # have to return it as is.
+ if isinstance(data, str):
+ return np.string_(data.encode(encoding='ASCII',
errors='replace'))
else:
return data
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git
More information about the debian-science-commits
mailing list