[pandas] 01/03: ENH: Add dateutil timezone support (GH4688)

Andreas Tille tille at debian.org
Wed Dec 28 15:20:14 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to annotated tag v0.13.0_ahl1
in repository pandas.

commit d5c0b20bfa4b094ed058b2a4f15a2aad986a853d
Author: prossahl <pross at ahl.com>
Date:   Thu Aug 15 16:15:40 2013 +0100

    ENH: Add dateutil timezone support (GH4688)
---
 doc/source/timeseries.rst              |   3 +-
 pandas/tseries/tests/test_timezones.py | 208 +++++++++++++++++++++++++++++++++
 pandas/tslib.pyx                       | 109 ++++++++++++-----
 3 files changed, 287 insertions(+), 33 deletions(-)

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index cd12cc6..c37b2bb 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -1048,7 +1048,8 @@ Time Zone Handling
 ------------------
 
 Using ``pytz``, pandas provides rich support for working with timestamps in
-different time zones. By default, pandas objects are time zone unaware:
+different time zones (pandas can also use timezones from the ``dateutil`` library).
+By default, pandas objects are time zone unaware:
 
 .. ipython:: python
 
diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py
index 083de95..09f4cbe 100644
--- a/pandas/tseries/tests/test_timezones.py
+++ b/pandas/tseries/tests/test_timezones.py
@@ -3,6 +3,7 @@ from datetime import datetime, time, timedelta, tzinfo, date
 import sys
 import os
 import unittest
+import itertools
 import nose
 
 import numpy as np
@@ -12,6 +13,7 @@ from pandas import (Index, Series, TimeSeries, DataFrame, isnull,
                     date_range, Timestamp)
 
 from pandas import DatetimeIndex, Int64Index, to_datetime
+from pandas import tslib
 
 from pandas.core.daterange import DateRange
 import pandas.core.datetools as datetools
@@ -39,11 +41,22 @@ def _skip_if_no_pytz():
     except ImportError:
         raise nose.SkipTest("pytz not installed")
 
+def _skip_if_no_dateutil():
+    try:
+        import dateutil
+    except ImportError:
+        raise nose.SkipTest
+
 try:
     import pytz
 except ImportError:
     pass
 
+try:
+    import dateutil
+except ImportError:
+    pass
+
 
 class FixedOffset(tzinfo):
     """Fixed offset in minutes east from UTC."""
@@ -958,6 +971,201 @@ class TestTimeZones(unittest.TestCase):
         offset = dates + offsets.Hour(5)
         self.assertEqual(dates[0] + offsets.Hour(5), offset[0])
 
+class TestPytzDateutilTimeZones(unittest.TestCase):
+    _multiprocess_can_split_ = True
+    FINANCIAL_TIMEZONE_NAMES = (
+        'Africa/Johannesburg',
+        'America/New_York', 'America/Chicago', 'America/Los_Angeles',
+        'Asia/Bangkok', 'Asia/Hong_Kong', 'Asia/Shanghai', 'Asia/Tokyo',
+        'Australia/Sydney',
+        'Europe/Berlin', 'Europe/London', 'Europe/Zurich',
+        'GMT', 'UTC',
+    )
+
+    def setUp(self):
+        _skip_if_no_pytz()
+        _skip_if_no_dateutil()
+        
+    def _gen_financial_timezone_pairs(self):
+        for pair in itertools.permutations(self.FINANCIAL_TIMEZONE_NAMES, 2):
+            yield pair
+            
+    def _assert_two_values_same_attributes(self, a, b, attrs):
+        for attr in attrs:
+            tm.assert_attr_equal(attr, a, b)
+            
+    def _assert_two_timestamp_values_same(self, a, b):
+        self._assert_two_values_same_attributes(a, b, \
+                    ('year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond', 'nanosecond'))
+
+    def _assert_two_datetime_values_same(self, a, b):
+        self._assert_two_values_same_attributes(a, b, \
+                    ('year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond'))
+        
+    def _clear_tslib_cache(self):
+        tslib.trans_cache = {}
+        tslib.utc_offset_cache = {}
+
+    def test_timestamp_tz_as_str(self):
+        """TestPytzDateutilTimeZones: Single date with default time zone, pytz and dateutil."""
+        ts = Timestamp('3/11/2012 04:00', tz='US/Eastern')
+        exp_pytz = Timestamp('3/11/2012 04:00', tz=pytz.timezone('US/Eastern'))
+        exp_du = Timestamp('3/11/2012 04:00', tz=dateutil.tz.gettz('US/Eastern'))
+        self.assertEquals(ts, exp_pytz)
+        self._assert_two_timestamp_values_same(ts, exp_pytz)
+        self.assertEquals(ts, exp_du)
+        self._assert_two_timestamp_values_same(ts, exp_du)
+        
+    def test_timestamp_tz_conversion(self):
+        """TestPytzDateutilTimeZones: Single date time zone conversion with pytz and dateutil."""
+        ts_base = Timestamp('3/11/2012 04:00', tz='US/Eastern')
+        ts_pytz = ts_base.astimezone(pytz.timezone('Europe/Moscow'))
+        ts_du = ts_base.astimezone(dateutil.tz.gettz('Europe/Moscow'))
+        self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+
+    def test_eastern_london_large_year_range_jan_june(self):
+        """TestPytzDateutilTimeZones: Matches Eastern->London->Eastern Jan and Jun 1st for 1970-2049."""
+        for yr, mo in itertools.product(range(1970, 2050), (1, 6)):
+            # US->Europe
+            ts_base = Timestamp(datetime(yr, mo, 1, 12, 0), tz=pytz.timezone('US/Eastern'))
+            ts_pytz = ts_base.astimezone(pytz.timezone('Europe/London'))
+            ts_du = ts_base.astimezone(dateutil.tz.gettz('Europe/London'))
+            self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+            # Europe->US
+            ts_base = Timestamp(datetime(yr, mo, 1, 12, 0), tz=pytz.timezone('Europe/London'))
+            ts_pytz = ts_base.astimezone(pytz.timezone('US/Eastern'))
+            ts_du = ts_base.astimezone(dateutil.tz.gettz('US/Eastern'))
+            self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+
+    def test_eastern_london_every_day_2012_2013(self):
+        """TestPytzDateutilTimeZones: Matches for Eastern->London->Eastern daily for two years (one a leap year)."""
+        # 2012 is a leap year
+        for yr, mo, dy in itertools.product((2012, 2013), range(1, 13), range(1, 32)):
+            # US->Europe
+            try:
+                ts_base = Timestamp(datetime(yr, mo, dy, 12, 0), tz=pytz.timezone('US/Eastern'))
+            except ValueError:
+                continue
+            ts_pytz = ts_base.astimezone(pytz.timezone('Europe/London'))
+            ts_du = ts_base.astimezone(dateutil.tz.gettz('Europe/London'))
+            self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+            # Europe->US
+            ts_base = Timestamp(datetime(yr, mo, dy, 12, 0), tz=pytz.timezone('Europe/London'))
+            ts_pytz = ts_base.astimezone(pytz.timezone('US/Eastern'))
+            ts_du = ts_base.astimezone(dateutil.tz.gettz('US/Eastern'))
+            self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+            
+    def test_common_financial_timezones(self):
+        """TestPytzDateutilTimeZones: Permutations of time zones for major financial centres, midday, first day of each month, 2013."""
+        self._clear_tslib_cache()
+        for mo in range(1, 12):
+            for tz_from, tz_to in self._gen_financial_timezone_pairs():
+                ts_base = Timestamp(datetime(2013, mo, 1, 12, 0), tz=tz_from)
+                ts_pytz = ts_base.astimezone(pytz.timezone(tz_to))
+                ts_du = ts_base.astimezone(dateutil.tz.gettz(tz_to))
+                self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+            
+    def test_common_financial_timezones_dateutil_loaded_first(self):
+        """TestPytzDateutilTimeZones: Permutations of time zones for major financial centres, midday, first day of each month, 2013. dateutil timezones loaded first"""
+        self._clear_tslib_cache()
+        for mo in range(1, 12):
+            for tz_from, tz_to in self._gen_financial_timezone_pairs():
+                ts_base = Timestamp(datetime(2013, mo, 1, 12, 0), tz=tz_from)
+                ts_du = ts_base.astimezone(dateutil.tz.gettz(tz_to))
+                ts_pytz = ts_base.astimezone(pytz.timezone(tz_to))
+                self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+            
+    def test_conflict_dst_start_US_Eastern(self):
+        """TestPytzDateutilTimeZones: Demonstrate that libraries disagree about start of DST, US/Eastern 2012."""
+        #   tstamp 2012-03-11 02:00:00
+        #    pytz: 2012-03-11 02:00:00-05:00  UTC offset -1 day, 19:00:00  UTC time: 07:00:00
+        #dateutil: 2012-03-11 02:00:00-04:00  UTC offset -1 day, 20:00:00  UTC time: 06:00:00
+        tstamp = datetime(2012, 3, 11, 2, 0)
+        tz_name = 'US/Eastern'
+        ts_pytz = pytz.timezone(tz_name).localize(tstamp)
+        ts_du = tstamp.replace(tzinfo=dateutil.tz.gettz(tz_name))
+        self.assertEqual(str(ts_pytz), '2012-03-11 02:00:00-05:00')
+        self.assertEqual(str(ts_du),   '2012-03-11 02:00:00-04:00')
+        self._assert_two_datetime_values_same(ts_pytz, ts_du)
+        self.assertNotEqual(ts_pytz.utcoffset(), ts_du.utcoffset())
+        self.assertNotEqual(
+            str(ts_pytz.astimezone(pytz.timezone('UTC'))),
+            str(ts_du.astimezone(dateutil.tz.tzutc())),
+        )
+
+    def test_conflict_dst_start_UK(self):
+        """TestPytzDateutilTimeZones: Demonstrate that libraries disagree about start of DST, Europe/London 2013."""
+        #   tstamp 2013-03-31 01:00:00
+        #    pytz: 2013-03-31 01:00:00+00:00  UTC offset 0:00:00  UTC time: 01:00:00
+        #dateutil: 2013-03-31 01:00:00+01:00  UTC offset 1:00:00  UTC time: 00:00:00
+        tstamp = datetime(2013, 3, 31, 1, 0)
+        tz_name = 'Europe/London'
+        ts_pytz = pytz.timezone(tz_name).localize(tstamp)
+        ts_du = tstamp.replace(tzinfo=dateutil.tz.gettz(tz_name))
+        self.assertEqual(str(ts_pytz), '2013-03-31 01:00:00+00:00')
+        self.assertEqual(str(ts_du),   '2013-03-31 01:00:00+01:00')
+        self._assert_two_datetime_values_same(ts_pytz, ts_du)
+        self.assertNotEqual(ts_pytz.utcoffset(), ts_du.utcoffset())
+        self.assertNotEqual(
+            str(ts_pytz.astimezone(pytz.timezone('UTC'))),
+            str(ts_du.astimezone(dateutil.tz.tzutc())),
+        )
+
+    def test_date_range_us_pacific_weekly(self):
+        """TestPytzDateutilTimeZones: Test a date_range weekly US/Pacific through 2012."""
+        range_pytz = date_range('2012-01-01 12:00', periods=52, freq='W', tz=pytz.timezone('US/Pacific'))
+        range_du = date_range('2012-01-01 12:00', periods=52, freq='W', tz=dateutil.tz.gettz('US/Pacific'))
+        for a, b in zip(range_pytz, range_du):
+            self.assertEquals(a, b)
+
+    def test_series_us_eastern(self):
+        """TestPytzDateutilTimeZones: Test a Series with a timestamp index, US/Eastern Time across start DST 2012."""
+        rng = date_range('3/9/2012 12:00', periods=5, freq='D')
+        ts = Series(np.random.randn(len(rng)), rng)
+        # Localize to UTC and convert to Eastern time with default timezone library 
+        ts_utc = ts.tz_localize('UTC')
+        ser_std = ts_utc.tz_convert('US/Eastern')
+        # Convert to Eastern time specifically with pytz
+        ser_pytz = ts_utc.tz_convert(pytz.timezone('US/Eastern'))
+        # Now with dateutil
+        ser_du = ts_utc.tz_convert(dateutil.tz.gettz('US/Eastern'))
+        # Check the indicies, firstly Timestamps
+        for s, p, d in zip(ser_std.index, ser_pytz.index, ser_du.index):
+            self.assertEquals(s, p)
+            self.assertEquals(s, d)
+            self.assertEquals(p, d)
+        # assert_series_equal(ser_pytz, ser_du) fails as ser_pytz.tz != ser_du.tz
+        self.assertTrue(np.array_equal(ser_du.index.asi8, ser_pytz.index.asi8))
+        self.assertNotEqual(ser_pytz.index.tz, ser_du.index.tz)
+        
+    def test_series_subtract_pytz_dateutil(self):
+        """TestPytzDateutilTimeZones: Create two series of Timestamps 15:00 US/Pacific from pytz and 12:00 US/Eastern from dateutil and subtract them.""" 
+        dr_pytz = date_range('2012-06-15 12:00', periods=5, freq='D').tz_localize(pytz.timezone('US/Pacific'))
+        dr_du   = date_range('2012-06-15 15:00', periods=5, freq='D').tz_localize(dateutil.tz.gettz('US/Eastern'))
+        ts_pytz = Series(dr_pytz, range(5))
+        ts_du = Series(dr_du, range(5))
+        diff = ts_pytz - ts_du
+        # Should be 0 hours apart
+        exp = Series(np.zeros((5,), dtype='m8[ns]'), range(5))
+        self.assertEquals(diff.dtype, np.dtype('m8[ns]'))
+        tm.assert_series_equal(diff, exp)
+        # Check reverse
+        diff = ts_du - ts_pytz
+        self.assertEquals(diff.dtype, np.dtype('m8[ns]'))
+        tm.assert_series_equal(diff, exp)
+        
+    def test_common_financial_timezones_timedelta_zero(self):
+        """TestPytzDateutilTimeZones: Time zones for major financial centres in pytz and dateutil subtract to zero."""
+        self._clear_tslib_cache()
+        for mo in range(1, 12):
+            for tz_from, tz_to in self._gen_financial_timezone_pairs():
+                ts_base = Timestamp(datetime(2013, mo, 1, 12, 0), tz=tz_from)
+                ts_pytz = ts_base.astimezone(pytz.timezone(tz_to))
+                ts_du = ts_base.astimezone(dateutil.tz.gettz(tz_to))
+                diff = ts_pytz - ts_du
+                self.assertTrue(isinstance(diff, timedelta))
+                self.assertEqual(diff, timedelta(0), 'From: %s to: %s' % (tz_from, tz_to))
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index d959562..8dfc68b 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -113,17 +113,19 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None):
 
     return result
 
-from dateutil.tz import tzlocal
+#from dateutil.tz import tzlocal
+import dateutil.tz
 
 def _is_tzlocal(tz):
-    return isinstance(tz, tzlocal)
+    return isinstance(tz, dateutil.tz.tzlocal)
 
 def _is_fixed_offset(tz):
-    try:
-        tz._transition_info
-        return False
-    except AttributeError:
-        return True
+    if _treat_tz_as_dateutil(tz):
+        return len(tz._trans_idx) == 0 and len(tz._trans_list) == 0
+    elif _treat_tz_as_pytz(tz):
+        return len(tz._transition_info) == 0 and len(tz._utc_transition_times) == 0
+    return True
+        
 
 # Python front end to C extension type _Timestamp
 # This serves as the box for datetime64
@@ -788,6 +790,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit):
     return obj
 
 cdef inline void _localize_tso(_TSObject obj, object tz):
+
     if _is_utc(tz):
         obj.tzinfo = tz
     elif _is_tzlocal(tz):
@@ -804,23 +807,34 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
         deltas = _get_deltas(tz)
         pos = trans.searchsorted(obj.value, side='right') - 1
 
-        # statictzinfo
-        if not hasattr(tz, '_transition_info'):
-            pandas_datetime_to_datetimestruct(obj.value + deltas[0],
-                                              PANDAS_FR_ns, &obj.dts)
+
+        # static/pytz/dateutil specific code
+        if _is_fixed_offset(tz):
+            # statictzinfo
+            if len(deltas) > 0:
+                pandas_datetime_to_datetimestruct(obj.value + deltas[0],
+                                                  PANDAS_FR_ns, &obj.dts)
+            else:
+                pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts)        
             obj.tzinfo = tz
-        else:
+        elif _treat_tz_as_pytz(tz):
             inf = tz._transition_info[pos]
             pandas_datetime_to_datetimestruct(obj.value + deltas[pos],
                                               PANDAS_FR_ns, &obj.dts)
             obj.tzinfo = tz._tzinfos[inf]
+        elif _treat_tz_as_dateutil(tz):
+            pandas_datetime_to_datetimestruct(obj.value + deltas[pos],
+                                              PANDAS_FR_ns, &obj.dts)
+            obj.tzinfo = tz
+        else:
+            obj.tzinfo = tz
 
 
 def get_timezone(tz):
     return _get_zone(tz)
 
 cdef inline bint _is_utc(object tz):
-    return tz is UTC or isinstance(tz, _du_utc)
+    return tz is UTC or isinstance(tz, dateutil.tz.tzutc)
 
 cdef inline object _get_zone(object tz):
     if _is_utc(tz):
@@ -1493,8 +1507,8 @@ def i8_to_pydt(int64_t i8, object tzinfo = None):
 #----------------------------------------------------------------------
 # time zone conversion helpers
 
+#    from dateutil.tz import tzutc as _du_utc
 try:
-    from dateutil.tz import tzutc as _du_utc
     import pytz
     UTC = pytz.utc
     have_pytz = True
@@ -1625,22 +1639,48 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
     offset = deltas[pos]
     return utc_date + offset
 
-
+# Timezone data caches, key is the pytz name, example: 'Europe/London'.
 trans_cache = {}
 utc_offset_cache = {}
 
+# Creat mapping of datutil names to cached keys (pytz names)
+# Example: {'/usr/share/zoneinfo/US/Eastern' : 'US/Eastern'}
+def __create_dateutil_to_pytz_names():
+    d = {}
+    for tz_name in pytz.all_timezones:
+        du = dateutil.tz.gettz(tz_name)
+        if du is not None:
+            d[du._filename] = tz_name
+    return d
+
+dateutil_to_pytz_names = __create_dateutil_to_pytz_names()
+
+cpdef bint _treat_tz_as_pytz(object tz):
+    return hasattr(tz, '_utc_transition_times') and hasattr(tz, '_transition_info')
+
+cpdef bint _treat_tz_as_dateutil(object tz):
+    return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx')
+
+def _tz_cache_key(tz):
+    """Return the key (example 'Europe/London') in the cache for the timezone info object or None if unknown."""
+    if isinstance(tz, pytz.tzinfo.BaseTzInfo):
+        return str(tz)
+    elif isinstance(tz, dateutil.tz.tzfile):
+        try:
+            return dateutil_to_pytz_names[tz._filename]
+        except KeyError:
+            pass
+
 def _get_transitions(tz):
     """
     Get UTC times of DST transitions
     """
-    try:
-        # tzoffset not hashable in Python 3
-        hash(tz)
-    except TypeError:
+    cache_key = _tz_cache_key(tz)
+    if cache_key is None:
         return np.array([NPY_NAT + 1], dtype=np.int64)
 
-    if tz not in trans_cache:
-        if hasattr(tz, '_utc_transition_times'):
+    if cache_key not in trans_cache:
+        if _treat_tz_as_pytz(tz):
             arr = np.array(tz._utc_transition_times, dtype='M8[ns]')
             arr = arr.view('i8')
             try:
@@ -1648,31 +1688,36 @@ def _get_transitions(tz):
                     arr[0] = NPY_NAT + 1
             except Exception:
                 pass
+        elif _treat_tz_as_dateutil(tz):
+            arr = np.array(tz._trans_list, dtype='M8[s]').astype('M8[ns]')
+            arr = arr.view('i8')
         else:
             arr = np.array([NPY_NAT + 1], dtype=np.int64)
-        trans_cache[tz] = arr
-    return trans_cache[tz]
+        trans_cache[cache_key] = arr
+    return trans_cache[cache_key]
 
 def _get_deltas(tz):
     """
     Get UTC offsets in microseconds corresponding to DST transitions
     """
-    try:
-        # tzoffset not hashable in Python 3
-        hash(tz)
-    except TypeError:
+    cache_key = _tz_cache_key(tz)
+    if cache_key is None:
         num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000
         return np.array([num], dtype=np.int64)
 
-    if tz not in utc_offset_cache:
-        if hasattr(tz, '_utc_transition_times'):
-            utc_offset_cache[tz] = _unbox_utcoffsets(tz._transition_info)
+    if cache_key not in utc_offset_cache:
+        if _treat_tz_as_pytz(tz):
+            utc_offset_cache[cache_key] = _unbox_utcoffsets(tz._transition_info)
+        elif _treat_tz_as_dateutil(tz):
+            arr = np.array([v.offset for v in tz._trans_idx], dtype='i8')
+            arr *= 1000000000
+            utc_offset_cache[cache_key] = arr
         else:
             # static tzinfo
             num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000
-            utc_offset_cache[tz] = np.array([num], dtype=np.int64)
+            utc_offset_cache[cache_key] = np.array([num], dtype=np.int64)
 
-    return utc_offset_cache[tz]
+    return utc_offset_cache[cache_key]
 
 cdef double total_seconds(object td): # Python 2.6 compat
     return ((td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) //

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pandas.git



More information about the debian-science-commits mailing list