[pandas] 01/03: ENH: Add dateutil timezone support (GH4688)
Andreas Tille
tille at debian.org
Wed Dec 28 15:20:14 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to annotated tag v0.13.0_ahl1
in repository pandas.
commit d5c0b20bfa4b094ed058b2a4f15a2aad986a853d
Author: prossahl <pross at ahl.com>
Date: Thu Aug 15 16:15:40 2013 +0100
ENH: Add dateutil timezone support (GH4688)
---
doc/source/timeseries.rst | 3 +-
pandas/tseries/tests/test_timezones.py | 208 +++++++++++++++++++++++++++++++++
pandas/tslib.pyx | 109 ++++++++++++-----
3 files changed, 287 insertions(+), 33 deletions(-)
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index cd12cc6..c37b2bb 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -1048,7 +1048,8 @@ Time Zone Handling
------------------
Using ``pytz``, pandas provides rich support for working with timestamps in
-different time zones. By default, pandas objects are time zone unaware:
+different time zones (pandas can also use timezones from the ``dateutil`` library).
+By default, pandas objects are time zone unaware:
.. ipython:: python
diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py
index 083de95..09f4cbe 100644
--- a/pandas/tseries/tests/test_timezones.py
+++ b/pandas/tseries/tests/test_timezones.py
@@ -3,6 +3,7 @@ from datetime import datetime, time, timedelta, tzinfo, date
import sys
import os
import unittest
+import itertools
import nose
import numpy as np
@@ -12,6 +13,7 @@ from pandas import (Index, Series, TimeSeries, DataFrame, isnull,
date_range, Timestamp)
from pandas import DatetimeIndex, Int64Index, to_datetime
+from pandas import tslib
from pandas.core.daterange import DateRange
import pandas.core.datetools as datetools
@@ -39,11 +41,22 @@ def _skip_if_no_pytz():
except ImportError:
raise nose.SkipTest("pytz not installed")
+def _skip_if_no_dateutil():
+ try:
+ import dateutil
+ except ImportError:
+ raise nose.SkipTest
+
try:
import pytz
except ImportError:
pass
+try:
+ import dateutil
+except ImportError:
+ pass
+
class FixedOffset(tzinfo):
"""Fixed offset in minutes east from UTC."""
@@ -958,6 +971,201 @@ class TestTimeZones(unittest.TestCase):
offset = dates + offsets.Hour(5)
self.assertEqual(dates[0] + offsets.Hour(5), offset[0])
+class TestPytzDateutilTimeZones(unittest.TestCase):
+ _multiprocess_can_split_ = True
+ FINANCIAL_TIMEZONE_NAMES = (
+ 'Africa/Johannesburg',
+ 'America/New_York', 'America/Chicago', 'America/Los_Angeles',
+ 'Asia/Bangkok', 'Asia/Hong_Kong', 'Asia/Shanghai', 'Asia/Tokyo',
+ 'Australia/Sydney',
+ 'Europe/Berlin', 'Europe/London', 'Europe/Zurich',
+ 'GMT', 'UTC',
+ )
+
+ def setUp(self):
+ _skip_if_no_pytz()
+ _skip_if_no_dateutil()
+
+ def _gen_financial_timezone_pairs(self):
+ for pair in itertools.permutations(self.FINANCIAL_TIMEZONE_NAMES, 2):
+ yield pair
+
+ def _assert_two_values_same_attributes(self, a, b, attrs):
+ for attr in attrs:
+ tm.assert_attr_equal(attr, a, b)
+
+ def _assert_two_timestamp_values_same(self, a, b):
+ self._assert_two_values_same_attributes(a, b, \
+ ('year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond', 'nanosecond'))
+
+ def _assert_two_datetime_values_same(self, a, b):
+ self._assert_two_values_same_attributes(a, b, \
+ ('year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond'))
+
+ def _clear_tslib_cache(self):
+ tslib.trans_cache = {}
+ tslib.utc_offset_cache = {}
+
+ def test_timestamp_tz_as_str(self):
+ """TestPytzDateutilTimeZones: Single date with default time zone, pytz and dateutil."""
+ ts = Timestamp('3/11/2012 04:00', tz='US/Eastern')
+ exp_pytz = Timestamp('3/11/2012 04:00', tz=pytz.timezone('US/Eastern'))
+ exp_du = Timestamp('3/11/2012 04:00', tz=dateutil.tz.gettz('US/Eastern'))
+ self.assertEquals(ts, exp_pytz)
+ self._assert_two_timestamp_values_same(ts, exp_pytz)
+ self.assertEquals(ts, exp_du)
+ self._assert_two_timestamp_values_same(ts, exp_du)
+
+ def test_timestamp_tz_conversion(self):
+ """TestPytzDateutilTimeZones: Single date time zone conversion with pytz and dateutil."""
+ ts_base = Timestamp('3/11/2012 04:00', tz='US/Eastern')
+ ts_pytz = ts_base.astimezone(pytz.timezone('Europe/Moscow'))
+ ts_du = ts_base.astimezone(dateutil.tz.gettz('Europe/Moscow'))
+ self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+
+ def test_eastern_london_large_year_range_jan_june(self):
+ """TestPytzDateutilTimeZones: Matches Eastern->London->Eastern Jan and Jun 1st for 1970-2049."""
+ for yr, mo in itertools.product(range(1970, 2050), (1, 6)):
+ # US->Europe
+ ts_base = Timestamp(datetime(yr, mo, 1, 12, 0), tz=pytz.timezone('US/Eastern'))
+ ts_pytz = ts_base.astimezone(pytz.timezone('Europe/London'))
+ ts_du = ts_base.astimezone(dateutil.tz.gettz('Europe/London'))
+ self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+ # Europe->US
+ ts_base = Timestamp(datetime(yr, mo, 1, 12, 0), tz=pytz.timezone('Europe/London'))
+ ts_pytz = ts_base.astimezone(pytz.timezone('US/Eastern'))
+ ts_du = ts_base.astimezone(dateutil.tz.gettz('US/Eastern'))
+ self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+
+ def test_eastern_london_every_day_2012_2013(self):
+ """TestPytzDateutilTimeZones: Matches for Eastern->London->Eastern daily for two years (one a leap year)."""
+ # 2012 is a leap year
+ for yr, mo, dy in itertools.product((2012, 2013), range(1, 13), range(1, 32)):
+ # US->Europe
+ try:
+ ts_base = Timestamp(datetime(yr, mo, dy, 12, 0), tz=pytz.timezone('US/Eastern'))
+ except ValueError:
+ continue
+ ts_pytz = ts_base.astimezone(pytz.timezone('Europe/London'))
+ ts_du = ts_base.astimezone(dateutil.tz.gettz('Europe/London'))
+ self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+ # Europe->US
+ ts_base = Timestamp(datetime(yr, mo, dy, 12, 0), tz=pytz.timezone('Europe/London'))
+ ts_pytz = ts_base.astimezone(pytz.timezone('US/Eastern'))
+ ts_du = ts_base.astimezone(dateutil.tz.gettz('US/Eastern'))
+ self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+
+ def test_common_financial_timezones(self):
+ """TestPytzDateutilTimeZones: Permutations of time zones for major financial centres, midday, first day of each month, 2013."""
+ self._clear_tslib_cache()
+ for mo in range(1, 12):
+ for tz_from, tz_to in self._gen_financial_timezone_pairs():
+ ts_base = Timestamp(datetime(2013, mo, 1, 12, 0), tz=tz_from)
+ ts_pytz = ts_base.astimezone(pytz.timezone(tz_to))
+ ts_du = ts_base.astimezone(dateutil.tz.gettz(tz_to))
+ self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+
+ def test_common_financial_timezones_dateutil_loaded_first(self):
+ """TestPytzDateutilTimeZones: Permutations of time zones for major financial centres, midday, first day of each month, 2013. dateutil timezones loaded first"""
+ self._clear_tslib_cache()
+ for mo in range(1, 12):
+ for tz_from, tz_to in self._gen_financial_timezone_pairs():
+ ts_base = Timestamp(datetime(2013, mo, 1, 12, 0), tz=tz_from)
+ ts_du = ts_base.astimezone(dateutil.tz.gettz(tz_to))
+ ts_pytz = ts_base.astimezone(pytz.timezone(tz_to))
+ self._assert_two_timestamp_values_same(ts_pytz, ts_du)
+
+ def test_conflict_dst_start_US_Eastern(self):
+ """TestPytzDateutilTimeZones: Demonstrate that libraries disagree about start of DST, US/Eastern 2012."""
+ # tstamp 2012-03-11 02:00:00
+ # pytz: 2012-03-11 02:00:00-05:00 UTC offset -1 day, 19:00:00 UTC time: 07:00:00
+ #dateutil: 2012-03-11 02:00:00-04:00 UTC offset -1 day, 20:00:00 UTC time: 06:00:00
+ tstamp = datetime(2012, 3, 11, 2, 0)
+ tz_name = 'US/Eastern'
+ ts_pytz = pytz.timezone(tz_name).localize(tstamp)
+ ts_du = tstamp.replace(tzinfo=dateutil.tz.gettz(tz_name))
+ self.assertEqual(str(ts_pytz), '2012-03-11 02:00:00-05:00')
+ self.assertEqual(str(ts_du), '2012-03-11 02:00:00-04:00')
+ self._assert_two_datetime_values_same(ts_pytz, ts_du)
+ self.assertNotEqual(ts_pytz.utcoffset(), ts_du.utcoffset())
+ self.assertNotEqual(
+ str(ts_pytz.astimezone(pytz.timezone('UTC'))),
+ str(ts_du.astimezone(dateutil.tz.tzutc())),
+ )
+
+ def test_conflict_dst_start_UK(self):
+ """TestPytzDateutilTimeZones: Demonstrate that libraries disagree about start of DST, Europe/London 2013."""
+ # tstamp 2013-03-31 01:00:00
+ # pytz: 2013-03-31 01:00:00+00:00 UTC offset 0:00:00 UTC time: 01:00:00
+ #dateutil: 2013-03-31 01:00:00+01:00 UTC offset 1:00:00 UTC time: 00:00:00
+ tstamp = datetime(2013, 3, 31, 1, 0)
+ tz_name = 'Europe/London'
+ ts_pytz = pytz.timezone(tz_name).localize(tstamp)
+ ts_du = tstamp.replace(tzinfo=dateutil.tz.gettz(tz_name))
+ self.assertEqual(str(ts_pytz), '2013-03-31 01:00:00+00:00')
+ self.assertEqual(str(ts_du), '2013-03-31 01:00:00+01:00')
+ self._assert_two_datetime_values_same(ts_pytz, ts_du)
+ self.assertNotEqual(ts_pytz.utcoffset(), ts_du.utcoffset())
+ self.assertNotEqual(
+ str(ts_pytz.astimezone(pytz.timezone('UTC'))),
+ str(ts_du.astimezone(dateutil.tz.tzutc())),
+ )
+
+ def test_date_range_us_pacific_weekly(self):
+ """TestPytzDateutilTimeZones: Test a date_range weekly US/Pacific through 2012."""
+ range_pytz = date_range('2012-01-01 12:00', periods=52, freq='W', tz=pytz.timezone('US/Pacific'))
+ range_du = date_range('2012-01-01 12:00', periods=52, freq='W', tz=dateutil.tz.gettz('US/Pacific'))
+ for a, b in zip(range_pytz, range_du):
+ self.assertEquals(a, b)
+
+ def test_series_us_eastern(self):
+ """TestPytzDateutilTimeZones: Test a Series with a timestamp index, US/Eastern Time across start DST 2012."""
+ rng = date_range('3/9/2012 12:00', periods=5, freq='D')
+ ts = Series(np.random.randn(len(rng)), rng)
+ # Localize to UTC and convert to Eastern time with default timezone library
+ ts_utc = ts.tz_localize('UTC')
+ ser_std = ts_utc.tz_convert('US/Eastern')
+ # Convert to Eastern time specifically with pytz
+ ser_pytz = ts_utc.tz_convert(pytz.timezone('US/Eastern'))
+ # Now with dateutil
+ ser_du = ts_utc.tz_convert(dateutil.tz.gettz('US/Eastern'))
+ # Check the indicies, firstly Timestamps
+ for s, p, d in zip(ser_std.index, ser_pytz.index, ser_du.index):
+ self.assertEquals(s, p)
+ self.assertEquals(s, d)
+ self.assertEquals(p, d)
+ # assert_series_equal(ser_pytz, ser_du) fails as ser_pytz.tz != ser_du.tz
+ self.assertTrue(np.array_equal(ser_du.index.asi8, ser_pytz.index.asi8))
+ self.assertNotEqual(ser_pytz.index.tz, ser_du.index.tz)
+
+ def test_series_subtract_pytz_dateutil(self):
+ """TestPytzDateutilTimeZones: Create two series of Timestamps 15:00 US/Pacific from pytz and 12:00 US/Eastern from dateutil and subtract them."""
+ dr_pytz = date_range('2012-06-15 12:00', periods=5, freq='D').tz_localize(pytz.timezone('US/Pacific'))
+ dr_du = date_range('2012-06-15 15:00', periods=5, freq='D').tz_localize(dateutil.tz.gettz('US/Eastern'))
+ ts_pytz = Series(dr_pytz, range(5))
+ ts_du = Series(dr_du, range(5))
+ diff = ts_pytz - ts_du
+ # Should be 0 hours apart
+ exp = Series(np.zeros((5,), dtype='m8[ns]'), range(5))
+ self.assertEquals(diff.dtype, np.dtype('m8[ns]'))
+ tm.assert_series_equal(diff, exp)
+ # Check reverse
+ diff = ts_du - ts_pytz
+ self.assertEquals(diff.dtype, np.dtype('m8[ns]'))
+ tm.assert_series_equal(diff, exp)
+
+ def test_common_financial_timezones_timedelta_zero(self):
+ """TestPytzDateutilTimeZones: Time zones for major financial centres in pytz and dateutil subtract to zero."""
+ self._clear_tslib_cache()
+ for mo in range(1, 12):
+ for tz_from, tz_to in self._gen_financial_timezone_pairs():
+ ts_base = Timestamp(datetime(2013, mo, 1, 12, 0), tz=tz_from)
+ ts_pytz = ts_base.astimezone(pytz.timezone(tz_to))
+ ts_du = ts_base.astimezone(dateutil.tz.gettz(tz_to))
+ diff = ts_pytz - ts_du
+ self.assertTrue(isinstance(diff, timedelta))
+ self.assertEqual(diff, timedelta(0), 'From: %s to: %s' % (tz_from, tz_to))
+
if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index d959562..8dfc68b 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -113,17 +113,19 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None):
return result
-from dateutil.tz import tzlocal
+#from dateutil.tz import tzlocal
+import dateutil.tz
def _is_tzlocal(tz):
- return isinstance(tz, tzlocal)
+ return isinstance(tz, dateutil.tz.tzlocal)
def _is_fixed_offset(tz):
- try:
- tz._transition_info
- return False
- except AttributeError:
- return True
+ if _treat_tz_as_dateutil(tz):
+ return len(tz._trans_idx) == 0 and len(tz._trans_list) == 0
+ elif _treat_tz_as_pytz(tz):
+ return len(tz._transition_info) == 0 and len(tz._utc_transition_times) == 0
+ return True
+
# Python front end to C extension type _Timestamp
# This serves as the box for datetime64
@@ -788,6 +790,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit):
return obj
cdef inline void _localize_tso(_TSObject obj, object tz):
+
if _is_utc(tz):
obj.tzinfo = tz
elif _is_tzlocal(tz):
@@ -804,23 +807,34 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
deltas = _get_deltas(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
- # statictzinfo
- if not hasattr(tz, '_transition_info'):
- pandas_datetime_to_datetimestruct(obj.value + deltas[0],
- PANDAS_FR_ns, &obj.dts)
+
+ # static/pytz/dateutil specific code
+ if _is_fixed_offset(tz):
+ # statictzinfo
+ if len(deltas) > 0:
+ pandas_datetime_to_datetimestruct(obj.value + deltas[0],
+ PANDAS_FR_ns, &obj.dts)
+ else:
+ pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts)
obj.tzinfo = tz
- else:
+ elif _treat_tz_as_pytz(tz):
inf = tz._transition_info[pos]
pandas_datetime_to_datetimestruct(obj.value + deltas[pos],
PANDAS_FR_ns, &obj.dts)
obj.tzinfo = tz._tzinfos[inf]
+ elif _treat_tz_as_dateutil(tz):
+ pandas_datetime_to_datetimestruct(obj.value + deltas[pos],
+ PANDAS_FR_ns, &obj.dts)
+ obj.tzinfo = tz
+ else:
+ obj.tzinfo = tz
def get_timezone(tz):
return _get_zone(tz)
cdef inline bint _is_utc(object tz):
- return tz is UTC or isinstance(tz, _du_utc)
+ return tz is UTC or isinstance(tz, dateutil.tz.tzutc)
cdef inline object _get_zone(object tz):
if _is_utc(tz):
@@ -1493,8 +1507,8 @@ def i8_to_pydt(int64_t i8, object tzinfo = None):
#----------------------------------------------------------------------
# time zone conversion helpers
+# from dateutil.tz import tzutc as _du_utc
try:
- from dateutil.tz import tzutc as _du_utc
import pytz
UTC = pytz.utc
have_pytz = True
@@ -1625,22 +1639,48 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
offset = deltas[pos]
return utc_date + offset
-
+# Timezone data caches, key is the pytz name, example: 'Europe/London'.
trans_cache = {}
utc_offset_cache = {}
+# Creat mapping of datutil names to cached keys (pytz names)
+# Example: {'/usr/share/zoneinfo/US/Eastern' : 'US/Eastern'}
+def __create_dateutil_to_pytz_names():
+ d = {}
+ for tz_name in pytz.all_timezones:
+ du = dateutil.tz.gettz(tz_name)
+ if du is not None:
+ d[du._filename] = tz_name
+ return d
+
+dateutil_to_pytz_names = __create_dateutil_to_pytz_names()
+
+cpdef bint _treat_tz_as_pytz(object tz):
+ return hasattr(tz, '_utc_transition_times') and hasattr(tz, '_transition_info')
+
+cpdef bint _treat_tz_as_dateutil(object tz):
+ return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx')
+
+def _tz_cache_key(tz):
+ """Return the key (example 'Europe/London') in the cache for the timezone info object or None if unknown."""
+ if isinstance(tz, pytz.tzinfo.BaseTzInfo):
+ return str(tz)
+ elif isinstance(tz, dateutil.tz.tzfile):
+ try:
+ return dateutil_to_pytz_names[tz._filename]
+ except KeyError:
+ pass
+
def _get_transitions(tz):
"""
Get UTC times of DST transitions
"""
- try:
- # tzoffset not hashable in Python 3
- hash(tz)
- except TypeError:
+ cache_key = _tz_cache_key(tz)
+ if cache_key is None:
return np.array([NPY_NAT + 1], dtype=np.int64)
- if tz not in trans_cache:
- if hasattr(tz, '_utc_transition_times'):
+ if cache_key not in trans_cache:
+ if _treat_tz_as_pytz(tz):
arr = np.array(tz._utc_transition_times, dtype='M8[ns]')
arr = arr.view('i8')
try:
@@ -1648,31 +1688,36 @@ def _get_transitions(tz):
arr[0] = NPY_NAT + 1
except Exception:
pass
+ elif _treat_tz_as_dateutil(tz):
+ arr = np.array(tz._trans_list, dtype='M8[s]').astype('M8[ns]')
+ arr = arr.view('i8')
else:
arr = np.array([NPY_NAT + 1], dtype=np.int64)
- trans_cache[tz] = arr
- return trans_cache[tz]
+ trans_cache[cache_key] = arr
+ return trans_cache[cache_key]
def _get_deltas(tz):
"""
Get UTC offsets in microseconds corresponding to DST transitions
"""
- try:
- # tzoffset not hashable in Python 3
- hash(tz)
- except TypeError:
+ cache_key = _tz_cache_key(tz)
+ if cache_key is None:
num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000
return np.array([num], dtype=np.int64)
- if tz not in utc_offset_cache:
- if hasattr(tz, '_utc_transition_times'):
- utc_offset_cache[tz] = _unbox_utcoffsets(tz._transition_info)
+ if cache_key not in utc_offset_cache:
+ if _treat_tz_as_pytz(tz):
+ utc_offset_cache[cache_key] = _unbox_utcoffsets(tz._transition_info)
+ elif _treat_tz_as_dateutil(tz):
+ arr = np.array([v.offset for v in tz._trans_idx], dtype='i8')
+ arr *= 1000000000
+ utc_offset_cache[cache_key] = arr
else:
# static tzinfo
num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000
- utc_offset_cache[tz] = np.array([num], dtype=np.int64)
+ utc_offset_cache[cache_key] = np.array([num], dtype=np.int64)
- return utc_offset_cache[tz]
+ return utc_offset_cache[cache_key]
cdef double total_seconds(object td): # Python 2.6 compat
return ((td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) //
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pandas.git
More information about the debian-science-commits
mailing list