From fa24af91a156587e7f8d1aab27a45644b59c7e49 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 10 Apr 2018 19:29:54 -0700 Subject: [PATCH] API/BUG: Enforce "normalized" pytz timezones for DatetimeIndex (#20510) --- doc/source/whatsnew/v0.23.0.txt | 3 ++ pandas/_libs/tslibs/timestamps.pyx | 6 +++ pandas/_libs/tslibs/timezones.pyx | 38 +++++++++++++++++++ pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/datetimes.py | 28 ++++++++------ pandas/tests/frame/test_alter_axes.py | 4 +- .../indexes/datetimes/test_construction.py | 28 ++++++++++++++ .../tests/scalar/timestamp/test_timestamp.py | 7 ++++ pandas/tests/test_resample.py | 12 ++++++ 9 files changed, 114 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a6c92bf9faf9b..daa7f937cca9d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -770,6 +770,8 @@ Datetimelike API Changes - :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) - :class:`Timestamp` constructor now accepts a `nanosecond` keyword or positional argument (:issue:`18898`) +- :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`) +- :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`) .. _whatsnew_0230.api.other: @@ -1127,6 +1129,7 @@ Groupby/Resample/Rolling - Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) - Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`) - Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`) +- Bug in :func:`Dataframe.resample` that dropped timezone information (:issue:`13238`) Sparse ^^^^^^ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9818d53e386bd..ba5ebdab82ddc 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -700,6 +700,12 @@ class Timestamp(_Timestamp): """ return self.tzinfo + @tz.setter + def tz(self, value): + # GH 3746: Prevent localizing or converting the index by setting tz + raise AttributeError("Cannot directly set timezone. Use tz_localize() " + "or tz_convert() as appropriate") + def __setstate__(self, state): self.value = state[0] self.freq = state[1] diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 215ae9ce087ee..74fadbdb64763 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -314,3 +314,41 @@ cpdef bint tz_compare(object start, object end): """ # GH 18523 return get_timezone(start) == get_timezone(end) + + +cpdef tz_standardize(object tz): + """ + If the passed tz is a pytz timezone object, "normalize" it to the a + consistent version + + Parameters + ---------- + tz : tz object + + Returns: + ------- + tz object + + Examples: + -------- + >>> tz + + + >>> tz_standardize(tz) + + + >>> tz + + + >>> tz_standardize(tz) + + + >>> tz + dateutil.tz.tz.tzutc + + >>> tz_standardize(tz) + dateutil.tz.tz.tzutc + """ + if treat_tz_as_pytz(tz): + return pytz.timezone(str(tz)) + return tz diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index b906ea0f4784c..95e1f8438c704 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -1005,7 +1005,7 @@ def shift(self, n, freq=None): result = self + offset if hasattr(self, 'tz'): - result.tz = self.tz + result._tz = self.tz return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 75f4ec4f0d341..88ea3511d4ee3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -511,13 +511,7 @@ def _generate(cls, start, end, periods, name, offset, 'different timezones') inferred_tz = timezones.maybe_get_tz(inferred_tz) - - # these may need to be localized tz = timezones.maybe_get_tz(tz) - if tz is not None: - date = start or end - if date.tzinfo is not None and hasattr(tz, 'localize'): - tz = tz.localize(date.replace(tzinfo=None)).tzinfo if tz is not None and inferred_tz is not None: if not timezones.tz_compare(inferred_tz, tz): @@ -654,7 +648,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, result._data = values result.name = name result.offset = freq - result.tz = timezones.maybe_get_tz(tz) + result._tz = timezones.maybe_get_tz(tz) + result._tz = timezones.tz_standardize(result._tz) result._reset_identity() return result @@ -684,6 +679,17 @@ def _values(self): else: return self.values + @property + def tz(self): + # GH 18595 + return self._tz + + @tz.setter + def tz(self, value): + # GH 3746: Prevent localizing or converting the index by setting tz + raise AttributeError("Cannot directly set timezone. Use tz_localize() " + "or tz_convert() as appropriate") + @property def tzinfo(self): """ @@ -754,7 +760,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None, cachedRange = DatetimeIndex._simple_new(arr) cachedRange.offset = offset - cachedRange.tz = None + cachedRange = cachedRange.tz_localize(None) cachedRange.name = None drc[offset] = cachedRange else: @@ -831,7 +837,7 @@ def __setstate__(self, state): self.name = own_state[0] self.offset = own_state[1] - self.tz = own_state[2] + self._tz = timezones.tz_standardize(own_state[2]) # provide numpy < 1.7 compat if nd_state[2] == 'M8[us]': @@ -1175,7 +1181,7 @@ def union(self, other): else: result = Index.union(this, other) if isinstance(result, DatetimeIndex): - result.tz = this.tz + result._tz = timezones.tz_standardize(this.tz) if (result.freq is None and (this.freq is not None or other.freq is not None)): result.offset = to_offset(result.inferred_freq) @@ -1223,7 +1229,7 @@ def union_many(self, others): tz = this.tz this = Index.union(this, other) if isinstance(this, DatetimeIndex): - this.tz = tz + this._tz = timezones.tz_standardize(tz) if this.freq is None: this.offset = to_offset(this.inferred_freq) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 3e0ba26c20eb0..785bb128512fc 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -249,8 +249,8 @@ def test_set_index_cast_datetimeindex(self): # convert to utc df['C'] = i.to_series().reset_index(drop=True) result = df['C'] - comp = pd.DatetimeIndex(expected.values).copy() - comp.tz = None + comp = pd.DatetimeIndex(expected.values) + comp = comp.tz_localize(None) tm.assert_numpy_array_equal(result.values, comp.values) # list of datetimes with a tz diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 176f5bd0c1a2a..97e01478c736b 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -441,6 +441,34 @@ def test_000constructor_resolution(self): assert idx.nanosecond[0] == t1.nanosecond + def test_disallow_setting_tz(self): + # GH 3746 + dti = DatetimeIndex(['2010'], tz='UTC') + with pytest.raises(AttributeError): + dti.tz = pytz.timezone('US/Pacific') + + @pytest.mark.parametrize('tz', [ + None, 'America/Los_Angeles', pytz.timezone('America/Los_Angeles'), + Timestamp('2000', tz='America/Los_Angeles').tz]) + def test_constructor_start_end_with_tz(self, tz): + # GH 18595 + start = Timestamp('2013-01-01 06:00:00', tz='America/Los_Angeles') + end = Timestamp('2013-01-02 06:00:00', tz='America/Los_Angeles') + result = DatetimeIndex(freq='D', start=start, end=end, tz=tz) + expected = DatetimeIndex(['2013-01-01 06:00:00', + '2013-01-02 06:00:00'], + tz='America/Los_Angeles') + tm.assert_index_equal(result, expected) + # Especially assert that the timezone is consistent for pytz + assert pytz.timezone('America/Los_Angeles') is result.tz + + @pytest.mark.parametrize('tz', ['US/Pacific', 'US/Eastern', 'Asia/Tokyo']) + def test_constructor_with_non_normalized_pytz(self, tz): + # GH 18595 + non_norm_tz = Timestamp('2010', tz=tz).tz + result = DatetimeIndex(['2010'], tz=non_norm_tz) + assert pytz.timezone(tz) is result.tz + class TestTimeSeries(object): diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index cde5baf47c18e..55ed7e6cfa8db 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -521,6 +521,13 @@ def test_today(self): assert (abs(ts_from_string_tz.tz_localize(None) - ts_from_method_tz.tz_localize(None)) < delta) + @pytest.mark.parametrize('tz', [None, pytz.timezone('US/Pacific')]) + def test_disallow_setting_tz(self, tz): + # GH 3746 + ts = Timestamp('2010') + with pytest.raises(AttributeError): + ts.tz = tz + class TestTimestamp(object): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 896002d007a69..2180e38e24e6c 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2532,6 +2532,18 @@ def test_with_local_timezone_pytz(self): expected = Series(1, index=expected_index) assert_series_equal(result, expected) + def test_resample_with_pytz(self): + # GH 13238 + s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H", + tz="US/Eastern")) + result = s.resample("D").mean() + expected = Series(2, index=pd.DatetimeIndex(['2017-01-01', + '2017-01-02'], + tz="US/Eastern")) + assert_series_equal(result, expected) + # Especially assert that the timezone is LMT for pytz + assert result.index.tz == pytz.timezone('US/Eastern') + def test_with_local_timezone_dateutil(self): # see gh-5430 local_timezone = 'dateutil/America/Los_Angeles'