From 0a11de70a658f3f585b00ff8a982ae653651d6f0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 19 Feb 2018 09:50:06 -0800 Subject: [PATCH] BUG: DataFrame.diff(axis=0) with DatetimeTZ data add whatsnew clarify comment Add addtional tests --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/algorithms.py | 7 ++++++- pandas/core/internals.py | 5 +++++ pandas/tests/frame/test_timeseries.py | 16 ++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 11c49995372f5a..ca8e4f108333cf 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -732,6 +732,7 @@ Timezones - Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) - Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) - Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`) +- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`) Offsets ^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c754c063fce8e5..1bd725b556c7a8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1499,7 +1499,12 @@ def diff(arr, n, axis=0): is_timedelta = False if needs_i8_conversion(arr): dtype = np.float64 - arr = arr.view('i8') + if is_datetime64tz_dtype(arr): + # Block data is usually a 2D array + # except DatetimeTZBlock which is a 1D array (DatetimeIndex) + arr = arr.view('i8').reshape(1, arr.shape[0]) + else: + arr = arr.view('i8') na = iNaT is_timedelta = True diff --git a/pandas/core/internals.py b/pandas/core/internals.py index dd5feefc49fe31..dbdaf948bff789 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1269,6 +1269,11 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): def diff(self, n, axis=1, mgr=None): """ return block for the diff of the values """ + if isinstance(self, DatetimeTZBlock) and axis == 0: + # This method will iterate (self.apply) over each DateTimeTZBlock + # but axis=0 will need to gather data from all DateTimeTZBlocks + # in the manager in order for diff to work correctly. + raise NotImplementedError new_values = algos.diff(self.values, n, axis=axis) return [self.make_block(values=new_values)] diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 25dd285e883a0c..c20757452d13d9 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -57,6 +57,22 @@ def test_diff(self): 1), 'z': pd.Series(1)}).astype('float64') assert_frame_equal(result, expected) + @pytest.mark.parametrize('axis', [0, 1]) + @pytest.mark.parametrize('tz', [None, 'UTC']) + @pytest.mark.xfail(raises=NotImplementedError) + def test_diff_datetime(self, axis, tz): + # GH 18578 + df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz), + 1: date_range('2010', freq='D', periods=2, tz=tz)}) + result = df.diff(axis=axis) # xfails for axis=1 and tz='UTC' + if axis == 1: + expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']), + 1: pd.TimedeltaIndex(['0 days', '0 days'])}) + else: + expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']), + 1: pd.TimedeltaIndex(['NaT', '1 days'])}) + assert_frame_equal(result, expected) + def test_diff_timedelta(self): # GH 4533 df = DataFrame(dict(time=[Timestamp('20130101 9:01'),