Skip to content

Commit

Permalink
BUG: DataFrame.diff(axis=0) with DatetimeTZ data
Browse files Browse the repository at this point in the history
add whatsnew

clarify comment

Add addtional tests
  • Loading branch information
mroeschke committed Feb 19, 2018
1 parent 718d067 commit 0a11de7
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ Timezones
- Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`)
- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`)
- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`)
- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`)

Offsets
^^^^^^^
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1499,7 +1499,12 @@ def diff(arr, n, axis=0):
is_timedelta = False
if needs_i8_conversion(arr):
dtype = np.float64
arr = arr.view('i8')
if is_datetime64tz_dtype(arr):
# Block data is usually a 2D array
# except DatetimeTZBlock which is a 1D array (DatetimeIndex)
arr = arr.view('i8').reshape(1, arr.shape[0])
else:
arr = arr.view('i8')
na = iNaT
is_timedelta = True

Expand Down
5 changes: 5 additions & 0 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,11 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):

def diff(self, n, axis=1, mgr=None):
""" return block for the diff of the values """
if isinstance(self, DatetimeTZBlock) and axis == 0:
# This method will iterate (self.apply) over each DateTimeTZBlock
# but axis=0 will need to gather data from all DateTimeTZBlocks
# in the manager in order for diff to work correctly.
raise NotImplementedError
new_values = algos.diff(self.values, n, axis=axis)
return [self.make_block(values=new_values)]

Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,22 @@ def test_diff(self):
1), 'z': pd.Series(1)}).astype('float64')
assert_frame_equal(result, expected)

@pytest.mark.parametrize('axis', [0, 1])
@pytest.mark.parametrize('tz', [None, 'UTC'])
@pytest.mark.xfail(raises=NotImplementedError)
def test_diff_datetime(self, axis, tz):
# GH 18578
df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
1: date_range('2010', freq='D', periods=2, tz=tz)})
result = df.diff(axis=axis) # xfails for axis=1 and tz='UTC'
if axis == 1:
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']),
1: pd.TimedeltaIndex(['0 days', '0 days'])})
else:
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']),
1: pd.TimedeltaIndex(['NaT', '1 days'])})
assert_frame_equal(result, expected)

def test_diff_timedelta(self):
# GH 4533
df = DataFrame(dict(time=[Timestamp('20130101 9:01'),
Expand Down

0 comments on commit 0a11de7

Please sign in to comment.