Skip to content

Commit

Permalink
BUG: DataFrame.diff(axis=0) with DatetimeTZ data
Browse files Browse the repository at this point in the history
add whatsnew

clarify comment

Add addtional tests

move diff into its own function in DatetimeTZBlock

Use correct placement

fix failing test
  • Loading branch information
mroeschke committed Feb 24, 2018
1 parent e97be6f commit 1eb30ca
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@ Timezones
- Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`)
- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`)
- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`)
- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`)

Offsets
^^^^^^^
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2886,7 +2886,6 @@ def shift(self, periods, axis=0, mgr=None):

# think about moving this to the DatetimeIndex. This is a non-freq
# (number of periods) shift ###

N = len(self)
indexer = np.zeros(N, dtype=int)
if periods > 0:
Expand All @@ -2905,6 +2904,18 @@ def shift(self, periods, axis=0, mgr=None):
return [self.make_block_same_class(new_values,
placement=self.mgr_locs)]

def diff(self, n, axis=0, mgr=None):
"""1st discrete difference"""
if axis == 0:
# Cannot currently calculate diff across multiple blocks since this
# function is invoked via apply
raise NotImplementedError
new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8
# Reshape the new_values like how algos.diff does for timedelta data
new_values = new_values.reshape(1, len(new_values))
new_values = new_values.astype('timedelta64[ns]')
return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]

def concat_same_type(self, to_concat, placement=None):
"""
Concatenate list of single blocks of the same type.
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,29 @@ def test_diff(self):
1), 'z': pd.Series(1)}).astype('float64')
assert_frame_equal(result, expected)

@pytest.mark.parametrize('axis', [0, 1])
@pytest.mark.parametrize('tz', [None, 'UTC'])
def test_diff_datetime(self, axis, tz):
# GH 18578
df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
1: date_range('2010', freq='D', periods=2, tz=tz)})
if axis == 1:
if tz is None:
result = df.diff(axis=axis)
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']),
1: pd.TimedeltaIndex(['0 days',
'0 days'])})
assert_frame_equal(result, expected)
else:
with pytest.raises(NotImplementedError):
result = df.diff(axis=axis)

else:
result = df.diff(axis=axis)
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']),
1: pd.TimedeltaIndex(['NaT', '1 days'])})
assert_frame_equal(result, expected)

def test_diff_timedelta(self):
# GH 4533
df = DataFrame(dict(time=[Timestamp('20130101 9:01'),
Expand Down

0 comments on commit 1eb30ca

Please sign in to comment.