Skip to content

Commit

Permalink
Series.describe returns first and last for tz-aware datetimes (#21332)
Browse files Browse the repository at this point in the history
* Series.describe returns first and last for tz-aware datetimes

GH issue 21328

* parameterize tests

* parameterize names

* use tz_naive_fixture and fix top

* add tz describe test for df

* add bugfix to whatsnew

* fix formatting in whatsnew and add issue number to tests

* final bugfix
  • Loading branch information
louispotok authored and mroeschke committed Jul 6, 2018
1 parent 30eb48c commit 620abc4
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ Timezones
- Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp`s to tz-aware (:issue:`13051`)
- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
- Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`)
- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`)

Offsets
^^^^^^^
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
is_bool_dtype,
is_categorical_dtype,
is_numeric_dtype,
is_datetime64_dtype,
is_datetime64_any_dtype,
is_timedelta64_dtype,
is_datetime64tz_dtype,
is_list_like,
Expand Down Expand Up @@ -8531,12 +8531,13 @@ def describe_categorical_1d(data):
if result[1] > 0:
top, freq = objcounts.index[0], objcounts.iloc[0]

if is_datetime64_dtype(data):
if is_datetime64_any_dtype(data):
tz = data.dt.tz
asint = data.dropna().values.view('i8')
names += ['top', 'freq', 'first', 'last']
result += [tslib.Timestamp(top), freq,
tslib.Timestamp(asint.min()),
tslib.Timestamp(asint.max())]
result += [tslib.Timestamp(top, tz=tz), freq,
tslib.Timestamp(asint.min(), tz=tz),
tslib.Timestamp(asint.max(), tz=tz)]
else:
names += ['top', 'freq']
result += [top, freq]
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,28 @@ def test_describe_timedelta_values(self):
"max 5 days 00:00:00 0 days 05:00:00")
assert repr(res) == exp_repr

def test_describe_tz_values(self, tz_naive_fixture):
# GH 21332
tz = tz_naive_fixture
s1 = Series(range(5))
start = Timestamp(2018, 1, 1)
end = Timestamp(2018, 1, 5)
s2 = Series(date_range(start, end, tz=tz))
df = pd.DataFrame({'s1': s1, 's2': s2})

expected = DataFrame({'s1': [5, np.nan, np.nan, np.nan, np.nan, np.nan,
2, 1.581139, 0, 1, 2, 3, 4],
's2': [5, 5, s2.value_counts().index[0], 1,
start.tz_localize(tz),
end.tz_localize(tz), np.nan, np.nan,
np.nan, np.nan, np.nan, np.nan, np.nan]},
index=['count', 'unique', 'top', 'freq', 'first',
'last', 'mean', 'std', 'min', '25%', '50%',
'75%', 'max']
)
res = df.describe(include='all')
tm.assert_frame_equal(res, expected)

def test_reduce_mixed_frame(self):
# GH 6806
df = DataFrame({
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,23 @@ def test_describe(self):
index=['count', 'unique', 'top', 'freq'])
tm.assert_series_equal(result, expected)

def test_describe_with_tz(self, tz_naive_fixture):
# GH 21332
tz = tz_naive_fixture
name = tz_naive_fixture
start = Timestamp(2018, 1, 1)
end = Timestamp(2018, 1, 5)
s = Series(date_range(start, end, tz=tz), name=name)
result = s.describe()
expected = Series(
[5, 5, s.value_counts().index[0], 1, start.tz_localize(tz),
end.tz_localize(tz)
],
name=name,
index=['count', 'unique', 'top', 'freq', 'first', 'last']
)
tm.assert_series_equal(result, expected)

def test_argsort(self):
self._check_accum_op('argsort', check_dtype=False)
argsorted = self.ts.argsort()
Expand Down

0 comments on commit 620abc4

Please sign in to comment.