Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG GH23282 calling min on series of NaT returns NaT #23289

Merged
merged 12 commits into from
Oct 28, 2018
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,7 @@ Datetimelike
- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`)
- Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`)
- Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`)
- Bug in :meth:`Series.min` which would return ``NaN`` instead of ``NaT`` when called on a series of ``NaT`` (:issue:`23282`)
- Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`)

Timedelta
Expand Down
38 changes: 23 additions & 15 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
elif is_float_dtype(dtype):
dtype_max = np.float64

return values, mask, dtype, dtype_max
return values, mask, dtype, dtype_max, fill_value


def _isfinite(values):
Expand All @@ -266,16 +266,21 @@ def _view_if_needed(values):
return values


def _wrap_results(result, dtype):
def _wrap_results(result, dtype, fill_value=None):
""" wrap our results if needed """

if is_datetime64_dtype(dtype):
if not isinstance(result, np.ndarray):
assert not isna(fill_value), "Expected non-null fill_value"
if result == fill_value:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it assumed that fill_value is not NA? If not, this will be wrong, since fill_value will never equal fill_value.

Should we assert that it's not NA?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this can't be for an i8 type by definition. but yes we can assert it.

result = np.nan
result = tslibs.Timestamp(result)
else:
result = result.view(dtype)
elif is_timedelta64_dtype(dtype):
if not isinstance(result, np.ndarray):
if result == fill_value:
result = np.nan

# raise if we have a timedelta64[ns] which is too large
if np.fabs(result) > _int64_max:
Expand Down Expand Up @@ -346,8 +351,8 @@ def nanany(values, axis=None, skipna=True, mask=None):
>>> nanops.nanany(s)
False
"""
values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna,
mask=mask)
values, mask, dtype, _, _ = _get_values(values, skipna, False, copy=skipna,
mask=mask)
return values.any(axis)


Expand Down Expand Up @@ -379,8 +384,8 @@ def nanall(values, axis=None, skipna=True, mask=None):
>>> nanops.nanall(s)
False
"""
values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna,
mask=mask)
values, mask, dtype, _, _ = _get_values(values, skipna, True, copy=skipna,
mask=mask)
return values.all(axis)


Expand Down Expand Up @@ -409,7 +414,8 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None):
>>> nanops.nansum(s)
3.0
"""
values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask)
values, mask, dtype, dtype_max, _ = _get_values(values,
skipna, 0, mask=mask)
dtype_sum = dtype_max
if is_float_dtype(dtype):
dtype_sum = dtype
Expand Down Expand Up @@ -448,7 +454,8 @@ def nanmean(values, axis=None, skipna=True, mask=None):
>>> nanops.nanmean(s)
1.5
"""
values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask)
values, mask, dtype, dtype_max, _ = _get_values(
values, skipna, 0, mask=mask)
dtype_sum = dtype_max
dtype_count = np.float64
if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype):
Expand Down Expand Up @@ -501,7 +508,7 @@ def get_median(x):
return np.nan
return np.nanmedian(x[mask])

values, mask, dtype, dtype_max = _get_values(values, skipna, mask=mask)
values, mask, dtype, dtype_max, _ = _get_values(values, skipna, mask=mask)
if not is_float_dtype(values):
values = values.astype('f8')
values[mask] = np.nan
Expand Down Expand Up @@ -705,7 +712,8 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None):
def _nanminmax(meth, fill_value_typ):
@bottleneck_switch()
def reduction(values, axis=None, skipna=True, mask=None):
values, mask, dtype, dtype_max = _get_values(

values, mask, dtype, dtype_max, fill_value = _get_values(
values, skipna, fill_value_typ=fill_value_typ, mask=mask)

if ((axis is not None and values.shape[axis] == 0) or
Expand All @@ -719,7 +727,7 @@ def reduction(values, axis=None, skipna=True, mask=None):
else:
result = getattr(values, meth)(axis)

result = _wrap_results(result, dtype)
result = _wrap_results(result, dtype, fill_value)
return _maybe_null_out(result, axis, mask)

reduction.__name__ = 'nan' + meth
Expand Down Expand Up @@ -753,8 +761,8 @@ def nanargmax(values, axis=None, skipna=True, mask=None):
>>> nanops.nanargmax(s)
4
"""
values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf',
mask=mask)
values, mask, dtype, _, _ = _get_values(
values, skipna, fill_value_typ='-inf', mask=mask)
result = values.argmax(axis)
result = _maybe_arg_null_out(result, axis, mask, skipna)
return result
Expand Down Expand Up @@ -783,8 +791,8 @@ def nanargmin(values, axis=None, skipna=True, mask=None):
>>> nanops.nanargmin(s)
0
"""
values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf',
mask=mask)
values, mask, dtype, _, _ = _get_values(
values, skipna, fill_value_typ='+inf', mask=mask)
result = values.argmin(axis)
result = _maybe_arg_null_out(result, axis, mask, skipna)
return result
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/series/test_datetime_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,3 +510,21 @@ def test_dt_timetz_accessor(self, tz_naive_fixture):
time(22, 14, tzinfo=tz)])
result = s.dt.timetz
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('nat', [
pd.Series([pd.NaT, pd.NaT]),
pd.Series([pd.NaT, pd.Timedelta('nat')]),
pd.Series([pd.Timedelta('nat'), pd.Timedelta('nat')])])
def test_minmax_nat_series(self, nat):
# GH 23282
assert nat.min() is pd.NaT
assert nat.max() is pd.NaT

@pytest.mark.parametrize('nat', [
# GH 23282
pd.DataFrame([pd.NaT, pd.NaT]),
pd.DataFrame([pd.NaT, pd.Timedelta('nat')]),
pd.DataFrame([pd.Timedelta('nat'), pd.Timedelta('nat')])])
TomAugspurger marked this conversation as resolved.
Show resolved Hide resolved
def test_minmax_nat_dataframe(self, nat):
assert nat.min()[0] is pd.NaT
assert nat.max()[0] is pd.NaT