From 914ed71e14c513851e9b39a6af21f435b75b3ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A2=9C=E5=8F=91=E6=89=8D=EF=BC=88Yan=20Facai=EF=BC=89?= Date: Fri, 8 Apr 2016 17:56:46 +0800 Subject: [PATCH 1/4] add test function --- .gitignore | 1 + pandas/tests/frame/test_timeseries.py | 24 ++++++++++++++++++++++++ pandas/tests/test_groupby.py | 26 ++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/.gitignore b/.gitignore index d987bab6fd5d7..19f1cc804dca0 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ .vagrant .noseids .ipynb_checkpoints +.tags # Compiled source # ################### diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index b9baae6cbeda7..4916d81b18c22 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -341,3 +341,27 @@ def test_first_last_valid(self): empty = DataFrame() self.assertIsNone(empty.last_valid_index()) self.assertIsNone(empty.first_valid_index()) + + def test_operation_on_NaT(self): + # Both NaT and Timestamp are in DataFrame. + df = pd.DataFrame({'foo': [pd.NaT, pd.NaT, + pd.Timestamp('2012-05-01')]}) + + res = df.min() + exp = pd.Series([pd.Timestamp('2012-05-01')], index=["foo"]) + tm.assert_series_equal(res, exp) + + res = df.max() + exp = pd.Series([pd.Timestamp('2012-05-01')], index=["foo"]) + tm.assert_series_equal(res, exp) + + # GH12941, only NaTs are in DataFrame. + df = pd.DataFrame({'foo': [pd.NaT, pd.NaT]}) + + res = df.min() + exp = pd.Series([pd.NaT], index=["foo"]) + tm.assert_series_equal(res, exp) + + res = df.max() + exp = pd.Series([pd.NaT], index=["foo"]) + tm.assert_series_equal(res, exp) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 268dcfc5744c1..cc588d891b398 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -720,6 +720,32 @@ def test_agg_period_index(self): grouped = df.groupby(df.index.month) list(grouped) + def test_agg_dict_parameter_cast_result_dtypes(self): + # GH 12821 + + df = DataFrame( + {'class': ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'], + 'time': date_range('1/1/2011', periods=8, freq='H')}) + df.loc[[0, 1, 2, 5], 'time'] = None + + # test for `first` function + exp = df.loc[[0, 3, 4, 6]].set_index('class') + grouped = df.groupby('class') + assert_frame_equal(grouped.first(), exp) + assert_frame_equal(grouped.agg('first'), exp) + assert_frame_equal(grouped.agg({'time': 'first'}), exp) + assert_series_equal(grouped.time.first(), exp['time']) + assert_series_equal(grouped.time.agg('first'), exp['time']) + + # test for `last` function + exp = df.loc[[0, 3, 4, 7]].set_index('class') + grouped = df.groupby('class') + assert_frame_equal(grouped.last(), exp) + assert_frame_equal(grouped.agg('last'), exp) + assert_frame_equal(grouped.agg({'time': 'last'}), exp) + assert_series_equal(grouped.time.last(), exp['time']) + assert_series_equal(grouped.time.agg('last'), exp['time']) + def test_agg_must_agg(self): grouped = self.df.groupby('A')['C'] self.assertRaises(Exception, grouped.agg, lambda x: x.describe()) From a949cee9d2aac812c4155c174a0360780b0cce39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A2=9C=E5=8F=91=E6=89=8D=EF=BC=88Yan=20Facai=EF=BC=89?= Date: Thu, 12 May 2016 22:51:00 +0800 Subject: [PATCH 2/4] BUG: fix GH12821, agg() function on groupby dataframe changes dtype of datetime64[ns] column to float64 --- pandas/types/cast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index f4cb476672ec7..e37b418664ba3 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -122,7 +122,8 @@ def trans(x): # noqa return new_result # a datetimelike - elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i']: + # GH12821, iNaT is casted to float + elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i', 'f']: try: result = result.astype(dtype) except: From 8d17eedf5db53d4d1f8ad7fbc77b329902b1610f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A2=9C=E5=8F=91=E6=89=8D=EF=BC=88Yan=20Facai=EF=BC=89?= Date: Sun, 29 May 2016 15:30:47 +0800 Subject: [PATCH 3/4] BUG: fix GH12941, Operations on NaT returning float instead of datetime64[ns] --- pandas/core/nanops.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 7b89373dda7ba..2199daf549824 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -17,6 +17,7 @@ is_integer, is_complex, is_float_dtype, is_complex_dtype, is_integer_dtype, is_bool_dtype, is_object_dtype, + is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype, is_datetime_or_timedelta_dtype, is_int_or_datetime_dtype, is_any_int_dtype) @@ -638,11 +639,15 @@ def _maybe_null_out(result, axis, mask): if axis is not None and getattr(result, 'ndim', False): null_mask = (mask.shape[axis] - mask.sum(axis)) == 0 if np.any(null_mask): - if np.iscomplexobj(result): - result = result.astype('c16') + if is_numeric_dtype(result): + if np.iscomplexobj(result): + result = result.astype('c16') + else: + result = result.astype('f8') + result[null_mask] = np.nan else: - result = result.astype('f8') - result[null_mask] = np.nan + # GH12941, use None to auto cast null + result[null_mask] = None elif result is not tslib.NaT: null_mask = mask.size - mask.sum() if null_mask == 0: From 607a170563bdc56a3df3d064325b75b5a446bd9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A2=9C=E5=8F=91=E6=89=8D=EF=BC=88Yan=20Facai=EF=BC=89?= Date: Fri, 13 May 2016 06:37:58 +0800 Subject: [PATCH 4/4] add whatsnew entry --- doc/source/whatsnew/v0.19.0.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 581daab5cea58..e977526da40bd 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -910,5 +910,11 @@ Bug Fixes - Bug in ``Index.union`` returns an incorrect result with a named empty index (:issue:`13432`) - Bugs in ``Index.difference`` and ``DataFrame.join`` raise in Python3 when using mixed-integer indexes (:issue:`13432`, :issue:`12814`) - Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`) + +- Bug in ``agg()`` function on groupby dataframe changes dtype of ``datetime64[ns]`` column to ``float64`` (:issue:`12821`) + +- Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`) + - Bug in ``pd.read_csv`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`) + - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)