diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 2be7194af34b0..27c8a66575747 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -1157,3 +1157,4 @@ Bug Fixes - Bug in ``Index`` dtype may not applied properly (:issue:`11017`) - Bug in ``io.gbq`` when testing for minimum google api client version (:issue:`10652`) - Bug in ``DataFrame`` construction from nested ``dict`` with ``timedelta`` keys (:issue:`11129`) +- Bug in ``.fillna`` against may raise ``TypeError`` when data contains datetime dtype (:issue:`7095`, :issue:`11153`) diff --git a/pandas/core/dtypes.py b/pandas/core/dtypes.py index 68d72fdd80554..ce345738d9efc 100644 --- a/pandas/core/dtypes.py +++ b/pandas/core/dtypes.py @@ -181,7 +181,7 @@ def construct_from_string(cls, string): def __unicode__(self): # format the tz - return "datetime64[{unit}, {tz}]".format(unit=self.unit,tz=self.tz) + return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) @property def name(self): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 94eccad8e0185..97b54d4ef6ebe 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1947,20 +1947,42 @@ def _try_fill(self, value): def fillna(self, value, limit=None, inplace=False, downcast=None): - # straight putmask here - values = self.values if inplace else self.values.copy() mask = isnull(self.values) value = self._try_fill(value) + if limit is not None: if self.ndim > 2: raise NotImplementedError("number of dimensions for 'fillna' " "is currently limited to 2") mask[mask.cumsum(self.ndim-1)>limit]=False - np.putmask(values, mask, value) - return [self if inplace else - self.make_block(values, - fastpath=True)] + if mask.any(): + try: + return self._fillna_mask(mask, value, inplace=inplace) + except TypeError: + pass + # _fillna_mask raises TypeError when it fails + # cannot perform inplace op because of object coercion + values = self.get_values(dtype=object) + np.putmask(values, mask, value) + return [self.make_block(values, fastpath=True)] + else: + return [self if inplace else self.copy()] + + def _fillna_mask(self, mask, value, inplace=False): + if getattr(value, 'tzinfo', None) is None: + # Series comes to this path + values = self.values + if not inplace: + values = values.copy() + try: + np.putmask(values, mask, value) + return [self if inplace else + self.make_block(values, fastpath=True)] + except (ValueError, TypeError): + # scalar causes ValueError, and array causes TypeError + pass + raise TypeError def to_native_types(self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs): @@ -2033,6 +2055,29 @@ def get_values(self, dtype=None): .reshape(self.values.shape) return self.values + def _fillna_mask(self, mask, value, inplace=False): + # cannot perform inplace op for internal DatetimeIndex + my_tz = tslib.get_timezone(self.values.tz) + value_tz = tslib.get_timezone(getattr(value, 'tzinfo', None)) + + if (my_tz == value_tz or self.dtype == getattr(value, 'dtype', None)): + if my_tz == value_tz: + # hack for PY2.6 / numpy 1.7.1. + # Other versions can directly use self.values.putmask + # -------------------------------------- + try: + value = value.asm8 + except AttributeError: + value = tslib.Timestamp(value).asm8 + ### ------------------------------------ + + try: + values = self.values.putmask(mask, value) + return [self.make_block(values, fastpath=True)] + except ValueError: + pass + raise TypeError + def _slice(self, slicer): """ return a slice of my values """ if isinstance(slicer, tuple): diff --git a/pandas/tests/test_dtypes.py b/pandas/tests/test_dtypes.py index 54a49de582e56..e6df9c894c219 100644 --- a/pandas/tests/test_dtypes.py +++ b/pandas/tests/test_dtypes.py @@ -137,6 +137,20 @@ def test_basic(self): self.assertFalse(is_datetimetz(np.dtype('float64'))) self.assertFalse(is_datetimetz(1.0)) + def test_dst(self): + + dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern') + s1 = Series(dr1, name='A') + self.assertTrue(is_datetimetz(s1)) + + dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern') + s2 = Series(dr2, name='A') + self.assertTrue(is_datetimetz(s2)) + self.assertEqual(s1.dtype, s2.dtype) + + + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index c1185f9455284..533f8df1d4599 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8740,6 +8740,34 @@ def test_fillna_dtype_conversion(self): result = df.fillna(v) assert_frame_equal(result, expected) + def test_fillna_datetime_columns(self): + # GH 7095 + df = pd.DataFrame({'A': [-1, -2, np.nan], + 'B': date_range('20130101', periods=3), + 'C': ['foo', 'bar', None], + 'D': ['foo2', 'bar2', None]}, + index=date_range('20130110', periods=3)) + result = df.fillna('?') + expected = pd.DataFrame({'A': [-1, -2, '?'], + 'B': date_range('20130101', periods=3), + 'C': ['foo', 'bar', '?'], + 'D': ['foo2', 'bar2', '?']}, + index=date_range('20130110', periods=3)) + self.assert_frame_equal(result, expected) + + df = pd.DataFrame({'A': [-1, -2, np.nan], + 'B': [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), pd.NaT], + 'C': ['foo', 'bar', None], + 'D': ['foo2', 'bar2', None]}, + index=date_range('20130110', periods=3)) + result = df.fillna('?') + expected = pd.DataFrame({'A': [-1, -2, '?'], + 'B': [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), '?'], + 'C': ['foo', 'bar', '?'], + 'D': ['foo2', 'bar2', '?']}, + index=date_range('20130110', periods=3)) + self.assert_frame_equal(result, expected) + def test_ffill(self): self.tsframe['A'][:5] = nan self.tsframe['A'][-5:] = nan diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 2060b31511ead..a6d7e63656d68 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -3937,6 +3937,89 @@ def test_datetime64_fillna(self): result = s.fillna(method='backfill') assert_series_equal(result, expected) + def test_datetime64_tz_fillna(self): + for tz in ['US/Eastern', 'Asia/Tokyo']: + # DatetimeBlock + s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, + Timestamp('2011-01-03 10:00'), pd.NaT]) + result = s.fillna(pd.Timestamp('2011-01-02 10:00')) + expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00'), + Timestamp('2011-01-03 10:00'), Timestamp('2011-01-02 10:00')]) + self.assert_series_equal(expected, result) + + result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) + expected = Series([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz=tz), + Timestamp('2011-01-03 10:00'), + Timestamp('2011-01-02 10:00', tz=tz)]) + self.assert_series_equal(expected, result) + + result = s.fillna('AAA') + expected = Series([Timestamp('2011-01-01 10:00'), 'AAA', + Timestamp('2011-01-03 10:00'), 'AAA'], dtype=object) + self.assert_series_equal(expected, result) + + result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), + 3: pd.Timestamp('2011-01-04 10:00')}) + expected = Series([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz=tz), + Timestamp('2011-01-03 10:00'), + Timestamp('2011-01-04 10:00')]) + self.assert_series_equal(expected, result) + + result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'), + 3: pd.Timestamp('2011-01-04 10:00')}) + expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00'), + Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')]) + self.assert_series_equal(expected, result) + + # DatetimeBlockTZ + idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT, + '2011-01-03 10:00', pd.NaT], tz=tz) + s = pd.Series(idx) + result = s.fillna(pd.Timestamp('2011-01-02 10:00')) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), + Timestamp('2011-01-02 10:00'), + Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2011-01-02 10:00')]) + self.assert_series_equal(expected, result) + + result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) + idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00', + '2011-01-03 10:00', '2011-01-02 10:00'], + tz=tz) + expected = Series(idx) + self.assert_series_equal(expected, result) + + result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz).to_pydatetime()) + idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00', + '2011-01-03 10:00', '2011-01-02 10:00'], + tz=tz) + expected = Series(idx) + self.assert_series_equal(expected, result) + + result = s.fillna('AAA') + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA', + Timestamp('2011-01-03 10:00', tz=tz), 'AAA'], + dtype=object) + self.assert_series_equal(expected, result) + + result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), + 3: pd.Timestamp('2011-01-04 10:00')}) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), + Timestamp('2011-01-02 10:00', tz=tz), + Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2011-01-04 10:00')]) + self.assert_series_equal(expected, result) + + result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), + 3: pd.Timestamp('2011-01-04 10:00', tz=tz)}) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), + Timestamp('2011-01-02 10:00', tz=tz), + Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2011-01-04 10:00', tz=tz)]) + self.assert_series_equal(expected, result) + def test_fillna_int(self): s = Series(np.random.randint(-100, 100, 50)) s.fillna(method='ffill', inplace=True) @@ -5022,6 +5105,29 @@ def test_dropna_empty(self): # invalid axis self.assertRaises(ValueError, s.dropna, axis=1) + + def test_datetime64_tz_dropna(self): + # DatetimeBlock + s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, + Timestamp('2011-01-03 10:00'), pd.NaT]) + result = s.dropna() + expected = Series([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-03 10:00')], index=[0, 2]) + self.assert_series_equal(result, expected) + + # DatetimeBlockTZ + idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT, + '2011-01-03 10:00', pd.NaT], + tz='Asia/Tokyo') + s = pd.Series(idx) + self.assertEqual(s.dtype, 'datetime64[ns, Asia/Tokyo]') + result = s.dropna() + expected = Series([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-03 10:00', tz='Asia/Tokyo')], + index=[0, 2]) + self.assertEqual(result.dtype, 'datetime64[ns, Asia/Tokyo]') + self.assert_series_equal(result, expected) + def test_axis_alias(self): s = Series([1, 2, np.nan]) assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))