Skip to content

Commit

Permalink
BUG: DatetimeTZBlock.fillna raises TypeError
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhrks committed Sep 20, 2015
1 parent d8182e1 commit a7c705a
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1157,3 +1157,4 @@ Bug Fixes
- Bug in ``Index`` dtype may not applied properly (:issue:`11017`)
- Bug in ``io.gbq`` when testing for minimum google api client version (:issue:`10652`)
- Bug in ``DataFrame`` construction from nested ``dict`` with ``timedelta`` keys (:issue:`11129`)
- Bug in ``.fillna`` against may raise ``TypeError`` when data contains datetime dtype (:issue:`7095`, :issue:`11153`)
2 changes: 1 addition & 1 deletion pandas/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def construct_from_string(cls, string):

def __unicode__(self):
# format the tz
return "datetime64[{unit}, {tz}]".format(unit=self.unit,tz=self.tz)
return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz)

@property
def name(self):
Expand Down
57 changes: 51 additions & 6 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1947,20 +1947,42 @@ def _try_fill(self, value):
def fillna(self, value, limit=None,
inplace=False, downcast=None):

# straight putmask here
values = self.values if inplace else self.values.copy()
mask = isnull(self.values)
value = self._try_fill(value)

if limit is not None:
if self.ndim > 2:
raise NotImplementedError("number of dimensions for 'fillna' "
"is currently limited to 2")
mask[mask.cumsum(self.ndim-1)>limit]=False

np.putmask(values, mask, value)
return [self if inplace else
self.make_block(values,
fastpath=True)]
if mask.any():
try:
return self._fillna_mask(mask, value, inplace=inplace)
except TypeError:
pass
# _fillna_mask raises TypeError when it fails
# cannot perform inplace op because of object coercion
values = self.get_values(dtype=object)
np.putmask(values, mask, value)
return [self.make_block(values, fastpath=True)]
else:
return [self if inplace else self.copy()]

def _fillna_mask(self, mask, value, inplace=False):
if getattr(value, 'tzinfo', None) is None:
# Series comes to this path
values = self.values
if not inplace:
values = values.copy()
try:
np.putmask(values, mask, value)
return [self if inplace else
self.make_block(values, fastpath=True)]
except (ValueError, TypeError):
# scalar causes ValueError, and array causes TypeError
pass
raise TypeError

def to_native_types(self, slicer=None, na_rep=None, date_format=None,
quoting=None, **kwargs):
Expand Down Expand Up @@ -2033,6 +2055,29 @@ def get_values(self, dtype=None):
.reshape(self.values.shape)
return self.values

def _fillna_mask(self, mask, value, inplace=False):
# cannot perform inplace op for internal DatetimeIndex
my_tz = tslib.get_timezone(self.values.tz)
value_tz = tslib.get_timezone(getattr(value, 'tzinfo', None))

if (my_tz == value_tz or self.dtype == getattr(value, 'dtype', None)):
if my_tz == value_tz:
# hack for PY2.6 / numpy 1.7.1.
# Other versions can directly use self.values.putmask
# --------------------------------------
try:
value = value.asm8
except AttributeError:
value = tslib.Timestamp(value).asm8
### ------------------------------------

try:
values = self.values.putmask(mask, value)
return [self.make_block(values, fastpath=True)]
except ValueError:
pass
raise TypeError

def _slice(self, slicer):
""" return a slice of my values """
if isinstance(slicer, tuple):
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,20 @@ def test_basic(self):
self.assertFalse(is_datetimetz(np.dtype('float64')))
self.assertFalse(is_datetimetz(1.0))

def test_dst(self):

dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern')
s1 = Series(dr1, name='A')
self.assertTrue(is_datetimetz(s1))

dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern')
s2 = Series(dr2, name='A')
self.assertTrue(is_datetimetz(s2))
self.assertEqual(s1.dtype, s2.dtype)




if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
28 changes: 28 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8740,6 +8740,34 @@ def test_fillna_dtype_conversion(self):
result = df.fillna(v)
assert_frame_equal(result, expected)

def test_fillna_datetime_columns(self):
# GH 7095
df = pd.DataFrame({'A': [-1, -2, np.nan],
'B': date_range('20130101', periods=3),
'C': ['foo', 'bar', None],
'D': ['foo2', 'bar2', None]},
index=date_range('20130110', periods=3))
result = df.fillna('?')
expected = pd.DataFrame({'A': [-1, -2, '?'],
'B': date_range('20130101', periods=3),
'C': ['foo', 'bar', '?'],
'D': ['foo2', 'bar2', '?']},
index=date_range('20130110', periods=3))
self.assert_frame_equal(result, expected)

df = pd.DataFrame({'A': [-1, -2, np.nan],
'B': [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), pd.NaT],
'C': ['foo', 'bar', None],
'D': ['foo2', 'bar2', None]},
index=date_range('20130110', periods=3))
result = df.fillna('?')
expected = pd.DataFrame({'A': [-1, -2, '?'],
'B': [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), '?'],
'C': ['foo', 'bar', '?'],
'D': ['foo2', 'bar2', '?']},
index=date_range('20130110', periods=3))
self.assert_frame_equal(result, expected)

def test_ffill(self):
self.tsframe['A'][:5] = nan
self.tsframe['A'][-5:] = nan
Expand Down
106 changes: 106 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3937,6 +3937,89 @@ def test_datetime64_fillna(self):
result = s.fillna(method='backfill')
assert_series_equal(result, expected)

def test_datetime64_tz_fillna(self):
for tz in ['US/Eastern', 'Asia/Tokyo']:
# DatetimeBlock
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
Timestamp('2011-01-03 10:00'), pd.NaT])
result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00'), Timestamp('2011-01-02 10:00')])
self.assert_series_equal(expected, result)

result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-02 10:00', tz=tz)])
self.assert_series_equal(expected, result)

result = s.fillna('AAA')
expected = Series([Timestamp('2011-01-01 10:00'), 'AAA',
Timestamp('2011-01-03 10:00'), 'AAA'], dtype=object)
self.assert_series_equal(expected, result)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-04 10:00')])
self.assert_series_equal(expected, result)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')])
self.assert_series_equal(expected, result)

# DatetimeBlockTZ
idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
'2011-01-03 10:00', pd.NaT], tz=tz)
s = pd.Series(idx)
result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-02 10:00')])
self.assert_series_equal(expected, result)

result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
'2011-01-03 10:00', '2011-01-02 10:00'],
tz=tz)
expected = Series(idx)
self.assert_series_equal(expected, result)

result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz).to_pydatetime())
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
'2011-01-03 10:00', '2011-01-02 10:00'],
tz=tz)
expected = Series(idx)
self.assert_series_equal(expected, result)

result = s.fillna('AAA')
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA',
Timestamp('2011-01-03 10:00', tz=tz), 'AAA'],
dtype=object)
self.assert_series_equal(expected, result)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-04 10:00')])
self.assert_series_equal(expected, result)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00', tz=tz)})
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-04 10:00', tz=tz)])
self.assert_series_equal(expected, result)

def test_fillna_int(self):
s = Series(np.random.randint(-100, 100, 50))
s.fillna(method='ffill', inplace=True)
Expand Down Expand Up @@ -5022,6 +5105,29 @@ def test_dropna_empty(self):
# invalid axis
self.assertRaises(ValueError, s.dropna, axis=1)


def test_datetime64_tz_dropna(self):
# DatetimeBlock
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
Timestamp('2011-01-03 10:00'), pd.NaT])
result = s.dropna()
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-03 10:00')], index=[0, 2])
self.assert_series_equal(result, expected)

# DatetimeBlockTZ
idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
'2011-01-03 10:00', pd.NaT],
tz='Asia/Tokyo')
s = pd.Series(idx)
self.assertEqual(s.dtype, 'datetime64[ns, Asia/Tokyo]')
result = s.dropna()
expected = Series([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-03 10:00', tz='Asia/Tokyo')],
index=[0, 2])
self.assertEqual(result.dtype, 'datetime64[ns, Asia/Tokyo]')
self.assert_series_equal(result, expected)

def test_axis_alias(self):
s = Series([1, 2, np.nan])
assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))
Expand Down

0 comments on commit a7c705a

Please sign in to comment.