Skip to content

Commit

Permalink
BUG: DatetimeTZBlock can't assign values near dst boundary
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhrks committed Sep 10, 2016
1 parent 289cd6d commit 95aa6b5
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 42 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1559,7 +1559,7 @@ Bug Fixes
- Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`)
- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`)
- Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`)

- Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`)

- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
Expand Down
19 changes: 12 additions & 7 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1487,7 +1487,10 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
-------
a new block(s), the result of the putmask
"""
new_values = self.values if inplace else self.values.copy()

# use block's copy logic.
# .values may be an Index which does shallow copy by default
new_values = self.values if inplace else self.copy().values
new_values, _, new, _ = self._try_coerce_args(new_values, new)

if isinstance(new, np.ndarray) and len(new) == len(mask):
Expand Down Expand Up @@ -2314,7 +2317,7 @@ def __init__(self, values, placement, ndim=2, **kwargs):
if dtype is not None:
if isinstance(dtype, compat.string_types):
dtype = DatetimeTZDtype.construct_from_string(dtype)
values = values.tz_localize('UTC').tz_convert(dtype.tz)
values = values._shallow_copy(tz=dtype.tz)

if values.tz is None:
raise ValueError("cannot create a DatetimeTZBlock without a tz")
Expand Down Expand Up @@ -2381,12 +2384,14 @@ def _try_coerce_args(self, values, other):
base-type values, values mask, base-type other, other mask
"""
values_mask = _block_shape(isnull(values), ndim=self.ndim)
values = _block_shape(values.tz_localize(None).asi8, ndim=self.ndim)
# asi8 is a view, needs copy
values = _block_shape(values.asi8, ndim=self.ndim)
other_mask = False

if isinstance(other, ABCSeries):
other = self._holder(other)
other_mask = isnull(other)

if isinstance(other, bool):
raise TypeError
elif is_null_datelike_scalar(other):
Expand All @@ -2395,7 +2400,7 @@ def _try_coerce_args(self, values, other):
elif isinstance(other, self._holder):
if other.tz != self.values.tz:
raise ValueError("incompatible or non tz-aware value")
other = other.tz_localize(None).asi8
other = other.asi8
other_mask = isnull(other)
elif isinstance(other, (np.datetime64, datetime, date)):
other = lib.Timestamp(other)
Expand All @@ -2405,7 +2410,7 @@ def _try_coerce_args(self, values, other):
if tz is None or str(tz) != str(self.values.tz):
raise ValueError("incompatible or non tz-aware value")
other_mask = isnull(other)
other = other.tz_localize(None).value
other = other.value

return values, values_mask, other, other_mask

Expand All @@ -2415,12 +2420,12 @@ def _try_coerce_result(self, result):
if result.dtype.kind in ['i', 'f', 'O']:
result = result.astype('M8[ns]')
elif isinstance(result, (np.integer, np.float, np.datetime64)):
result = lib.Timestamp(result).tz_localize(self.values.tz)
result = lib.Timestamp(result, tz=self.values.tz)
if isinstance(result, np.ndarray):
# allow passing of > 1dim if its trivial
if result.ndim > 1:
result = result.reshape(len(result))
result = self._holder(result).tz_localize(self.values.tz)
result = self.values._shallow_copy(result)

return result

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def test_setitem_series_datetime64tz(self):
# datetime64 + int -> object
# ToDo: The result must be object
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp(1).tz_localize(tz),
pd.Timestamp(1, tz=tz),
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
self._assert_setitem_series_conversion(obj, 1, exp,
Expand Down Expand Up @@ -1038,7 +1038,7 @@ def test_fillna_series_datetime64tz(self):
# datetime64tz + int => datetime64tz
# ToDo: must be object
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp(1).tz_localize(tz=tz),
pd.Timestamp(1, tz=tz),
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
self._assert_fillna_conversion(obj, 1, exp,
Expand Down
83 changes: 83 additions & 0 deletions pandas/tests/series/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,89 @@ def test_ix_getitem_iterator(self):
result = self.series.ix[idx]
assert_series_equal(result, self.series[:10])

def test_setitem_with_tz(self):
for tz in ['US/Eastern', 'UTC', 'Asia/Tokyo']:
orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3,
tz=tz))
self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz))

# scalar
s = orig.copy()
s[1] = pd.Timestamp('2011-01-01', tz=tz)
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2016-01-01 02:00', tz=tz)])
tm.assert_series_equal(s, exp)

s = orig.copy()
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)

s = orig.copy()
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)

# vector
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz))

s[[1, 2]] = vals
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2012-01-01 00:00', tz=tz)])
tm.assert_series_equal(s, exp)

s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)

s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)

def test_setitem_with_tz_dst(self):
# GH XXX
tz = 'US/Eastern'
orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3,
tz=tz))
self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz))

# scalar
s = orig.copy()
s[1] = pd.Timestamp('2011-01-01', tz=tz)
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2016-11-06 02:00', tz=tz)])
tm.assert_series_equal(s, exp)

s = orig.copy()
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)

s = orig.copy()
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)

# vector
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz))

s[[1, 2]] = vals
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2012-01-01 00:00', tz=tz)])
tm.assert_series_equal(s, exp)

s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)

s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)

def test_where(self):
s = Series(np.random.randn(5))
cond = s > 0
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/series/test_misc_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,6 @@ def test_copy(self):
self.assertTrue(np.isnan(s2[0]))
self.assertFalse(np.isnan(s[0]))
else:

# we DID modify the original Series
self.assertTrue(np.isnan(s2[0]))
self.assertTrue(np.isnan(s[0]))
Expand All @@ -252,6 +251,7 @@ def test_copy(self):
expected2 = Series([Timestamp('1999/01/01', tz='UTC')])

for deep in [None, False, True]:

s = Series([Timestamp('2012/01/01', tz='UTC')])

if deep is None:
Expand All @@ -263,11 +263,13 @@ def test_copy(self):

# default deep is True
if deep is None or deep is True:
assert_series_equal(s, expected)
# Did not modify original Series
assert_series_equal(s2, expected2)
assert_series_equal(s, expected)
else:
assert_series_equal(s, expected2)
# we DID modify the original Series
assert_series_equal(s2, expected2)
assert_series_equal(s, expected2)

def test_axis_alias(self):
s = Series([1, 2, np.nan])
Expand Down
84 changes: 55 additions & 29 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,99 +130,125 @@ def test_datetime64_fillna(self):
def test_datetime64_tz_fillna(self):
for tz in ['US/Eastern', 'Asia/Tokyo']:
# DatetimeBlock
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp(
'2011-01-03 10:00'), pd.NaT])
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
Timestamp('2011-01-03 10:00'), pd.NaT])
null_loc = pd.Series([False, True, False, True])

result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
'2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp(
'2011-01-02 10:00')])
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-02 10:00')])
self.assert_series_equal(expected, result)
# check s is not changed
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
'2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-02 10:00', tz=tz)])
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-02 10:00', tz=tz)])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna('AAA')
expected = Series([Timestamp('2011-01-01 10:00'), 'AAA',
Timestamp('2011-01-03 10:00'), 'AAA'],
dtype=object)
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
'2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-04 10:00')])
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-04 10:00')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
'2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp(
'2011-01-04 10:00')])
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-04 10:00')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

# DatetimeBlockTZ
idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
'2011-01-03 10:00', pd.NaT], tz=tz)
s = pd.Series(idx)
self.assertEqual(s.dtype, 'datetime64[ns, {0}]'.format(tz))
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
'2011-01-02 10:00'), Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-02 10:00')])
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-02 10:00')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
'2011-01-03 10:00', '2011-01-02 10:00'],
tz=tz)
expected = Series(idx)
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(pd.Timestamp(
'2011-01-02 10:00', tz=tz).to_pydatetime())
result = s.fillna(pd.Timestamp('2011-01-02 10:00',
tz=tz).to_pydatetime())
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
'2011-01-03 10:00', '2011-01-02 10:00'],
tz=tz)
expected = Series(idx)
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna('AAA')
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA',
Timestamp('2011-01-03 10:00', tz=tz), 'AAA'],
dtype=object)
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
'2011-01-02 10:00', tz=tz), Timestamp(
'2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00')])
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-04 10:00')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00', tz=tz)})
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
'2011-01-02 10:00', tz=tz), Timestamp(
'2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00',
tz=tz)])
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-04 10:00', tz=tz)])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

# filling with a naive/other zone, coerce to object
result = s.fillna(Timestamp('20130101'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
'2013-01-01'), Timestamp('2011-01-03 10:00', tz=tz), Timestamp(
'2013-01-01')])
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2013-01-01'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2013-01-01')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(Timestamp('20130101', tz='US/Pacific'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2013-01-01', tz='US/Pacific'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2013-01-01', tz='US/Pacific')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

def test_fillna_int(self):
s = Series(np.random.randint(-100, 100, 50))
Expand Down

0 comments on commit 95aa6b5

Please sign in to comment.