Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: DatetimeTZBlock can't assign values near dst boundary #14146

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1559,7 +1559,7 @@ Bug Fixes
- Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`)
- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`)
- Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`)

- Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`)

- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
Expand Down
19 changes: 12 additions & 7 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1487,7 +1487,10 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
-------
a new block(s), the result of the putmask
"""
new_values = self.values if inplace else self.values.copy()

# use block's copy logic.
# .values may be an Index which does shallow copy by default
new_values = self.values if inplace else self.copy().values
new_values, _, new, _ = self._try_coerce_args(new_values, new)

if isinstance(new, np.ndarray) and len(new) == len(mask):
Expand Down Expand Up @@ -2314,7 +2317,7 @@ def __init__(self, values, placement, ndim=2, **kwargs):
if dtype is not None:
if isinstance(dtype, compat.string_types):
dtype = DatetimeTZDtype.construct_from_string(dtype)
values = values.tz_localize('UTC').tz_convert(dtype.tz)
values = values._shallow_copy(tz=dtype.tz)

if values.tz is None:
raise ValueError("cannot create a DatetimeTZBlock without a tz")
Expand Down Expand Up @@ -2381,12 +2384,14 @@ def _try_coerce_args(self, values, other):
base-type values, values mask, base-type other, other mask
"""
values_mask = _block_shape(isnull(values), ndim=self.ndim)
values = _block_shape(values.tz_localize(None).asi8, ndim=self.ndim)
# asi8 is a view, needs copy
values = _block_shape(values.asi8, ndim=self.ndim)
other_mask = False

if isinstance(other, ABCSeries):
other = self._holder(other)
other_mask = isnull(other)

if isinstance(other, bool):
raise TypeError
elif is_null_datelike_scalar(other):
Expand All @@ -2395,7 +2400,7 @@ def _try_coerce_args(self, values, other):
elif isinstance(other, self._holder):
if other.tz != self.values.tz:
raise ValueError("incompatible or non tz-aware value")
other = other.tz_localize(None).asi8
other = other.asi8
other_mask = isnull(other)
elif isinstance(other, (np.datetime64, datetime, date)):
other = lib.Timestamp(other)
Expand All @@ -2405,7 +2410,7 @@ def _try_coerce_args(self, values, other):
if tz is None or str(tz) != str(self.values.tz):
raise ValueError("incompatible or non tz-aware value")
other_mask = isnull(other)
other = other.tz_localize(None).value
other = other.value

return values, values_mask, other, other_mask

Expand All @@ -2415,12 +2420,12 @@ def _try_coerce_result(self, result):
if result.dtype.kind in ['i', 'f', 'O']:
result = result.astype('M8[ns]')
elif isinstance(result, (np.integer, np.float, np.datetime64)):
result = lib.Timestamp(result).tz_localize(self.values.tz)
result = lib.Timestamp(result, tz=self.values.tz)
if isinstance(result, np.ndarray):
# allow passing of > 1dim if its trivial
if result.ndim > 1:
result = result.reshape(len(result))
result = self._holder(result).tz_localize(self.values.tz)
result = self.values._shallow_copy(result)

return result

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def test_setitem_series_datetime64tz(self):
# datetime64 + int -> object
# ToDo: The result must be object
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp(1).tz_localize(tz),
pd.Timestamp(1, tz=tz),
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
self._assert_setitem_series_conversion(obj, 1, exp,
Expand Down Expand Up @@ -1038,7 +1038,7 @@ def test_fillna_series_datetime64tz(self):
# datetime64tz + int => datetime64tz
# ToDo: must be object
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp(1).tz_localize(tz=tz),
pd.Timestamp(1, tz=tz),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both cases are incorrect. Being fixed by #14145.

pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
self._assert_fillna_conversion(obj, 1, exp,
Expand Down
83 changes: 83 additions & 0 deletions pandas/tests/series/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,89 @@ def test_ix_getitem_iterator(self):
result = self.series.ix[idx]
assert_series_equal(result, self.series[:10])

def test_setitem_with_tz(self):
for tz in ['US/Eastern', 'UTC', 'Asia/Tokyo']:
orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3,
tz=tz))
self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz))

# scalar
s = orig.copy()
s[1] = pd.Timestamp('2011-01-01', tz=tz)
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2016-01-01 02:00', tz=tz)])
tm.assert_series_equal(s, exp)

s = orig.copy()
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)

s = orig.copy()
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)

# vector
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz))

s[[1, 2]] = vals
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2012-01-01 00:00', tz=tz)])
tm.assert_series_equal(s, exp)

s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)

s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)

def test_setitem_with_tz_dst(self):
# GH XXX
tz = 'US/Eastern'
orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3,
tz=tz))
self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz))

# scalar
s = orig.copy()
s[1] = pd.Timestamp('2011-01-01', tz=tz)
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2016-11-06 02:00', tz=tz)])
tm.assert_series_equal(s, exp)

s = orig.copy()
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)

s = orig.copy()
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)

# vector
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz))

s[[1, 2]] = vals
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2012-01-01 00:00', tz=tz)])
tm.assert_series_equal(s, exp)

s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)

s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)

def test_where(self):
s = Series(np.random.randn(5))
cond = s > 0
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/series/test_misc_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,6 @@ def test_copy(self):
self.assertTrue(np.isnan(s2[0]))
self.assertFalse(np.isnan(s[0]))
else:

# we DID modify the original Series
self.assertTrue(np.isnan(s2[0]))
self.assertTrue(np.isnan(s[0]))
Expand All @@ -252,6 +251,7 @@ def test_copy(self):
expected2 = Series([Timestamp('1999/01/01', tz='UTC')])

for deep in [None, False, True]:

s = Series([Timestamp('2012/01/01', tz='UTC')])

if deep is None:
Expand All @@ -263,11 +263,13 @@ def test_copy(self):

# default deep is True
if deep is None or deep is True:
assert_series_equal(s, expected)
# Did not modify original Series
assert_series_equal(s2, expected2)
assert_series_equal(s, expected)
else:
assert_series_equal(s, expected2)
# we DID modify the original Series
assert_series_equal(s2, expected2)
assert_series_equal(s, expected2)

def test_axis_alias(self):
s = Series([1, 2, np.nan])
Expand Down
84 changes: 55 additions & 29 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,99 +130,125 @@ def test_datetime64_fillna(self):
def test_datetime64_tz_fillna(self):
for tz in ['US/Eastern', 'Asia/Tokyo']:
# DatetimeBlock
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp(
'2011-01-03 10:00'), pd.NaT])
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
Timestamp('2011-01-03 10:00'), pd.NaT])
null_loc = pd.Series([False, True, False, True])

result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
'2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp(
'2011-01-02 10:00')])
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-02 10:00')])
self.assert_series_equal(expected, result)
# check s is not changed
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
'2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-02 10:00', tz=tz)])
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-02 10:00', tz=tz)])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna('AAA')
expected = Series([Timestamp('2011-01-01 10:00'), 'AAA',
Timestamp('2011-01-03 10:00'), 'AAA'],
dtype=object)
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
'2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-04 10:00')])
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-04 10:00')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
'2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp(
'2011-01-04 10:00')])
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-04 10:00')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

# DatetimeBlockTZ
idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
'2011-01-03 10:00', pd.NaT], tz=tz)
s = pd.Series(idx)
self.assertEqual(s.dtype, 'datetime64[ns, {0}]'.format(tz))
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
'2011-01-02 10:00'), Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-02 10:00')])
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-02 10:00')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
'2011-01-03 10:00', '2011-01-02 10:00'],
tz=tz)
expected = Series(idx)
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(pd.Timestamp(
'2011-01-02 10:00', tz=tz).to_pydatetime())
result = s.fillna(pd.Timestamp('2011-01-02 10:00',
tz=tz).to_pydatetime())
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
'2011-01-03 10:00', '2011-01-02 10:00'],
tz=tz)
expected = Series(idx)
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna('AAA')
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA',
Timestamp('2011-01-03 10:00', tz=tz), 'AAA'],
dtype=object)
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
'2011-01-02 10:00', tz=tz), Timestamp(
'2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00')])
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-04 10:00')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00', tz=tz)})
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
'2011-01-02 10:00', tz=tz), Timestamp(
'2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00',
tz=tz)])
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-04 10:00', tz=tz)])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

# filling with a naive/other zone, coerce to object
result = s.fillna(Timestamp('20130101'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
'2013-01-01'), Timestamp('2011-01-03 10:00', tz=tz), Timestamp(
'2013-01-01')])
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2013-01-01'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2013-01-01')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

result = s.fillna(Timestamp('20130101', tz='US/Pacific'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2013-01-01', tz='US/Pacific'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2013-01-01', tz='US/Pacific')])
self.assert_series_equal(expected, result)
self.assert_series_equal(pd.isnull(s), null_loc)

def test_fillna_int(self):
s = Series(np.random.randint(-100, 100, 50))
Expand Down