Skip to content

Commit

Permalink
DEPR: DTI int64 values and tz interpreted as UTC (pandas-dev#30115)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and proost committed Dec 19, 2019
1 parent 9a48d36 commit beb5e68
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 79 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`)
- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
- Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`)
- Passing ``int64`` values to :class:`DatetimeIndex` and a timezone now interprets the values as nanosecond timestamps in UTC, not wall times in the given timezone (:issue:`24559`)
- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`)
- Addition and subtraction of ``int`` or integer-arrays is no longer allowed in :class:`Timestamp`, :class:`DatetimeIndex`, :class:`TimedeltaIndex`, use ``obj + n * obj.freq`` instead of ``obj + n`` (:issue:`22535`)
- Removed :meth:`Series.from_array` (:issue:`18258`)
Expand Down
35 changes: 0 additions & 35 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,6 @@
from pandas.tseries.offsets import Day, Tick

_midnight = time(0, 0)
# TODO(GH-24559): Remove warning, int_as_wall_time parameter.
_i8_message = """
Passing integer-dtype data and a timezone to DatetimeIndex. Integer values
will be interpreted differently in a future version of pandas. Previously,
these were viewed as datetime64[ns] values representing the wall time
*in the specified timezone*. In the future, these will be viewed as
datetime64[ns] values representing the wall time *in UTC*. This is similar
to a nanosecond-precision UNIX epoch. To accept the future behavior, use
pd.to_datetime(integer_data, utc=True).tz_convert(tz)
To keep the previous behavior, use
pd.to_datetime(integer_data).tz_localize(tz)
"""


def tz_to_dtype(tz):
Expand Down Expand Up @@ -422,7 +407,6 @@ def _from_sequence(
dayfirst=False,
yearfirst=False,
ambiguous="raise",
int_as_wall_time=False,
):

freq, freq_infer = dtl.maybe_infer_freq(freq)
Expand All @@ -435,7 +419,6 @@ def _from_sequence(
dayfirst=dayfirst,
yearfirst=yearfirst,
ambiguous=ambiguous,
int_as_wall_time=int_as_wall_time,
)

freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
Expand Down Expand Up @@ -1811,7 +1794,6 @@ def sequence_to_dt64ns(
dayfirst=False,
yearfirst=False,
ambiguous="raise",
int_as_wall_time=False,
):
"""
Parameters
Expand All @@ -1824,13 +1806,6 @@ def sequence_to_dt64ns(
yearfirst : bool, default False
ambiguous : str, bool, or arraylike, default 'raise'
See pandas._libs.tslibs.conversion.tz_localize_to_utc.
int_as_wall_time : bool, default False
Whether to treat ints as wall time in specified timezone, or as
nanosecond-precision UNIX epoch (wall time in UTC).
This is used in DatetimeIndex.__init__ to deprecate the wall-time
behaviour.
..versionadded:: 0.24.0
Returns
-------
Expand Down Expand Up @@ -1891,10 +1866,6 @@ def sequence_to_dt64ns(
data, dayfirst=dayfirst, yearfirst=yearfirst
)
tz = maybe_infer_tz(tz, inferred_tz)
# When a sequence of timestamp objects is passed, we always
# want to treat the (now i8-valued) data as UTC timestamps,
# not wall times.
int_as_wall_time = False

# `data` may have originally been a Categorical[datetime64[ns, tz]],
# so we need to handle these types.
Expand Down Expand Up @@ -1928,12 +1899,6 @@ def sequence_to_dt64ns(

if data.dtype != _INT64_DTYPE:
data = data.astype(np.int64, copy=False)
if int_as_wall_time and tz is not None and not timezones.is_utc(tz):
warnings.warn(_i8_message, FutureWarning, stacklevel=4)
data = conversion.tz_localize_to_utc(
data.view("i8"), tz, ambiguous=ambiguous
)
data = data.view(_NS_DTYPE)
result = data.view(_NS_DTYPE)

if copy:
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@ def __new__(
dayfirst=dayfirst,
yearfirst=yearfirst,
ambiguous=ambiguous,
int_as_wall_time=True,
)

subarr = cls._simple_new(dtarr, name=name, freq=dtarr.freq, tz=dtarr.tz)
Expand Down
36 changes: 12 additions & 24 deletions pandas/tests/indexes/datetimes/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,11 @@ def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}

if str(tz) in ("UTC", "tzutc()", "UTC+00:00"):
warn = None
else:
warn = FutureWarning

with tm.assert_produces_warning(warn, check_stacklevel=False):
result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
expected = DatetimeIndex(i, **kwargs)
tm.assert_index_equal(result, expected)
if "tz" in kwargs:
result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"])

expected = DatetimeIndex(i, **kwargs)
tm.assert_index_equal(result, expected)

# localize into the provided tz
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
Expand Down Expand Up @@ -485,11 +481,13 @@ def test_construction_with_ndarray(self):
expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B")
tm.assert_index_equal(result, expected)

def test_integer_values_and_tz_deprecated(self):
def test_integer_values_and_tz_interpreted_as_utc(self):
# GH-24559
values = np.array([946684800000000000])
with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(values, tz="US/Central")
val = np.datetime64("2000-01-01 00:00:00", "ns")
values = np.array([val.view("i8")])

result = DatetimeIndex(values).tz_localize("US/Central")

expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")
tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -718,17 +716,7 @@ def test_constructor_timestamp_near_dst(self):
@pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list])
@pytest.mark.parametrize(
"tz, dtype",
[
pytest.param(
"US/Pacific",
"datetime64[ns, US/Pacific]",
marks=[
pytest.mark.xfail(),
pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning"),
],
),
[None, "datetime64[ns]"],
],
[("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")],
)
def test_constructor_with_int_tz(self, klass, box, tz, dtype):
# GH 20997, 20964
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/indexes/multi/test_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@ def test_values_multiindex_datetimeindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(10 ** 18, 10 ** 18 + 5)
naive = pd.DatetimeIndex(ints)
# TODO(GH-24559): Remove the FutureWarning
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
aware = pd.DatetimeIndex(ints, tz="US/Central")

aware = pd.DatetimeIndex(ints, tz="US/Central")

idx = pd.MultiIndex.from_arrays([naive, aware])
result = idx.values
Expand Down
29 changes: 13 additions & 16 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,9 +453,9 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
index = Index(vals)
assert isinstance(index, TimedeltaIndex)

@pytest.mark.parametrize("attr, utc", [["values", False], ["asi8", True]])
@pytest.mark.parametrize("attr", ["values", "asi8"])
@pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex])
def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc, klass):
def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
# Test constructing with a datetimetz dtype
# .values produces numpy datetimes, so these are considered naive
# .asi8 produces integers, so these are considered epoch timestamps
Expand All @@ -466,30 +466,27 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc, klass):
index = index.tz_localize(tz_naive_fixture)
dtype = index.dtype

if (
tz_naive_fixture
and attr == "asi8"
and str(tz_naive_fixture) not in ("UTC", "tzutc()", "UTC+00:00")
):
ex_warn = FutureWarning
if attr == "asi8":
result = pd.DatetimeIndex(arg).tz_localize(tz_naive_fixture)
else:
ex_warn = None

# stacklevel is checked elsewhere. We don't do it here since
# Index will have an frame, throwing off the expected.
with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
result = klass(arg, tz=tz_naive_fixture)
tm.assert_index_equal(result, index)

with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
if attr == "asi8":
result = pd.DatetimeIndex(arg).astype(dtype)
else:
result = klass(arg, dtype=dtype)
tm.assert_index_equal(result, index)

with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
if attr == "asi8":
result = pd.DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture)
else:
result = klass(list(arg), tz=tz_naive_fixture)
tm.assert_index_equal(result, index)

with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
if attr == "asi8":
result = pd.DatetimeIndex(list(arg)).astype(dtype)
else:
result = klass(list(arg), dtype=dtype)
tm.assert_index_equal(result, index)

Expand Down

0 comments on commit beb5e68

Please sign in to comment.