Skip to content

Commit

Permalink
DEPR: box arg in to_datetime (pandas-dev#30111)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and proost committed Dec 19, 2019
1 parent f474b30 commit af5a4b4
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 134 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed the previously deprecated :meth:`Series.compound` and :meth:`DataFrame.compound` (:issue:`26405`)
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`)
- Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`)
- :func:`to_datetime` no longer accepts "box" argument, always returns :class:`DatetimeIndex` or :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`)
- Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`)
- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`)
- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`)
Expand Down
141 changes: 43 additions & 98 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
parse_time_string,
)
from pandas._libs.tslibs.strptime import array_strptime
from pandas.util._decorators import deprecate_kwarg

from pandas.core.dtypes.common import (
ensure_object,
Expand Down Expand Up @@ -45,12 +44,6 @@
# types used in annotations

ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries]

# ---------------------------------------------------------------------

# ---------------------------------------------------------------------
# types used in annotations

Scalar = Union[int, float, str]
DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
DatetimeScalarOrArrayConvertible = Union[
Expand Down Expand Up @@ -154,7 +147,7 @@ def _maybe_cache(arg, format, cache, convert_listlike):

unique_dates = unique(arg)
if len(unique_dates) < len(arg):
cache_dates = convert_listlike(unique_dates, True, format)
cache_dates = convert_listlike(unique_dates, format)
cache_array = Series(cache_dates, index=unique_dates)
return cache_array

Expand All @@ -169,7 +162,7 @@ def _box_as_indexlike(
Parameters
----------
dt_array: 1-d array
array of datetimes to be boxed
Array of datetimes to be wrapped in an Index.
tz : object
None or 'utc'
name : string, default None
Expand All @@ -192,37 +185,30 @@ def _box_as_indexlike(
def _convert_and_box_cache(
arg: DatetimeScalarOrArrayConvertible,
cache_array: ABCSeries,
box: bool,
name: Optional[str] = None,
) -> Union[ABCIndex, np.ndarray]:
) -> ABCIndexClass:
"""
Convert array of dates with a cache and box the result
Convert array of dates with a cache and wrap the result in an Index.
Parameters
----------
arg : integer, float, string, datetime, list, tuple, 1-d array, Series
cache_array : Series
Cache of converted, unique dates
box : boolean
True boxes result as an Index-like, False returns an ndarray
name : string, default None
Name for a DatetimeIndex
Returns
-------
result : datetime of converted dates
- Index-like if box=True
- ndarray if box=False
result : Index-like of converted dates
"""
from pandas import Series

result = Series(arg).map(cache_array)
if box:
return _box_as_indexlike(result, utc=None, name=name)
return result.values
return _box_as_indexlike(result, utc=None, name=name)


def _return_parsed_timezone_results(result, timezones, box, tz, name):
def _return_parsed_timezone_results(result, timezones, tz, name):
"""
Return results from array_strptime if a %z or %Z directive was passed.
Expand All @@ -232,20 +218,14 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name):
int64 date representations of the dates
timezones : ndarray
pytz timezone objects
box : boolean
True boxes result as an Index-like, False returns an ndarray
tz : object
None or pytz timezone object
name : string, default None
Name for a DatetimeIndex
Returns
-------
tz_result : ndarray of parsed dates with timezone
Returns:
- Index-like if box=True
- ndarray of Timestamps if box=False
tz_result : Index-like of parsed dates with timezone
"""
if tz is not None:
raise ValueError(
Expand All @@ -256,16 +236,13 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name):
tz_results = np.array(
[Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)]
)
if box:
from pandas import Index
from pandas import Index

return Index(tz_results, name=name)
return tz_results
return Index(tz_results, name=name)


def _convert_listlike_datetimes(
arg,
box,
format,
name=None,
tz=None,
Expand All @@ -284,8 +261,6 @@ def _convert_listlike_datetimes(
----------
arg : list, tuple, ndarray, Series, Index
date to be parced
box : boolean
True boxes result as an Index-like, False returns an ndarray
name : object
None or string for the Index name
tz : object
Expand All @@ -305,11 +280,7 @@ def _convert_listlike_datetimes(
Returns
-------
ndarray of parsed dates
Returns:
- Index-like if box=True
- ndarray of Timestamps if box=False
Index-like of parsed dates
"""
from pandas import DatetimeIndex
from pandas.core.arrays import DatetimeArray
Expand All @@ -330,7 +301,7 @@ def _convert_listlike_datetimes(
return arg

elif is_datetime64_ns_dtype(arg):
if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
try:
return DatetimeIndex(arg, tz=tz, name=name)
except ValueError:
Expand All @@ -346,26 +317,25 @@ def _convert_listlike_datetimes(
raise ValueError("cannot specify both format and unit")
arg = getattr(arg, "values", arg)
result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
if box:
if errors == "ignore":
from pandas import Index
if errors == "ignore":
from pandas import Index

result = Index(result, name=name)
result = Index(result, name=name)
else:
result = DatetimeIndex(result, name=name)
# GH 23758: We may still need to localize the result with tz
# GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
# result will be naive but in UTC
try:
result = result.tz_localize("UTC").tz_convert(tz_parsed)
except AttributeError:
# Regular Index from 'ignore' path
return result
if tz is not None:
if result.tz is None:
result = result.tz_localize(tz)
else:
result = DatetimeIndex(result, name=name)
# GH 23758: We may still need to localize the result with tz
# GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
# result will be naive but in UTC
try:
result = result.tz_localize("UTC").tz_convert(tz_parsed)
except AttributeError:
# Regular Index from 'ignore' path
return result
if tz is not None:
if result.tz is None:
result = result.tz_localize(tz)
else:
result = result.tz_convert(tz)
result = result.tz_convert(tz)
return result
elif getattr(arg, "ndim", 1) > 1:
raise TypeError(
Expand Down Expand Up @@ -416,7 +386,7 @@ def _convert_listlike_datetimes(
)
if "%Z" in format or "%z" in format:
return _return_parsed_timezone_results(
result, timezones, box, tz, name
result, timezones, tz, name
)
except tslibs.OutOfBoundsDatetime:
if errors == "raise":
Expand Down Expand Up @@ -463,20 +433,12 @@ def _convert_listlike_datetimes(
)

if tz_parsed is not None:
if box:
# We can take a shortcut since the datetime64 numpy array
# is in UTC
return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)
else:
# Convert the datetime64 numpy array to an numpy array
# of datetime objects
result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result]
return np.array(result, dtype=object)
# We can take a shortcut since the datetime64 numpy array
# is in UTC
return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)

if box:
utc = tz == "utc"
return _box_as_indexlike(result, utc=utc, name=name)
return result
utc = tz == "utc"
return _box_as_indexlike(result, utc=utc, name=name)


def _adjust_to_origin(arg, origin, unit):
Expand Down Expand Up @@ -558,14 +520,12 @@ def _adjust_to_origin(arg, origin, unit):
return arg


@deprecate_kwarg(old_arg_name="box", new_arg_name=None)
def to_datetime(
arg,
errors="raise",
dayfirst=False,
yearfirst=False,
utc=None,
box=True,
format=None,
exact=True,
unit=None,
Expand Down Expand Up @@ -603,15 +563,6 @@ def to_datetime(
utc : bool, default None
Return UTC DatetimeIndex if True (converting any tz-aware
datetime.datetime objects as well).
box : bool, default True
- If True returns a DatetimeIndex or Index-like object
- If False returns ndarray of values.
.. deprecated:: 0.25.0
Use :meth:`Series.to_numpy` or :meth:`Timestamp.to_datetime64`
instead to get an ndarray of values or numpy.datetime64,
respectively.
format : str, default None
The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
all the way up to nanoseconds.
Expand Down Expand Up @@ -764,25 +715,25 @@ def to_datetime(
if not cache_array.empty:
result = arg.map(cache_array)
else:
values = convert_listlike(arg._values, True, format)
values = convert_listlike(arg._values, format)
result = arg._constructor(values, index=arg.index, name=arg.name)
elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):
result = _assemble_from_unit_mappings(arg, errors, box, tz)
result = _assemble_from_unit_mappings(arg, errors, tz)
elif isinstance(arg, ABCIndexClass):
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
if not cache_array.empty:
result = _convert_and_box_cache(arg, cache_array, box, name=arg.name)
result = _convert_and_box_cache(arg, cache_array, name=arg.name)
else:
convert_listlike = partial(convert_listlike, name=arg.name)
result = convert_listlike(arg, box, format)
result = convert_listlike(arg, format)
elif is_list_like(arg):
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
if not cache_array.empty:
result = _convert_and_box_cache(arg, cache_array, box)
result = _convert_and_box_cache(arg, cache_array)
else:
result = convert_listlike(arg, box, format)
result = convert_listlike(arg, format)
else:
result = convert_listlike(np.array([arg]), box, format)[0]
result = convert_listlike(np.array([arg]), format)[0]

return result

Expand Down Expand Up @@ -813,7 +764,7 @@ def to_datetime(
}


def _assemble_from_unit_mappings(arg, errors, box, tz):
def _assemble_from_unit_mappings(arg, errors, tz):
"""
assemble the unit specified fields from the arg (DataFrame)
Return a Series for actual parsing
Expand All @@ -826,10 +777,6 @@ def _assemble_from_unit_mappings(arg, errors, box, tz):
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as NaT
- If 'ignore', then invalid parsing will return the input
box : boolean
- If True, return a DatetimeIndex
- If False, return an array
tz : None or 'utc'
Returns
Expand Down Expand Up @@ -904,8 +851,6 @@ def coerce(values):
"cannot assemble the datetimes [{value}]: "
"{error}".format(value=value, error=e)
)
if not box:
return values.values
return values


Expand Down
36 changes: 0 additions & 36 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,22 +921,6 @@ def test_iso_8601_strings_with_same_offset(self):
result = DatetimeIndex([ts_str] * 2)
tm.assert_index_equal(result, expected)

def test_iso_8601_strings_same_offset_no_box(self):
# GH 22446
data = ["2018-01-04 09:01:00+09:00", "2018-01-04 09:02:00+09:00"]

with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(data, box=False)

expected = np.array(
[
datetime(2018, 1, 4, 9, 1, tzinfo=pytz.FixedOffset(540)),
datetime(2018, 1, 4, 9, 2, tzinfo=pytz.FixedOffset(540)),
],
dtype=object,
)
tm.assert_numpy_array_equal(result, expected)

def test_iso_8601_strings_with_different_offsets(self):
# GH 17697, 11736
ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT]
Expand Down Expand Up @@ -1024,16 +1008,6 @@ def test_timestamp_utc_true(self, ts, expected):
result = to_datetime(ts, utc=True)
assert result == expected

def test_to_datetime_box_deprecated(self):
expected = np.datetime64("2018-09-09")

# Deprecated - see GH24416
with tm.assert_produces_warning(FutureWarning):
pd.to_datetime(expected, box=False)

result = pd.to_datetime(expected).to_datetime64()
assert result == expected

@pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"])
def test_to_datetime_with_format_out_of_bounds(self, dt_str):
# GH 9107
Expand Down Expand Up @@ -1345,16 +1319,6 @@ def test_dataframe_dtypes(self, cache):
with pytest.raises(ValueError):
to_datetime(df, cache=cache)

def test_dataframe_box_false(self):
# GH 23760
df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})

with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(df, box=False)

expected = np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]")
tm.assert_numpy_array_equal(result, expected)

def test_dataframe_utc_true(self):
# GH 23760
df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
Expand Down

0 comments on commit af5a4b4

Please sign in to comment.