Skip to content

Commit

Permalink
API: allow mixed-datetimes-and-ints in to_datetime, DatetimeIndex (pa…
Browse files Browse the repository at this point in the history
…ndas-dev#49348)

* API: allow mixed-datetimes-and-ints in to_datetime, DatetimeIndex

* typo fixup

* typo fixup, update import

* mypy fixup
  • Loading branch information
jbrockmendel authored and mliu08 committed Nov 27, 2022
1 parent 6ce7787 commit 8d2b2f7
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 61 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ Other API changes
- The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`)
- When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`)
- :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`)
- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`)
- :func:`pandas.api.dtypes.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`)
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def array_to_datetime(
yearfirst: bool = ...,
utc: bool = ...,
require_iso8601: bool = ...,
allow_mixed: bool = ...,
) -> tuple[np.ndarray, tzinfo | None]: ...

# returned ndarray may be object dtype or datetime64[ns]
Expand Down
9 changes: 0 additions & 9 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,6 @@ cpdef array_to_datetime(
bint yearfirst=False,
bint utc=False,
bint require_iso8601=False,
bint allow_mixed=False,
):
"""
Converts a 1D array of date-like values to a numpy array of either:
Expand Down Expand Up @@ -475,8 +474,6 @@ cpdef array_to_datetime(
indicator whether the dates should be UTC
require_iso8601 : bool, default False
indicator whether the datetime string should be iso8601
allow_mixed : bool, default False
Whether to allow mixed datetimes and integers.
Returns
-------
Expand Down Expand Up @@ -710,12 +707,6 @@ cpdef array_to_datetime(
val = values[i]
if is_integer_object(val) or is_float_object(val):
result[i] = NPY_NAT
elif allow_mixed:
pass
elif is_raise:
raise ValueError("mixed datetimes and integers in passed array")
else:
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)

if seen_datetime_offset and not utc_convert:
# GH#17697
Expand Down
13 changes: 1 addition & 12 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1937,10 +1937,7 @@ def sequence_to_datetimes(data) -> DatetimeArray:
"""
Parse/convert the passed data to either DatetimeArray or np.ndarray[object].
"""
result, tz, freq = _sequence_to_dt64ns(
data,
allow_mixed=True,
)
result, tz, freq = _sequence_to_dt64ns(data)

unit = np.datetime_data(result.dtype)[0]
dtype = tz_to_dtype(tz, unit)
Expand All @@ -1956,7 +1953,6 @@ def _sequence_to_dt64ns(
dayfirst: bool = False,
yearfirst: bool = False,
ambiguous: TimeAmbiguous = "raise",
allow_mixed: bool = False,
):
"""
Parameters
Expand All @@ -1968,8 +1964,6 @@ def _sequence_to_dt64ns(
yearfirst : bool, default False
ambiguous : str, bool, or arraylike, default 'raise'
See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.
allow_mixed : bool, default False
Interpret integers as timestamps when datetime objects are also present.
Returns
-------
Expand Down Expand Up @@ -2020,7 +2014,6 @@ def _sequence_to_dt64ns(
dayfirst=dayfirst,
yearfirst=yearfirst,
allow_object=False,
allow_mixed=allow_mixed,
)
if tz and inferred_tz:
# two timezones: convert to intended from base UTC repr
Expand Down Expand Up @@ -2109,7 +2102,6 @@ def objects_to_datetime64ns(
errors: DateTimeErrorChoices = "raise",
require_iso8601: bool = False,
allow_object: bool = False,
allow_mixed: bool = False,
):
"""
Convert data to array of timestamps.
Expand All @@ -2126,8 +2118,6 @@ def objects_to_datetime64ns(
allow_object : bool
Whether to return an object-dtype ndarray instead of raising if the
data contains more than one timezone.
allow_mixed : bool, default False
Interpret integers as timestamps when datetime objects are also present.
Returns
-------
Expand Down Expand Up @@ -2156,7 +2146,6 @@ def objects_to_datetime64ns(
dayfirst=dayfirst,
yearfirst=yearfirst,
require_iso8601=require_iso8601,
allow_mixed=allow_mixed,
)
result = result.reshape(data.shape, order=order)
except OverflowError as err:
Expand Down
37 changes: 12 additions & 25 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
is_complex,
is_complex_dtype,
is_datetime64_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_float,
is_float_dtype,
Expand Down Expand Up @@ -1222,7 +1221,7 @@ def maybe_cast_to_datetime(
Caller is responsible for handling ExtensionDtype cases and non dt64/td64
cases.
"""
from pandas.core.arrays.datetimes import sequence_to_datetimes
from pandas.core.arrays.datetimes import DatetimeArray
from pandas.core.arrays.timedeltas import TimedeltaArray

assert dtype.kind in ["m", "M"]
Expand All @@ -1238,36 +1237,24 @@ def maybe_cast_to_datetime(
res = TimedeltaArray._from_sequence(value, dtype=dtype)
return res

if is_datetime64_dtype(dtype):
# Incompatible types in assignment (expression has type
# "Union[dtype[Any], ExtensionDtype]", variable has type
# "Optional[dtype[Any]]")
else:
# error: Incompatible types in assignment (expression has type
# "Union[dtype[Any], ExtensionDtype]", variable has type "Optional[dtype[Any]]")
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]

value = np.array(value, copy=False)

# we have an array of datetime or timedeltas & nulls
if value.size or not is_dtype_equal(value.dtype, dtype):
_disallow_mismatched_datetimelike(value, dtype)

dta = sequence_to_datetimes(value)
# GH 25843: Remove tz information since the dtype
# didn't specify one

if dta.tz is not None:
try:
dta = DatetimeArray._from_sequence(value, dtype=dtype)
except ValueError as err:
# We can give a Series-specific exception message.
if "cannot supply both a tz and a timezone-naive dtype" in str(err):
raise ValueError(
"Cannot convert timezone-aware data to "
"timezone-naive dtype. Use "
"pd.Series(values).dt.tz_localize(None) instead."
)

# TODO(2.0): Do this astype in sequence_to_datetimes to
# avoid potential extra copy?
dta = dta.astype(dtype, copy=False)
return dta
) from err
raise

# at this point we have converted or raised in all cases where we had a list
return cast(ArrayLike, value)
return dta


def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarray:
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/frame/methods/test_combine_first.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
from pandas.compat import pa_version_under7p0
from pandas.errors import PerformanceWarning

from pandas.core.dtypes.cast import (
find_common_type,
is_dtype_equal,
)
from pandas.core.dtypes.cast import find_common_type
from pandas.core.dtypes.common import is_dtype_equal

import pandas as pd
from pandas import (
Expand Down
11 changes: 4 additions & 7 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3021,14 +3021,11 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
scalar = cls("NaT", "ns")
dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls]

msg = "Cannot cast"
if cls is np.datetime64:
msg = "|".join(
[
r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]",
"Cannot cast",
]
)
msg1 = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]"
else:
msg1 = r"dtype timedelta64\[ns\] cannot be converted to datetime64\[ns\]"
msg = "|".join(["Cannot cast", msg1])

with pytest.raises(TypeError, match=msg):
constructor(scalar, dtype=dtype)
Expand Down
11 changes: 8 additions & 3 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1434,9 +1434,14 @@ def test_unit_mixed(self, cache, exp, arr):
result = to_datetime(arr, errors="coerce", cache=cache)
tm.assert_index_equal(result, expected)

msg = "mixed datetimes and integers in passed array"
with pytest.raises(ValueError, match=msg):
to_datetime(arr, errors="raise", cache=cache)
# GH#49037 pre-2.0 this raised, but it always worked with Series,
# was never clear why it was disallowed
result = to_datetime(arr, errors="raise", cache=cache)
expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]")
tm.assert_index_equal(result, expected)

result = DatetimeIndex(arr)
tm.assert_index_equal(result, expected)

def test_unit_rounding(self, cache):
# GH 14156 & GH 20445: argument will incur floating point errors
Expand Down

0 comments on commit 8d2b2f7

Please sign in to comment.