From bdf50a21cbe2b977268175285b60828567f9a137 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 May 2023 09:11:48 -0700 Subject: [PATCH] API/BUG: infer_dtype_from_scalar with non-nano (#52212) * API/BUG: infer_dtype_from_scalar with non-nano * update test * xfail on 32bit * fix xfail condition * whatsnew * xfail on windows --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/conftest.py | 2 +- pandas/core/dtypes/cast.py | 24 ++++++++--- pandas/tests/dtypes/cast/test_infer_dtype.py | 36 +++++++++++----- pandas/tests/frame/indexing/test_indexing.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 4 +- .../frame/methods/test_get_numeric_data.py | 2 +- pandas/tests/frame/methods/test_reindex.py | 15 ++++++- pandas/tests/frame/methods/test_to_csv.py | 9 +++- pandas/tests/frame/test_block_internals.py | 12 +++--- pandas/tests/frame/test_constructors.py | 43 ++++++++++++------- pandas/tests/groupby/test_apply.py | 4 +- .../tests/groupby/test_groupby_shift_diff.py | 2 +- pandas/tests/groupby/test_timegrouper.py | 3 +- .../tests/groupby/transform/test_transform.py | 4 +- pandas/tests/io/json/test_pandas.py | 12 +++--- pandas/tests/io/parser/test_parse_dates.py | 15 +++++-- .../io/parser/usecols/test_parse_dates.py | 2 +- pandas/tests/io/pytables/test_append.py | 21 +++++---- pandas/tests/io/pytables/test_put.py | 8 ++-- pandas/tests/io/pytables/test_store.py | 8 ++-- pandas/tests/io/pytables/test_timezones.py | 18 ++++---- pandas/tests/series/test_constructors.py | 10 ++++- 23 files changed, 166 insertions(+), 91 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 738c53527e358b..bdce680e4cfe93 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -350,6 +350,7 @@ Datetimelike - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`) - Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) - Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) +- Bug in constructing a :class:`Series` or :class:`DataFrame` from a datetime or timedelta scalar always inferring nanosecond resolution instead of inferring from the input (:issue:`52212`) - Bug in parsing datetime strings with weekday but no day e.g. "2023 Sept Thu" incorrectly raising ``AttributeError`` instead of ``ValueError`` (:issue:`52659`) - diff --git a/pandas/conftest.py b/pandas/conftest.py index 86f0121dd00a90..c24a56493b519e 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -931,7 +931,7 @@ def rand_series_with_duplicate_datetimeindex() -> Series: (Period("2012-02-01", freq="D"), "period[D]"), ( Timestamp("2011-01-01", tz="US/Eastern"), - DatetimeTZDtype(tz="US/Eastern"), + DatetimeTZDtype(unit="s", tz="US/Eastern"), ), (Timedelta(seconds=500), "timedelta64[ns]"), ] diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5f859d1bc6ee64..e7a66928076853 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -645,7 +645,18 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): if inferred == dtype: return dtype, fv - return np.dtype("object"), fill_value + elif inferred.kind == "m": + # different unit, e.g. passed np.timedelta64(24, "h") with dtype=m8[ns] + # see if we can losslessly cast it to our dtype + unit = np.datetime_data(dtype)[0] + try: + td = Timedelta(fill_value).as_unit(unit, round_ok=False) + except OutOfBoundsTimedelta: + return _dtype_obj, fill_value + else: + return dtype, td.asm8 + + return _dtype_obj, fill_value elif is_float(fill_value): if issubclass(dtype.type, np.bool_): @@ -775,8 +786,6 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: elif isinstance(val, (np.datetime64, dt.datetime)): try: val = Timestamp(val) - if val is not NaT: - val = val.as_unit("ns") except OutOfBoundsDatetime: return _dtype_obj, val @@ -785,7 +794,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: dtype = val.dtype # TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes else: - dtype = DatetimeTZDtype(unit="ns", tz=val.tz) + dtype = DatetimeTZDtype(unit=val.unit, tz=val.tz) elif isinstance(val, (np.timedelta64, dt.timedelta)): try: @@ -793,8 +802,11 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: except (OutOfBoundsTimedelta, OverflowError): dtype = _dtype_obj else: - dtype = np.dtype("m8[ns]") - val = np.timedelta64(val.value, "ns") + if val is NaT: + val = np.timedelta64("NaT", "ns") + else: + val = val.asm8 + dtype = val.dtype elif is_bool(val): dtype = np.dtype(np.bool_) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 53d0656a11f819..b5d761b3549faf 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -61,17 +61,31 @@ def test_infer_dtype_from_complex(complex_dtype): assert dtype == np.complex_ -@pytest.mark.parametrize( - "data", [np.datetime64(1, "ns"), Timestamp(1), datetime(2000, 1, 1, 0, 0)] -) -def test_infer_dtype_from_datetime(data): - dtype, val = infer_dtype_from_scalar(data) +def test_infer_dtype_from_datetime(): + dt64 = np.datetime64(1, "ns") + dtype, val = infer_dtype_from_scalar(dt64) assert dtype == "M8[ns]" + ts = Timestamp(1) + dtype, val = infer_dtype_from_scalar(ts) + assert dtype == "M8[ns]" -@pytest.mark.parametrize("data", [np.timedelta64(1, "ns"), Timedelta(1), timedelta(1)]) -def test_infer_dtype_from_timedelta(data): - dtype, val = infer_dtype_from_scalar(data) + dt = datetime(2000, 1, 1, 0, 0) + dtype, val = infer_dtype_from_scalar(dt) + assert dtype == "M8[us]" + + +def test_infer_dtype_from_timedelta(): + td64 = np.timedelta64(1, "ns") + dtype, val = infer_dtype_from_scalar(td64) + assert dtype == "m8[ns]" + + pytd = timedelta(1) + dtype, val = infer_dtype_from_scalar(pytd) + assert dtype == "m8[us]" + + td = Timedelta(1) + dtype, val = infer_dtype_from_scalar(td) assert dtype == "m8[ns]" @@ -140,9 +154,9 @@ def test_infer_dtype_from_scalar_errors(): (b"foo", np.object_), (1, np.int64), (1.5, np.float_), - (np.datetime64("2016-01-01"), np.dtype("M8[ns]")), - (Timestamp("20160101"), np.dtype("M8[ns]")), - (Timestamp("20160101", tz="UTC"), "datetime64[ns, UTC]"), + (np.datetime64("2016-01-01"), np.dtype("M8[s]")), + (Timestamp("20160101"), np.dtype("M8[s]")), + (Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"), ], ) def test_infer_dtype_from_scalar(value, expected): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 224abbcef27df6..c399e6fc65bc78 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -819,7 +819,7 @@ def test_setitem_single_column_mixed_datetime(self): # check our dtypes result = df.dtypes expected = Series( - [np.dtype("float64")] * 3 + [np.dtype("datetime64[ns]")], + [np.dtype("float64")] * 3 + [np.dtype("datetime64[s]")], index=["foo", "bar", "baz", "timestamp"], ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 3edfd47cb05a1a..b745575876212c 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -154,7 +154,7 @@ def test_setitem_dt64_index_empty_columns(self): def test_setitem_timestamp_empty_columns(self): # GH#19843 df = DataFrame(index=range(3)) - df["now"] = Timestamp("20130101", tz="UTC") + df["now"] = Timestamp("20130101", tz="UTC").as_unit("ns") expected = DataFrame( [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"] @@ -234,7 +234,7 @@ def test_setitem_dict_preserves_dtypes(self): (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( Timestamp("2011-01-01", tz="US/Eastern"), - DatetimeTZDtype(tz="US/Eastern"), + DatetimeTZDtype(unit="s", tz="US/Eastern"), ), ], ) diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py index bed611b3a969e4..ec1c768603a598 100644 --- a/pandas/tests/frame/methods/test_get_numeric_data.py +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -21,7 +21,7 @@ def test_get_numeric_data_preserve_dtype(self): tm.assert_frame_equal(result, expected) def test_get_numeric_data(self): - datetime64name = np.dtype("M8[ns]").name + datetime64name = np.dtype("M8[s]").name objectname = np.dtype(np.object_).name df = DataFrame( diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index a96dec5f34ce12..1ed0143e5b309c 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -8,6 +8,10 @@ import pytest from pandas._libs.tslibs.timezones import dateutil_gettz as gettz +from pandas.compat import ( + IS64, + is_platform_windows, +) import pandas.util._test_decorators as td import pandas as pd @@ -118,6 +122,11 @@ class TestDataFrameSelectReindex: # These are specific reindex-based tests; other indexing tests should go in # test_indexing + @pytest.mark.xfail( + not IS64 or is_platform_windows(), + reason="Passes int32 values to DatetimeArray in make_na_array on " + "windows, 32bit linux builds", + ) @td.skip_array_manager_not_yet_implemented def test_reindex_tzaware_fill_value(self): # GH#52586 @@ -125,8 +134,9 @@ def test_reindex_tzaware_fill_value(self): ts = pd.Timestamp("2023-04-10 17:32", tz="US/Pacific") res = df.reindex([0, 1], axis=1, fill_value=ts) - assert res.dtypes[1] == pd.DatetimeTZDtype(tz="US/Pacific") + assert res.dtypes[1] == pd.DatetimeTZDtype(unit="s", tz="US/Pacific") expected = DataFrame({0: [1], 1: [ts]}) + expected[1] = expected[1].astype(res.dtypes[1]) tm.assert_frame_equal(res, expected) per = ts.tz_localize(None).to_period("s") @@ -137,8 +147,9 @@ def test_reindex_tzaware_fill_value(self): interval = pd.Interval(ts, ts + pd.Timedelta(seconds=1)) res = df.reindex([0, 1], axis=1, fill_value=interval) - assert res.dtypes[1] == pd.IntervalDtype("datetime64[ns, US/Pacific]", "right") + assert res.dtypes[1] == pd.IntervalDtype("datetime64[s, US/Pacific]", "right") expected = DataFrame({0: [1], 1: [interval]}) + expected[1] = expected[1].astype(res.dtypes[1]) tm.assert_frame_equal(res, expected) def test_reindex_copies(self): diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index b44b05f9f8153f..5671a569c8ac8a 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -656,7 +656,9 @@ def create_cols(name): "foo", index=df_float.index, columns=create_cols("object") ) df_dt = DataFrame( - Timestamp("20010101"), index=df_float.index, columns=create_cols("date") + Timestamp("20010101").as_unit("ns"), + index=df_float.index, + columns=create_cols("date"), ) # add in some nans @@ -664,6 +666,7 @@ def create_cols(name): # ## this is a bug in read_csv right now #### # df_dt.loc[30:50,1:3] = np.nan + # FIXME: don't leave commented-out df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) @@ -702,7 +705,9 @@ def test_to_csv_dups_cols(self): df_int = DataFrame(np.random.randn(1000, 3)).astype("int64") df_bool = DataFrame(True, index=df_float.index, columns=range(3)) df_object = DataFrame("foo", index=df_float.index, columns=range(3)) - df_dt = DataFrame(Timestamp("20010101"), index=df_float.index, columns=range(3)) + df_dt = DataFrame( + Timestamp("20010101").as_unit("ns"), index=df_float.index, columns=range(3) + ) df = pd.concat( [df_float, df_int, df_bool, df_object, df_dt], axis=1, ignore_index=True ) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 0ddcbf87e3b4c5..3ad5c304d9a303 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -191,20 +191,20 @@ def test_construction_with_mixed(self, float_string_frame): # check dtypes result = df.dtypes - expected = Series({"datetime64[ns]": 3}) + expected = Series({"datetime64[us]": 3}) # mixed-type frames float_string_frame["datetime"] = datetime.now() float_string_frame["timedelta"] = timedelta(days=1, seconds=1) - assert float_string_frame["datetime"].dtype == "M8[ns]" - assert float_string_frame["timedelta"].dtype == "m8[ns]" + assert float_string_frame["datetime"].dtype == "M8[us]" + assert float_string_frame["timedelta"].dtype == "m8[us]" result = float_string_frame.dtypes expected = Series( [np.dtype("float64")] * 4 + [ np.dtype("object"), - np.dtype("datetime64[ns]"), - np.dtype("timedelta64[ns]"), + np.dtype("datetime64[us]"), + np.dtype("timedelta64[us]"), ], index=list("ABCD") + ["foo", "datetime", "timedelta"], ) @@ -230,7 +230,7 @@ def test_construction_with_conversions(self): }, index=range(3), ) - assert expected.dtypes["dt1"] == "M8[ns]" + assert expected.dtypes["dt1"] == "M8[s]" assert expected.dtypes["dt2"] == "M8[s]" df = DataFrame(index=range(3)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5c1fa5483555b2..47e307f561cf47 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -97,6 +97,7 @@ def test_constructor_from_2d_datetimearray(self, using_array_manager): def test_constructor_dict_with_tzaware_scalar(self): # GH#42505 dt = Timestamp("2019-11-03 01:00:00-0700").tz_convert("America/Los_Angeles") + dt = dt.as_unit("ns") df = DataFrame({"dt": dt}, index=[0]) expected = DataFrame({"dt": [dt]}) @@ -926,7 +927,7 @@ def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype): (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( Timestamp("2011-01-01", tz="US/Eastern"), - DatetimeTZDtype(tz="US/Eastern"), + DatetimeTZDtype(unit="s", tz="US/Eastern"), ), ], ) @@ -1323,7 +1324,7 @@ def test_constructor_unequal_length_nested_list_column(self): [[Timestamp("2021-01-01")]], [{"x": Timestamp("2021-01-01")}], {"x": [Timestamp("2021-01-01")]}, - {"x": Timestamp("2021-01-01")}, + {"x": Timestamp("2021-01-01").as_unit("ns")}, ], ) def test_constructor_one_element_data_list(self, data): @@ -1814,7 +1815,6 @@ def test_constructor_single_value(self): def test_constructor_with_datetimes(self): intname = np.dtype(np.int_).name floatname = np.dtype(np.float_).name - datetime64name = np.dtype("M8[ns]").name objectname = np.dtype(np.object_).name # single item @@ -1832,7 +1832,7 @@ def test_constructor_with_datetimes(self): expected = Series( [np.dtype("int64")] + [np.dtype(objectname)] * 2 - + [np.dtype(datetime64name)] * 2, + + [np.dtype("M8[s]"), np.dtype("M8[us]")], index=list("ABCDE"), ) tm.assert_series_equal(result, expected) @@ -1912,7 +1912,7 @@ def test_constructor_with_datetimes3(self): df = DataFrame({"End Date": dt}, index=[0]) assert df.iat[0, 0] == dt tm.assert_series_equal( - df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}) + df.dtypes, Series({"End Date": "datetime64[us, US/Eastern]"}) ) df = DataFrame([{"End Date": dt}]) @@ -3047,15 +3047,22 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls): with pytest.raises(TypeError, match=msg): constructor(scalar, dtype=dtype) - @pytest.mark.xfail( - reason="Timestamp constructor has been updated to cast dt64 to non-nano, " - "but DatetimeArray._from_sequence has not" - ) @pytest.mark.parametrize("cls", [datetime, np.datetime64]) - def test_from_out_of_bounds_ns_datetime(self, constructor, cls): + def test_from_out_of_bounds_ns_datetime( + self, constructor, cls, request, box, frame_or_series + ): # scalar that won't fit in nanosecond dt64, but will fit in microsecond + if box is list or (frame_or_series is Series and box is dict): + mark = pytest.mark.xfail( + reason="Timestamp constructor has been updated to cast dt64 to " + "non-nano, but DatetimeArray._from_sequence has not", + strict=True, + ) + request.node.add_marker(mark) + scalar = datetime(9999, 1, 1) exp_dtype = "M8[us]" # pydatetime objects default to this reso + if cls is np.datetime64: scalar = np.datetime64(scalar, "D") exp_dtype = "M8[s]" # closest reso to input @@ -3076,13 +3083,19 @@ def test_out_of_s_bounds_datetime64(self, constructor): dtype = tm.get_dtype(result) assert dtype == object - @pytest.mark.xfail( - reason="TimedeltaArray constructor has been updated to cast td64 to non-nano, " - "but TimedeltaArray._from_sequence has not" - ) @pytest.mark.parametrize("cls", [timedelta, np.timedelta64]) - def test_from_out_of_bounds_ns_timedelta(self, constructor, cls): + def test_from_out_of_bounds_ns_timedelta( + self, constructor, cls, request, box, frame_or_series + ): # scalar that won't fit in nanosecond td64, but will fit in microsecond + if box is list or (frame_or_series is Series and box is dict): + mark = pytest.mark.xfail( + reason="TimedeltaArray constructor has been updated to cast td64 " + "to non-nano, but TimedeltaArray._from_sequence has not", + strict=True, + ) + request.node.add_marker(mark) + scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1) exp_dtype = "m8[us]" # smallest reso that fits if cls is np.timedelta64: diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index d879a6d1f299b1..a077bd62927e68 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -721,7 +721,9 @@ def func_with_date(batch): dfg_no_conversion_expected.index.name = "a" dfg_conversion = df.groupby(by=["a"]).apply(func_with_date) - dfg_conversion_expected = DataFrame({"b": datetime(2015, 1, 1), "c": 2}, index=[1]) + dfg_conversion_expected = DataFrame( + {"b": pd.Timestamp(2015, 1, 1).as_unit("ns"), "c": 2}, index=[1] + ) dfg_conversion_expected.index.name = "a" tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected) diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py index 7ffee412e3cdff..656471b2f6eb08 100644 --- a/pandas/tests/groupby/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -62,7 +62,7 @@ def test_group_shift_with_fill_value(): def test_group_shift_lose_timezone(): # GH 30134 - now_dt = Timestamp.utcnow() + now_dt = Timestamp.utcnow().as_unit("ns") df = DataFrame({"a": [1, 1], "date": now_dt}) result = df.groupby("a").shift(0).iloc[0] expected = Series({"date": now_dt}, name=result.name) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index f9a13490815298..cfbecd3efd07ef 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -715,7 +715,8 @@ def test_groupby_max_datetime64(self): # GH 5869 # datetimelike dtype conversion from int df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) - expected = df.groupby("A")["A"].apply(lambda x: x.max()) + # TODO: can we retain second reso in .apply here? + expected = df.groupby("A")["A"].apply(lambda x: x.max()).astype("M8[s]") result = df.groupby("A")["A"].max() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 04e6f5d2fdcaad..d0e1343fbeb54e 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -279,7 +279,9 @@ def test_transform_datetime_to_timedelta(): # GH 15429 # transforming a datetime to timedelta df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) - expected = Series([Timestamp("20130101") - Timestamp("20130101")] * 5, name="A") + expected = Series( + Timestamp("20130101") - Timestamp("20130101"), index=range(5), name="A" + ) # this does date math without changing result type in transform base_time = df["A"][0] diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index e93cd836fa3075..89190dae46169c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -446,7 +446,7 @@ def test_v12_compat(self, datapath): columns=["A", "B", "C", "D"], index=dti, ) - df["date"] = Timestamp("19920106 18:21:32.12") + df["date"] = Timestamp("19920106 18:21:32.12").as_unit("ns") df.iloc[3, df.columns.get_loc("date")] = Timestamp("20130101") df["modified"] = df["date"] df.iloc[1, df.columns.get_loc("modified")] = pd.NaT @@ -751,7 +751,7 @@ def test_axis_dates(self, datetime_series, datetime_frame): def test_convert_dates(self, datetime_series, datetime_frame): # frame df = datetime_frame - df["date"] = Timestamp("20130101") + df["date"] = Timestamp("20130101").as_unit("ns") json = df.to_json() result = read_json(json) @@ -767,7 +767,7 @@ def test_convert_dates(self, datetime_series, datetime_frame): tm.assert_frame_equal(result, expected) # series - ts = Series(Timestamp("20130101"), index=datetime_series.index) + ts = Series(Timestamp("20130101").as_unit("ns"), index=datetime_series.index) json = ts.to_json() result = read_json(json, typ="series") tm.assert_series_equal(result, ts) @@ -831,7 +831,7 @@ def test_convert_dates_infer(self, infer_word): def test_date_format_frame(self, date, date_unit, datetime_frame): df = datetime_frame - df["date"] = Timestamp(date) + df["date"] = Timestamp(date).as_unit("ns") df.iloc[1, df.columns.get_loc("date")] = pd.NaT df.iloc[5, df.columns.get_loc("date")] = pd.NaT if date_unit: @@ -859,7 +859,7 @@ def test_date_format_frame_raises(self, datetime_frame): ], ) def test_date_format_series(self, date, date_unit, datetime_series): - ts = Series(Timestamp(date), index=datetime_series.index) + ts = Series(Timestamp(date).as_unit("ns"), index=datetime_series.index) ts.iloc[1] = pd.NaT ts.iloc[5] = pd.NaT if date_unit: @@ -879,7 +879,7 @@ def test_date_format_series_raises(self, datetime_series): @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) def test_date_unit(self, unit, datetime_frame): df = datetime_frame - df["date"] = Timestamp("20130101 20:43:42") + df["date"] = Timestamp("20130101 20:43:42").as_unit("ns") dl = df.columns.get_loc("date") df.iloc[1, dl] = Timestamp("19710101 20:43:42") df.iloc[2, dl] = Timestamp("21460101 20:43:42") diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 81de4f13de81d2..13d2c79025d1f5 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -746,7 +746,12 @@ def test_date_parser_int_bug(all_parsers): def test_nat_parse(all_parsers): # see gh-3062 parser = all_parsers - df = DataFrame({"A": np.arange(10, dtype="float64"), "B": Timestamp("20010101")}) + df = DataFrame( + { + "A": np.arange(10, dtype="float64"), + "B": Timestamp("20010101").as_unit("ns"), + } + ) df.iloc[3:6, :] = np.nan with tm.ensure_clean("__nat_parse_.csv") as path: @@ -1902,7 +1907,9 @@ def test_date_parser_multiindex_columns(all_parsers): 1,2 2019-12-31,6""" result = parser.read_csv(StringIO(data), parse_dates=[("a", "1")], header=[0, 1]) - expected = DataFrame({("a", "1"): Timestamp("2019-12-31"), ("b", "2"): [6]}) + expected = DataFrame( + {("a", "1"): Timestamp("2019-12-31").as_unit("ns"), ("b", "2"): [6]} + ) tm.assert_frame_equal(result, expected) @@ -1924,7 +1931,9 @@ def test_date_parser_multiindex_columns_combine_cols(all_parsers, parse_spec, co parse_dates=parse_spec, header=[0, 1], ) - expected = DataFrame({col_name: Timestamp("2019-12-31"), ("c", "3"): [6]}) + expected = DataFrame( + {col_name: Timestamp("2019-12-31").as_unit("ns"), ("c", "3"): [6]} + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py index f818d621c744f0..32231cbbdda648 100644 --- a/pandas/tests/io/parser/usecols/test_parse_dates.py +++ b/pandas/tests/io/parser/usecols/test_parse_dates.py @@ -88,7 +88,7 @@ def test_usecols_with_parse_dates3(all_parsers): parse_dates = [0] cols = { - "a": Timestamp("2016-09-21"), + "a": Timestamp("2016-09-21").as_unit("ns"), "b": [1], "c": [1], "d": [2], diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index c37e68f537ebb9..b31a520924d5f4 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -146,8 +146,8 @@ def test_append_some_nans(setup_path): "A2": np.random.randn(20), "B": "foo", "C": "bar", - "D": Timestamp("20010101"), - "E": datetime.datetime(2001, 1, 2, 0, 0), + "D": Timestamp("2001-01-01").as_unit("ns"), + "E": Timestamp("2001-01-02").as_unit("ns"), }, index=np.arange(20), ) @@ -247,8 +247,8 @@ def test_append_all_nans(setup_path): "A2": np.random.randn(20), "B": "foo", "C": "bar", - "D": Timestamp("20010101"), - "E": datetime.datetime(2001, 1, 2, 0, 0), + "D": Timestamp("2001-01-01").as_unit("ns"), + "E": Timestamp("2001-01-02").as_unit("ns"), }, index=np.arange(20), ) @@ -572,7 +572,7 @@ def check_col(key, name, size): df_dc.loc[df_dc.index[4:6], "string"] = np.nan df_dc.loc[df_dc.index[7:9], "string"] = "bar" df_dc["string2"] = "cool" - df_dc["datetime"] = Timestamp("20010102") + df_dc["datetime"] = Timestamp("20010102").as_unit("ns") df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan _maybe_remove(store, "df_dc") @@ -654,8 +654,8 @@ def test_append_misc_chunksize(setup_path, chunksize): df["float322"] = 1.0 df["float322"] = df["float322"].astype("float32") df["bool"] = df["float322"] > 0 - df["time1"] = Timestamp("20130101") - df["time2"] = Timestamp("20130102") + df["time1"] = Timestamp("20130101").as_unit("ns") + df["time2"] = Timestamp("20130102").as_unit("ns") with ensure_clean_store(setup_path, mode="w") as store: store.append("obj", df, chunksize=chunksize) result = store.select("obj") @@ -767,12 +767,11 @@ def test_append_with_timedelta(setup_path): # GH 3577 # append timedelta + ts = Timestamp("20130101").as_unit("ns") df = DataFrame( { - "A": Timestamp("20130101"), - "B": [ - Timestamp("20130101") + timedelta(days=i, seconds=10) for i in range(10) - ], + "A": ts, + "B": [ts + timedelta(days=i, seconds=10) for i in range(10)], } ) df["C"] = df["A"] - df["B"] diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index d2b0519d6cf3d9..910f83e0b997cb 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -183,10 +183,10 @@ def test_put_mixed_type(setup_path): df["bool3"] = True df["int1"] = 1 df["int2"] = 2 - df["timestamp1"] = Timestamp("20010102") - df["timestamp2"] = Timestamp("20010103") - df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) - df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) + df["timestamp1"] = Timestamp("20010102").as_unit("ns") + df["timestamp2"] = Timestamp("20010103").as_unit("ns") + df["datetime1"] = Timestamp("20010102").as_unit("ns") + df["datetime2"] = Timestamp("20010103").as_unit("ns") df.loc[df.index[3:6], ["obj1"]] = np.nan df = df._consolidate() diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 7a5b6ddd403340..2d87b719af36b1 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -422,10 +422,10 @@ def test_table_mixed_dtypes(setup_path): df["bool3"] = True df["int1"] = 1 df["int2"] = 2 - df["timestamp1"] = Timestamp("20010102") - df["timestamp2"] = Timestamp("20010103") - df["datetime1"] = dt.datetime(2001, 1, 2, 0, 0) - df["datetime2"] = dt.datetime(2001, 1, 3, 0, 0) + df["timestamp1"] = Timestamp("20010102").as_unit("ns") + df["timestamp2"] = Timestamp("20010103").as_unit("ns") + df["datetime1"] = Timestamp("20010102").as_unit("ns") + df["datetime2"] = Timestamp("20010103").as_unit("ns") df.loc[df.index[3:6], ["obj1"]] = np.nan df = df._consolidate() diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index 7589eb8e96a107..e6c0c918a73cc0 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -50,7 +50,7 @@ def test_append_with_timezones(setup_path, gettz): df_est = DataFrame( { "A": [ - Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")) + Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")).as_unit("ns") + timedelta(hours=1) * i for i in range(5) ] @@ -61,24 +61,24 @@ def test_append_with_timezones(setup_path, gettz): # of DST transition df_crosses_dst = DataFrame( { - "A": Timestamp("20130102", tz=gettz("US/Eastern")), - "B": Timestamp("20130603", tz=gettz("US/Eastern")), + "A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"), + "B": Timestamp("20130603", tz=gettz("US/Eastern")).as_unit("ns"), }, index=range(5), ) df_mixed_tz = DataFrame( { - "A": Timestamp("20130102", tz=gettz("US/Eastern")), - "B": Timestamp("20130102", tz=gettz("EET")), + "A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"), + "B": Timestamp("20130102", tz=gettz("EET")).as_unit("ns"), }, index=range(5), ) df_different_tz = DataFrame( { - "A": Timestamp("20130102", tz=gettz("US/Eastern")), - "B": Timestamp("20130102", tz=gettz("CET")), + "A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"), + "B": Timestamp("20130102", tz=gettz("CET")).as_unit("ns"), }, index=range(5), ) @@ -303,8 +303,8 @@ def test_legacy_datetimetz_object(datapath): # 8260 expected = DataFrame( { - "A": Timestamp("20130102", tz="US/Eastern"), - "B": Timestamp("20130603", tz="CET"), + "A": Timestamp("20130102", tz="US/Eastern").as_unit("ns"), + "B": Timestamp("20130603", tz="CET").as_unit("ns"), }, index=range(5), ) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 7238232a46e604..9750e8d32c8443 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1352,8 +1352,14 @@ def test_constructor_dict_order(self): expected = Series([1, 0, 2], index=list("bac")) tm.assert_series_equal(result, expected) - def test_constructor_dict_extension(self, ea_scalar_and_dtype): + def test_constructor_dict_extension(self, ea_scalar_and_dtype, request): ea_scalar, ea_dtype = ea_scalar_and_dtype + if isinstance(ea_scalar, Timestamp): + mark = pytest.mark.xfail( + reason="Construction from dict goes through " + "maybe_convert_objects which casts to nano" + ) + request.node.add_marker(mark) d = {"a": ea_scalar} result = Series(d, index=["a"]) expected = Series(ea_scalar, index=["a"], dtype=ea_dtype) @@ -1465,7 +1471,7 @@ def test_fromValue(self, datetime_series): d = datetime.now() dates = Series(d, index=datetime_series.index) - assert dates.dtype == "M8[ns]" + assert dates.dtype == "M8[us]" assert len(dates) == len(datetime_series) # GH12336