Skip to content

Commit

Permalink
TST: parametrize over dt64 unit (#55974)
Browse files Browse the repository at this point in the history
* TST: parametrize tests over dt64 unit

* TST: parametrize over unit

* revert

* TST: parametrize over dt64 unit

* xfail non-nano

* revert
  • Loading branch information
jbrockmendel authored Nov 16, 2023
1 parent 3bb8ad1 commit 713c4dc
Show file tree
Hide file tree
Showing 18 changed files with 247 additions and 130 deletions.
14 changes: 10 additions & 4 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,10 +853,14 @@ def test_concat_same_type_invalid(self, arr1d):
with pytest.raises(ValueError, match="to_concat must have the same"):
arr._concat_same_type([arr, other])

def test_concat_same_type_different_freq(self):
def test_concat_same_type_different_freq(self, unit):
# we *can* concatenate DTI with different freqs.
a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central"))
b = DatetimeArray(pd.date_range("2000", periods=2, freq="h", tz="US/Central"))
a = DatetimeArray(
pd.date_range("2000", periods=2, freq="D", tz="US/Central", unit=unit)
)
b = DatetimeArray(
pd.date_range("2000", periods=2, freq="h", tz="US/Central", unit=unit)
)
result = DatetimeArray._concat_same_type([a, b])
expected = DatetimeArray(
pd.to_datetime(
Expand All @@ -866,7 +870,9 @@ def test_concat_same_type_different_freq(self):
"2000-01-01 00:00:00",
"2000-01-01 01:00:00",
]
).tz_localize("US/Central")
)
.tz_localize("US/Central")
.as_unit(unit)
)

tm.assert_datetime_array_equal(result, expected)
Expand Down
14 changes: 9 additions & 5 deletions pandas/tests/groupby/methods/test_nth.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,10 @@ def test_nth5():
tm.assert_frame_equal(gb.nth([3, 4]), df.loc[[]])


def test_nth_bdays():
business_dates = pd.date_range(start="4/1/2014", end="6/30/2014", freq="B")
def test_nth_bdays(unit):
business_dates = pd.date_range(
start="4/1/2014", end="6/30/2014", freq="B", unit=unit
)
df = DataFrame(1, index=business_dates, columns=["a", "b"])
# get the first, fourth and last two business days for each month
key = [df.index.year, df.index.month]
Expand All @@ -307,7 +309,7 @@ def test_nth_bdays():
"2014/6/27",
"2014/6/30",
]
)
).as_unit(unit)
expected = DataFrame(1, columns=["a", "b"], index=expected_dates)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -401,14 +403,15 @@ def test_first_last_tz(data, expected_first, expected_last):
["last", Timestamp("2013-01-02", tz="US/Eastern"), "b"],
],
)
def test_first_last_tz_multi_column(method, ts, alpha):
def test_first_last_tz_multi_column(method, ts, alpha, unit):
# GH 21603
category_string = Series(list("abc")).astype("category")
dti = pd.date_range("20130101", periods=3, tz="US/Eastern", unit=unit)
df = DataFrame(
{
"group": [1, 1, 2],
"category_string": category_string,
"datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"),
"datetimetz": dti,
}
)
result = getattr(df.groupby("group"), method)()
Expand All @@ -421,6 +424,7 @@ def test_first_last_tz_multi_column(method, ts, alpha):
},
index=Index([1, 2], name="group"),
)
expected["datetimetz"] = expected["datetimetz"].dt.as_unit(unit)
tm.assert_frame_equal(result, expected)


Expand Down
20 changes: 7 additions & 13 deletions pandas/tests/groupby/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,36 +415,30 @@ def test_columns_groupby_quantile():
tm.assert_frame_equal(result, expected)


def test_timestamp_groupby_quantile():
def test_timestamp_groupby_quantile(unit):
# GH 33168
dti = pd.date_range(
start="2020-04-19 00:00:00", freq="1min", periods=100, tz="UTC", unit=unit
).floor("1h")
df = DataFrame(
{
"timestamp": pd.date_range(
start="2020-04-19 00:00:00", freq="1min", periods=100, tz="UTC"
).floor("1h"),
"timestamp": dti,
"category": list(range(1, 101)),
"value": list(range(101, 201)),
}
)

result = df.groupby("timestamp").quantile([0.2, 0.8])

mi = pd.MultiIndex.from_product([dti[::99], [0.2, 0.8]], names=("timestamp", None))
expected = DataFrame(
[
{"category": 12.8, "value": 112.8},
{"category": 48.2, "value": 148.2},
{"category": 68.8, "value": 168.8},
{"category": 92.2, "value": 192.2},
],
index=pd.MultiIndex.from_tuples(
[
(pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.2),
(pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.8),
(pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.2),
(pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.8),
],
names=("timestamp", None),
),
index=mi,
)

tm.assert_frame_equal(result, expected)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/groupby/methods/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1135,7 +1135,7 @@ def test_subset_duplicate_columns():


@pytest.mark.parametrize("utc", [True, False])
def test_value_counts_time_grouper(utc):
def test_value_counts_time_grouper(utc, unit):
# GH#50486
df = DataFrame(
{
Expand All @@ -1152,12 +1152,12 @@ def test_value_counts_time_grouper(utc):
}
).drop([3])

df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s")
df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s").dt.as_unit(unit)
gb = df.groupby(Grouper(freq="1D", key="Datetime"))
result = gb.value_counts()
dates = to_datetime(
["2019-08-06", "2019-08-07", "2019-08-09", "2019-08-10"], utc=utc
)
).as_unit(unit)
timestamps = df["Timestamp"].unique()
index = MultiIndex(
levels=[dates, timestamps, ["apple", "banana", "orange", "pear"]],
Expand Down
22 changes: 13 additions & 9 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3172,28 +3172,32 @@ def test_groupby_selection_other_methods(df):
)


def test_groupby_with_Time_Grouper():
idx2 = [
to_datetime("2016-08-31 22:08:12.000"),
to_datetime("2016-08-31 22:09:12.200"),
to_datetime("2016-08-31 22:20:12.400"),
]
def test_groupby_with_Time_Grouper(unit):
idx2 = to_datetime(
[
"2016-08-31 22:08:12.000",
"2016-08-31 22:09:12.200",
"2016-08-31 22:20:12.400",
]
).as_unit(unit)

test_data = DataFrame(
{"quant": [1.0, 1.0, 3.0], "quant2": [1.0, 1.0, 3.0], "time2": idx2}
)

time2 = date_range("2016-08-31 22:08:00", periods=13, freq="1min", unit=unit)
expected_output = DataFrame(
{
"time2": date_range("2016-08-31 22:08:00", periods=13, freq="1min"),
"time2": time2,
"quant": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
"quant2": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
}
)

df = test_data.groupby(Grouper(key="time2", freq="1min")).count().reset_index()
gb = test_data.groupby(Grouper(key="time2", freq="1min"))
result = gb.count().reset_index()

tm.assert_frame_equal(df, expected_output)
tm.assert_frame_equal(result, expected_output)


def test_groupby_series_with_datetimeindex_month_name():
Expand Down
17 changes: 11 additions & 6 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,19 +276,24 @@ def test_grouper_creation_bug2(self):
result = g.sum()
tm.assert_frame_equal(result, expected)

def test_grouper_creation_bug3(self):
def test_grouper_creation_bug3(self, unit):
# GH8866
dti = date_range("20130101", periods=2, unit=unit)
mi = MultiIndex.from_product(
[list("ab"), range(2), dti],
names=["one", "two", "three"],
)
ser = Series(
np.arange(8, dtype="int64"),
index=MultiIndex.from_product(
[list("ab"), range(2), date_range("20130101", periods=2)],
names=["one", "two", "three"],
),
index=mi,
)
result = ser.groupby(Grouper(level="three", freq="ME")).sum()
exp_dti = pd.DatetimeIndex(
[Timestamp("2013-01-31")], freq="ME", name="three"
).as_unit(unit)
expected = Series(
[28],
index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="ME", name="three"),
index=exp_dti,
)
tm.assert_series_equal(result, expected)

Expand Down
26 changes: 7 additions & 19 deletions pandas/tests/indexes/datetimes/methods/test_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,37 +116,25 @@ def test_delete_slice(self, unit):

# TODO: belongs in Series.drop tests?
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Pacific"])
def test_delete_slice2(self, tz):
def test_delete_slice2(self, tz, unit):
dti = date_range(
"2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz, unit=unit
)
ts = Series(
1,
index=date_range(
"2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz
),
index=dti,
)
# preserve freq
result = ts.drop(ts.index[:5]).index
expected = date_range(
"2000-01-01 14:00", periods=5, freq="h", name="idx", tz=tz
)
expected = dti[5:]
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz

# reset freq to None
result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index
expected = DatetimeIndex(
[
"2000-01-01 09:00",
"2000-01-01 11:00",
"2000-01-01 13:00",
"2000-01-01 15:00",
"2000-01-01 17:00",
],
freq=None,
name="idx",
tz=tz,
)
expected = dti[::2]._with_freq(None)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/indexes/datetimes/methods/test_repeat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,29 @@ def test_repeat_range(self, tz_naive_fixture):
assert result.freq is None
assert len(result) == 5 * len(rng)

def test_repeat_range2(self, tz_naive_fixture):
def test_repeat_range2(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
index = date_range("2001-01-01", periods=2, freq="D", tz=tz)
index = date_range("2001-01-01", periods=2, freq="D", tz=tz, unit=unit)
exp = DatetimeIndex(
["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
)
).as_unit(unit)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None

def test_repeat_range3(self, tz_naive_fixture):
def test_repeat_range3(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
index = date_range("2001-01-01", periods=2, freq="2D", tz=tz)
index = date_range("2001-01-01", periods=2, freq="2D", tz=tz, unit=unit)
exp = DatetimeIndex(
["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
)
).as_unit(unit)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None

def test_repeat_range4(self, tz_naive_fixture):
def test_repeat_range4(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz).as_unit(unit)
exp = DatetimeIndex(
[
"2001-01-01",
Expand All @@ -53,17 +53,17 @@ def test_repeat_range4(self, tz_naive_fixture):
"2003-01-01",
],
tz=tz,
)
).as_unit(unit)
for res in [index.repeat(3), np.repeat(index, 3)]:
tm.assert_index_equal(res, exp)
assert res.freq is None

def test_repeat(self, tz_naive_fixture):
def test_repeat(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
reps = 2
msg = "the 'axis' parameter is not supported"

rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)
rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz, unit=unit)

expected_rng = DatetimeIndex(
[
Expand All @@ -72,7 +72,7 @@ def test_repeat(self, tz_naive_fixture):
Timestamp("2016-01-01 00:30:00", tz=tz),
Timestamp("2016-01-01 00:30:00", tz=tz),
]
)
).as_unit(unit)

res = rng.repeat(reps)
tm.assert_index_equal(res, expected_rng)
Expand Down
Loading

0 comments on commit 713c4dc

Please sign in to comment.