Skip to content

Commit

Permalink
Fix DatetimeIndex.loc for all types of ordering cases (#15761)
Browse files Browse the repository at this point in the history
Fixes: #15742 

This PR resolves issues with returning incorrect ranges for `DatetimeIndex.loc` when the index objects are monotonically decreasing. Additionally, I went ahead and fixed it for all cases, (i.e., random ordering) too.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #15761
  • Loading branch information
galipremsagar authored May 16, 2024
1 parent 0a544c2 commit bdd48f1
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 6 deletions.
20 changes: 14 additions & 6 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ def _get_label_range_or_mask(index, start, stop, step):
if (
not (start is None and stop is None)
and type(index) is cudf.core.index.DatetimeIndex
and index.is_monotonic_increasing is False
):
start = pd.to_datetime(start)
stop = pd.to_datetime(stop)
Expand All @@ -205,19 +204,28 @@ def _get_label_range_or_mask(index, start, stop, step):
# when we have a non-monotonic datetime index, return
# values in the slice defined by index_of(start) and
# index_of(end)
start_loc = index.get_loc(start.to_datetime64())
stop_loc = index.get_loc(stop.to_datetime64()) + 1
start_loc = index.get_loc(start)
stop_loc = index.get_loc(stop) + 1
return slice(start_loc, stop_loc)
else:
raise KeyError(
"Value based partial slicing on non-monotonic "
"DatetimeIndexes with non-existing keys is not allowed.",
)
elif start is not None:
boolean_mask = index >= start
if index.is_monotonic_increasing:
return index >= start
elif index.is_monotonic_decreasing:
return index <= start
else:
return index.find_label_range(slice(start, stop, step))
else:
boolean_mask = index <= stop
return boolean_mask
if index.is_monotonic_increasing:
return index <= stop
elif index.is_monotonic_decreasing:
return index >= stop
else:
return index.find_label_range(slice(start, stop, step))
else:
return index.find_label_range(slice(start, stop, step))

Expand Down
74 changes: 74 additions & 0 deletions python/cudf/cudf/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2275,3 +2275,77 @@ def test_loc_setitem_empty_dataframe():
gdf.loc[["index_1"], "new_col"] = "A"

assert_eq(pdf, gdf)


@pytest.mark.parametrize(
"data",
[
[15, 14, 12, 10, 1],
[1, 10, 12, 14, 15],
],
)
@pytest.mark.parametrize(
"scalar",
[
1,
10,
15,
14,
0,
2,
],
)
def test_loc_datetime_monotonic_with_ts(data, scalar):
gdf = cudf.DataFrame(
{"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5]},
index=cudf.Index(data, dtype="datetime64[ns]"),
)
pdf = gdf.to_pandas()

i = pd.Timestamp(scalar)

actual = gdf.loc[i:]
expected = pdf.loc[i:]

assert_eq(actual, expected)

actual = gdf.loc[:i]
expected = pdf.loc[:i]

assert_eq(actual, expected)


@pytest.mark.parametrize("data", [[15, 14, 3, 10, 1]])
@pytest.mark.parametrize("scalar", [1, 10, 15, 14, 0, 2])
def test_loc_datetime_random_with_ts(data, scalar):
gdf = cudf.DataFrame(
{"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5]},
index=cudf.Index(data, dtype="datetime64[ns]"),
)
pdf = gdf.to_pandas()

i = pd.Timestamp(scalar)

if i not in pdf.index:
assert_exceptions_equal(
lambda: pdf.loc[i:],
lambda: gdf.loc[i:],
lfunc_args_and_kwargs=([],),
rfunc_args_and_kwargs=([],),
)
assert_exceptions_equal(
lambda: pdf.loc[:i],
lambda: gdf.loc[:i],
lfunc_args_and_kwargs=([],),
rfunc_args_and_kwargs=([],),
)
else:
actual = gdf.loc[i:]
expected = pdf.loc[i:]

assert_eq(actual, expected)

actual = gdf.loc[:i]
expected = pdf.loc[:i]

assert_eq(actual, expected)

0 comments on commit bdd48f1

Please sign in to comment.