Skip to content

Commit

Permalink
Return int64 when pandas compatible mode is turned on for `get_inde…
Browse files Browse the repository at this point in the history
…xer` (#15659)

Fixes: #15658 

This PR makes a change to `get_indexer` to return `int64` indices when pandas compatible mode is turned on.

Forks out of #14534

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #15659
  • Loading branch information
galipremsagar authored May 6, 2024
1 parent 4dc6162 commit 4ce6674
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 7 deletions.
6 changes: 6 additions & 0 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2205,3 +2205,9 @@ def _split(self, splits):

def _get_result_name(left_name, right_name):
return left_name if _is_same_name(left_name, right_name) else None


def _return_get_indexer_result(result):
if cudf.get_option("mode.pandas_compatible"):
return result.astype("int64")
return result
8 changes: 4 additions & 4 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
is_list_like,
is_scalar,
)
from cudf.core._base_index import BaseIndex
from cudf.core._base_index import BaseIndex, _return_get_indexer_result
from cudf.core._compat import PANDAS_LT_300
from cudf.core.column import (
CategoricalColumn,
Expand Down Expand Up @@ -1256,11 +1256,11 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
)

if not len(self):
return result.values
return _return_get_indexer_result(result.values)
try:
lcol, rcol = _match_join_keys(needle, self._column, "inner")
except ValueError:
return result.values
return _return_get_indexer_result(result.values)

scatter_map, indices = libcudf.join.join([lcol], [rcol], how="inner")
(result,) = libcudf.copying.scatter([indices], scatter_map, [result])
Expand All @@ -1287,7 +1287,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
"{['ffill'/'pad', 'bfill'/'backfill', 'nearest', None]}"
)

return result_series.to_cupy()
return _return_get_indexer_result(result_series.to_cupy())

@_cudf_nvtx_annotate
def get_loc(self, key):
Expand Down
7 changes: 4 additions & 3 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from cudf.api.extensions import no_default
from cudf.api.types import is_integer, is_list_like, is_object_dtype
from cudf.core import column
from cudf.core._base_index import _return_get_indexer_result
from cudf.core.frame import Frame
from cudf.core.index import (
BaseIndex,
Expand Down Expand Up @@ -1858,11 +1859,11 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
dtype=libcudf.types.size_type_dtype,
)
if not len(self):
return result.values
return _return_get_indexer_result(result.values)
try:
target = cudf.MultiIndex.from_tuples(target)
except TypeError:
return result.values
return _return_get_indexer_result(result.values)

join_keys = [
_match_join_keys(lcol, rcol, "inner")
Expand Down Expand Up @@ -1892,7 +1893,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
"{['ffill'/'pad', 'bfill'/'backfill', None]}"
)

return result_series.to_cupy()
return _return_get_indexer_result(result_series.to_cupy())

@_cudf_nvtx_annotate
def get_loc(self, key):
Expand Down
20 changes: 20 additions & 0 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1741,6 +1741,10 @@ def test_get_indexer_single_unique_numeric(idx, key, method):

assert_eq(expected, got)

with cudf.option_context("mode.pandas_compatible", True):
got = gi.get_indexer(key, method=method)
assert_eq(expected, got, check_dtype=True)


@pytest.mark.parametrize(
"idx",
Expand Down Expand Up @@ -1770,6 +1774,12 @@ def test_get_indexer_rangeindex(idx, key, method, tolerance):

assert_eq(expected, got)

with cudf.option_context("mode.pandas_compatible", True):
got = gi.get_indexer(
key, method=method, tolerance=None if method is None else tolerance
)
assert_eq(expected, got, check_dtype=True)


@pytest.mark.parametrize(
"idx",
Expand Down Expand Up @@ -1950,6 +1960,11 @@ def test_get_indexer_single_duplicate_string(idx, key, method):

assert_eq(expected, got)

with cudf.option_context("mode.pandas_compatible", True):
got = gi.get_indexer(key, method=method)

assert_eq(expected, got, check_dtype=True)


@pytest.mark.parametrize(
"idx",
Expand Down Expand Up @@ -2009,6 +2024,11 @@ def test_get_indexer_multi_numeric(idx, key, method):

assert_eq(expected, got)

with cudf.option_context("mode.pandas_compatible", True):
got = gi.get_indexer(key, method=method)

assert_eq(expected, got, check_dtype=True)


@pytest.mark.parametrize(
"idx",
Expand Down

0 comments on commit 4ce6674

Please sign in to comment.