diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 7be491dbced..fe6d0e17ab0 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -456,13 +456,15 @@ def contains(self, search_key: ScalarLike) -> ParentType: def index(self, search_key: Union[ScalarLike, ColumnLike]) -> ParentType: """ - Return integers representing the index of the search key for each row. + Returns integers representing the index of the search key for each row. + If ``search_key`` is a sequence, it must be the same length as the Series and ``search_key[i]`` represents the search key for the ``i``-th row of the Series. - If the search key is not contained in a row, return -1. - If either the row or the search key are null, return . + If the search key is not contained in a row, -1 is returned. If either + the row or the search key are null, is returned. If the search key + is contained multiple times, the smallest matching index is returned. Parameters ---------- @@ -495,15 +497,20 @@ def index(self, search_key: Union[ScalarLike, ColumnLike]) -> ParentType: 1 2 1 dtype: int32 + + Notes + ----- + ``index`` only supports list search operations on numeric types, + decimals, chrono types, and strings. """ try: if is_scalar(search_key): - res = self._return_or_inplace( + return self._return_or_inplace( index_of_scalar(self._column, cudf.Scalar(search_key)) ) else: - res = self._return_or_inplace( + return self._return_or_inplace( index_of_column(self._column, as_column(search_key)) ) @@ -514,7 +521,6 @@ def index(self, search_key: Union[ScalarLike, ColumnLike]) -> ParentType: ): raise TypeError(str(e)) from e raise - return res @property def leaves(self) -> ParentType: diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index d775dd41c47..3dc1c63b83a 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -501,7 +501,7 @@ def test_index(data, search_key, expect): ), ], ) -def test_index_invalid(data, search_key): +def test_index_invalid_type(data, search_key): sr = cudf.Series(data) with pytest.raises( TypeError, @@ -511,6 +511,28 @@ def test_index_invalid(data, search_key): sr.list.index(search_key) +@pytest.mark.parametrize( + "data, search_key", + [ + ( + [[5, 8], [2, 6]], + [8, 2, 4], + ), + ( + [["h", "j"], ["p", None], ["t", "z"]], + ["j", "a"], + ), + ], +) +def test_index_invalid_length(data, search_key): + sr = cudf.Series(data) + with pytest.raises( + RuntimeError, + match="Number of search keys must match list column size.", + ): + sr.list.index(search_key) + + @pytest.mark.parametrize( "row", [