diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index c96d940c378..7a9a17631a9 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -1347,6 +1347,16 @@ def isin(self, values): array([ True, False, False]) """ + # To match pandas behavior, even though only list-like objects are + # supposed to be passed, only scalars throw errors. Other types (like + # dicts) just transparently return False (see the implementation of + # ColumnBase.isin). + if is_scalar(values): + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a {type(values).__name__}" + ) + return self._values.isin(values).values @classmethod diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index a4de6db9bda..2596f90c59b 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -43,6 +43,7 @@ from cudf.core import column, df_protocol, reshape from cudf.core.abc import Serializable from cudf.core.column import ( + CategoricalColumn, as_column, build_categorical_column, build_column, @@ -5169,82 +5170,81 @@ def isin(self, values): falcon True True dog False False """ - if isinstance(values, dict): - - result_df = DataFrame() - - for col in self._data.names: - if col in values: - val = values[col] - result_df[col] = self._data[col].isin(val) - else: - result_df[col] = column.full( - size=len(self), fill_value=False, dtype="bool" - ) - - result_df.index = self.index - return result_df - elif isinstance(values, Series): + # TODO: propagate nulls through isin + # https://github.com/rapidsai/cudf/issues/7556 + + fill_value = cudf.Scalar(False) + + def make_false_column_like_self(): + return column.full(len(self), fill_value, "bool") + + # Preprocess different input types into a mapping from column names to + # a list of values to check. + result = {} + if isinstance(values, IndexedFrame): + # Note: In the case where values is a Series, computing some + # information about the values column outside the loop may result + # in performance gains. However, since categorical conversion + # depends on the current column in the loop, using the correct + # precomputed variables inside the loop requires nontrivial logic. + # This optimization could be attempted if `isin` ever becomes a + # bottleneck. values = values.reindex(self.index) + other_cols = ( + values._data + if isinstance(values, DataFrame) + else {name: values._column for name in self._data} + ) + for col, self_col in self._data.items(): + if col in other_cols: + other_col = other_cols[col] + self_is_cat = isinstance(self_col, CategoricalColumn) + other_is_cat = isinstance(other_col, CategoricalColumn) + + if self_is_cat != other_is_cat: + # It is valid to compare the levels of a categorical + # column to a non-categorical column. + if self_is_cat: + self_col = self_col._get_decategorized_column() + else: + other_col = other_col._get_decategorized_column() - result = DataFrame() - # TODO: propagate nulls through isin - # https://github.com/rapidsai/cudf/issues/7556 - for col in self._data.names: - if isinstance( - self[col]._column, cudf.core.column.CategoricalColumn - ) and isinstance( - values._column, cudf.core.column.CategoricalColumn - ): - res = (self._data[col] == values._column).fillna(False) - result[col] = res - elif ( - isinstance( - self[col]._column, cudf.core.column.CategoricalColumn - ) - or np.issubdtype(self[col].dtype, cudf.dtype("object")) - ) or ( - isinstance( - values._column, cudf.core.column.CategoricalColumn - ) - or np.issubdtype(values.dtype, cudf.dtype("object")) - ): - result[col] = utils.scalar_broadcast_to(False, len(self)) + # We use the type checks from _before_ the conversion + # because if only one was categorical then it's already + # been converted and we have to check if they're strings. + if self_is_cat and other_is_cat: + self_is_str = other_is_str = False + else: + # These checks must happen after the conversions above + # since numpy can't handle categorical dtypes. + self_is_str = is_string_dtype(self_col.dtype) + other_is_str = is_string_dtype(other_col.dtype) + + if self_is_str != other_is_str: + # Strings can't compare to anything else. + result[col] = make_false_column_like_self() + else: + result[col] = (self_col == other_col).fillna(False) else: - result[col] = (self._data[col] == values._column).fillna( - False - ) - - result.index = self.index - return result - elif isinstance(values, DataFrame): - values = values.reindex(self.index) - - result = DataFrame() - for col in self._data.names: - if col in values.columns: - result[col] = ( - self._data[col] == values[col]._column - ).fillna(False) + result[col] = make_false_column_like_self() + elif is_dict_like(values): + for name, col in self._data.items(): + if name in values: + result[name] = col.isin(values[name]) else: - result[col] = utils.scalar_broadcast_to(False, len(self)) - result.index = self.index - return result + result[name] = make_false_column_like_self() + elif is_list_like(values): + for name, col in self._data.items(): + result[name] = col.isin(values) else: - if not is_list_like(values): - raise TypeError( - f"only list-like or dict-like objects are " - f"allowed to be passed to DataFrame.isin(), " - f"you passed a " - f"'{type(values).__name__}'" - ) - - result_df = DataFrame() + raise TypeError( + "only list-like or dict-like objects are " + "allowed to be passed to DataFrame.isin(), " + "you passed a " + f"'{type(values).__name__}'" + ) - for col in self._data.names: - result_df[col] = self._data[col].isin(values) - result_df.index = self.index - return result_df + return DataFrame._from_data(result, self.index) # # Stats diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 90ae7274a3f..8574a152c44 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2658,14 +2658,17 @@ def isin(self, values): dtype: bool """ + # Even though only list-like objects are supposed to be passed, only + # scalars throw errors. Other types (like dicts) just transparently + # return False (see the implementation of ColumnBase.isin). if is_scalar(values): raise TypeError( "only list-like objects are allowed to be passed " f"to isin(), you passed a [{type(values).__name__}]" ) - return Series( - self._column.isin(values), index=self.index, name=self.name + return Series._from_data( + {self.name: self._column.isin(values)}, index=self.index ) def unique(self): diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index f765c614907..acd9e28c661 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -4250,272 +4250,6 @@ def test_value_counts(): ) -@pytest.mark.parametrize( - "data", - [ - [], - [0, 12, 14], - [0, 14, 12, 12, 3, 10, 12, 14], - np.random.randint(-100, 100, 200), - pd.Series([0.0, 1.0, None, 10.0]), - [None, None, None, None], - [np.nan, None, -1, 2, 3], - ], -) -@pytest.mark.parametrize( - "values", - [ - np.random.randint(-100, 100, 10), - [], - [np.nan, None, -1, 2, 3], - [1.0, 12.0, None, None, 120], - [0, 14, 12, 12, 3, 10, 12, 14, None], - [None, None, None], - ["0", "12", "14"], - ["0", "12", "14", "a"], - ], -) -def test_isin_numeric(data, values): - index = np.random.randint(0, 100, len(data)) - psr = cudf.utils.utils._create_pandas_series(data=data, index=index) - gsr = cudf.Series.from_pandas(psr, nan_as_null=False) - - expected = psr.isin(values) - got = gsr.isin(values) - - assert_eq(got, expected) - - -@pytest.mark.parametrize( - "data", - [ - [], - pd.Series( - ["2018-01-01", "2019-04-03", None, "2019-12-30"], - dtype="datetime64[ns]", - ), - pd.Series( - [ - "2018-01-01", - "2019-04-03", - None, - "2019-12-30", - "2018-01-01", - "2018-01-01", - ], - dtype="datetime64[ns]", - ), - ], -) -@pytest.mark.parametrize( - "values", - [ - [], - [1514764800000000000, 1577664000000000000], - [ - 1514764800000000000, - 1577664000000000000, - 1577664000000000000, - 1577664000000000000, - 1514764800000000000, - ], - ["2019-04-03", "2019-12-30", "2012-01-01"], - [ - "2012-01-01", - "2012-01-01", - "2012-01-01", - "2019-04-03", - "2019-12-30", - "2012-01-01", - ], - ], -) -def test_isin_datetime(data, values): - psr = cudf.utils.utils._create_pandas_series(data=data) - gsr = cudf.Series.from_pandas(psr) - - got = gsr.isin(values) - expected = psr.isin(values) - assert_eq(got, expected) - - -@pytest.mark.parametrize( - "data", - [ - [], - pd.Series(["this", "is", None, "a", "test"]), - pd.Series(["test", "this", "test", "is", None, "test", "a", "test"]), - pd.Series(["0", "12", "14"]), - ], -) -@pytest.mark.parametrize( - "values", - [ - [], - ["this", "is"], - [None, None, None], - ["12", "14", "19"], - pytest.param( - [12, 14, 19], - marks=pytest.mark.xfail( - not PANDAS_GE_120, - reason="pandas's failure here seems like a bug(in < 1.2) " - "given the reverse succeeds", - ), - ), - ["is", "this", "is", "this", "is"], - ], -) -def test_isin_string(data, values): - psr = cudf.utils.utils._create_pandas_series(data=data) - gsr = cudf.Series.from_pandas(psr) - - got = gsr.isin(values) - expected = psr.isin(values) - assert_eq(got, expected) - - -@pytest.mark.parametrize( - "data", - [ - [], - pd.Series(["a", "b", "c", "c", "c", "d", "e"], dtype="category"), - pd.Series(["a", "b", None, "c", "d", "e"], dtype="category"), - pd.Series([0, 3, 10, 12], dtype="category"), - pd.Series([0, 3, 10, 12, 0, 10, 3, 0, 0, 3, 3], dtype="category"), - ], -) -@pytest.mark.parametrize( - "values", - [ - [], - ["a", "b", None, "f", "words"], - ["0", "12", None, "14"], - [0, 10, 12, None, 39, 40, 1000], - [0, 0, 0, 0, 3, 3, 3, None, 1, 2, 3], - ], -) -def test_isin_categorical(data, values): - psr = cudf.utils.utils._create_pandas_series(data=data) - gsr = cudf.Series.from_pandas(psr) - - got = gsr.isin(values) - expected = psr.isin(values) - assert_eq(got, expected) - - -@pytest.mark.parametrize( - "data", - [ - [], - pd.Series( - ["this", "is", None, "a", "test"], index=["a", "b", "c", "d", "e"] - ), - pd.Series([0, 15, 10], index=[0, None, 9]), - pd.Series( - range(25), - index=pd.date_range( - start="2019-01-01", end="2019-01-02", freq="H" - ), - ), - ], -) -@pytest.mark.parametrize( - "values", - [ - [], - ["this", "is"], - [0, 19, 13], - ["2019-01-01 04:00:00", "2019-01-01 06:00:00", "2018-03-02"], - ], -) -def test_isin_index(data, values): - psr = cudf.utils.utils._create_pandas_series(data=data) - gsr = cudf.Series.from_pandas(psr) - - got = gsr.index.isin(values) - expected = psr.index.isin(values) - - assert_eq(got, expected) - - -@pytest.mark.parametrize( - "data", - [ - pd.MultiIndex.from_arrays( - [[1, 2, 3], ["red", "blue", "green"]], names=("number", "color") - ), - pd.MultiIndex.from_arrays([[], []], names=("number", "color")), - pd.MultiIndex.from_arrays( - [[1, 2, 3, 10, 100], ["red", "blue", "green", "pink", "white"]], - names=("number", "color"), - ), - ], -) -@pytest.mark.parametrize( - "values,level,err", - [ - (["red", "orange", "yellow"], "color", None), - (["red", "white", "yellow"], "color", None), - ([0, 1, 2, 10, 11, 15], "number", None), - ([0, 1, 2, 10, 11, 15], None, TypeError), - (pd.Series([0, 1, 2, 10, 11, 15]), None, TypeError), - (pd.Index([0, 1, 2, 10, 11, 15]), None, TypeError), - (pd.Index([0, 1, 2, 8, 11, 15]), "number", None), - (pd.Index(["red", "white", "yellow"]), "color", None), - ([(1, "red"), (3, "red")], None, None), - (((1, "red"), (3, "red")), None, None), - ( - pd.MultiIndex.from_arrays( - [[1, 2, 3], ["red", "blue", "green"]], - names=("number", "color"), - ), - None, - None, - ), - ( - pd.MultiIndex.from_arrays([[], []], names=("number", "color")), - None, - None, - ), - ( - pd.MultiIndex.from_arrays( - [ - [1, 2, 3, 10, 100], - ["red", "blue", "green", "pink", "white"], - ], - names=("number", "color"), - ), - None, - None, - ), - ], -) -def test_isin_multiindex(data, values, level, err): - pmdx = data - gmdx = cudf.from_pandas(data) - - if err is None: - expected = pmdx.isin(values, level=level) - if isinstance(values, pd.MultiIndex): - values = cudf.from_pandas(values) - got = gmdx.isin(values, level=level) - - assert_eq(got, expected) - else: - assert_exceptions_equal( - lfunc=pmdx.isin, - rfunc=gmdx.isin, - lfunc_args_and_kwargs=([values], {"level": level}), - rfunc_args_and_kwargs=([values], {"level": level}), - check_exception_type=False, - expected_error_message=re.escape( - "values need to be a Multi-Index or set/list-like tuple " - "squences when `level=None`." - ), - ) - - @pytest.mark.parametrize( "data", [ @@ -4541,6 +4275,8 @@ def test_isin_multiindex(data, values, level, err): "num_wings": [2, 0, 2, 1, 2, 4, -1], } ), + pd.DataFrame({"a": ["a", "b", "c"]}, dtype="category"), + pd.DataFrame({"a": ["a", "b", "c"]}), ], ) @pytest.mark.parametrize( @@ -4569,6 +4305,9 @@ def test_isin_multiindex(data, values, level, err): pd.Series([1, 2, 3, 4, 5]), "abc", 123, + pd.Series(["a", "b", "c"]), + pd.Series(["a", "b", "c"], dtype="category"), + pd.DataFrame({"a": ["a", "b", "c"]}, dtype="category"), ], ) def test_isin_dataframe(data, values): @@ -4591,6 +4330,13 @@ def test_isin_dataframe(data, values): not PANDAS_GE_110, "https://github.com/pandas-dev/pandas/issues/34256", ) + except TypeError as e: + # Can't do isin with different categories + if str(e) == ( + "Categoricals can only be compared if 'categories' " + "are the same." + ): + return if isinstance(values, (pd.DataFrame, pd.Series)): values = cudf.from_pandas(values) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 6679725ae9a..faaa42ac7f8 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. """ Test related to Index @@ -2528,3 +2528,115 @@ def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null): if NA_idx is not None: assert idx[NA_idx] is cudf.NA + + +@pytest.mark.parametrize( + "data", + [ + [], + pd.Series( + ["this", "is", None, "a", "test"], index=["a", "b", "c", "d", "e"] + ), + pd.Series([0, 15, 10], index=[0, None, 9]), + pd.Series( + range(25), + index=pd.date_range( + start="2019-01-01", end="2019-01-02", freq="H" + ), + ), + ], +) +@pytest.mark.parametrize( + "values", + [ + [], + ["this", "is"], + [0, 19, 13], + ["2019-01-01 04:00:00", "2019-01-01 06:00:00", "2018-03-02"], + ], +) +def test_isin_index(data, values): + psr = cudf.utils.utils._create_pandas_series(data=data) + gsr = cudf.Series.from_pandas(psr) + + got = gsr.index.isin(values) + expected = psr.index.isin(values) + + assert_eq(got, expected) + + +@pytest.mark.parametrize( + "data", + [ + pd.MultiIndex.from_arrays( + [[1, 2, 3], ["red", "blue", "green"]], names=("number", "color") + ), + pd.MultiIndex.from_arrays([[], []], names=("number", "color")), + pd.MultiIndex.from_arrays( + [[1, 2, 3, 10, 100], ["red", "blue", "green", "pink", "white"]], + names=("number", "color"), + ), + ], +) +@pytest.mark.parametrize( + "values,level,err", + [ + (["red", "orange", "yellow"], "color", None), + (["red", "white", "yellow"], "color", None), + ([0, 1, 2, 10, 11, 15], "number", None), + ([0, 1, 2, 10, 11, 15], None, TypeError), + (pd.Series([0, 1, 2, 10, 11, 15]), None, TypeError), + (pd.Index([0, 1, 2, 10, 11, 15]), None, TypeError), + (pd.Index([0, 1, 2, 8, 11, 15]), "number", None), + (pd.Index(["red", "white", "yellow"]), "color", None), + ([(1, "red"), (3, "red")], None, None), + (((1, "red"), (3, "red")), None, None), + ( + pd.MultiIndex.from_arrays( + [[1, 2, 3], ["red", "blue", "green"]], + names=("number", "color"), + ), + None, + None, + ), + ( + pd.MultiIndex.from_arrays([[], []], names=("number", "color")), + None, + None, + ), + ( + pd.MultiIndex.from_arrays( + [ + [1, 2, 3, 10, 100], + ["red", "blue", "green", "pink", "white"], + ], + names=("number", "color"), + ), + None, + None, + ), + ], +) +def test_isin_multiindex(data, values, level, err): + pmdx = data + gmdx = cudf.from_pandas(data) + + if err is None: + expected = pmdx.isin(values, level=level) + if isinstance(values, pd.MultiIndex): + values = cudf.from_pandas(values) + got = gmdx.isin(values, level=level) + + assert_eq(got, expected) + else: + assert_exceptions_equal( + lfunc=pmdx.isin, + rfunc=gmdx.isin, + lfunc_args_and_kwargs=([values], {"level": level}), + rfunc_args_and_kwargs=([values], {"level": level}), + check_exception_type=False, + expected_error_message=re.escape( + "values need to be a Multi-Index or set/list-like tuple " + "squences when `level=None`." + ), + ) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 358484d79b9..3e3c5d1b053 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import operator import re @@ -11,6 +11,7 @@ import pytest import cudf +from cudf.core._compat import PANDAS_GE_120 from cudf.testing._utils import ( NUMERIC_TYPES, TIMEDELTA_TYPES, @@ -1548,45 +1549,154 @@ def test_series_nunique_index(data): @pytest.mark.parametrize( - "fill_value,data", + "data", [ - (7, [6, 3, 4]), - ("x", ["a", "b", "c", "d", "e", "f"]), - (7, [6, 3, 4, 2, 1, 7, 8, 5]), - (0.8, [0.6, 0.3, 0.4, 0.2, 0.1, 0.7, 0.8, 0.5]), - ("b", pd.Categorical(["a", "b", "c"])), - (None, [0.0, 1.0, 2.0, 3.0]), + [], + [0, 12, 14], + [0, 14, 12, 12, 3, 10, 12, 14], + np.random.randint(-100, 100, 200), + pd.Series([0.0, 1.0, None, 10.0]), + [None, None, None, None], + [np.nan, None, -1, 2, 3], ], ) @pytest.mark.parametrize( - "begin,end", + "values", [ - (0, -1), - (0, 4), - (1, -1), - (1, 4), - (-2, 1), - (-2, -1), - (10, 12), - (8, 10), - (10, 8), - (-10, -8), - (-2, 6), + np.random.randint(-100, 100, 10), + [], + [np.nan, None, -1, 2, 3], + [1.0, 12.0, None, None, 120], + [0, 14, 12, 12, 3, 10, 12, 14, None], + [None, None, None], + ["0", "12", "14"], + ["0", "12", "14", "a"], ], ) -@pytest.mark.parametrize("inplace", [True, False]) -def test_fill(data, fill_value, begin, end, inplace): - gs = cudf.Series(data) - ps = gs.to_pandas() +def test_isin_numeric(data, values): + index = np.random.randint(0, 100, len(data)) + psr = cudf.utils.utils._create_pandas_series(data=data, index=index) + gsr = cudf.Series.from_pandas(psr, nan_as_null=False) + + expected = psr.isin(values) + got = gsr.isin(values) + + assert_eq(got, expected) - actual = gs - gs[begin:end] = fill_value - ps[begin:end] = fill_value - assert_eq(ps, actual) +@pytest.mark.parametrize( + "data", + [ + [], + pd.Series( + ["2018-01-01", "2019-04-03", None, "2019-12-30"], + dtype="datetime64[ns]", + ), + pd.Series( + [ + "2018-01-01", + "2019-04-03", + None, + "2019-12-30", + "2018-01-01", + "2018-01-01", + ], + dtype="datetime64[ns]", + ), + ], +) +@pytest.mark.parametrize( + "values", + [ + [], + [1514764800000000000, 1577664000000000000], + [ + 1514764800000000000, + 1577664000000000000, + 1577664000000000000, + 1577664000000000000, + 1514764800000000000, + ], + ["2019-04-03", "2019-12-30", "2012-01-01"], + [ + "2012-01-01", + "2012-01-01", + "2012-01-01", + "2019-04-03", + "2019-12-30", + "2012-01-01", + ], + ], +) +def test_isin_datetime(data, values): + psr = cudf.utils.utils._create_pandas_series(data=data) + gsr = cudf.Series.from_pandas(psr) + + got = gsr.isin(values) + expected = psr.isin(values) + assert_eq(got, expected) -@pytest.mark.xfail(raises=ValueError) -def test_fill_new_category(): - gs = cudf.Series(pd.Categorical(["a", "b", "c"])) - gs[0:1] = "d" +@pytest.mark.parametrize( + "data", + [ + [], + pd.Series(["this", "is", None, "a", "test"]), + pd.Series(["test", "this", "test", "is", None, "test", "a", "test"]), + pd.Series(["0", "12", "14"]), + ], +) +@pytest.mark.parametrize( + "values", + [ + [], + ["this", "is"], + [None, None, None], + ["12", "14", "19"], + pytest.param( + [12, 14, 19], + marks=pytest.mark.xfail( + not PANDAS_GE_120, + reason="pandas's failure here seems like a bug(in < 1.2) " + "given the reverse succeeds", + ), + ), + ["is", "this", "is", "this", "is"], + ], +) +def test_isin_string(data, values): + psr = cudf.utils.utils._create_pandas_series(data=data) + gsr = cudf.Series.from_pandas(psr) + + got = gsr.isin(values) + expected = psr.isin(values) + assert_eq(got, expected) + + +@pytest.mark.parametrize( + "data", + [ + [], + pd.Series(["a", "b", "c", "c", "c", "d", "e"], dtype="category"), + pd.Series(["a", "b", None, "c", "d", "e"], dtype="category"), + pd.Series([0, 3, 10, 12], dtype="category"), + pd.Series([0, 3, 10, 12, 0, 10, 3, 0, 0, 3, 3], dtype="category"), + ], +) +@pytest.mark.parametrize( + "values", + [ + [], + ["a", "b", None, "f", "words"], + ["0", "12", None, "14"], + [0, 10, 12, None, 39, 40, 1000], + [0, 0, 0, 0, 3, 3, 3, None, 1, 2, 3], + ], +) +def test_isin_categorical(data, values): + psr = cudf.utils.utils._create_pandas_series(data=data) + gsr = cudf.Series.from_pandas(psr) + + got = gsr.isin(values) + expected = psr.isin(values) + assert_eq(got, expected) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index cf845a5d525..4dadfede866 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -93,6 +93,8 @@ def wrapper(*args, **kwargs): return wrapper +# TODO: We should evaluate whether calls to this could be more easily replaced +# with column.full, which appears to be significantly faster in simple cases. def scalar_broadcast_to(scalar, size, dtype=None): if isinstance(size, (tuple, list)):