diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8bdb36fc27d..6b8b16b7118 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -5713,6 +5713,43 @@ def isin(self, values): DataFrame: DataFrame of booleans showing whether each element in the DataFrame is contained in values. + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, + ... index=['falcon', 'dog']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + + When ``values`` is a list check whether every value in the DataFrame + is present in the list (which animals have 0 or 2 legs or wings) + + >>> df.isin([0, 2]) + num_legs num_wings + falcon True True + dog False True + + When ``values`` is a dict, we can pass values to check for each + column separately: + + >>> df.isin({'num_wings': [0, 3]}) + num_legs num_wings + falcon False False + dog False True + + When ``values`` is a Series or DataFrame the index and column must + match. Note that 'falcon' does not match based on the number of legs + in other. + + >>> other = cudf.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]}, + ... index=['spider', 'falcon']) + >>> df.isin(other) + num_legs num_wings + falcon True True + dog False False """ if isinstance(values, dict): diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index dedefeaf9a2..b5946653c77 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -239,6 +239,7 @@ def copy(self: T, deep: bool = True) -> T: ------- copy : Series or DataFrame Object type matches caller. + Examples -------- >>> s = cudf.Series([1, 2], index=["a", "b"]) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 88f3f8c4c89..b10ed5aa8cb 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1362,6 +1362,16 @@ def isin(self, values): is_contained : cupy array CuPy array of boolean values. + Examples + -------- + >>> idx = cudf.Index([1,2,3]) + >>> idx + Int64Index([1, 2, 3], dtype='int64') + + Check whether each index value in a list of values. + + >>> idx.isin([1, 4]) + array([ True, False, False]) """ result = self.to_series().isin(values).values diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 19c5b827d50..82e89bb00f4 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -655,16 +655,42 @@ def isin(self, values, level=None): level : str or int, optional Name or position of the index level to use (if the index is a MultiIndex). + Returns ------- is_contained : cupy array CuPy array of boolean values. + Notes ------- When `level` is None, `values` can only be MultiIndex, or a set/list-like tuples. When `level` is provided, `values` can be Index or MultiIndex, or a set/list-like tuples. + + Examples + -------- + >>> import cudf + >>> import pandas as pd + >>> midx = cudf.from_pandas(pd.MultiIndex.from_arrays([[1,2,3], + ... ['red', 'blue', 'green']], + ... names=('number', 'color'))) + >>> midx + MultiIndex([(1, 'red'), + (2, 'blue'), + (3, 'green')], + names=['number', 'color']) + + Check whether the strings in the 'color' level of the MultiIndex + are in a list of colors. + + >>> midx.isin(['red', 'orange', 'yellow'], level='color') + array([ True, False, False]) + + To check across the levels of a MultiIndex, pass a list of tuples: + + >>> midx.isin([(1, 'red'), (3, 'red')]) + array([ True, False, False]) """ from cudf.utils.dtypes import is_list_like diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index be03fb147ff..abce4f7cfca 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -3775,6 +3775,41 @@ def isin(self, values): ------- TypeError If values is a string + + Examples + -------- + >>> import cudf + >>> s = cudf.Series(['lama', 'cow', 'lama', 'beetle', 'lama', + ... 'hippo'], name='animal') + >>> s.isin(['cow', 'lama']) + 0 True + 1 True + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + + Passing a single string as ``s.isin('lama')`` will raise an error. Use + a list of one element instead: + + >>> s.isin(['lama']) + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + + Strings and integers are distinct and are therefore not comparable: + + >>> cudf.Series([1]).isin(['1']) + 0 False + dtype: bool + >>> cudf.Series([1.1]).isin(['1.1']) + 0 False + dtype: bool """ if is_scalar(values):