Skip to content

Commit

Permalink
Add isin examples in Docstring (#7479)
Browse files Browse the repository at this point in the history
Fully resolves: #7412 

Most of the fix to address #7412 were done as part of pandas upgrade in #7375. This PR only includes docstrings update to `isin`.

Authors:
  - GALI PREM SAGAR (@galipremsagar)

Approvers:
  - Keith Kraus (@kkraus14)

URL: #7479
  • Loading branch information
galipremsagar authored Mar 2, 2021
1 parent 61091a0 commit f4f4d87
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 0 deletions.
37 changes: 37 additions & 0 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5713,6 +5713,43 @@ def isin(self, values):
DataFrame:
DataFrame of booleans showing whether each element in
the DataFrame is contained in values.
Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]},
... index=['falcon', 'dog'])
>>> df
num_legs num_wings
falcon 2 2
dog 4 0
When ``values`` is a list check whether every value in the DataFrame
is present in the list (which animals have 0 or 2 legs or wings)
>>> df.isin([0, 2])
num_legs num_wings
falcon True True
dog False True
When ``values`` is a dict, we can pass values to check for each
column separately:
>>> df.isin({'num_wings': [0, 3]})
num_legs num_wings
falcon False False
dog False True
When ``values`` is a Series or DataFrame the index and column must
match. Note that 'falcon' does not match based on the number of legs
in other.
>>> other = cudf.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]},
... index=['spider', 'falcon'])
>>> df.isin(other)
num_legs num_wings
falcon True True
dog False False
"""

if isinstance(values, dict):
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def copy(self: T, deep: bool = True) -> T:
-------
copy : Series or DataFrame
Object type matches caller.
Examples
--------
>>> s = cudf.Series([1, 2], index=["a", "b"])
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,16 @@ def isin(self, values):
is_contained : cupy array
CuPy array of boolean values.
Examples
--------
>>> idx = cudf.Index([1,2,3])
>>> idx
Int64Index([1, 2, 3], dtype='int64')
Check whether each index value in a list of values.
>>> idx.isin([1, 4])
array([ True, False, False])
"""

result = self.to_series().isin(values).values
Expand Down
26 changes: 26 additions & 0 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,16 +655,42 @@ def isin(self, values, level=None):
level : str or int, optional
Name or position of the index level to use (if the index
is a MultiIndex).
Returns
-------
is_contained : cupy array
CuPy array of boolean values.
Notes
-------
When `level` is None, `values` can only be MultiIndex, or a
set/list-like tuples.
When `level` is provided, `values` can be Index or MultiIndex,
or a set/list-like tuples.
Examples
--------
>>> import cudf
>>> import pandas as pd
>>> midx = cudf.from_pandas(pd.MultiIndex.from_arrays([[1,2,3],
... ['red', 'blue', 'green']],
... names=('number', 'color')))
>>> midx
MultiIndex([(1, 'red'),
(2, 'blue'),
(3, 'green')],
names=['number', 'color'])
Check whether the strings in the 'color' level of the MultiIndex
are in a list of colors.
>>> midx.isin(['red', 'orange', 'yellow'], level='color')
array([ True, False, False])
To check across the levels of a MultiIndex, pass a list of tuples:
>>> midx.isin([(1, 'red'), (3, 'red')])
array([ True, False, False])
"""
from cudf.utils.dtypes import is_list_like

Expand Down
35 changes: 35 additions & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3784,6 +3784,41 @@ def isin(self, values):
-------
TypeError
If values is a string
Examples
--------
>>> import cudf
>>> s = cudf.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
... 'hippo'], name='animal')
>>> s.isin(['cow', 'lama'])
0 True
1 True
2 True
3 False
4 True
5 False
Name: animal, dtype: bool
Passing a single string as ``s.isin('lama')`` will raise an error. Use
a list of one element instead:
>>> s.isin(['lama'])
0 True
1 False
2 True
3 False
4 True
5 False
Name: animal, dtype: bool
Strings and integers are distinct and are therefore not comparable:
>>> cudf.Series([1]).isin(['1'])
0 False
dtype: bool
>>> cudf.Series([1.1]).isin(['1.1'])
0 False
dtype: bool
"""

if is_scalar(values):
Expand Down

0 comments on commit f4f4d87

Please sign in to comment.