From a5fbc19307eef509a27c3ddfdc43c3f5e2966e08 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 13 Apr 2022 10:57:31 -0700 Subject: [PATCH 1/3] update docstrings --- python/cudf/cudf/core/dataframe.py | 25 +++++++++++++------------ python/cudf/cudf/core/series.py | 12 +++++++++--- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b3beb553187..77032f5f6a5 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3575,12 +3575,13 @@ def apply( ): """ Apply a function along an axis of the DataFrame. - - Designed to mimic `pandas.DataFrame.apply`. Applies a user - defined function row wise over a dataframe, with true null - handling. Works with UDFs using `core.udf.pipeline.nulludf` - and returns a single series. Uses numba to jit compile the - function to PTX via LLVM. + ``apply`` relies on Numba to JIT compile ``func``. + Thus the allowed operations within func are limited + to the ones specified + [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html). + For more information, see the cuDF guide + to user defined functions found + [here](https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html). Parameters ---------- @@ -3601,7 +3602,7 @@ def apply( Examples -------- - Simple function of a single variable which could be NA + Simple function of a single variable which could be NA: >>> def f(row): ... if row['a'] is cudf.NA: @@ -3617,7 +3618,7 @@ def apply( dtype: int64 Function of multiple variables will operate in - a null aware manner + a null aware manner: >>> def f(row): ... return row['a'] - row['b'] @@ -3633,7 +3634,7 @@ def apply( 3 dtype: int64 - Functions may conditionally return NA as in pandas + Functions may conditionally return NA as in pandas: >>> def f(row): ... if row['a'] + row['b'] > 3: @@ -3652,7 +3653,7 @@ def apply( dtype: int64 Mixed types are allowed, but will return the common - type, rather than object as in pandas + type, rather than object as in pandas: >>> def f(row): ... return row['a'] + row['b'] @@ -3669,7 +3670,7 @@ def apply( Functions may also return scalar values, however the result will be promoted to a safe type regardless of - the data + the data: >>> def f(row): ... if row['a'] > 3: @@ -3686,7 +3687,7 @@ def apply( 2 5.0 dtype: float64 - Ops against N columns are supported generally + Ops against N columns are supported generally: >>> def f(row): ... v, w, x, y, z = ( diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 5bf52ed7520..93262187244 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2021,9 +2021,15 @@ def _return_sentinel_series(): def apply(self, func, convert_dtype=True, args=(), **kwargs): """ Apply a scalar function to the values of a Series. - - Similar to `pandas.Series.apply. Applies a user - defined function elementwise over a series. + Similar to ``pandas.Series.apply``. + + ``apply`` relies on Numba to JIT compile ``func``. + Thus the allowed operations within func are limited + to the ones specified + [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html). + For more information, see the cuDF guide to + user defined functions found + [here](https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html). Parameters ---------- From 4c9ffe0f280492d138842c9c2292171fe9b26285 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 13 Apr 2022 10:58:28 -0700 Subject: [PATCH 2/3] minor updates --- python/cudf/cudf/core/dataframe.py | 2 +- python/cudf/cudf/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 77032f5f6a5..b30eaa827e4 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3576,7 +3576,7 @@ def apply( """ Apply a function along an axis of the DataFrame. ``apply`` relies on Numba to JIT compile ``func``. - Thus the allowed operations within func are limited + Thus the allowed operations within ``func`` are limited to the ones specified [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html). For more information, see the cuDF guide diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 93262187244..2dcb6b1b7b9 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2024,7 +2024,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): Similar to ``pandas.Series.apply``. ``apply`` relies on Numba to JIT compile ``func``. - Thus the allowed operations within func are limited + Thus the allowed operations within ``func`` are limited to the ones specified [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html). For more information, see the cuDF guide to From 57d5e82420efb2e56af389205b8a7c5d8013b464 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 13 Apr 2022 10:59:51 -0700 Subject: [PATCH 3/3] more very minor updates --- python/cudf/cudf/core/series.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 2dcb6b1b7b9..6e15c03e6b4 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2067,7 +2067,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): 2 4 dtype: int64 - Apply a basic function to a series with nulls + Apply a basic function to a series with nulls: >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x): @@ -2079,7 +2079,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): dtype: int64 Use a function that does something conditionally, - based on if the value is or is not null + based on if the value is or is not null: >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x): @@ -2097,7 +2097,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): as derived from the UDFs logic. Note that this means the common type will be returned even if such data is passed that would not result in any values of that - dtype. + dtype: >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x):