diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b3beb553187..b30eaa827e4 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3575,12 +3575,13 @@ def apply( ): """ Apply a function along an axis of the DataFrame. - - Designed to mimic `pandas.DataFrame.apply`. Applies a user - defined function row wise over a dataframe, with true null - handling. Works with UDFs using `core.udf.pipeline.nulludf` - and returns a single series. Uses numba to jit compile the - function to PTX via LLVM. + ``apply`` relies on Numba to JIT compile ``func``. + Thus the allowed operations within ``func`` are limited + to the ones specified + [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html). + For more information, see the cuDF guide + to user defined functions found + [here](https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html). Parameters ---------- @@ -3601,7 +3602,7 @@ def apply( Examples -------- - Simple function of a single variable which could be NA + Simple function of a single variable which could be NA: >>> def f(row): ... if row['a'] is cudf.NA: @@ -3617,7 +3618,7 @@ def apply( dtype: int64 Function of multiple variables will operate in - a null aware manner + a null aware manner: >>> def f(row): ... return row['a'] - row['b'] @@ -3633,7 +3634,7 @@ def apply( 3 dtype: int64 - Functions may conditionally return NA as in pandas + Functions may conditionally return NA as in pandas: >>> def f(row): ... if row['a'] + row['b'] > 3: @@ -3652,7 +3653,7 @@ def apply( dtype: int64 Mixed types are allowed, but will return the common - type, rather than object as in pandas + type, rather than object as in pandas: >>> def f(row): ... return row['a'] + row['b'] @@ -3669,7 +3670,7 @@ def apply( Functions may also return scalar values, however the result will be promoted to a safe type regardless of - the data + the data: >>> def f(row): ... if row['a'] > 3: @@ -3686,7 +3687,7 @@ def apply( 2 5.0 dtype: float64 - Ops against N columns are supported generally + Ops against N columns are supported generally: >>> def f(row): ... v, w, x, y, z = ( diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 5bf52ed7520..6e15c03e6b4 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2021,9 +2021,15 @@ def _return_sentinel_series(): def apply(self, func, convert_dtype=True, args=(), **kwargs): """ Apply a scalar function to the values of a Series. + Similar to ``pandas.Series.apply``. - Similar to `pandas.Series.apply. Applies a user - defined function elementwise over a series. + ``apply`` relies on Numba to JIT compile ``func``. + Thus the allowed operations within ``func`` are limited + to the ones specified + [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html). + For more information, see the cuDF guide to + user defined functions found + [here](https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html). Parameters ---------- @@ -2061,7 +2067,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): 2 4 dtype: int64 - Apply a basic function to a series with nulls + Apply a basic function to a series with nulls: >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x): @@ -2073,7 +2079,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): dtype: int64 Use a function that does something conditionally, - based on if the value is or is not null + based on if the value is or is not null: >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x): @@ -2091,7 +2097,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): as derived from the UDFs logic. Note that this means the common type will be returned even if such data is passed that would not result in any values of that - dtype. + dtype: >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x):