From 48dc1681da8a6f532e664d740a187ede6885968b Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 26 Aug 2022 09:50:34 -0700 Subject: [PATCH] Remove deprecated Series.applymap. (#11031) This PR removes the deprecated `Series.applymap` function. This function does not exist in pandas. Users should switch to using `Series.apply`. (Note that `DataFrame.applymap` does exist in both pandas and cudf.) Deprecated in #10497. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - https://github.com/brandon-b-miller - Michael Wang (https://github.com/isVoid) URL: https://github.com/rapidsai/cudf/pull/11031 --- python/cudf/cudf/core/series.py | 134 ++---------------- python/cudf/cudf/core/window/rolling.py | 30 +++- python/cudf/cudf/tests/test_applymap.py | 56 +------- python/cudf/cudf/tests/test_seriesmap.py | 4 +- python/cudf/cudf/tests/test_transform.py | 5 +- python/cudf/cudf/tests/test_udf_masked_ops.py | 17 ++- 6 files changed, 52 insertions(+), 194 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index d66128bacada..24f07e66be81 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6,7 +6,6 @@ import inspect import pickle import textwrap -import warnings from collections import abc from shutil import get_terminal_size from typing import Any, Dict, MutableMapping, Optional, Set, Tuple, Type, Union @@ -1157,10 +1156,7 @@ def map(self, arg, na_action=None) -> "Series": result.name = self.name result.index = self.index else: - # TODO: switch to `apply` - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=FutureWarning) - result = self.applymap(arg) + result = self.apply(arg) return result @_cudf_nvtx_annotate @@ -2246,7 +2242,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): ``apply`` relies on Numba to JIT compile ``func``. Thus the allowed operations within ``func`` are limited to `those supported by the CUDA Python Numba target - `__. + `__. For more information, see the `cuDF guide to user defined functions `__. @@ -2265,6 +2261,11 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): **kwargs Not supported + Returns + ------- + result : Series + The mask and index are preserved. + Notes ----- UDFs are cached in memory to avoid recompilation. The first @@ -2275,7 +2276,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): Examples -------- - Apply a basic function to a series + Apply a basic function to a series: + >>> sr = cudf.Series([1,2,3]) >>> def f(x): ... return x + 1 @@ -2333,124 +2335,6 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): result.name = self.name return result - @_cudf_nvtx_annotate - def applymap(self, udf, out_dtype=None): - """Apply an elementwise function to transform the values in the Column. - - The user function is expected to take one argument and return the - result, which will be stored to the output Series. The function - cannot reference globals except for other simple scalar objects. - - Parameters - ---------- - udf : function - Either a callable python function or a python function already - decorated by ``numba.cuda.jit`` for call on the GPU as a device - - out_dtype : :class:`numpy.dtype`; optional - The dtype for use in the output. - Only used for ``numba.cuda.jit`` decorated udf. - By default, the result will have the same dtype as the source. - - Returns - ------- - result : Series - The mask and index are preserved. - - Notes - ----- - The supported Python features are listed in - - https://numba.pydata.org/numba-doc/dev/cuda/cudapysupported.html - - with these exceptions: - - * Math functions in `cmath` are not supported since `libcudf` does not - have complex number support and output of `cmath` functions are most - likely complex numbers. - - * These five functions in `math` are not supported since numba - generates multiple PTX functions from them - - * math.sin() - * math.cos() - * math.tan() - * math.gamma() - * math.lgamma() - - * Series with string dtypes are not supported in `applymap` method. - - * Global variables need to be re-defined explicitly inside - the udf, as numba considers them to be compile-time constants - and there is no known way to obtain value of the global variable. - - Examples - -------- - Returning a Series of booleans using only a literal pattern. - - >>> import cudf - >>> s = cudf.Series([1, 10, -10, 200, 100]) - >>> s.applymap(lambda x: x) - 0 1 - 1 10 - 2 -10 - 3 200 - 4 100 - dtype: int64 - >>> s.applymap(lambda x: x in [1, 100, 59]) - 0 True - 1 False - 2 False - 3 False - 4 True - dtype: bool - >>> s.applymap(lambda x: x ** 2) - 0 1 - 1 100 - 2 100 - 3 40000 - 4 10000 - dtype: int64 - >>> s.applymap(lambda x: (x ** 2) + (x / 2)) - 0 1.5 - 1 105.0 - 2 95.0 - 3 40100.0 - 4 10050.0 - dtype: float64 - >>> def cube_function(a): - ... return a ** 3 - ... - >>> s.applymap(cube_function) - 0 1 - 1 1000 - 2 -1000 - 3 8000000 - 4 1000000 - dtype: int64 - >>> def custom_udf(x): - ... if x > 0: - ... return x + 5 - ... else: - ... return x - 5 - ... - >>> s.applymap(custom_udf) - 0 6 - 1 15 - 2 -15 - 3 205 - 4 105 - dtype: int64 - """ - warnings.warn( - "Series.applymap is deprecated and will be removed " - "in a future cuDF release. Use Series.apply instead.", - FutureWarning, - ) - if not callable(udf): - raise ValueError("Input UDF must be a callable object.") - return self._from_data({self.name: self._unaryop(udf)}, self._index) - # # Stats # diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py index 0abb46bb988a..8d6d0171ee7f 100644 --- a/python/cudf/cudf/core/window/rolling.py +++ b/python/cudf/cudf/core/window/rolling.py @@ -335,8 +335,7 @@ def count(self): def apply(self, func, *args, **kwargs): """ - Counterpart of `pandas.core.window.Rolling.apply - `_. + Calculate the rolling custom aggregation function. Parameters ---------- @@ -349,12 +348,35 @@ def apply(self, func, *args, **kwargs): See Also -------- - cudf.Series.applymap : Apply an elementwise function to + cudf.Series.apply: Apply an elementwise function to transform the values in the Column. Notes ----- - See notes of the :meth:`cudf.Series.applymap` + The supported Python features are listed in + + https://numba.readthedocs.io/en/stable/cuda/cudapysupported.html + + with these exceptions: + + * Math functions in `cmath` are not supported since `libcudf` does not + have complex number support and output of `cmath` functions are most + likely complex numbers. + + * These five functions in `math` are not supported since numba + generates multiple PTX functions from them: + + * math.sin() + * math.cos() + * math.tan() + * math.gamma() + * math.lgamma() + + * Series with string dtypes are not supported. + + * Global variables need to be re-defined explicitly inside + the udf, as numba considers them to be compile-time constants + and there is no known way to obtain value of the global variable. Examples -------- diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py index c8a9b5d03f54..32f3e39dd7cc 100644 --- a/python/cudf/cudf/tests/test_applymap.py +++ b/python/cudf/cudf/tests/test_applymap.py @@ -1,65 +1,11 @@ # Copyright (c) 2018-2022, NVIDIA CORPORATION. -from itertools import product -from math import floor - -import numpy as np import pytest -from cudf import NA, DataFrame, Series +from cudf import NA, DataFrame from cudf.testing import _utils as utils -@pytest.mark.parametrize( - "nelem,masked", list(product([2, 10, 100, 1000], [True, False])) -) -def test_applymap_round(nelem, masked): - # Generate data - np.random.seed(0) - data = np.random.random(nelem) * 100 - - if masked: - # Make mask - bitmask = utils.random_bitmask(nelem) - boolmask = np.asarray( - utils.expand_bits_to_bytes(bitmask), dtype=np.bool_ - )[:nelem] - data[~boolmask] = None - - sr = Series(data) - - # Call applymap - with pytest.warns(FutureWarning): - out = sr.applymap( - lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x)) - ) - - if masked: - # Fill masked values - out = out.fillna(np.nan) - - # Check - expect = np.round(data) - got = out.to_numpy() - np.testing.assert_array_almost_equal(expect, got) - - -def test_applymap_change_out_dtype(): - # Test for changing the out_dtype using applymap - - data = list(range(10)) - - sr = Series(data) - - with pytest.warns(FutureWarning): - out = sr.applymap(lambda x: float(x), out_dtype=float) - - # Check - expect = np.array(data, dtype=float) - got = out.to_numpy() - np.testing.assert_array_equal(expect, got) - - @pytest.mark.parametrize( "data", [ diff --git a/python/cudf/cudf/tests/test_seriesmap.py b/python/cudf/cudf/tests/test_seriesmap.py index f1a51a45779e..4ed6bf7afdda 100644 --- a/python/cudf/cudf/tests/test_seriesmap.py +++ b/python/cudf/cudf/tests/test_seriesmap.py @@ -52,7 +52,7 @@ def test_series_map_callable_numeric_random(nelem): sr = Series(data) pdsr = pd.Series(data) - # Call applymap + # Call map got = sr.map(lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x))) expect = pdsr.map( lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x)) @@ -63,7 +63,7 @@ def test_series_map_callable_numeric_random(nelem): def test_series_map_callable_numeric_random_dtype_change(): - # Test for changing the out_dtype using applymap + # Test for changing the out_dtype using map data = list(range(10)) diff --git a/python/cudf/cudf/tests/test_transform.py b/python/cudf/cudf/tests/test_transform.py index 4b4537514d6c..723bbdf9371e 100644 --- a/python/cudf/cudf/tests/test_transform.py +++ b/python/cudf/cudf/tests/test_transform.py @@ -22,14 +22,13 @@ def _generic_function(a): (lambda x: x in [1, 2, 3, 4], lambda ser: np.isin(ser, [1, 2, 3, 4])), ], ) -def test_applymap_python_lambda(dtype, udf, testfunc): +def test_apply_python_lambda(dtype, udf, testfunc): size = 500 lhs_arr = np.random.random(size).astype(dtype) lhs_ser = Series(lhs_arr) - with pytest.warns(FutureWarning): - out_ser = lhs_ser.applymap(udf) + out_ser = lhs_ser.apply(udf) result = testfunc(lhs_arr) np.testing.assert_almost_equal(result, out_ser.to_numpy()) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 0cbc0d1f103c..4f385656405d 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -243,6 +243,15 @@ def func(row): run_masked_udf_test(func, gdf, check_dtype=False) +def test_apply_contains(): + def func(row): + x = row["a"] + return x in [1, 2] + + gdf = cudf.DataFrame({"a": [1, 3]}) + run_masked_udf_test(func, gdf, check_dtype=False) + + @parametrize_numeric_dtypes_pairwise @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq]) def test_apply_mixed_dtypes(left_dtype, right_dtype, op): @@ -647,18 +656,16 @@ def test_masked_udf_caching(): # recompile data = cudf.Series([1, 2, 3]) - expect = data**2 - with pytest.warns(FutureWarning): - got = data.applymap(lambda x: x**2) + expect = data**2 + got = data.apply(lambda x: x**2) assert_eq(expect, got, check_dtype=False) # update the constant value being used and make sure # it does not result in a cache hit expect = data**3 - with pytest.warns(FutureWarning): - got = data.applymap(lambda x: x**3) + got = data.apply(lambda x: x**3) assert_eq(expect, got, check_dtype=False) # make sure we get a hit when reapplying