rapidsai · rapids-bot · Apr 13, 2022 · Mar 24, 2022 · Mar 28, 2022 · Mar 29, 2022
@@ -13,6 +13,7 @@
 from collections.abc import Iterable, Sequence
 from typing import (
     Any,
+    Callable,
     Dict,
     List,
     MutableMapping,
@@ -25,6 +26,7 @@
 )
 
 import cupy
+import numba
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -3718,6 +3720,68 @@ def apply(
 
         return self._apply(func, _get_row_kernel, *args, **kwargs)
 
+    def applymap(
+        self,
+        func: Callable[[Any], Any],
+        na_action: Union[str, None] = None,
+        **kwargs,
+    ) -> DataFrame:
+
+        """
+        Apply a function to a Dataframe elementwise.
+
+        This method applies a function that accepts and returns a scalar
+        to every element of a DataFrame.
+
+        Parameters
+        ----------
+        func : callable
+            Python function, returns a single value from a single value.
+        na_action : {None, 'ignore'}, default None
+            If 'ignore', propagate NaN values, without passing them to func.
+
+        Returns
+        -------
+        DataFrame
+            Transformed DataFrame.
+        """
+
+        if kwargs:
+            raise NotImplementedError(
+                "DataFrame.applymap does not yet support **kwargs."
+            )
+
+        if na_action not in {"ignore", None}:
+            raise ValueError(
+                f"na_action must be 'ignore' or None. Got {repr(na_action)}"
+            )
+
+        if na_action == "ignore":
+            devfunc = numba.cuda.jit(device=True)(func)
+
+            # promote to a null-ignoring function
+            # this code is never run in python, it only
+            # exists to provide numba with the correct
+            # bytecode to generate the equivalent PTX
+            # as a null-ignoring version of the function
+            def _func(x):  # pragma: no cover
+                if x is cudf.NA:
+                    return cudf.NA
+                else:
+                    return devfunc(x)
+
+        else:
+            _func = func
+
+        # TODO: naive implementation
+        # this could be written as a single kernel
+        result = {}
+        for name, col in self._data.items():
+            apply_sr = Series._from_data({None: col})
+            result[name] = apply_sr.apply(_func)
+
+        return DataFrame._from_data(result, index=self.index)
+
     @_cudf_nvtx_annotate
     @applyutils.doc_apply()
     def apply_rows(

@@ -6,7 +6,7 @@
 import numpy as np
 import pytest
 
-from cudf import Series
+from cudf import NA, DataFrame, Series
 from cudf.testing import _utils as utils
 
 
@@ -58,3 +58,45 @@ def test_applymap_change_out_dtype():
     expect = np.array(data, dtype=float)
     got = out.to_numpy()
     np.testing.assert_array_equal(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [4, 5, 6]},
+        {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]},
+        {"a": [1, 2, 3], "b": [True, False, True]},
+        {"a": [1, NA, 2], "b": [NA, 4, NA]},
+    ],
+)
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x + 1,
+        lambda x: x - 0.5,
+        lambda x: 2 if x is NA else 2 + (x + 1) / 4.1,
+        lambda x: 42,
+    ],
+)
+@pytest.mark.parametrize("na_action", [None, "ignore"])
+def test_applymap_dataframe(data, func, na_action):
+    gdf = DataFrame(data)
+    pdf = gdf.to_pandas(nullable=True)
+
+    expect = pdf.applymap(func, na_action=na_action)
+    got = gdf.applymap(func, na_action=na_action)
+
+    utils.assert_eq(expect, got, check_dtype=False)
+
+
+def test_applymap_raise_cases():
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+    def f(x, some_kwarg=0):
+        return x + some_kwarg
+
+    with pytest.raises(NotImplementedError):
+        df.applymap(f, some_kwarg=1)
+
+    with pytest.raises(ValueError):
+        df.applymap(f, na_action="some_invalid_option")
@@ -0,0 +1,29 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import pytest
+from pandas import NA
+
+from dask import dataframe as dd
+
+from .utils import _make_random_frame
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x + 1,
+        lambda x: x - 0.5,
+        lambda x: 2 if x is NA else 2 + (x + 1) / 4.1,
+        lambda x: 42,
+    ],
+)
+@pytest.mark.parametrize("has_na", [True, False])
+def test_applymap_basic(func, has_na):
+    size = 2000
+    pdf, dgdf = _make_random_frame(size, include_na=False)
+
+    dpdf = dd.from_pandas(pdf, npartitions=dgdf.npartitions)
+
+    expect = dpdf.applymap(func)
+    got = dgdf.applymap(func)
+    dd.assert_eq(expect, got, check_dtype=False)
@@ -1,3 +1,5 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
 import operator
 
 import numpy as np
@@ -8,6 +10,8 @@
 
 import cudf
 
+from .utils import _make_random_frame
+
 
 def _make_empty_frame(npartitions=2):
     df = pd.DataFrame({"x": [], "y": []})
@@ -16,15 +20,6 @@ def _make_empty_frame(npartitions=2):
     return dgf
 
 
-def _make_random_frame(nelem, npartitions=2):
-    df = pd.DataFrame(
-        {"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)}
-    )
-    gdf = cudf.DataFrame.from_pandas(df)
-    dgf = dd.from_pandas(gdf, npartitions=npartitions)
-    return df, dgf
-
-
 def _make_random_frame_float(nelem, npartitions=2):
     df = pd.DataFrame(
         {

@@ -0,0 +1,21 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+
+import dask.dataframe as dd
+
+import cudf
+
+
+def _make_random_frame(nelem, npartitions=2, include_na=False):
+    df = pd.DataFrame(
+        {"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)}
+    )
+
+    if include_na:
+        df["x"][::2] = pd.NA
+
+    gdf = cudf.DataFrame.from_pandas(df)
+    dgf = dd.from_pandas(gdf, npartitions=npartitions)
+    return df, dgf