rapidsai · rapids-bot · Apr 13, 2022 · Mar 24, 2022 · Mar 28, 2022 · Mar 29, 2022
@@ -13,6 +13,7 @@
 from collections.abc import Iterable, Sequence
 from typing import (
     Any,
+    Callable,
     Dict,
     List,
     MutableMapping,
@@ -25,6 +26,7 @@
 )
 
 import cupy
+import numba
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -3712,6 +3714,59 @@ def apply(
 
         return self._apply(func, _get_row_kernel, *args, **kwargs)
 
+    def applymap(
+        self, func: Callable, na_action: str | None = None, **kwargs
+    ) -> DataFrame:
+        """
+        Apply a function to a Dataframe elementwise.
+        This method applies a function that accepts and returns a scalar
+        to every element of a DataFrame.
+        Parameters
+        ----------
+        func : callable
+            Python function, returns a single value from a single value.
+        na_action : {None, 'ignore'}, default None
+            If ``ignore``, propagate NaN values, without passing them to func.
-            If ``ignore``, propagate NaN values, without passing them to func.
+            If 'ignore', propagate NaN values, without passing them to func.
-            If ``ignore``, propagate NaN values, without passing them to func.
+            If 'ignore', propagate NaN values, without passing them to func.
+
+        Returns
+        -------
+        DataFrame
+            Transformed DataFrame.
+        """
+
+        if kwargs:
+            raise ValueError(
-            raise ValueError(
+            raise NotImplementedError(
-            raise ValueError(
+            raise NotImplementedError(
+                "DataFrame.applymap does not yet support **kwargs."
+            )
+
+        if na_action not in {"ignore", None}:
+            raise ValueError(
+                f"na_action must be 'ignore' or None. Got {repr(na_action)}"
+            )
+
+        if na_action == "ignore":
+            devfunc = numba.cuda.jit(device=True)(func)
+
+            # promote to a null-ignoring function
+            def _func(x):
+                # promote to a null-ignoring function
+                if x is cudf.NA:
+                    return cudf.NA
+                else:
+                    return devfunc(x)
+
+        else:
+            _func = func
+
+        # TODO: naive implementation
+        # this could be written as a single kernel
+        result = {}
+        for name, col in self._data.items():
+            apply_sr = Series._from_data({None: col})
+            result[name] = apply_sr.apply(_func)
+
+        return DataFrame._from_data(result, index=self.index)
+
     @_cudf_nvtx_annotate
     @applyutils.doc_apply()
     def apply_rows(

@@ -6,7 +6,7 @@
 import numpy as np
 import pytest
 
-from cudf import Series
+from cudf import NA, DataFrame, Series
 from cudf.testing import _utils as utils
 
 
@@ -56,3 +56,33 @@ def test_applymap_change_out_dtype():
     expect = np.array(data, dtype=float)
     got = out.to_numpy()
     np.testing.assert_array_equal(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [4, 5, 6]},
+        {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]},
+        {"a": [1, 2, 3], "b": [True, False, True]},
+        {"a": [1, NA, 2], "b": [NA, 4, NA]},
+    ],
+)
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x + 1,
+        lambda x: x - 1,
+        lambda x: x + 0.5,
+        lambda x: 2 if x is NA else 2 + (x + 1) / 4.1,
+        lambda x: 42,
+    ],
+)
+@pytest.mark.parametrize("na_action", [None, "ignore"])
+def test_applymap_dataframe(data, func, na_action):
+    gdf = DataFrame(data)
+    pdf = gdf.to_pandas(nullable=True)
+
+    expect = pdf.applymap(func, na_action=na_action)
+    got = gdf.applymap(func, na_action=na_action)
+
+    utils.assert_eq(expect, got, check_dtype=False)
@@ -0,0 +1,30 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import pytest
+from pandas import NA
+
+from dask import dataframe as dd
+
+from .utils import _make_random_frame
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x + 1,
+        lambda x: x - 1,
+        lambda x: x + 0.5,
+        lambda x: 2 if x is NA else 2 + (x + 1) / 4.1,
+        lambda x: 42,
+    ],
+)
+@pytest.mark.parametrize("has_na", [True, False])
+def test_applymap_basic(func, has_na):
+    size = 2000
+    pdf, dgdf = _make_random_frame(size, include_na=False)
+
+    dpdf = dd.from_pandas(pdf, npartitions=dgdf.npartitions)
+
+    expect = dpdf.applymap(func)
+    got = dgdf.applymap(func)
+    dd.assert_eq(expect, got, check_dtype=False)
@@ -1,41 +1,13 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
 import operator
 
 import numpy as np
-import pandas as pd
 import pytest
 
 from dask import dataframe as dd
 
-import cudf
-
-
-def _make_empty_frame(npartitions=2):
-    df = pd.DataFrame({"x": [], "y": []})
-    gdf = cudf.DataFrame.from_pandas(df)
-    dgf = dd.from_pandas(gdf, npartitions=npartitions)
-    return dgf
-
-
-def _make_random_frame(nelem, npartitions=2):
-    df = pd.DataFrame(
-        {"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)}
-    )
-    gdf = cudf.DataFrame.from_pandas(df)
-    dgf = dd.from_pandas(gdf, npartitions=npartitions)
-    return df, dgf
-
-
-def _make_random_frame_float(nelem, npartitions=2):
-    df = pd.DataFrame(
-        {
-            "x": np.random.randint(0, 5, size=nelem),
-            "y": np.random.normal(size=nelem) + 1,
-        }
-    )
-    gdf = cudf.from_pandas(df)
-    dgf = dd.from_pandas(gdf, npartitions=npartitions)
-    return df, dgf
-
+from .utils import _make_random_frame, _make_random_frame_float
 
 _binops = [
     operator.add,

@@ -0,0 +1,40 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+
+import dask.dataframe as dd
+
+import cudf
+
+
+def _make_empty_frame(npartitions=2):
+    df = pd.DataFrame({"x": [], "y": []})
+    gdf = cudf.DataFrame.from_pandas(df)
+    dgf = dd.from_pandas(gdf, npartitions=npartitions)
+    return dgf
+
+
+def _make_random_frame(nelem, npartitions=2, include_na=False):
+    df = pd.DataFrame(
+        {"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)}
+    )
+
+    if include_na:
+        df["x"][::2] = pd.NA
+
+    gdf = cudf.DataFrame.from_pandas(df)
+    dgf = dd.from_pandas(gdf, npartitions=npartitions)
+    return df, dgf
+
+
+def _make_random_frame_float(nelem, npartitions=2):
+    df = pd.DataFrame(
+        {
+            "x": np.random.randint(0, 5, size=nelem),
+            "y": np.random.normal(size=nelem) + 1,
+        }
+    )
+    gdf = cudf.from_pandas(df)
+    dgf = dd.from_pandas(gdf, npartitions=npartitions)
+    return df, dgf