-
Notifications
You must be signed in to change notification settings - Fork 915
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
cudf.DataFrame.applymap
(#10542)
Naive implementation of `DataFrame.applymap` that just calls `apply` in a loop over columns. This could theoretically be made much faster within our framework. This requires at worst `N` compilations and `M` kernel launches, where `N` is the number of different dtypes in the data, and `M` is the number of total columns. We could however as an improvement to this launch just one kernel that populates the entire output data. This would still suffer from the compilation bottleneck however, since the function must be compiled in order for an output dtype to be determined, and this will need to be done for each distinct dtype within the data. Part of #10169 Authors: - https://github.com/brandon-b-miller - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #10542
- Loading branch information
1 parent
c72868e
commit ce56bc3
Showing
6 changed files
with
162 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Copyright (c) 2022, NVIDIA CORPORATION. | ||
|
||
import pytest | ||
from pandas import NA | ||
|
||
from dask import dataframe as dd | ||
|
||
from dask_cudf.tests.utils import _make_random_frame | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"func", | ||
[ | ||
lambda x: x + 1, | ||
lambda x: x - 0.5, | ||
lambda x: 2 if x is NA else 2 + (x + 1) / 4.1, | ||
lambda x: 42, | ||
], | ||
) | ||
@pytest.mark.parametrize("has_na", [True, False]) | ||
def test_applymap_basic(func, has_na): | ||
size = 2000 | ||
pdf, dgdf = _make_random_frame(size, include_na=False) | ||
|
||
dpdf = dd.from_pandas(pdf, npartitions=dgdf.npartitions) | ||
|
||
expect = dpdf.applymap(func) | ||
got = dgdf.applymap(func) | ||
dd.assert_eq(expect, got, check_dtype=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Copyright (c) 2022, NVIDIA CORPORATION. | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
import dask.dataframe as dd | ||
|
||
import cudf | ||
|
||
|
||
def _make_random_frame(nelem, npartitions=2, include_na=False): | ||
df = pd.DataFrame( | ||
{"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)} | ||
) | ||
|
||
if include_na: | ||
df["x"][::2] = pd.NA | ||
|
||
gdf = cudf.DataFrame.from_pandas(df) | ||
dgf = dd.from_pandas(gdf, npartitions=npartitions) | ||
return df, dgf |