-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add cudf.DataFrame.applymap
#10542
Add cudf.DataFrame.applymap
#10542
Changes from 19 commits
67a6187
e087124
454d9d8
2871aa1
b6827b5
06348f7
6fb742d
6ce8383
262c958
1c5d7ad
bd311ab
db2fee9
7081276
7d7b304
85963a8
6b91f33
b344532
d342f8e
137604e
477a824
e1d444c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Copyright (c) 2022, NVIDIA CORPORATION. | ||
|
||
import pytest | ||
from pandas import NA | ||
|
||
from dask import dataframe as dd | ||
|
||
from .utils import _make_random_frame | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we do an absolute import here instead of a relative import so that it is consistent with other imports here and elsewhere in the code-base? |
||
|
||
|
||
@pytest.mark.parametrize( | ||
"func", | ||
[ | ||
lambda x: x + 1, | ||
lambda x: x - 0.5, | ||
lambda x: 2 if x is NA else 2 + (x + 1) / 4.1, | ||
lambda x: 42, | ||
], | ||
) | ||
@pytest.mark.parametrize("has_na", [True, False]) | ||
def test_applymap_basic(func, has_na): | ||
size = 2000 | ||
pdf, dgdf = _make_random_frame(size, include_na=False) | ||
|
||
dpdf = dd.from_pandas(pdf, npartitions=dgdf.npartitions) | ||
|
||
expect = dpdf.applymap(func) | ||
got = dgdf.applymap(func) | ||
dd.assert_eq(expect, got, check_dtype=False) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
# Copyright (c) 2022, NVIDIA CORPORATION. | ||
|
||
import operator | ||
|
||
import numpy as np | ||
|
@@ -8,6 +10,8 @@ | |
|
||
import cudf | ||
|
||
from .utils import _make_random_frame | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here aswell |
||
|
||
|
||
def _make_empty_frame(npartitions=2): | ||
df = pd.DataFrame({"x": [], "y": []}) | ||
|
@@ -16,15 +20,6 @@ def _make_empty_frame(npartitions=2): | |
return dgf | ||
|
||
|
||
def _make_random_frame(nelem, npartitions=2): | ||
df = pd.DataFrame( | ||
{"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)} | ||
) | ||
gdf = cudf.DataFrame.from_pandas(df) | ||
dgf = dd.from_pandas(gdf, npartitions=npartitions) | ||
return df, dgf | ||
|
||
|
||
def _make_random_frame_float(nelem, npartitions=2): | ||
df = pd.DataFrame( | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Copyright (c) 2022, NVIDIA CORPORATION. | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
import dask.dataframe as dd | ||
|
||
import cudf | ||
|
||
|
||
def _make_random_frame(nelem, npartitions=2, include_na=False): | ||
df = pd.DataFrame( | ||
{"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)} | ||
) | ||
|
||
if include_na: | ||
df["x"][::2] = pd.NA | ||
|
||
gdf = cudf.DataFrame.from_pandas(df) | ||
dgf = dd.from_pandas(gdf, npartitions=npartitions) | ||
return df, dgf |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you also add this entry to this section of docs: https://github.com/rapidsai/cudf/blob/branch-22.06/docs/cudf/source/api_docs/dataframe.rst#function-application-groupby--window