Skip to content

Commit

Permalink
Merge pull request #13448 from rapidsai/branch-23.06
Browse files Browse the repository at this point in the history
Forward-merge branch-23.06 to branch-23.08
  • Loading branch information
GPUtester authored May 25, 2023
2 parents 2def7f1 + 03f0c0c commit 53c685b
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 25 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/transform.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def one_hot_encode(Column input_column, Column categories):
move(c_result.second),
owner=owner,
column_names=[
x if x is not None else 'null' for x in pylist_categories
x if x is not None else '<NA>' for x in pylist_categories
]
)
return encodings
Expand Down
38 changes: 14 additions & 24 deletions python/cudf/cudf/tests/test_onehot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pytest

import cudf
from cudf import DataFrame
from cudf.testing._utils import assert_eq

pytestmark = pytest.mark.spilling
Expand All @@ -24,7 +23,7 @@
],
)
def test_get_dummies(data, index):
gdf = DataFrame({"x": data}, index=index)
gdf = cudf.DataFrame({"x": data}, index=index)
pdf = pd.DataFrame({"x": data}, index=index)

encoded_expected = pd.get_dummies(pdf, prefix="test")
Expand Down Expand Up @@ -66,16 +65,16 @@ def test_onehot_get_dummies_multicol(n_cols):
@pytest.mark.parametrize("dummy_na", [True, False])
def test_onehost_get_dummies_dummy_na(nan_as_null, dummy_na):
pdf = pd.DataFrame({"a": [0, 1, np.nan]})
df = DataFrame.from_pandas(pdf, nan_as_null=nan_as_null)
df = cudf.DataFrame.from_pandas(pdf, nan_as_null=nan_as_null)

expected = pd.get_dummies(pdf, dummy_na=dummy_na, columns=["a"])
with pytest.warns(FutureWarning):
got = cudf.get_dummies(df, dummy_na=dummy_na, columns=["a"])
actual = cudf.get_dummies(df, dummy_na=dummy_na, columns=["a"])

if dummy_na and nan_as_null:
got = got.rename(columns={"a_null": "a_nan"})[expected.columns]
actual = actual.rename(columns={"a_<NA>": "a_nan"})[expected.columns]

assert_eq(expected, got)
assert_eq(expected, actual)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -104,7 +103,7 @@ def test_get_dummies_prefix_sep(prefix, prefix_sep):
"third": ["ji", "ji", "ji"],
}

gdf = DataFrame(data)
gdf = cudf.DataFrame(data)
pdf = pd.DataFrame(data)

encoded_expected = pd.get_dummies(
Expand All @@ -122,15 +121,11 @@ def test_get_dummies_with_nan():
df = cudf.DataFrame(
{"a": cudf.Series([1, 2, np.nan, None], nan_as_null=False)}
)
expected = cudf.DataFrame(
{
"a_1.0": [1, 0, 0, 0],
"a_2.0": [0, 1, 0, 0],
"a_nan": [0, 0, 1, 0],
"a_null": [0, 0, 0, 1],
},
dtype="uint8",

expected = pd.get_dummies(
df.to_pandas(nullable=True), dummy_na=True, columns=["a"]
)

with pytest.warns(FutureWarning):
actual = cudf.get_dummies(df, dummy_na=True, columns=["a"])

Expand Down Expand Up @@ -166,16 +161,11 @@ def test_get_dummies_array_like(data, prefix_sep, prefix, dtype):

def test_get_dummies_array_like_with_nan():
ser = cudf.Series([0.1, 2, 3, None, np.nan], nan_as_null=False)
expected = cudf.DataFrame(
{
"a_0.1": [1, 0, 0, 0, 0],
"a_2.0": [0, 1, 0, 0, 0],
"a_3.0": [0, 0, 1, 0, 0],
"a_nan": [0, 0, 0, 0, 1],
"a_null": [0, 0, 0, 1, 0],
},
dtype="uint8",

expected = pd.get_dummies(
ser.to_pandas(nullable=True), dummy_na=True, prefix="a", prefix_sep="_"
)

with pytest.warns(FutureWarning):
actual = cudf.get_dummies(
ser, dummy_na=True, prefix="a", prefix_sep="_"
Expand Down

0 comments on commit 53c685b

Please sign in to comment.