Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate merge_sorted, change dask cudf usage to internal method #10713

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions python/cudf/cudf/core/reshape.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

import itertools
import warnings
from collections import abc
from typing import Dict, Optional

Expand Down Expand Up @@ -791,6 +792,24 @@ def merge_sorted(
A new, lexicographically sorted, DataFrame/Series.
"""

warnings.warn(
"merge_sorted is deprecated and will be removed in a "
"future release.",
FutureWarning,
)
return _merge_sorted(
objs, keys, by_index, ignore_index, ascending, na_position
)


def _merge_sorted(
objs,
keys=None,
by_index=False,
ignore_index=False,
ascending=True,
na_position="last",
):
if not pd.api.types.is_list_like(objs):
raise TypeError("objs must be a list-like of Frame-like objects")

Expand Down
17 changes: 9 additions & 8 deletions python/cudf/cudf/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
@pytest.mark.parametrize("nulls", ["none", "some", "all"])
def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
if dtype not in ["float32", "float64"] and nulls in ["some", "all"]:
pytest.skip(msg="nulls not supported in dtype: " + dtype)
pytest.skip(reason="nulls not supported in dtype: " + dtype)

pdf = pd.DataFrame()
id_vars = []
Expand Down Expand Up @@ -87,7 +87,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
@pytest.mark.parametrize("nulls", ["none", "some"])
def test_df_stack(nulls, num_cols, num_rows, dtype):
if dtype not in ["float32", "float64"] and nulls in ["some"]:
pytest.skip(msg="nulls not supported in dtype: " + dtype)
pytest.skip(reason="nulls not supported in dtype: " + dtype)

pdf = pd.DataFrame()
for i in range(num_cols):
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_df_stack_reset_index():
def test_interleave_columns(nulls, num_cols, num_rows, dtype):

if dtype not in ["float32", "float64"] and nulls in ["some"]:
pytest.skip(msg="nulls not supported in dtype: " + dtype)
pytest.skip(reason="nulls not supported in dtype: " + dtype)

pdf = pd.DataFrame(dtype=dtype)
for i in range(num_cols):
Expand Down Expand Up @@ -176,7 +176,7 @@ def test_interleave_columns(nulls, num_cols, num_rows, dtype):
def test_tile(nulls, num_cols, num_rows, dtype, count):

if dtype not in ["float32", "float64"] and nulls in ["some"]:
pytest.skip(msg="nulls not supported in dtype: " + dtype)
pytest.skip(reason="nulls not supported in dtype: " + dtype)

pdf = pd.DataFrame(dtype=dtype)
for i in range(num_cols):
Expand Down Expand Up @@ -269,7 +269,7 @@ def test_df_merge_sorted(nparts, keys, na_position, ascending):
expect = df.sort_values(
keys_1, na_position=na_position, ascending=ascending
)
result = cudf.merge_sorted(
result = cudf.core.reshape._merge_sorted(
dfs, keys=keys, na_position=na_position, ascending=ascending
)
if keys:
Expand All @@ -290,7 +290,8 @@ def test_df_merge_sorted_index(nparts, index, ascending):
)

expect = df.sort_index(ascending=ascending)
result = cudf.merge_sorted(dfs, by_index=True, ascending=ascending)
with pytest.warns(FutureWarning, match="deprecated and will be removed"):
result = cudf.merge_sorted(dfs, by_index=True, ascending=ascending)

assert_eq(expect.index, result.index)

Expand All @@ -317,7 +318,7 @@ def test_df_merge_sorted_ignore_index(keys, na_position, ascending):
expect = df.sort_values(
keys_1, na_position=na_position, ascending=ascending
)
result = cudf.merge_sorted(
result = cudf.core.reshape._merge_sorted(
dfs,
keys=keys,
na_position=na_position,
Expand Down Expand Up @@ -347,7 +348,7 @@ def test_series_merge_sorted(nparts, key, na_position, ascending):
)

expect = df.sort_values(na_position=na_position, ascending=ascending)
result = cudf.merge_sorted(
result = cudf.core.reshape._merge_sorted(
dfs, na_position=na_position, ascending=ascending
)

Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/dask_cudf/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _append_counts(val, count):
return val

# Sort by calculated quantile values, then number of observations.
combined_vals_counts = gd.merge_sorted(
combined_vals_counts = gd.core.reshape._merge_sorted(
[*map(_append_counts, vals, counts)]
)
combined_counts = cupy.asnumpy(combined_vals_counts["_counts"].values)
Expand Down