Skip to content

Commit

Permalink
Deprecate method parameters to DataFrame.join, DataFrame.merge. (#9291)
Browse files Browse the repository at this point in the history
The `method` parameter to `DataFrame.join` and `DataFrame.merge` [isn't used internally](https://github.com/rapidsai/cudf/blob/e2098e56f0cb209b1d916ce617c04533444a056a/python/cudf/cudf/core/join/join.py#L90) after changes in #7454. This PR updates the docstrings and adds deprecation notices via `FutureWarning` as discussed in #9347.

The parameter is now deprecated in the public API. I removed all internal uses of the `method` parameter.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

URL: #9291
  • Loading branch information
bdice authored Oct 1, 2021
1 parent 91f1dea commit 34b54ca
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 38 deletions.
28 changes: 22 additions & 6 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4399,7 +4399,7 @@ def merge(
sort=False,
lsuffix=None,
rsuffix=None,
method="hash",
method=None,
indicator=False,
suffixes=("_x", "_y"),
):
Expand Down Expand Up @@ -4444,8 +4444,9 @@ def merge(
suffixes: Tuple[str, str], defaults to ('_x', '_y')
Suffixes applied to overlapping column names on the left and right
sides
method : {‘hash’, ‘sort’}, default ‘hash’
The implementation method to be used for the operation.
method :
This parameter is unused. It is deprecated and will be removed in a
future version.
Returns
-------
Expand Down Expand Up @@ -4507,6 +4508,13 @@ def merge(
else:
lsuffix, rsuffix = suffixes

if method is not None:
warnings.warn(
"The 'method' argument is deprecated and will be removed "
"in a future version of cudf.",
FutureWarning,
)

# Compute merge
gdf_result = super()._merge(
right,
Expand All @@ -4517,7 +4525,6 @@ def merge(
right_index=right_index,
how=how,
sort=sort,
method=method,
indicator=indicator,
suffixes=suffixes,
)
Expand All @@ -4532,7 +4539,7 @@ def join(
lsuffix="",
rsuffix="",
sort=False,
method="hash",
method=None,
):
"""Join columns with other DataFrame on index or on a key column.
Expand All @@ -4546,6 +4553,9 @@ def join(
column names when avoiding conflicts.
sort : bool
Set to True to ensure sorted ordering.
method :
This parameter is unused. It is deprecated and will be removed in a
future version.
Returns
-------
Expand All @@ -4559,6 +4569,13 @@ def join(
- *on* is not supported yet due to lack of multi-index support.
"""

if method is not None:
warnings.warn(
"The 'method' argument is deprecated and will be removed "
"in a future version of cudf.",
FutureWarning,
)

lhs = self
rhs = other

Expand All @@ -4568,7 +4585,6 @@ def join(
right_index=True,
how=how,
suffixes=(lsuffix, rsuffix),
method=method,
sort=sort,
)
df.index.name = (
Expand Down
2 changes: 0 additions & 2 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3552,7 +3552,6 @@ def _merge(
right_index=False,
how="inner",
sort=False,
method="hash",
indicator=False,
suffixes=("_x", "_y"),
):
Expand All @@ -3575,7 +3574,6 @@ def _merge(
right_index=right_index,
how=how,
sort=sort,
method=method,
indicator=indicator,
suffixes=suffixes,
)
Expand Down
3 changes: 0 additions & 3 deletions python/cudf/cudf/core/join/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def merge(
right_index,
how,
sort,
method,
indicator,
suffixes,
):
Expand All @@ -47,7 +46,6 @@ def merge(
right_index=right_index,
how=how,
sort=sort,
method=method,
indicator=indicator,
suffixes=suffixes,
)
Expand Down Expand Up @@ -87,7 +85,6 @@ def __init__(
right_index,
how,
sort,
method,
indicator,
suffixes,
):
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4651,7 +4651,6 @@ def merge(
right_index=right_index,
how=how,
sort=sort,
method=method,
indicator=False,
suffixes=suffixes,
)
Expand Down
34 changes: 8 additions & 26 deletions python/cudf/cudf/tests/test_joining.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,47 +21,30 @@ def make_params():
np.random.seed(0)

hows = _JOIN_TYPES
methods = "hash,sort".split(",")

# Test specific cases (1)
aa = [0, 0, 4, 5, 5]
bb = [0, 0, 2, 3, 5]
for how in hows:
if how in ["left", "inner", "right", "leftanti", "leftsemi"]:
for method in methods:
yield (aa, bb, how, method)
else:
yield (aa, bb, how, "sort")
yield (aa, bb, how)

# Test specific cases (2)
aa = [0, 0, 1, 2, 3]
bb = [0, 1, 2, 2, 3]
for how in hows:
if how in ["left", "inner", "right", "leftanti", "leftsemi"]:
for method in methods:
yield (aa, bb, how, method)
else:
yield (aa, bb, how, "sort")
yield (aa, bb, how)

# Test large random integer inputs
aa = np.random.randint(0, 50, 100)
bb = np.random.randint(0, 50, 100)
for how in hows:
if how in ["left", "inner", "right", "leftanti", "leftsemi"]:
for method in methods:
yield (aa, bb, how, method)
else:
yield (aa, bb, how, "sort")
yield (aa, bb, how)

# Test floating point inputs
aa = np.random.random(50)
bb = np.random.random(50)
for how in hows:
if how in ["left", "inner", "right", "leftanti", "leftsemi"]:
for method in methods:
yield (aa, bb, how, method)
else:
yield (aa, bb, how, "sort")
yield (aa, bb, how)


def pd_odd_joins(left, right, join_type):
Expand Down Expand Up @@ -102,8 +85,8 @@ def assert_join_results_equal(expect, got, how, **kwargs):
raise ValueError(f"Not a join result: {type(expect).__name__}")


@pytest.mark.parametrize("aa,bb,how,method", make_params())
def test_dataframe_join_how(aa, bb, how, method):
@pytest.mark.parametrize("aa,bb,how", make_params())
def test_dataframe_join_how(aa, bb, how):
df = cudf.DataFrame()
df["a"] = aa
df["b"] = bb
Expand All @@ -122,7 +105,7 @@ def work_pandas(df, how):
def work_gdf(df):
df1 = df.set_index("a")
df2 = df.set_index("b")
joined = df1.join(df2, how=how, sort=True, method=method)
joined = df1.join(df2, how=how, sort=True)
return joined

expect = work_pandas(df.to_pandas(), how)
Expand All @@ -136,8 +119,7 @@ def work_gdf(df):
assert got.index.name is None

assert list(expect.columns) == list(got.columns)
# test disabled until libgdf sort join gets updated with new api
if method == "hash":
if how in {"left", "inner", "right", "leftanti", "leftsemi"}:
assert_eq(sorted(expect.index.values), sorted(got.index.values))
if how != "outer":
# Newly introduced ambiguous ValueError thrown when
Expand Down

0 comments on commit 34b54ca

Please sign in to comment.