Deprecate method parameters to DataFrame.join, DataFrame.merge. (#9291)

The `method` parameter to `DataFrame.join` and `DataFrame.merge` [isn't used internally](https://github.com/rapidsai/cudf/blob/e2098e56f0cb209b1d916ce617c04533444a056a/python/cudf/cudf/core/join/join.py#L90) after changes in #7454. This PR updates the docstrings and adds deprecation notices via `FutureWarning` as discussed in #9347. The parameter is now deprecated in the public API. I removed all internal uses of the `method` parameter. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Charles Blackmon-Luca (https://github.com/charlesbluca) URL: #9291
rapidsai · Oct 1, 2021 · 34b54ca · 34b54ca
1 parent 91f1dea
commit 34b54ca
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 38 deletions.
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
@@ -4399,7 +4399,7 @@ def merge(
         sort=False,
         lsuffix=None,
         rsuffix=None,
-        method="hash",
+        method=None,
         indicator=False,
         suffixes=("_x", "_y"),
     ):
@@ -4444,8 +4444,9 @@ def merge(
         suffixes: Tuple[str, str], defaults to ('_x', '_y')
             Suffixes applied to overlapping column names on the left and right
             sides
-        method : {‘hash’, ‘sort’}, default ‘hash’
-            The implementation method to be used for the operation.
+        method :
+            This parameter is unused. It is deprecated and will be removed in a
+            future version.
 
         Returns
         -------
@@ -4507,6 +4508,13 @@ def merge(
         else:
             lsuffix, rsuffix = suffixes
 
+        if method is not None:
+            warnings.warn(
+                "The 'method' argument is deprecated and will be removed "
+                "in a future version of cudf.",
+                FutureWarning,
+            )
+
         # Compute merge
         gdf_result = super()._merge(
             right,
@@ -4517,7 +4525,6 @@ def merge(
             right_index=right_index,
             how=how,
             sort=sort,
-            method=method,
             indicator=indicator,
             suffixes=suffixes,
         )
@@ -4532,7 +4539,7 @@ def join(
         lsuffix="",
         rsuffix="",
         sort=False,
-        method="hash",
+        method=None,
     ):
         """Join columns with other DataFrame on index or on a key column.
 
@@ -4546,6 +4553,9 @@ def join(
             column names when avoiding conflicts.
         sort : bool
             Set to True to ensure sorted ordering.
+        method :
+            This parameter is unused. It is deprecated and will be removed in a
+            future version.
 
         Returns
         -------
@@ -4559,6 +4569,13 @@ def join(
         - *on* is not supported yet due to lack of multi-index support.
         """
 
+        if method is not None:
+            warnings.warn(
+                "The 'method' argument is deprecated and will be removed "
+                "in a future version of cudf.",
+                FutureWarning,
+            )
+
         lhs = self
         rhs = other
 
@@ -4568,7 +4585,6 @@ def join(
             right_index=True,
             how=how,
             suffixes=(lsuffix, rsuffix),
-            method=method,
             sort=sort,
         )
         df.index.name = (

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
@@ -3552,7 +3552,6 @@ def _merge(
         right_index=False,
         how="inner",
         sort=False,
-        method="hash",
         indicator=False,
         suffixes=("_x", "_y"),
     ):
@@ -3575,7 +3574,6 @@ def _merge(
             right_index=right_index,
             how=how,
             sort=sort,
-            method=method,
             indicator=indicator,
             suffixes=suffixes,
         )

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
@@ -29,7 +29,6 @@ def merge(
     right_index,
     how,
     sort,
-    method,
     indicator,
     suffixes,
 ):
@@ -47,7 +46,6 @@ def merge(
         right_index=right_index,
         how=how,
         sort=sort,
-        method=method,
         indicator=indicator,
         suffixes=suffixes,
     )
@@ -87,7 +85,6 @@ def __init__(
         right_index,
         how,
         sort,
-        method,
         indicator,
         suffixes,
     ):

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -4651,7 +4651,6 @@ def merge(
             right_index=right_index,
             how=how,
             sort=sort,
-            method=method,
             indicator=False,
             suffixes=suffixes,
         )

diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
@@ -21,47 +21,30 @@ def make_params():
     np.random.seed(0)
 
     hows = _JOIN_TYPES
-    methods = "hash,sort".split(",")
 
     # Test specific cases (1)
     aa = [0, 0, 4, 5, 5]
     bb = [0, 0, 2, 3, 5]
     for how in hows:
-        if how in ["left", "inner", "right", "leftanti", "leftsemi"]:
-            for method in methods:
-                yield (aa, bb, how, method)
-        else:
-            yield (aa, bb, how, "sort")
+        yield (aa, bb, how)
 
     # Test specific cases (2)
     aa = [0, 0, 1, 2, 3]
     bb = [0, 1, 2, 2, 3]
     for how in hows:
-        if how in ["left", "inner", "right", "leftanti", "leftsemi"]:
-            for method in methods:
-                yield (aa, bb, how, method)
-        else:
-            yield (aa, bb, how, "sort")
+        yield (aa, bb, how)
 
     # Test large random integer inputs
     aa = np.random.randint(0, 50, 100)
     bb = np.random.randint(0, 50, 100)
     for how in hows:
-        if how in ["left", "inner", "right", "leftanti", "leftsemi"]:
-            for method in methods:
-                yield (aa, bb, how, method)
-        else:
-            yield (aa, bb, how, "sort")
+        yield (aa, bb, how)
 
     # Test floating point inputs
     aa = np.random.random(50)
     bb = np.random.random(50)
     for how in hows:
-        if how in ["left", "inner", "right", "leftanti", "leftsemi"]:
-            for method in methods:
-                yield (aa, bb, how, method)
-        else:
-            yield (aa, bb, how, "sort")
+        yield (aa, bb, how)
 
 
 def pd_odd_joins(left, right, join_type):
@@ -102,8 +85,8 @@ def assert_join_results_equal(expect, got, how, **kwargs):
         raise ValueError(f"Not a join result: {type(expect).__name__}")
 
 
-@pytest.mark.parametrize("aa,bb,how,method", make_params())
-def test_dataframe_join_how(aa, bb, how, method):
+@pytest.mark.parametrize("aa,bb,how", make_params())
+def test_dataframe_join_how(aa, bb, how):
     df = cudf.DataFrame()
     df["a"] = aa
     df["b"] = bb
@@ -122,7 +105,7 @@ def work_pandas(df, how):
     def work_gdf(df):
         df1 = df.set_index("a")
         df2 = df.set_index("b")
-        joined = df1.join(df2, how=how, sort=True, method=method)
+        joined = df1.join(df2, how=how, sort=True)
         return joined
 
     expect = work_pandas(df.to_pandas(), how)
@@ -136,8 +119,7 @@ def work_gdf(df):
     assert got.index.name is None
 
     assert list(expect.columns) == list(got.columns)
-    # test disabled until libgdf sort join gets updated with new api
-    if method == "hash":
+    if how in {"left", "inner", "right", "leftanti", "leftsemi"}:
         assert_eq(sorted(expect.index.values), sorted(got.index.values))
         if how != "outer":
             # Newly introduced ambiguous ValueError thrown when