diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index 94f88a40ea5..2de55553c3f 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -254,7 +254,6 @@ Serialization / IO / conversion DataFrame.from_arrow DataFrame.from_pandas DataFrame.from_records - DataFrame.hash_columns DataFrame.hash_values DataFrame.to_arrow DataFrame.to_dlpack diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b7fc5efb412..d97ea456f72 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4150,38 +4150,6 @@ def apply_chunks( tpb=tpb, ) - def hash_columns(self, columns=None, method="murmur3"): - """Hash the given *columns* and return a new device array - - This method is deprecated. Replace ``df.hash_columns(columns, method)`` - with ``df[columns].hash_values(method)``. - - Parameters - ---------- - columns : sequence of str; optional - Sequence of column names. If columns is *None* (unspecified), - all columns in the frame are used. - method : {'murmur3', 'md5'}, default 'murmur3' - Hash function to use: - * murmur3: MurmurHash3 hash function. - * md5: MD5 hash function. - - Returns - ------- - Series - Hash values for each row. - """ - warnings.warn( - "The `hash_columns` method will be removed in a future cuDF " - "release. Replace `df.hash_columns(columns, method)` with " - "`df[columns].hash_values(method)`.", - FutureWarning, - ) - if columns is None: - # Slice by [:] to keep all columns. - columns = slice(None, None, None) - return self[columns].hash_values(method=method) - def hash_values(self, method="murmur3"): """Compute the hash of values in each row. diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 78560ee6723..33c993cc56a 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -1107,34 +1107,6 @@ def test_assign(): np.testing.assert_equal(gdf2.y.to_numpy(), [2, 3, 4]) -@pytest.mark.parametrize("nrows", [1, 8, 100, 1000]) -@pytest.mark.parametrize("method", ["murmur3", "md5"]) -def test_dataframe_hash_columns(nrows, method): - gdf = cudf.DataFrame() - data = np.asarray(range(nrows)) - data[0] = data[-1] # make first and last the same - gdf["a"] = data - gdf["b"] = gdf.a + 100 - with pytest.warns(FutureWarning): - out = gdf.hash_columns(["a", "b"]) - assert isinstance(out, cudf.Series) - assert len(out) == nrows - assert out.dtype == np.int32 - - # Check default - with pytest.warns(FutureWarning): - out_all = gdf.hash_columns() - assert_eq(out, out_all) - - # Check single column - with pytest.warns(FutureWarning): - out_one = gdf.hash_columns(["a"], method=method) - # First matches last - assert out_one.iloc[0] == out_one.iloc[-1] - # Equivalent to the cudf.Series.hash_values() - assert_eq(gdf["a"].hash_values(method=method), out_one) - - @pytest.mark.parametrize("nrows", [1, 8, 100, 1000]) @pytest.mark.parametrize("method", ["murmur3", "md5"]) def test_dataframe_hash_values(nrows, method):