From bcd84ad76fd0f2676d3ced0db58dc31c018addcb Mon Sep 17 00:00:00 2001 From: martinfalisse Date: Wed, 2 Feb 2022 22:43:58 +0100 Subject: [PATCH] Remove unnecessary nunique function in series. --- python/cudf/cudf/core/dataframe.py | 10 +++++----- python/cudf/cudf/core/frame.py | 6 ++++-- python/cudf/cudf/core/single_column_frame.py | 5 +++-- python/cudf/cudf/tests/test_dataframe.py | 3 +-- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index bb9cd9b5cc7..3735a949277 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -6031,11 +6031,12 @@ def nunique(self, axis=0, dropna=True): """ Count number of distinct elements in specified axis. Return Series with number of distinct elements. Can ignore NaN values. - + Parameters ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 - The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for + column-wise. dropna : bool, default True Don't include NaN in the counts. @@ -6053,12 +6054,11 @@ def nunique(self, axis=0, dropna=True): dtype: int64 """ if axis != 0: - raise NotImplementedError( - "axis parameter is not supported yet." - ) + raise NotImplementedError("axis parameter is not supported yet.") return cudf.Series(super().nunique(method="sort", dropna=dropna)) + def from_dataframe(df, allow_copy=False): return df_protocol.from_dataframe(df, allow_copy=allow_copy) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index a05986555b1..7e97d655147 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -2,6 +2,7 @@ from __future__ import annotations +import builtins import copy import pickle import warnings @@ -6420,10 +6421,11 @@ def nunique(self, method: builtins.str = "sort", dropna: bool = True): Name and unique value counts of each column in frame. """ return { - name: col.distinct_count(method=method, dropna=dropna) - for name, col in self._data.items() + name: col.distinct_count(method=method, dropna=dropna) + for name, col in self._data.items() } + def _get_replacement_values_for_columns( to_replace: Any, value: Any, columns_dtype_map: Dict[Any, Any] ) -> Tuple[Dict[Any, bool], Dict[Any, Any], Dict[Any, Any]]: diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index ea1917acc10..ef479f19363 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -3,6 +3,7 @@ from __future__ import annotations +import builtins from typing import Any, Dict, MutableMapping, Optional, Tuple, TypeVar, Union import cupy @@ -328,7 +329,7 @@ def _make_operands_for_binop( def nunique(self, method: builtins.str = "sort", dropna: bool = True): """ - Returns count of unique values for the column. + Return count of unique values for the column. Parameters ---------- @@ -342,4 +343,4 @@ def nunique(self, method: builtins.str = "sort", dropna: bool = True): int Number of unique values in the column. """ - return self._column.distinct_count(method=method, dropna=dropna) \ No newline at end of file + return self._column.distinct_count(method=method, dropna=dropna) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index d0a25fd3e8c..ba2caf7c6c8 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9095,7 +9095,7 @@ def test_dataframe_nunique(data): @pytest.mark.parametrize( - "data", [{ "key": [0, 1, 1, 0, 0, 1], "val": [1, 8, 3, 9, -3, 8]}], + "data", [{"key": [0, 1, 1, 0, 0, 1], "val": [1, 8, 3, 9, -3, 8]}], ) def test_dataframe_nunique_index(data): gdf = cudf.DataFrame(data) @@ -9113,4 +9113,3 @@ def test_dataframe_rename_duplicate_column(): ValueError, match="Duplicate column names are not allowed" ): gdf.rename(columns={"a": "b"}, inplace=True) -