diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 45e0fc345b5..5bf955544fc 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -28,7 +28,6 @@ set(cython_sources orc.pyx parquet.pyx reduce.pyx - replace.pyx round.pyx scalar.pyx sort.pyx diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index c51db601985..7aa49128f91 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -16,7 +16,6 @@ orc, parquet, reduce, - replace, round, sort, stream_compaction, diff --git a/python/cudf/cudf/_lib/replace.pyx b/python/cudf/cudf/_lib/replace.pyx deleted file mode 100644 index b50c6dd25e3..00000000000 --- a/python/cudf/cudf/_lib/replace.pyx +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -from cudf.api.types import is_scalar -from cudf.core.buffer import acquire_spill_lock - -from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar - -import pylibcudf - -from cudf._lib.scalar import as_device_scalar - - -@acquire_spill_lock() -def replace(Column input_col, Column values_to_replace, - Column replacement_values): - """ - Replaces values from values_to_replace with corresponding value from - replacement_values in input_col - - Parameters - ---------- - input_col : Column whose value will be updated - values_to_replace : Column with values which needs to be replaced - replacement_values : Column with values which will replace - """ - - return Column.from_pylibcudf( - pylibcudf.replace.find_and_replace_all( - input_col.to_pylibcudf(mode="read"), - values_to_replace.to_pylibcudf(mode="read"), - replacement_values.to_pylibcudf(mode="read"), - ) - ) - - -@acquire_spill_lock() -def replace_nulls_column(Column input_col, Column replacement_values): - """ - Replaces null values in input_col with corresponding values from - replacement_values - - Parameters - ---------- - input_col : Column whose value will be updated - replacement_values : Column with values which will replace nulls - """ - return Column.from_pylibcudf( - pylibcudf.replace.replace_nulls( - input_col.to_pylibcudf(mode="read"), - replacement_values.to_pylibcudf(mode="read"), - ) - ) - - -@acquire_spill_lock() -def replace_nulls_scalar(Column input_col, DeviceScalar replacement_value): - """ - Replaces null values in input_col with replacement_value - - Parameters - ---------- - input_col : Column whose value will be updated - replacement_value : DeviceScalar with value which will replace nulls - """ - return Column.from_pylibcudf( - pylibcudf.replace.replace_nulls( - input_col.to_pylibcudf(mode="read"), - replacement_value.c_value, - ) - ) - - -@acquire_spill_lock() -def replace_nulls_fill(Column input_col, object method): - """ - Replaces null values in input_col with replacement_value - - Parameters - ---------- - input_col : Column whose value will be updated - method : 'ffill' or 'bfill' - """ - return Column.from_pylibcudf( - pylibcudf.replace.replace_nulls( - input_col.to_pylibcudf(mode="read"), - pylibcudf.replace.ReplacePolicy.PRECEDING - if method == 'ffill' - else pylibcudf.replace.ReplacePolicy.FOLLOWING, - ) - ) - - -def replace_nulls( - Column input_col, - object replacement=None, - object method=None, - object dtype=None -): - """ - Calls one of the version of replace_nulls depending on type - of replacement - """ - - if replacement is None and method is None: - raise ValueError("Must specify a fill 'value' or 'method'.") - - if replacement and method: - raise ValueError("Cannot specify both 'value' and 'method'.") - - if method: - return replace_nulls_fill(input_col, method) - elif is_scalar(replacement): - return replace_nulls_scalar( - input_col, - as_device_scalar(replacement, dtype=dtype) - ) - else: - return replace_nulls_column(input_col, replacement) - - -@acquire_spill_lock() -def clamp(Column input_col, DeviceScalar lo, DeviceScalar hi): - """ - Clip the input_col such that values < lo will be replaced by lo - and > hi will be replaced by hi - - Parameters - ---------- - input_col : Column whose value will be updated - lo : DeviceScalar value for clipping lower values - hi : DeviceScalar value for clipping upper values - """ - return Column.from_pylibcudf( - pylibcudf.replace.clamp( - input_col.to_pylibcudf(mode="read"), - lo.c_value, - hi.c_value, - ) - ) - - -@acquire_spill_lock() -def clip(Column input_col, object lo, object hi): - """ - Clip the input_col such that values < lo will be replaced by lo - and > hi will be replaced by hi - """ - - lo_scalar = as_device_scalar(lo, dtype=input_col.dtype) - hi_scalar = as_device_scalar(hi, dtype=input_col.dtype) - - return clamp(input_col, lo_scalar, hi_scalar) - - -@acquire_spill_lock() -def normalize_nans_and_zeros_inplace(Column input_col): - """ - Inplace normalizing - """ - pylibcudf.replace.normalize_nans_and_zeros( - input_col.to_pylibcudf(mode="write"), inplace=True - ) - - -@acquire_spill_lock() -def normalize_nans_and_zeros_column(Column input_col): - """ - Returns a new normalized Column - """ - return Column.from_pylibcudf( - pylibcudf.replace.normalize_nans_and_zeros( - input_col.to_pylibcudf(mode="read") - ) - ) - - -def normalize_nans_and_zeros(Column input_col, in_place=False): - """ - Normalize the NaN and zeros in input_col - Convert -NaN -> NaN - Convert -0.0 -> 0.0 - - Parameters - ---------- - input_col : Column that needs to be normalized - in_place : boolean whether to normalize in place or return new column - """ - - if in_place is True: - normalize_nans_and_zeros_inplace(input_col) - else: - return normalize_nans_and_zeros_column(input_col) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 7354b917f90..7551703c53e 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -843,9 +843,9 @@ def values(self): """ raise NotImplementedError("cudf.Categorical is not yet implemented") - def clip(self, lo: ScalarLike, hi: ScalarLike) -> "column.ColumnBase": + def clip(self, lo: ScalarLike, hi: ScalarLike) -> Self: return ( - self.astype(self.categories.dtype).clip(lo, hi).astype(self.dtype) + self.astype(self.categories.dtype).clip(lo, hi).astype(self.dtype) # type: ignore[return-value] ) def data_array_view( @@ -989,10 +989,8 @@ def find_and_replace( replacement_col = catmap._data["index"].astype(replaced.codes.dtype) replaced_codes = column.as_column(replaced.codes) - output = libcudf.replace.replace( - replaced_codes, to_replace_col, replacement_col - ) - codes = as_unsigned_codes(len(new_cats["cats"]), output) + output = replaced_codes.replace(to_replace_col, replacement_col) + codes = as_unsigned_codes(len(new_cats["cats"]), output) # type: ignore[arg-type] result = type(self)( data=self.data, # type: ignore[arg-type] diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index f0df4a3c1b3..fdd939f592c 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -241,8 +241,14 @@ def find_and_replace( ) -> Self: raise NotImplementedError - def clip(self, lo: ScalarLike, hi: ScalarLike) -> ColumnBase: - return libcudf.replace.clip(self, lo, hi) + @acquire_spill_lock() + def clip(self, lo: ScalarLike, hi: ScalarLike) -> Self: + plc_column = plc.replace.clamp( + self.to_pylibcudf(mode="read"), + cudf.Scalar(lo, self.dtype).device_value.c_value, + cudf.Scalar(hi, self.dtype).device_value.c_value, + ) + return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool: if self is other: @@ -686,6 +692,18 @@ def _validate_fillna_value( return cudf.Scalar(fill_value, dtype=self.dtype) return as_column(fill_value) + @acquire_spill_lock() + def replace( + self, values_to_replace: Self, replacement_values: Self + ) -> Self: + return type(self).from_pylibcudf( # type: ignore[return-value] + plc.replace.find_and_replace_all( + self.to_pylibcudf(mode="read"), + values_to_replace.to_pylibcudf(mode="read"), + replacement_values.to_pylibcudf(mode="read"), + ) + ) + def fillna( self, fill_value: ScalarLike | ColumnLike, @@ -704,11 +722,32 @@ def fillna( return self.copy() else: fill_value = self._validate_fillna_value(fill_value) - return libcudf.replace.replace_nulls( - input_col=self.nans_to_nulls(), - replacement=fill_value, - method=method, - )._with_type_metadata(self.dtype) + + if fill_value is None and method is None: + raise ValueError("Must specify a fill 'value' or 'method'.") + + if fill_value and method: + raise ValueError("Cannot specify both 'value' and 'method'.") + + input_col = self.nans_to_nulls() + + with acquire_spill_lock(): + if method: + plc_replace = ( + plc.replace.ReplacePolicy.PRECEDING + if method == "ffill" + else plc.replace.ReplacePolicy.FOLLOWING + ) + elif is_scalar(fill_value): + plc_replace = cudf.Scalar(fill_value).device_value.c_value + else: + plc_replace = fill_value.to_pylibcudf(mode="read") + plc_column = plc.replace.replace_nulls( + input_col.to_pylibcudf(mode="read"), + plc_replace, + ) + result = type(self).from_pylibcudf(plc_column) + return result._with_type_metadata(self.dtype) # type: ignore[return-value] def isnull(self) -> ColumnBase: """Identify missing values in a Column.""" diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index a7538c1c947..c8f859596b2 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -482,7 +482,7 @@ def find_and_replace( to_replace: ColumnLike, replacement: ColumnLike, all_nan: bool = False, - ) -> NumericalColumn: + ) -> Self: """ Return col with *to_replace* replaced with *value*. """ @@ -547,7 +547,7 @@ def find_and_replace( ) elif len(replacement_col) == 1 and len(to_replace_col) == 0: return self.copy() - replaced = self.astype(common_type) + replaced = cast(Self, self.astype(common_type)) df = cudf.DataFrame._from_data( { "old": to_replace_col.astype(common_type), @@ -563,9 +563,7 @@ def find_and_replace( ) df = df.dropna(subset=["old"]) - return libcudf.replace.replace( - replaced, df._data["old"], df._data["new"] - ) + return replaced.replace(df._data["old"], df._data["new"]) def _validate_fillna_value( self, fill_value: ScalarLike | ColumnLike diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index a9ab2d373fd..ff513d91053 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -6038,7 +6038,7 @@ def find_and_replace( df = df.dropna(subset=["old"]) else: res = self - return libcudf.replace.replace(res, df._data["old"], df._data["new"]) + return res.replace(df._data["old"], df._data["new"]) def normalize_binop_value(self, other) -> column.ColumnBase | cudf.Scalar: if (