-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Remove cudf._lib.replace in favor of inlining pylibcudf #17428
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,6 @@ | |
parquet, | ||
partitioning, | ||
reduce, | ||
replace, | ||
reshape, | ||
rolling, | ||
round, | ||
|
This file was deleted.
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -241,8 +241,14 @@ def find_and_replace( | |||||||||
) -> Self: | ||||||||||
raise NotImplementedError | ||||||||||
|
||||||||||
@acquire_spill_lock() | ||||||||||
def clip(self, lo: ScalarLike, hi: ScalarLike) -> ColumnBase: | ||||||||||
return libcudf.replace.clip(self, lo, hi) | ||||||||||
plc_column = plc.replace.clamp( | ||||||||||
self.to_pylibcudf(mode="read"), | ||||||||||
cudf.Scalar(lo, self.dtype).device_value.c_value, | ||||||||||
cudf.Scalar(hi, self.dtype).device_value.c_value, | ||||||||||
) | ||||||||||
return type(self).from_pylibcudf(plc_column) | ||||||||||
|
||||||||||
def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool: | ||||||||||
if self is other: | ||||||||||
|
@@ -686,6 +692,18 @@ def _validate_fillna_value( | |||||||||
return cudf.Scalar(fill_value, dtype=self.dtype) | ||||||||||
return as_column(fill_value) | ||||||||||
|
||||||||||
@acquire_spill_lock() | ||||||||||
def replace( | ||||||||||
self, values_to_replace: ColumnBase, replacement_values: ColumnBase | ||||||||||
) -> ColumnBase: | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
The values to replace replace and the replacements must have the same type as Self, and we get a Self back. |
||||||||||
return type(self).from_pylibcudf( | ||||||||||
plc.replace.find_and_replace_all( | ||||||||||
self.to_pylibcudf(mode="read"), | ||||||||||
values_to_replace.to_pylibcudf(mode="read"), | ||||||||||
replacement_values.to_pylibcudf(mode="read"), | ||||||||||
) | ||||||||||
) | ||||||||||
|
||||||||||
def fillna( | ||||||||||
self, | ||||||||||
fill_value: ScalarLike | ColumnLike, | ||||||||||
|
@@ -704,11 +722,32 @@ def fillna( | |||||||||
return self.copy() | ||||||||||
else: | ||||||||||
fill_value = self._validate_fillna_value(fill_value) | ||||||||||
return libcudf.replace.replace_nulls( | ||||||||||
input_col=self.nans_to_nulls(), | ||||||||||
replacement=fill_value, | ||||||||||
method=method, | ||||||||||
)._with_type_metadata(self.dtype) | ||||||||||
|
||||||||||
if fill_value is None and method is None: | ||||||||||
raise ValueError("Must specify a fill 'value' or 'method'.") | ||||||||||
|
||||||||||
if fill_value and method: | ||||||||||
raise ValueError("Cannot specify both 'value' and 'method'.") | ||||||||||
|
||||||||||
input_col = self.nans_to_nulls() | ||||||||||
|
||||||||||
with acquire_spill_lock(): | ||||||||||
if method: | ||||||||||
plc_replace = ( | ||||||||||
plc.replace.ReplacePolicy.PRECEDING | ||||||||||
if method == "ffill" | ||||||||||
else plc.replace.ReplacePolicy.FOLLOWING | ||||||||||
) | ||||||||||
elif is_scalar(fill_value): | ||||||||||
plc_replace = cudf.Scalar(fill_value).device_value.c_value | ||||||||||
else: | ||||||||||
plc_replace = fill_value.to_pylibcudf(mode="read") | ||||||||||
plc_column = plc.replace.replace_nulls( | ||||||||||
input_col.to_pylibcudf(mode="read"), | ||||||||||
plc_replace, | ||||||||||
) | ||||||||||
result = type(self).from_pylibcudf(plc_column) | ||||||||||
return result._with_type_metadata(self.dtype) # type: ignore[return-value] | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know why we need to ignore type check here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because |
||||||||||
|
||||||||||
def isnull(self) -> ColumnBase: | ||||||||||
"""Identify missing values in a Column.""" | ||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -563,9 +563,7 @@ def find_and_replace( | |
) | ||
df = df.dropna(subset=["old"]) | ||
|
||
return libcudf.replace.replace( | ||
replaced, df._data["old"], df._data["new"] | ||
) | ||
return replaced.replace(df._data["old"], df._data["new"]) # type: ignore[return-value] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this one can go away if we type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was able to remove this, but I had to add a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AH, thanks |
||
|
||
def _validate_fillna_value( | ||
self, fill_value: ScalarLike | ColumnLike | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -6044,7 +6044,7 @@ def find_and_replace( | |||||
df = df.dropna(subset=["old"]) | ||||||
else: | ||||||
res = self | ||||||
return libcudf.replace.replace(res, df._data["old"], df._data["new"]) | ||||||
return res.replace(df._data["old"], df._data["new"]) # type: ignore[return-value] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likewise here
Suggested change
|
||||||
|
||||||
def normalize_binop_value(self, other) -> column.ColumnBase | cudf.Scalar: | ||||||
if ( | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.