-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Remove cudf._lib.replace in favor of inlining pylibcudf #17428
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,6 @@ set(cython_sources | |
orc.pyx | ||
parquet.pyx | ||
reduce.pyx | ||
replace.pyx | ||
round.pyx | ||
scalar.pyx | ||
sort.pyx | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,6 @@ | |
orc, | ||
parquet, | ||
reduce, | ||
replace, | ||
round, | ||
sort, | ||
stream_compaction, | ||
|
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -241,8 +241,14 @@ def find_and_replace( | |
) -> Self: | ||
raise NotImplementedError | ||
|
||
def clip(self, lo: ScalarLike, hi: ScalarLike) -> ColumnBase: | ||
return libcudf.replace.clip(self, lo, hi) | ||
@acquire_spill_lock() | ||
def clip(self, lo: ScalarLike, hi: ScalarLike) -> Self: | ||
plc_column = plc.replace.clamp( | ||
self.to_pylibcudf(mode="read"), | ||
cudf.Scalar(lo, self.dtype).device_value.c_value, | ||
cudf.Scalar(hi, self.dtype).device_value.c_value, | ||
) | ||
return type(self).from_pylibcudf(plc_column) # type: ignore[return-value] | ||
|
||
def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool: | ||
if self is other: | ||
|
@@ -686,6 +692,18 @@ def _validate_fillna_value( | |
return cudf.Scalar(fill_value, dtype=self.dtype) | ||
return as_column(fill_value) | ||
|
||
@acquire_spill_lock() | ||
def replace( | ||
self, values_to_replace: Self, replacement_values: Self | ||
) -> Self: | ||
return type(self).from_pylibcudf( # type: ignore[return-value] | ||
plc.replace.find_and_replace_all( | ||
self.to_pylibcudf(mode="read"), | ||
values_to_replace.to_pylibcudf(mode="read"), | ||
replacement_values.to_pylibcudf(mode="read"), | ||
) | ||
) | ||
|
||
def fillna( | ||
self, | ||
fill_value: ScalarLike | ColumnLike, | ||
|
@@ -704,11 +722,32 @@ def fillna( | |
return self.copy() | ||
else: | ||
fill_value = self._validate_fillna_value(fill_value) | ||
return libcudf.replace.replace_nulls( | ||
input_col=self.nans_to_nulls(), | ||
replacement=fill_value, | ||
method=method, | ||
)._with_type_metadata(self.dtype) | ||
|
||
if fill_value is None and method is None: | ||
raise ValueError("Must specify a fill 'value' or 'method'.") | ||
|
||
if fill_value and method: | ||
raise ValueError("Cannot specify both 'value' and 'method'.") | ||
|
||
input_col = self.nans_to_nulls() | ||
|
||
with acquire_spill_lock(): | ||
if method: | ||
plc_replace = ( | ||
plc.replace.ReplacePolicy.PRECEDING | ||
if method == "ffill" | ||
else plc.replace.ReplacePolicy.FOLLOWING | ||
) | ||
elif is_scalar(fill_value): | ||
plc_replace = cudf.Scalar(fill_value).device_value.c_value | ||
else: | ||
plc_replace = fill_value.to_pylibcudf(mode="read") | ||
plc_column = plc.replace.replace_nulls( | ||
input_col.to_pylibcudf(mode="read"), | ||
plc_replace, | ||
) | ||
result = type(self).from_pylibcudf(plc_column) | ||
return result._with_type_metadata(self.dtype) # type: ignore[return-value] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know why we need to ignore type check here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because |
||
|
||
def isnull(self) -> ColumnBase: | ||
"""Identify missing values in a Column.""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suppose this is because
Column.from_pylibcudf
only advertises that it producesColumnBase
rather thanSelf
? All this typing is kind of weird because the C++ column is type-erased.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yup exactly. We have this occurrences in quite a few places in cudf Python since we know what column subclass should be returned.