-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Migrate string replace.pxd to pylibcudf #15839
Changes from 2 commits
d49ed89
397ba14
4d3b40f
4f20ea1
0924bd2
186e408
139a2e2
e168f58
cac8be1
bc9771a
4583f5c
1f2e434
b589e25
65ae3d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.string cimport string | ||
|
||
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar | ||
|
||
|
||
cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil: | ||
cdef unique_ptr[scalar] make_string_scalar(const string & _string) except + |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from . cimport case, find | ||
from . cimport case, find, replace |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from . import case, find | ||
from . import case, find, replace |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from cudf._lib.pylibcudf.column cimport Column | ||
from cudf._lib.pylibcudf.libcudf.types cimport size_type | ||
from cudf._lib.pylibcudf.scalar cimport Scalar | ||
from cudf._lib.pylibcudf.strings.types cimport ColumnOrScalar | ||
|
||
|
||
cpdef Column replace( | ||
Column input, | ||
ColumnOrScalar target, | ||
ColumnOrScalar repl, | ||
size_type maxrepl = * | ||
) | ||
cpdef Column replace_slice( | ||
Column input, | ||
Scalar repl = *, | ||
size_type start = *, | ||
size_type stop = * | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
|
||
from cudf._lib.pylibcudf.column cimport Column | ||
from cudf._lib.pylibcudf.libcudf.column.column cimport column | ||
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar | ||
from cudf._lib.pylibcudf.libcudf.scalar.scalar_factories cimport ( | ||
make_string_scalar as cpp_make_string_scalar, | ||
) | ||
from cudf._lib.pylibcudf.libcudf.strings.replace cimport ( | ||
replace as cpp_replace, | ||
replace_slice as cpp_replace_slice, | ||
) | ||
from cudf._lib.pylibcudf.libcudf.types cimport size_type | ||
from cudf._lib.pylibcudf.scalar cimport Scalar | ||
from cudf._lib.pylibcudf.strings.types cimport ColumnOrScalar | ||
|
||
|
||
cpdef Column replace( | ||
Column input, | ||
ColumnOrScalar target, | ||
ColumnOrScalar repl, | ||
size_type maxrepl = -1 | ||
): | ||
cdef: | ||
unique_ptr[column] c_result | ||
const string_scalar* target_str | ||
const string_scalar* repl_str | ||
|
||
if ColumnOrScalar is Scalar: | ||
target_str = <string_scalar *>(target.c_obj.get()) | ||
repl_str = <string_scalar *>(repl.c_obj.get()) | ||
|
||
with nogil: | ||
c_result = move(cpp_replace( | ||
input.view(), | ||
target_str[0], | ||
repl_str[0], | ||
maxrepl, | ||
)) | ||
else: | ||
# Column case | ||
# TODO: maxrepl should be supported in the corresponding CUDA/C++ code | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the overload of replace in libcudf where input/target/repl are columns, there isn't a maxrepl arg. We should probably support this in libcudf replace (eventually), otherwise we'll have some weirdness in pylibcudf where we'll have to raise for maxrepl despite accepting it as an argument. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea. Can you raise an issue? In the meantime, I would recommend that we change the default value of the parameter to None, then raise a NotImplementedError in this branch of the code if we find a non-None value, while in the Scalar branch we set it to -1. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
with nogil: | ||
c_result = move(cpp_replace( | ||
input.view(), | ||
target.view(), | ||
repl.view(), | ||
)) | ||
|
||
return Column.from_libcudf(move(c_result)) | ||
|
||
|
||
cpdef Column replace_slice( | ||
Column input, | ||
# TODO: default scalar values | ||
# https://github.com/rapidsai/cudf/issues/15505 | ||
Scalar repl = None, | ||
size_type start = 0, | ||
size_type stop = -1 | ||
): | ||
|
||
cdef unique_ptr[column] c_result | ||
|
||
cdef const string_scalar* scalar_str | ||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if repl is None: | ||
repl = Scalar.from_libcudf( | ||
cpp_make_string_scalar("".encode()) | ||
) | ||
|
||
scalar_str = <string_scalar*>(repl.c_obj.get()) | ||
|
||
with nogil: | ||
c_result = move(cpp_replace_slice( | ||
input.view(), | ||
scalar_str[0], | ||
start, | ||
stop | ||
)) | ||
|
||
return Column.from_libcudf(move(c_result)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from cudf._lib.pylibcudf.column cimport Column | ||
from cudf._lib.pylibcudf.scalar cimport Scalar | ||
|
||
ctypedef fused ColumnOrScalar: | ||
Column | ||
Scalar |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
renamed since the string replace.pyx clashes with the regular replace.pyx