-
Notifications
You must be signed in to change notification settings - Fork 913
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
196 additions
and
125 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,19 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from pylibcudf.column cimport Column | ||
from pylibcudf.libcudf.strings.char_types cimport string_character_types | ||
from pylibcudf.scalar cimport Scalar | ||
|
||
|
||
cpdef Column all_characters_of_type( | ||
Column source_strings, | ||
string_character_types types, | ||
string_character_types verify_types | ||
) | ||
|
||
cpdef Column filter_characters_of_type( | ||
Column source_strings, | ||
string_character_types types_to_remove, | ||
Scalar replacement, | ||
string_character_types types_to_keep | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,93 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
from pylibcudf.column cimport Column | ||
from pylibcudf.libcudf.column.column cimport column | ||
from pylibcudf.libcudf.scalar.scalar cimport string_scalar | ||
from pylibcudf.libcudf.strings cimport char_types as cpp_char_types | ||
from pylibcudf.scalar cimport Scalar | ||
|
||
from cython.operator import dereference | ||
from pylibcudf.libcudf.strings.char_types import \ | ||
string_character_types as StringCharacterTypes # no-cython-lint | ||
|
||
|
||
cpdef Column all_characters_of_type( | ||
Column source_strings, | ||
string_character_types types, | ||
string_character_types verify_types | ||
): | ||
""" | ||
Identifies strings where all characters match the specified type. | ||
Parameters | ||
---------- | ||
source_strings : Column | ||
Strings instance for this operation | ||
types : StringCharacterTypes | ||
The character types to check in each string | ||
verify_types : StringCharacterTypes | ||
Only verify against these character types. | ||
Returns | ||
------- | ||
Column | ||
New column of boolean results for each string | ||
""" | ||
cdef unique_ptr[column] c_result | ||
|
||
with nogil: | ||
c_result = move( | ||
cpp_char_types.all_characters_of_type( | ||
source_strings.view(), | ||
types, | ||
verify_types, | ||
) | ||
) | ||
|
||
return Column.from_libcudf(move(c_result)) | ||
|
||
cpdef Column filter_characters_of_type( | ||
Column source_strings, | ||
string_character_types types_to_remove, | ||
Scalar replacement, | ||
string_character_types types_to_keep | ||
): | ||
""" | ||
Filter specific character types from a column of strings. | ||
Parameters | ||
---------- | ||
source_strings : Column | ||
Strings instance for this operation | ||
types_to_remove : StringCharacterTypes | ||
The character types to check in each string. | ||
replacement : Scalar | ||
The replacement character to use when removing characters | ||
types_to_keep : StringCharacterTypes | ||
Default `ALL_TYPES` means all characters of `types_to_remove` | ||
will be filtered. | ||
Returns | ||
------- | ||
Column | ||
New column with the specified characters filtered out and | ||
replaced with the specified replacement string. | ||
""" | ||
cdef const string_scalar* c_replacement = <const string_scalar*>( | ||
replacement.c_obj.get() | ||
) | ||
cdef unique_ptr[column] c_result | ||
|
||
with nogil: | ||
c_result = move( | ||
cpp_char_types.filter_characters_of_type( | ||
source_strings.view(), | ||
types_to_remove, | ||
dereference(c_replacement), | ||
types_to_keep, | ||
) | ||
) | ||
|
||
return Column.from_libcudf(move(c_result)) |
Oops, something went wrong.