Skip to content

Commit

Permalink
Migrate string case operations to pylibcudf (#15489)
Browse files Browse the repository at this point in the history
This PR creates `pylibcudf` `case` APIs and migrates the cuDF cython to leverage them. Part of #15162.

Authors:
  - https://github.com/brandon-b-miller
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #15489
  • Loading branch information
brandon-b-miller authored Apr 11, 2024
1 parent af33b0a commit 8506ea6
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 32 deletions.
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,5 @@ rapids_cython_create_modules(
LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf
)
link_to_pyarrow_headers(pylibcudf_interop)

add_subdirectory(strings)
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ from . cimport (
search,
sorting,
stream_compaction,
strings,
types,
unary,
)
Expand Down Expand Up @@ -48,6 +49,7 @@ __all__ = [
"rolling",
"search",
"stream_compaction",
"strings",
"sorting",
"types",
"unary",
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
search,
sorting,
stream_compaction,
strings,
types,
unary,
)
Expand Down Expand Up @@ -48,6 +49,7 @@
"rolling",
"search",
"stream_compaction",
"strings",
"sorting",
"types",
"unary",
Expand Down
21 changes: 21 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# =============================================================================
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

set(cython_sources case.pyx)
set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
CXX
SOURCE_FILES "${cython_sources}"
LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf
)
3 changes: 3 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from . import case
3 changes: 3 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/strings/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from . import case
8 changes: 8 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/strings/case.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from cudf._lib.pylibcudf.column cimport Column


cpdef Column to_lower(Column input)
cpdef Column to_upper(Column input)
cpdef Column swapcase(Column input)
30 changes: 30 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/strings/case.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.strings cimport case as cpp_case
from cudf._lib.pylibcudf.column cimport Column


cpdef Column to_lower(Column input):
cdef unique_ptr[column] c_result
with nogil:
c_result = cpp_case.to_lower(input.view())

return Column.from_libcudf(move(c_result))

cpdef Column to_upper(Column input):
cdef unique_ptr[column] c_result
with nogil:
c_result = cpp_case.to_upper(input.view())

return Column.from_libcudf(move(c_result))

cpdef Column swapcase(Column input):
cdef unique_ptr[column] c_result
with nogil:
c_result = cpp_case.swapcase(input.view())

return Column.from_libcudf(move(c_result))
50 changes: 18 additions & 32 deletions python/cudf/cudf/_lib/strings/case.pyx
Original file line number Diff line number Diff line change
@@ -1,48 +1,34 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
# Copyright (c) 2018-2024, NVIDIA CORPORATION.

from cudf.core.buffer import acquire_spill_lock

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.strings.case cimport (
swapcase as cpp_swapcase,
to_lower as cpp_to_lower,
to_upper as cpp_to_upper,
)

from cudf._lib.pylibcudf.strings import case


@acquire_spill_lock()
def to_upper(Column source_strings):
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

with nogil:
c_result = move(cpp_to_upper(source_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
case.to_upper(
source_strings.to_pylibcudf(mode='read')
)
)


@acquire_spill_lock()
def to_lower(Column source_strings):
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

with nogil:
c_result = move(cpp_to_lower(source_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
case.to_lower(
source_strings.to_pylibcudf(mode='read')
)
)


@acquire_spill_lock()
def swapcase(Column source_strings):
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

with nogil:
c_result = move(cpp_swapcase(source_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
case.swapcase(
source_strings.to_pylibcudf(mode='read')
)
)
35 changes: 35 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/test_string_case.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pyarrow as pa
import pytest
from utils import assert_column_eq

import cudf._lib.pylibcudf as plc


@pytest.fixture(scope="module")
def string_col():
return pa.array(
["AbC", "de", "FGHI", "j", "kLm", "nOPq", None, "RsT", None, "uVw"]
)


def test_to_upper(string_col):
plc_col = plc.interop.from_arrow(string_col)
got = plc.strings.case.to_upper(plc_col)
expected = pa.compute.utf8_upper(string_col)
assert_column_eq(got, expected)


def test_to_lower(string_col):
plc_col = plc.interop.from_arrow(string_col)
got = plc.strings.case.to_lower(plc_col)
expected = pa.compute.utf8_lower(string_col)
assert_column_eq(got, expected)


def test_swapcase(string_col):
plc_col = plc.interop.from_arrow(string_col)
got = plc.strings.case.swapcase(plc_col)
expected = pa.compute.utf8_swapcase(string_col)
assert_column_eq(got, expected)

0 comments on commit 8506ea6

Please sign in to comment.