Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add string.convert.convert_fixed_type APIs to pylibcudf #16984

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 17 additions & 52 deletions python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

import cudf

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf.core.buffer import acquire_spill_lock

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.strings.convert.convert_fixed_point cimport (
from_fixed_point as cpp_from_fixed_point,
is_fixed_point as cpp_is_fixed_point,
to_fixed_point as cpp_to_fixed_point,
)
from pylibcudf.libcudf.types cimport data_type, type_id

from cudf._lib.column cimport Column
from cudf._lib.types cimport dtype_to_pylibcudf_type

import pylibcudf as plc


@acquire_spill_lock()
Expand All @@ -32,14 +21,10 @@ def from_decimal(Column input_col):
-------
A column of strings representing the input decimal values.
"""
cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_from_fixed_point(
input_column_view))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_fixed_point.from_fixed_point(
input_col.to_pylibcudf(mode="read"),
)
return Column.from_pylibcudf(plc_column)


@acquire_spill_lock()
Expand All @@ -57,25 +42,11 @@ def to_decimal(Column input_col, object out_type):
-------
A column of decimals parsed from the string values.
"""
cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
cdef int scale = out_type.scale
cdef data_type c_out_type
if isinstance(out_type, cudf.Decimal32Dtype):
c_out_type = data_type(type_id.DECIMAL32, -scale)
elif isinstance(out_type, cudf.Decimal64Dtype):
c_out_type = data_type(type_id.DECIMAL64, -scale)
elif isinstance(out_type, cudf.Decimal128Dtype):
c_out_type = data_type(type_id.DECIMAL128, -scale)
else:
raise TypeError("should be a decimal dtype")
with nogil:
c_result = move(
cpp_to_fixed_point(
input_column_view,
c_out_type))

result = Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_fixed_point.to_fixed_point(
input_col.to_pylibcudf(mode="read"),
dtype_to_pylibcudf_type(out_type),
)
result = Column.from_pylibcudf(plc_column)
result.dtype.precision = out_type.precision
return result

Expand All @@ -98,14 +69,8 @@ def is_fixed_point(Column input_col, object dtype):
-------
A Column of booleans indicating valid decimal conversion.
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = input_col.view()
cdef int scale = dtype.scale
cdef data_type c_dtype = data_type(type_id.DECIMAL64, -scale)
with nogil:
c_result = move(cpp_is_fixed_point(
source_view,
c_dtype
))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_fixed_point.is_fixed_point(
input_col.to_pylibcudf(mode="read"),
dtype_to_pylibcudf_type(dtype),
)
return Column.from_pylibcudf(plc_column)
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ from pylibcudf.libcudf.types cimport data_type
cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \
"cudf::strings" nogil:
cdef unique_ptr[column] to_fixed_point(
column_view input_col,
column_view input,
data_type output_type) except +

cdef unique_ptr[column] from_fixed_point(
column_view input_col) except +
column_view input) except +

cdef unique_ptr[column] is_fixed_point(
column_view source_strings,
data_type output_type
column_view input,
data_type decimal_type
) except +
4 changes: 3 additions & 1 deletion python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
# the License.
# =============================================================================

set(cython_sources convert_booleans.pyx convert_durations.pyx convert_datetime.pyx)
set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
convert_fixed_point.pyx
)

set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
Expand Down
7 changes: 6 additions & 1 deletion python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from . cimport convert_booleans, convert_datetime, convert_durations
from . cimport (
convert_booleans,
convert_datetime,
convert_durations,
convert_fixed_point,
)
7 changes: 6 additions & 1 deletion python/pylibcudf/pylibcudf/strings/convert/__init__.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a corresponding pxd file? If not, can we add it? If so, can we update it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes we have one and forgot to update.

Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from . import convert_booleans, convert_datetime, convert_durations
from . import (
convert_booleans,
convert_datetime,
convert_durations,
convert_fixed_point,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from pylibcudf.column cimport Column
from pylibcudf.types cimport DataType


cpdef Column to_fixed_point(Column input, DataType output_type)

cpdef Column from_fixed_point(Column input)

cpdef Column is_fixed_point(Column input, DataType decimal_type=*)
107 changes: 107 additions & 0 deletions python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from pylibcudf.column cimport Column
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.strings.convert cimport (
convert_fixed_point as cpp_fixed_point,
)
from pylibcudf.types cimport DataType, type_id


cpdef Column to_fixed_point(Column input, DataType output_type):
"""
Returns a new fixed-point column parsing decimal values from the
provided strings column.
For details, see :cpp:details:`cudf::strings::to_fixed_point`
Parameters
----------
input : Column
Strings instance for this operation.
output_type : DataType
Type of fixed-point column to return including the scale value.
Returns
-------
Column
New column of output_type.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_fixed_point.to_fixed_point(
input.view(),
output_type.c_obj,
)
)

return Column.from_libcudf(move(c_result))

cpdef Column from_fixed_point(Column input):
"""
Returns a new strings column converting the fixed-point values
into a strings column.
For details, see :cpp:details:`cudf::strings::from_fixed_point`
Parameters
----------
input : Column
Fixed-point column to convert.
Returns
-------
Column
New strings column.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_fixed_point.from_fixed_point(
input.view(),
)
)

return Column.from_libcudf(move(c_result))

cpdef Column is_fixed_point(Column input, DataType decimal_type=None):
"""
Returns a boolean column identifying strings in which all
characters are valid for conversion to fixed-point.
For details, see :cpp:details:`cudf::strings::is_fixed_point`
Parameters
----------
input : Column
Strings instance for this operation.
decimal_type : DataType
Fixed-point type (with scale) used only for checking overflow.
Defaults to Decimal64
Returns
-------
Column
New column of boolean results for each string.
"""
cdef unique_ptr[column] c_result

if decimal_type is None:
decimal_type = DataType(type_id.DECIMAL64)

with nogil:
c_result = move(
cpp_fixed_point.is_fixed_point(
input.view(),
decimal_type.c_obj,
)
)

return Column.from_libcudf(move(c_result))
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
import decimal

import pyarrow as pa
import pylibcudf as plc
from utils import assert_column_eq


def test_to_fixed_point():
typ = pa.decimal128(38, 2)
arr = pa.array(["123", "1.23", None])
result = plc.strings.convert.convert_fixed_point.to_fixed_point(
plc.interop.from_arrow(arr), plc.interop.from_arrow(typ)
)
expected = arr.cast(typ)
assert_column_eq(result, expected)


def test_from_fixed_point():
arr = pa.array([decimal.Decimal("1.1"), None])
result = plc.strings.convert.convert_fixed_point.from_fixed_point(
plc.interop.from_arrow(arr),
)
expected = pa.array(["1.1", None])
assert_column_eq(result, expected)


def test_is_fixed_point():
arr = pa.array(["123", "1.23", "1.2.3", "", None])
result = plc.strings.convert.convert_fixed_point.is_fixed_point(
plc.interop.from_arrow(arr),
)
expected = pa.array([True, True, False, False, None])
assert_column_eq(result, expected)
Loading