Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add docstrings and test for strings.convert_durations APIs for pylibcudf #16982

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions python/cudf/cudf/_lib/string_casting.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -599,12 +599,11 @@ def timedelta2int(Column input_col, dtype, format):

"""
dtype = dtype_to_pylibcudf_type(dtype)
cdef string c_timestamp_format = format.encode('UTF-8')
return Column.from_pylibcudf(
plc.strings.convert.convert_durations.to_durations(
input_col.to_pylibcudf(mode="read"),
dtype,
c_timestamp_format
format
)
)

Expand All @@ -623,12 +622,10 @@ def int2timedelta(Column input_col, str format):
A Column with Timedelta represented in string format

"""

cdef string c_duration_format = format.encode('UTF-8')
return Column.from_pylibcudf(
plc.strings.convert.convert_durations.from_durations(
input_col.to_pylibcudf(mode="read"),
c_duration_format
format
)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ from pylibcudf.libcudf.types cimport data_type
cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \
"cudf::strings" nogil:
cdef unique_ptr[column] to_durations(
const column_view & strings_col,
const column_view & input,
data_type duration_type,
const string & format) except +

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ from pylibcudf.types cimport DataType
cpdef Column to_durations(
Column input,
DataType duration_type,
const string& format
str format
)

cpdef Column from_durations(
Column input,
const string& format
Column durations,
str format=*
)
73 changes: 63 additions & 10 deletions python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,80 @@ from pylibcudf.types import DataType
cpdef Column to_durations(
Column input,
DataType duration_type,
const string& format
str format
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, sorry we missed this on review.

):
"""
Returns a new duration column converting a strings column into
durations using the provided format pattern.

For details, see cpp:func:`cudf::strings::to_durations`

Parameters
----------
input : Column
Strings instance for this operation.

duration_type : DataType
The duration type used for creating the output column.

format : str
String specifying the duration format in strings.

Returns
-------
Column
New duration column.
"""
cdef unique_ptr[column] c_result
cdef string c_format = format.encode()

with nogil:
c_result = cpp_convert_durations.to_durations(
input.view(),
duration_type.c_obj,
format
c_result = move(
cpp_convert_durations.to_durations(
input.view(),
duration_type.c_obj,
c_format
)
)

return Column.from_libcudf(move(c_result))

cpdef Column from_durations(
Column input,
const string& format
Column durations,
str format=None
):
"""
Returns a new strings column converting a duration column into
strings using the provided format pattern.

For details, see cpp:func:`cudf::strings::from_durations`

Parameters
----------
durations : Column
Duration values to convert.

format : str
The string specifying output format.
Default format is ""%D days %H:%M:%S".
mroeschke marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
Column
New strings column with formatted durations.
"""
cdef unique_ptr[column] c_result

if format is None:
format = "%D days %H:%M:%S"
cdef string c_format = format.encode()

with nogil:
c_result = cpp_convert_durations.from_durations(
input.view(),
format
c_result = move(
cpp_convert_durations.from_durations(
durations.view(),
c_format
)
)

return Column.from_libcudf(move(c_result))
43 changes: 0 additions & 43 deletions python/pylibcudf/pylibcudf/tests/test_string_convert.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from datetime import datetime

import pyarrow as pa
import pylibcudf as plc
import pytest
Expand All @@ -21,39 +19,16 @@ def timestamp_type(request):
return request.param


@pytest.fixture(
scope="module",
params=[
pa.duration("ns"),
pa.duration("us"),
pa.duration("ms"),
pa.duration("s"),
],
)
def duration_type(request):
return request.param


@pytest.fixture(scope="module")
def pa_timestamp_col():
return pa.array(["2011-01-01", "2011-01-02", "2011-01-03"])


@pytest.fixture(scope="module")
def pa_duration_col():
return pa.array(["05:20:25"])


@pytest.fixture(scope="module")
def plc_timestamp_col(pa_timestamp_col):
return plc.interop.from_arrow(pa_timestamp_col)


@pytest.fixture(scope="module")
def plc_duration_col(pa_duration_col):
return plc.interop.from_arrow(pa_duration_col)


@pytest.mark.parametrize("format", ["%Y-%m-%d"])
def test_to_datetime(
pa_timestamp_col, plc_timestamp_col, timestamp_type, format
Expand All @@ -65,21 +40,3 @@ def test_to_datetime(
format.encode(),
)
assert_column_eq(expect, got)


@pytest.mark.parametrize("format", ["%H:%M:%S"])
def test_to_duration(pa_duration_col, plc_duration_col, duration_type, format):
def to_timedelta(duration_str):
date = datetime.strptime(duration_str, format)
return date - datetime(1900, 1, 1) # "%H:%M:%S" zero date

expect = pa.array([to_timedelta(d.as_py()) for d in pa_duration_col]).cast(
duration_type
)

got = plc.strings.convert.convert_durations.to_durations(
plc_duration_col,
plc.interop.from_arrow(duration_type),
format.encode(),
)
assert_column_eq(expect, got)
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from datetime import datetime, timedelta

import pyarrow as pa
import pylibcudf as plc
import pytest
from utils import assert_column_eq


@pytest.fixture(
params=[
pa.duration("ns"),
pa.duration("us"),
pa.duration("ms"),
pa.duration("s"),
],
)
def duration_type(request):
return request.param


@pytest.fixture
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
def pa_duration_col():
return pa.array(["05:20:25"])


@pytest.fixture
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
def plc_duration_col(pa_duration_col):
return plc.interop.from_arrow(pa_duration_col)


def test_to_duration(pa_duration_col, plc_duration_col, duration_type):
format = "%H:%M:%S"

def to_timedelta(duration_str):
date = datetime.strptime(duration_str, format)
return date - datetime(1900, 1, 1) # "%H:%M:%S" zero date

expect = pa.array([to_timedelta(d.as_py()) for d in pa_duration_col]).cast(
duration_type
)

got = plc.strings.convert.convert_durations.to_durations(
plc_duration_col,
plc.interop.from_arrow(duration_type),
format,
)
assert_column_eq(expect, got)


@pytest.mark.parametrize("format", [None, "%D days %H:%M:%S"])
def test_from_durations(format):
pa_array = pa.array(
[timedelta(days=1, hours=1, minutes=1, seconds=1), None]
)
result = plc.strings.convert.convert_durations.from_durations(
plc.interop.from_arrow(pa_array), format
)
expected = pa.array(["1 days 01:01:01", None])
assert_column_eq(result, expected)
Loading