Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cudf::calendrical_month_sequence to pylibcudf #17277

Merged
merged 6 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 7 additions & 11 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ from libcpp.utility cimport move
cimport pylibcudf.libcudf.datetime as libcudf_datetime
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.filling cimport calendrical_month_sequence
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.types cimport size_type
from pylibcudf.datetime import DatetimeComponent

from cudf._lib.column cimport Column
from cudf._lib.scalar cimport DeviceScalar
import pylibcudf as plc


@acquire_spill_lock()
Expand Down Expand Up @@ -177,20 +176,17 @@ def is_leap_year(Column col):

@acquire_spill_lock()
def date_range(DeviceScalar start, size_type n, offset):
cdef unique_ptr[column] c_result
cdef size_type months = (
offset.kwds.get("years", 0) * 12
+ offset.kwds.get("months", 0)
)

cdef const scalar* c_start = start.get_raw_ptr()
with nogil:
c_result = move(calendrical_month_sequence(
return Column.from_pylibcudf(
plc.filling.calendrical_month_sequence(
n,
c_start[0],
months
))
return Column.from_unique_ptr(move(c_result))
start.c_value,
months,
)
)


@acquire_spill_lock()
Expand Down
6 changes: 6 additions & 0 deletions python/pylibcudf/pylibcudf/filling.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,9 @@ cpdef Table repeat(
Table input_table,
ColumnOrSize count
)

cpdef Column calendrical_month_sequence(
size_type n,
Scalar init,
size_type months,
)
37 changes: 37 additions & 0 deletions python/pylibcudf/pylibcudf/filling.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ from pylibcudf.libcudf.filling cimport (
fill_in_place as cpp_fill_in_place,
repeat as cpp_repeat,
sequence as cpp_sequence,
calendrical_month_sequence as cpp_calendrical_month_sequence
)
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.types cimport size_type
Expand Down Expand Up @@ -164,3 +165,39 @@ cpdef Table repeat(
count
)
return Table.from_libcudf(move(result))


cpdef Column calendrical_month_sequence(
size_type n,
Scalar init,
size_type months,
):

"""Fill destination column from begin to end with value.

For details, see :cpp:func:`calendrical_month_sequence`.

Parameters
----------
n : size_type
Number of timestamps to generate
init : Scalar
The initial timestamp
months : size_type
Months to increment

Returns
-------
pylibcudf.Column
Timestamps column with sequences of months
"""

cdef unique_ptr[column] c_result

with nogil:
c_result = cpp_calendrical_month_sequence(
n,
dereference(init.c_obj),
months
)
return Column.from_libcudf(move(c_result))
85 changes: 85 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_filling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pandas as pd
import pyarrow as pa
import pytest
from utils import assert_column_eq, assert_table_eq

import pylibcudf as plc


@pytest.fixture
def pa_col():
return pa.array([2, 3, 5, 7, 11])


@pytest.fixture
def pa_table():
pa_col = pa.array([1, 2, 3])
return pa.table([pa_col], names=["a"])


def test_fill(pa_col):
result = plc.filling.fill(
plc.interop.from_arrow(pa_col),
1,
3,
plc.interop.from_arrow(pa.scalar(5)),
)
expect = pa.array([2, 5, 5, 7, 11])
assert_column_eq(result, expect)


def test_fill_in_place(pa_col):
result = plc.interop.from_arrow(pa_col)
plc.filling.fill_in_place(
result,
1,
3,
plc.interop.from_arrow(pa.scalar(5)),
)
expect = pa.array([2, 5, 5, 7, 11])
assert_column_eq(result, expect)


def test_sequence():
size = 5
init_scalar = plc.interop.from_arrow(pa.scalar(10))
step_scalar = plc.interop.from_arrow(pa.scalar(2))
result = plc.filling.sequence(
size,
init_scalar,
step_scalar,
)
expect = pa.array([10, 12, 14, 16, 18])
assert_column_eq(result, expect)


def test_repeat_with_count_int(pa_table):
input_table = plc.interop.from_arrow(pa_table)
count = 2
result = plc.filling.repeat(input_table, count)
expect = pa.table([[1, 1, 2, 2, 3, 3]], names=["a"])
assert_table_eq(expect, result)


def test_repeat_with_count_column(pa_table):
input_table = plc.interop.from_arrow(pa_table)
count = plc.interop.from_arrow(pa.array([1, 2, 3]))
result = plc.filling.repeat(input_table, count)
expect = pa.table([[1] + [2] * 2 + [3] * 3], names=["a"])
assert_table_eq(expect, result)


def test_calendrical_month_sequence():
n = 5
init = plc.interop.from_arrow(
pa.scalar(pd.Timestamp("2020-01-31"), type=pa.timestamp("ms"))
)
months = 1
result = plc.filling.calendrical_month_sequence(n, init, months)
expected_dates = pd.to_datetime(
["2020-01-31", "2020-02-29", "2020-03-31", "2020-04-30", "2020-05-31"]
)
expect = pa.array(expected_dates, type=pa.timestamp("ms"))
assert_column_eq(result, expect)
Loading