diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/filling.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/filling.pxd new file mode 100644 index 00000000000..8403fd179f7 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/filling.pxd @@ -0,0 +1,19 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr + +from cudf._lib.pylibcudf.libcudf.column.column cimport column +from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view + + +cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil: + cdef unique_ptr[column] sequences( + const column_view& starts, + const column_view& sizes, + ) except + + + cdef unique_ptr[column] sequences( + const column_view& starts, + const column_view& steps, + const column_view& sizes, + ) except + diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pxd b/python/cudf/cudf/_lib/pylibcudf/lists.pxd index cacecae6010..6e9bd5ff76b 100644 --- a/python/cudf/cudf/_lib/pylibcudf/lists.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/lists.pxd @@ -36,4 +36,6 @@ cpdef Column extract_list_element(Column, ColumnOrSizeType) cpdef Column count_elements(Column) +cpdef Column sequences(Column, Column, Column steps = *) + cpdef Column sort_lists(Column, bool, null_order, bool stable = *) diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pyx b/python/cudf/cudf/_lib/pylibcudf/lists.pyx index b5661a3e634..3837eaaca78 100644 --- a/python/cudf/cudf/_lib/pylibcudf/lists.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/lists.pyx @@ -9,6 +9,7 @@ from cudf._lib.pylibcudf.libcudf.column.column cimport column from cudf._lib.pylibcudf.libcudf.lists cimport ( contains as cpp_contains, explode as cpp_explode, + filling as cpp_filling, gather as cpp_gather, reverse as cpp_reverse, ) @@ -326,6 +327,43 @@ cpdef Column count_elements(Column input): return Column.from_libcudf(move(c_result)) +cpdef Column sequences(Column starts, Column sizes, Column steps = None): + """Create a lists column in which each row contains a sequence of + values specified by a tuple of (start, step, size) parameters. + + For details, see :cpp:func:`sequences`. + + Parameters + ---------- + starts : Column + First values in the result sequences. + sizes : Column + Numbers of values in the result sequences. + steps : Optional[Column] + Increment values for the result sequences. + + Returns + ------- + Column + The result column containing generated sequences. + """ + cdef unique_ptr[column] c_result + + if steps is not None: + with nogil: + c_result = move(cpp_filling.sequences( + starts.view(), + steps.view(), + sizes.view(), + )) + else: + with nogil: + c_result = move(cpp_filling.sequences( + starts.view(), + sizes.view(), + )) + return Column.from_libcudf(move(c_result)) + cpdef Column sort_lists( Column input, bool ascending, diff --git a/python/cudf/cudf/pylibcudf_tests/test_lists.py b/python/cudf/cudf/pylibcudf_tests/test_lists.py index 87472f6d59b..0b2e0e00ce8 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_lists.py +++ b/python/cudf/cudf/pylibcudf_tests/test_lists.py @@ -198,6 +198,22 @@ def test_count_elements(test_data): assert_column_eq(expect, res) +def test_sequences(): + starts = plc.interop.from_arrow(pa.array([0, 1, 2, 3, 4])) + steps = plc.interop.from_arrow(pa.array([2, 1, 1, 1, -3])) + sizes = plc.interop.from_arrow(pa.array([0, 2, 2, 1, 3])) + + res1 = plc.lists.sequences(starts, sizes, steps) + res2 = plc.lists.sequences(starts, sizes) + + expect1 = pa.array([[], [1, 2], [2, 3], [3], [4, 1, -2]]) + expect2 = pa.array([[], [1, 2], [2, 3], [3], [4, 5, 6]]) + + assert_column_eq(expect1, res1) + + assert_column_eq(expect2, res2) + + @pytest.mark.parametrize( "ascending,na_position,expected", [