Skip to content

Commit

Permalink
Migrate lists/combine to pylibcudf (#15928)
Browse files Browse the repository at this point in the history
Part of #15162. concatenate_rows, concatenate_list_elements

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Thomas Li (https://github.com/lithomas1)

URL: #15928
  • Loading branch information
Matt711 authored Jun 12, 2024
1 parent 49e2a56 commit d2cd1d4
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 33 deletions.
46 changes: 13 additions & 33 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@ from libcpp.utility cimport move
from cudf._lib.column cimport Column
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.lists.combine cimport (
concatenate_list_elements as cpp_concatenate_list_elements,
concatenate_null_policy,
concatenate_rows as cpp_concatenate_rows,
)
from cudf._lib.pylibcudf.libcudf.lists.contains cimport (
contains,
index_of as cpp_index_of,
Expand All @@ -32,7 +27,6 @@ from cudf._lib.pylibcudf.libcudf.lists.stream_compaction cimport (
distinct as cpp_distinct,
)
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.pylibcudf.libcudf.types cimport (
nan_equality,
null_equality,
Expand All @@ -41,10 +35,7 @@ from cudf._lib.pylibcudf.libcudf.types cimport (
size_type,
)
from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.utils cimport (
columns_from_pylibcudf_table,
table_view_from_columns,
)
from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf._lib import pylibcudf

Expand Down Expand Up @@ -223,31 +214,20 @@ def index_of_column(Column col, Column search_keys):

@acquire_spill_lock()
def concatenate_rows(list source_columns):
cdef unique_ptr[column] c_result

cdef table_view c_table_view = table_view_from_columns(source_columns)

with nogil:
c_result = move(cpp_concatenate_rows(
c_table_view,
))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
pylibcudf.lists.concatenate_rows(
pylibcudf.Table([
c.to_pylibcudf(mode="read") for c in source_columns
])
)
)


@acquire_spill_lock()
def concatenate_list_elements(Column input_column, dropna=False):
cdef concatenate_null_policy policy = (
concatenate_null_policy.IGNORE if dropna
else concatenate_null_policy.NULLIFY_OUTPUT_ROW
return Column.from_pylibcudf(
pylibcudf.lists.concatenate_list_elements(
input_column.to_pylibcudf(mode="read"),
dropna,
)
)
cdef column_view c_input = input_column.view()
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_concatenate_list_elements(
c_input,
policy
))

return Column.from_unique_ptr(move(c_result))
7 changes: 7 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/lists.pxd
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool

from cudf._lib.pylibcudf.libcudf.types cimport size_type

from .column cimport Column
from .table cimport Table


cpdef Table explode_outer(Table, size_type explode_column_idx)

cpdef Column concatenate_rows(Table)

cpdef Column concatenate_list_elements(Column, bool dropna)
61 changes: 61 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/lists.pyx
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.lists cimport explode as cpp_explode
from cudf._lib.pylibcudf.libcudf.lists.combine cimport (
concatenate_list_elements as cpp_concatenate_list_elements,
concatenate_null_policy,
concatenate_rows as cpp_concatenate_rows,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.types cimport size_type

from .column cimport Column
from .table cimport Table


Expand All @@ -33,3 +41,56 @@ cpdef Table explode_outer(Table input, size_type explode_column_idx):
c_result = move(cpp_explode.explode_outer(input.view(), explode_column_idx))

return Table.from_libcudf(move(c_result))


cpdef Column concatenate_rows(Table input):
"""Concatenate multiple lists columns into a single lists column row-wise.
Parameters
----------
input : Table
The input table
Returns
-------
Table
A new Column of concatenated rows
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_concatenate_rows(input.view()))

return Column.from_libcudf(move(c_result))


cpdef Column concatenate_list_elements(Column input, bool dropna):
"""Concatenate multiple lists on the same row into a single list.
Parameters
----------
input : Column
The input column
Returns
-------
Column
A new Column of concatenated list elements
dropna : bool
If true, null list elements will be ignored
from concatenation. Otherwise any input null values will result in
the corresponding output row being set to null.
"""
cdef concatenate_null_policy null_policy = (
concatenate_null_policy.IGNORE if dropna
else concatenate_null_policy.NULLIFY_OUTPUT_ROW
)
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_concatenate_list_elements(
input.view(),
null_policy,
))

return Column.from_libcudf(move(c_result))
46 changes: 46 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/test_lists.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pyarrow as pa
import pytest
from utils import assert_column_eq

from cudf._lib import pylibcudf as plc


def test_concatenate_rows():
test_data = [[[0, 1], [2], [5], [6, 7]], [[8], [9], [], [13, 14, 15]]]

arrow_tbl = pa.Table.from_arrays(test_data, names=["a", "b"])
plc_tbl = plc.interop.from_arrow(arrow_tbl)

res = plc.lists.concatenate_rows(plc_tbl)

expect = pa.array([pair[0] + pair[1] for pair in zip(*test_data)])

assert_column_eq(expect, res)


@pytest.mark.parametrize(
"test_data, dropna, expected",
[
(
[[[1, 2], [3, 4], [5]], [[6], None, [7, 8, 9]]],
False,
[[1, 2, 3, 4, 5], None],
),
(
[[[1, 2], [3, 4], [5, None]], [[6], [None], [7, 8, 9]]],
True,
[[1, 2, 3, 4, 5, None], [6, None, 7, 8, 9]],
),
],
)
def test_concatenate_list_elements(test_data, dropna, expected):
arr = pa.array(test_data)
plc_column = plc.interop.from_arrow(arr)

res = plc.lists.concatenate_list_elements(plc_column, dropna)

expect = pa.array(expected)

assert_column_eq(expect, res)

0 comments on commit d2cd1d4

Please sign in to comment.