Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move segmented_gather function from the copying module to the lists module #17148

Merged
merged 1 commit into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 1 addition & 25 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import pickle

from libc.stdint cimport uint8_t, uintptr_t
from libcpp cimport bool
from libcpp.memory cimport make_shared, shared_ptr, unique_ptr
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

Expand All @@ -30,10 +30,6 @@ from libcpp.memory cimport make_unique
cimport pylibcudf.libcudf.contiguous_split as cpp_contiguous_split
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.lists.gather cimport (
segmented_gather as cpp_segmented_gather,
)
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.types cimport size_type

Expand Down Expand Up @@ -339,26 +335,6 @@ def get_element(Column input_column, size_type index):
)


@acquire_spill_lock()
def segmented_gather(Column source_column, Column gather_map):
cdef shared_ptr[lists_column_view] source_LCV = (
make_shared[lists_column_view](source_column.view())
)
cdef shared_ptr[lists_column_view] gather_map_LCV = (
make_shared[lists_column_view](gather_map.view())
)
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_segmented_gather(
source_LCV.get()[0], gather_map_LCV.get()[0])
)

result = Column.from_unique_ptr(move(c_result))
return result


cdef class _CPackedColumns:

@staticmethod
Expand Down
38 changes: 24 additions & 14 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,24 @@ from pylibcudf.libcudf.types cimport null_order, size_type
from cudf._lib.column cimport Column
from cudf._lib.utils cimport columns_from_pylibcudf_table

import pylibcudf
import pylibcudf as plc

from pylibcudf cimport Scalar


@acquire_spill_lock()
def count_elements(Column col):
return Column.from_pylibcudf(
pylibcudf.lists.count_elements(
plc.lists.count_elements(
col.to_pylibcudf(mode="read"))
)


@acquire_spill_lock()
def explode_outer(list source_columns, int explode_column_idx):
return columns_from_pylibcudf_table(
pylibcudf.lists.explode_outer(
pylibcudf.Table([c.to_pylibcudf(mode="read") for c in source_columns]),
plc.lists.explode_outer(
plc.Table([c.to_pylibcudf(mode="read") for c in source_columns]),
explode_column_idx,
)
)
Expand All @@ -35,7 +35,7 @@ def explode_outer(list source_columns, int explode_column_idx):
@acquire_spill_lock()
def distinct(Column col, bool nulls_equal, bool nans_all_equal):
return Column.from_pylibcudf(
pylibcudf.lists.distinct(
plc.lists.distinct(
col.to_pylibcudf(mode="read"),
nulls_equal,
nans_all_equal,
Expand All @@ -46,7 +46,7 @@ def distinct(Column col, bool nulls_equal, bool nans_all_equal):
@acquire_spill_lock()
def sort_lists(Column col, bool ascending, str na_position):
return Column.from_pylibcudf(
pylibcudf.lists.sort_lists(
plc.lists.sort_lists(
col.to_pylibcudf(mode="read"),
ascending,
null_order.BEFORE if na_position == "first" else null_order.AFTER,
Expand All @@ -58,7 +58,7 @@ def sort_lists(Column col, bool ascending, str na_position):
@acquire_spill_lock()
def extract_element_scalar(Column col, size_type index):
return Column.from_pylibcudf(
pylibcudf.lists.extract_list_element(
plc.lists.extract_list_element(
col.to_pylibcudf(mode="read"),
index,
)
Expand All @@ -68,7 +68,7 @@ def extract_element_scalar(Column col, size_type index):
@acquire_spill_lock()
def extract_element_column(Column col, Column index):
return Column.from_pylibcudf(
pylibcudf.lists.extract_list_element(
plc.lists.extract_list_element(
col.to_pylibcudf(mode="read"),
index.to_pylibcudf(mode="read"),
)
Expand All @@ -78,7 +78,7 @@ def extract_element_column(Column col, Column index):
@acquire_spill_lock()
def contains_scalar(Column col, py_search_key):
return Column.from_pylibcudf(
pylibcudf.lists.contains(
plc.lists.contains(
col.to_pylibcudf(mode="read"),
<Scalar> py_search_key.device_value.c_value,
)
Expand All @@ -88,7 +88,7 @@ def contains_scalar(Column col, py_search_key):
@acquire_spill_lock()
def index_of_scalar(Column col, object py_search_key):
return Column.from_pylibcudf(
pylibcudf.lists.index_of(
plc.lists.index_of(
col.to_pylibcudf(mode="read"),
<Scalar> py_search_key.device_value.c_value,
True,
Expand All @@ -99,7 +99,7 @@ def index_of_scalar(Column col, object py_search_key):
@acquire_spill_lock()
def index_of_column(Column col, Column search_keys):
return Column.from_pylibcudf(
pylibcudf.lists.index_of(
plc.lists.index_of(
col.to_pylibcudf(mode="read"),
search_keys.to_pylibcudf(mode="read"),
True,
Expand All @@ -110,8 +110,8 @@ def index_of_column(Column col, Column search_keys):
@acquire_spill_lock()
def concatenate_rows(list source_columns):
return Column.from_pylibcudf(
pylibcudf.lists.concatenate_rows(
pylibcudf.Table([
plc.lists.concatenate_rows(
plc.Table([
c.to_pylibcudf(mode="read") for c in source_columns
])
)
Expand All @@ -121,8 +121,18 @@ def concatenate_rows(list source_columns):
@acquire_spill_lock()
def concatenate_list_elements(Column input_column, dropna=False):
return Column.from_pylibcudf(
pylibcudf.lists.concatenate_list_elements(
plc.lists.concatenate_list_elements(
input_column.to_pylibcudf(mode="read"),
dropna,
)
)


@acquire_spill_lock()
def segmented_gather(Column source_column, Column gather_map):
return Column.from_pylibcudf(
plc.lists.segmented_gather(
source_column.to_pylibcudf(mode="read"),
gather_map.to_pylibcudf(mode="read"),
)
)
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from typing_extensions import Self

import cudf
from cudf._lib.copying import segmented_gather
from cudf._lib.lists import (
concatenate_list_elements,
concatenate_rows,
Expand All @@ -22,6 +21,7 @@
extract_element_scalar,
index_of_column,
index_of_scalar,
segmented_gather,
sort_lists,
)
from cudf._lib.strings.convert.convert_lists import format_list_column
Expand Down
Loading