Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose new stable_sort and finish stream_compaction in pylibcudf #15175

Merged
merged 5 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion python/cudf/cudf/_lib/cpp/sorting.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
Expand Down Expand Up @@ -68,3 +68,8 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
table_view source_table,
vector[libcudf_types.order] column_order,
vector[libcudf_types.null_order] null_precedence) except +

cdef unique_ptr[table] stable_sort(
table_view source_table,
vector[libcudf_types.order] column_order,
vector[libcudf_types.null_order] null_precedence) except +
43 changes: 38 additions & 5 deletions python/cudf/cudf/_lib/cpp/stream_compaction.pxd
vyasr marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,28 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
vector[size_type] keys,
size_type keep_threshold) except +

cdef unique_ptr[table] drop_nans(table_view source_table,
vector[size_type] keys,
size_type keep_threshold) except +

cdef unique_ptr[table] apply_boolean_mask(
table_view source_table,
column_view boolean_mask
) except +

cdef size_type distinct_count(
column_view source_table,
null_policy null_handling,
nan_policy nan_handling) except +
cdef unique_ptr[table] unique(
table_view input,
vector[size_type] keys,
duplicate_keep_option keep,
null_equality nulls_equal,
) except +

cdef unique_ptr[table] stable_distinct(
cdef unique_ptr[table] distinct(
table_view input,
vector[size_type] keys,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equals,
) except +

cdef unique_ptr[column] distinct_indices(
Expand All @@ -53,3 +60,29 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
null_equality nulls_equal,
nan_equality nans_equal,
) except +

cdef unique_ptr[table] stable_distinct(
table_view input,
vector[size_type] keys,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equal,
) except +

cdef size_type unique_count(
column_view column,
null_policy null_handling,
nan_policy nan_handling) except +

cdef size_type unique_count(
table_view source_table,
null_policy null_handling) except +
wence- marked this conversation as resolved.
Show resolved Hide resolved

cdef size_type distinct_count(
column_view column,
null_policy null_handling,
nan_policy nan_handling) except +

cdef size_type distinct_count(
table_view source_table,
null_policy null_handling) except +
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/sorting.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,5 @@ cpdef Table stable_sort_by_key(
)

cpdef Table sort(Table source_table, list column_order, list null_precedence)

cpdef Table stable_sort(Table source_table, list column_order, list null_precedence)
39 changes: 36 additions & 3 deletions python/cudf/cudf/_lib/pylibcudf/sorting.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ cpdef Column stable_sorted_order(
list column_order,
list null_precedence,
):
"""Computes the row indices required to sort the table, maintaining input order.
"""Computes the row indices required to sort the table,
preserving order of equal elements.

Parameters
----------
Expand Down Expand Up @@ -206,7 +207,8 @@ cpdef Table stable_segmented_sort_by_key(
list column_order,
list null_precedence,
):
"""Sorts the table by key, within segments, maintaining input order.
"""Sorts the table by key preserving order of equal elements,
within segments.

Parameters
----------
Expand Down Expand Up @@ -287,7 +289,7 @@ cpdef Table stable_sort_by_key(
list column_order,
list null_precedence,
):
"""Sorts the table by key, maintaining input order.
"""Sorts the table by key preserving order of equal elements.

Parameters
----------
Expand Down Expand Up @@ -349,3 +351,34 @@ cpdef Table sort(Table source_table, list column_order, list null_precedence):
)
)
return Table.from_libcudf(move(c_result))


cpdef Table stable_sort(Table source_table, list column_order, list null_precedence):
"""Sorts the table preserving order of equal elements.

Parameters
----------
source_table : Table
The table to sort.
column_order : List[ColumnOrder]
Whether each column should be sorted in ascending or descending order.
null_precedence : List[NullOrder]
Whether nulls should come before or after non-nulls.

Returns
-------
Table
The sorted table.
"""
cdef unique_ptr[table] c_result
cdef vector[order] c_orders = column_order
cdef vector[null_order] c_null_precedence = null_precedence
with nogil:
c_result = move(
cpp_sorting.stable_sort(
source_table.view(),
c_orders,
c_null_precedence,
)
)
return Table.from_libcudf(move(c_result))
34 changes: 28 additions & 6 deletions python/cudf/cudf/_lib/pylibcudf/stream_compaction.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,21 @@ from .table cimport Table

cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold)

cpdef Table apply_boolean_mask(Table source_table, Column boolean_mask)
cpdef Table drop_nans(Table source_table, list keys, size_type keep_threshold)

cpdef size_type distinct_count(
Column source_table,
null_policy null_handling,
nan_policy nan_handling
cpdef Table unique(
Table input,
list keys,
duplicate_keep_option keep,
null_equality nulls_equal,
)

cpdef Table stable_distinct(
cpdef Table distinct(
Table input,
list keys,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equal,
)

cpdef Column distinct_indices(
Expand All @@ -36,3 +38,23 @@ cpdef Column distinct_indices(
null_equality nulls_equal,
nan_equality nans_equal,
)

cpdef Table stable_distinct(
Table input,
list keys,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equal,
)

cpdef size_type unique_count(
Column column,
null_policy null_handling,
nan_policy nan_handling
)

cpdef size_type distinct_count(
Column column,
null_policy null_handling,
nan_policy nan_handling
)
Loading
Loading