Skip to content

Commit

Permalink
Expose new stable_sort and finish stream_compaction in pylibcudf (#15175
Browse files Browse the repository at this point in the history
)

Completes coverage of `sorting.hpp` and `stream_compaction.hpp`

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #15175
  • Loading branch information
wence- authored Mar 4, 2024
1 parent 903dcac commit dbdcc31
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 36 deletions.
7 changes: 6 additions & 1 deletion python/cudf/cudf/_lib/cpp/sorting.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
Expand Down Expand Up @@ -68,3 +68,8 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
table_view source_table,
vector[libcudf_types.order] column_order,
vector[libcudf_types.null_order] null_precedence) except +

cdef unique_ptr[table] stable_sort(
table_view source_table,
vector[libcudf_types.order] column_order,
vector[libcudf_types.null_order] null_precedence) except +
43 changes: 38 additions & 5 deletions python/cudf/cudf/_lib/cpp/stream_compaction.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,28 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
vector[size_type] keys,
size_type keep_threshold) except +

cdef unique_ptr[table] drop_nans(table_view source_table,
vector[size_type] keys,
size_type keep_threshold) except +

cdef unique_ptr[table] apply_boolean_mask(
table_view source_table,
column_view boolean_mask
) except +

cdef size_type distinct_count(
column_view source_table,
null_policy null_handling,
nan_policy nan_handling) except +
cdef unique_ptr[table] unique(
table_view input,
vector[size_type] keys,
duplicate_keep_option keep,
null_equality nulls_equal,
) except +

cdef unique_ptr[table] stable_distinct(
cdef unique_ptr[table] distinct(
table_view input,
vector[size_type] keys,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equals,
) except +

cdef unique_ptr[column] distinct_indices(
Expand All @@ -53,3 +60,29 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
null_equality nulls_equal,
nan_equality nans_equal,
) except +

cdef unique_ptr[table] stable_distinct(
table_view input,
vector[size_type] keys,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equal,
) except +

cdef size_type unique_count(
column_view column,
null_policy null_handling,
nan_policy nan_handling) except +

cdef size_type unique_count(
table_view source_table,
null_policy null_handling) except +

cdef size_type distinct_count(
column_view column,
null_policy null_handling,
nan_policy nan_handling) except +

cdef size_type distinct_count(
table_view source_table,
null_policy null_handling) except +
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/sorting.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,5 @@ cpdef Table stable_sort_by_key(
)

cpdef Table sort(Table source_table, list column_order, list null_precedence)

cpdef Table stable_sort(Table source_table, list column_order, list null_precedence)
39 changes: 36 additions & 3 deletions python/cudf/cudf/_lib/pylibcudf/sorting.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ cpdef Column stable_sorted_order(
list column_order,
list null_precedence,
):
"""Computes the row indices required to sort the table, maintaining input order.
"""Computes the row indices required to sort the table,
preserving order of equal elements.
Parameters
----------
Expand Down Expand Up @@ -206,7 +207,8 @@ cpdef Table stable_segmented_sort_by_key(
list column_order,
list null_precedence,
):
"""Sorts the table by key, within segments, maintaining input order.
"""Sorts the table by key preserving order of equal elements,
within segments.
Parameters
----------
Expand Down Expand Up @@ -287,7 +289,7 @@ cpdef Table stable_sort_by_key(
list column_order,
list null_precedence,
):
"""Sorts the table by key, maintaining input order.
"""Sorts the table by key preserving order of equal elements.
Parameters
----------
Expand Down Expand Up @@ -349,3 +351,34 @@ cpdef Table sort(Table source_table, list column_order, list null_precedence):
)
)
return Table.from_libcudf(move(c_result))


cpdef Table stable_sort(Table source_table, list column_order, list null_precedence):
"""Sorts the table preserving order of equal elements.
Parameters
----------
source_table : Table
The table to sort.
column_order : List[ColumnOrder]
Whether each column should be sorted in ascending or descending order.
null_precedence : List[NullOrder]
Whether nulls should come before or after non-nulls.
Returns
-------
Table
The sorted table.
"""
cdef unique_ptr[table] c_result
cdef vector[order] c_orders = column_order
cdef vector[null_order] c_null_precedence = null_precedence
with nogil:
c_result = move(
cpp_sorting.stable_sort(
source_table.view(),
c_orders,
c_null_precedence,
)
)
return Table.from_libcudf(move(c_result))
34 changes: 28 additions & 6 deletions python/cudf/cudf/_lib/pylibcudf/stream_compaction.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,21 @@ from .table cimport Table

cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold)

cpdef Table apply_boolean_mask(Table source_table, Column boolean_mask)
cpdef Table drop_nans(Table source_table, list keys, size_type keep_threshold)

cpdef size_type distinct_count(
Column source_table,
null_policy null_handling,
nan_policy nan_handling
cpdef Table unique(
Table input,
list keys,
duplicate_keep_option keep,
null_equality nulls_equal,
)

cpdef Table stable_distinct(
cpdef Table distinct(
Table input,
list keys,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equal,
)

cpdef Column distinct_indices(
Expand All @@ -36,3 +38,23 @@ cpdef Column distinct_indices(
null_equality nulls_equal,
nan_equality nans_equal,
)

cpdef Table stable_distinct(
Table input,
list keys,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equal,
)

cpdef size_type unique_count(
Column column,
null_policy null_handling,
nan_policy nan_handling
)

cpdef size_type distinct_count(
Column column,
null_policy null_handling,
nan_policy nan_handling
)
Loading

0 comments on commit dbdcc31

Please sign in to comment.