Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate quantile.pxd to pylibcudf #15874

Merged
merged 13 commits into from
Jun 6, 2024
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This page provides API documentation for pylibcudf.
join
lists
merge
quantiles
reduce
reshape
rolling
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=========
quantiles
=========

.. automodule:: cudf._lib.pylibcudf.quantiles
:members:
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ set(cython_sources
join.pyx
lists.pyx
merge.pyx
quantiles.pyx
reduce.pyx
replace.pyx
reshape.pyx
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ from . cimport (
join,
lists,
merge,
quantiles,
reduce,
replace,
reshape,
Expand Down Expand Up @@ -45,6 +46,7 @@ __all__ = [
"join",
"lists",
"merge",
"quantiles",
"reduce",
"replace",
"rolling",
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
join,
lists,
merge,
quantiles,
reduce,
replace,
reshape,
Expand Down Expand Up @@ -45,6 +46,7 @@
"join",
"lists",
"merge",
"quantiles",
"reduce",
"replace",
"rolling",
Expand Down
23 changes: 23 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/quantiles.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from .column cimport Column
from .table cimport Table
from .types cimport interpolation, sorted


cpdef Column quantile(
Column input,
const double[:] q,
interpolation interp = *,
Column ordered_indices = *,
bint exact = *
)

cpdef Table quantiles(
Table input,
const double[:] q,
interpolation interp = *,
sorted is_input_sorted = *,
list column_order = *,
list null_precedence = *,
)
145 changes: 145 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/quantiles.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.quantiles cimport (
quantile as cpp_quantile,
quantiles as cpp_quantiles,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.types cimport null_order, order, sorted
vyasr marked this conversation as resolved.
Show resolved Hide resolved

from .column cimport Column
from .table cimport Table
from .types cimport interpolation


cpdef Column quantile(
Column input,
const double[:] q,
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
interpolation interp = interpolation.LINEAR,
Column ordered_indices = None,
bool exact=True
):
"""Computes quantiles with interpolation.

Computes the specified quantiles by interpolating values between which they lie,
using the interpolation strategy specified in interp.

Parameters
----------
q: array-like that implements buffer-protocol
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
The quantiles to calculate in range [0,1]
interp: Interpolation, default Interpolation.LINEAR
The strategy used to select between values adjacent to a specified quantile.
ordered_indices: Column, default empty column
The column containing the sorted order of input.

If empty, all input values are used in existing order.
Indices must be in range [0, input.size()), but are not required to be unique.
Values not indexed by this column will be ignored.
exact: bool, default True
Returns doubles if True. Otherwise, returns same type as input

Returns
-------
Column
A Column containing specified quantiles, with nulls for indeterminable values
"""
cdef:
unique_ptr[column] c_result
vector[double] q_vec
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
column_view ordered_indices_view

if ordered_indices is None:
ordered_indices_view = column_view()
else:
ordered_indices_view = ordered_indices.view()

# Copy from memoryview into vector
if len(q) > 0:
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
q_vec.assign(&q[0], &q[0] + len(q))

with nogil:
c_result = move(
cpp_quantile(
input.view(),
q_vec,
interp,
ordered_indices_view,
exact,
)
)

return Column.from_libcudf(move(c_result))


cpdef Table quantiles(
Table input,
const double[:] q,
interpolation interp = interpolation.NEAREST,
sorted is_input_sorted = sorted.NO,
# cython-lint complains that this a dangerous default value but
# we don't modify these parameters, and so should be good to go
list column_order = [], # no-cython-lint
list null_precedence = [], # no-cython-lint
):
"""Computes row quantiles with interpolation.
vyasr marked this conversation as resolved.
Show resolved Hide resolved

Computes the specified quantiles by retrieving the row corresponding to the
specified quantiles. In the event a quantile lies in between rows, the specified
interpolation strategy is used to pick between the rows.

Parameters
----------
q: array-like that implements buffer-protocol
The quantiles to calculate in range [0,1]
interp: Interpolation, default Interpolation.LINEAR
The strategy used to select between values adjacent to a specified quantile.

Must be a non-arithmetic interpolation strategy
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
(i.e. one of
{`Interpolation.HIGHER`, `Interpolation.LOWER`, `Interpolation.NEAREST`})
is_input_sorted: Sorted, default Sorted.NO
Whether the input table has been pre-sorted or not.
column_order: list, default []
A list of `Order` enums, indicating the desired sort order for each column.

Ignored if `is_input_sorted` is `Sorted.YES`
null_precedence: list, default []
A list of `NullOrder` enums, indicating how nulls should be sorted.

Ignored if `is_input_sorted` is `Sorted.YES`

Returns
-------
Column
A Column containing specified quantiles, with nulls for indeterminable values
"""
cdef:
unique_ptr[table] c_result
vector[double] q_vec
vector[order] column_order_vec = column_order
vector[null_order] null_precedence_vec = null_precedence

# Copy from memoryview into vector
q_vec.assign(&q[0], &q[0] + len(q))

with nogil:
c_result = move(
cpp_quantiles(
input.view(),
q_vec,
interp,
is_input_sorted,
column_order_vec,
null_precedence_vec,
)
)

return Table.from_libcudf(move(c_result))
8 changes: 8 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/types.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ from cudf._lib.pylibcudf.libcudf.types cimport (
type_id,
)

from cudf._lib.pylibcudf.libcudf.types import \
interpolation as Interpolation # no-cython-lint
from cudf._lib.pylibcudf.libcudf.types import \
null_order as NullOrder # no-cython-lint
from cudf._lib.pylibcudf.libcudf.types import \
sorted as Sorted # no-cython-lint
from cudf._lib.pylibcudf.libcudf.types import order as Order # no-cython-lint

lithomas1 marked this conversation as resolved.
Show resolved Hide resolved

cdef class DataType:
cdef data_type c_obj
Expand Down
105 changes: 26 additions & 79 deletions python/cudf/cudf/_lib/quantiles.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,122 +3,69 @@
from cudf.core.buffer import acquire_spill_lock

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from cudf._lib.column cimport Column
from cudf._lib.types cimport (
underlying_type_t_interpolation,
underlying_type_t_null_order,
underlying_type_t_order,
underlying_type_t_sorted,
)

from cudf._lib.types import Interpolation

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.quantiles cimport (
quantile as cpp_quantile,
quantiles as cpp_quantile_table,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.pylibcudf.libcudf.types cimport (
interpolation,
null_order,
order,
sorted,
)
from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
from cudf._lib.pylibcudf.libcudf.types cimport interpolation, sorted
from cudf._lib.utils cimport columns_from_pylibcudf_table

import cudf._lib.pylibcudf as plc


@acquire_spill_lock()
def quantile(
Column input,
object q,
double[:] q,
str interp,
Column ordered_indices,
bool exact,

):
cdef column_view c_input = input.view()
cdef column_view c_ordered_indices = (
column_view() if ordered_indices is None
else ordered_indices.view()
)
cdef interpolation c_interp = <interpolation>(
<underlying_type_t_interpolation> Interpolation[interp.upper()]
)
cdef bool c_exact = exact

cdef vector[double] c_q
c_q.reserve(len(q))

for value in q:
c_q.push_back(value)

cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_quantile(
c_input,
c_q,
c_interp,
c_ordered_indices,
c_exact,
)
return Column.from_pylibcudf(
plc.quantiles.quantile(
input.to_pylibcudf(mode="read"),
q,
c_interp,
ordered_indices.to_pylibcudf(mode="read"),
<bool>exact
)

return Column.from_unique_ptr(move(c_result))
)


def quantile_table(
list source_columns,
vector[double] q,
double[:] q,
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
object interp,
object is_input_sorted,
list column_order,
list null_precedence,
):
cdef table_view c_input = table_view_from_columns(source_columns)
cdef vector[double] c_q = q

cdef interpolation c_interp = <interpolation>(
<underlying_type_t_interpolation> interp
)
cdef sorted c_is_input_sorted = <sorted>(
<underlying_type_t_sorted> is_input_sorted
)
cdef vector[order] c_column_order
cdef vector[null_order] c_null_precedence

c_column_order.reserve(len(column_order))
c_null_precedence.reserve(len(null_precedence))

for value in column_order:
c_column_order.push_back(
<order>(<underlying_type_t_order> value)
return columns_from_pylibcudf_table(
plc.quantiles.quantiles(
plc.Table([
c.to_pylibcudf(mode="read") for c in source_columns
]),
q,
c_interp,
c_is_input_sorted,
column_order,
null_precedence
)

for value in null_precedence:
c_null_precedence.push_back(
<null_order>(<underlying_type_t_null_order> value)
)

cdef unique_ptr[table] c_result

with nogil:
c_result = move(
cpp_quantile_table(
c_input,
c_q,
c_interp,
c_is_input_sorted,
c_column_order,
c_null_precedence,
)
)

return columns_from_unique_ptr(move(c_result))
)
3 changes: 3 additions & 0 deletions python/cudf/cudf/core/column/numerical_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ def quantile(
indices = libcudf.sort.order_by(
[self], [True], "first", stable=True
).slice(self.null_count, len(self))

q = np.asarray(q, dtype="float64")

result = libcudf.quantiles.quantile(
self, q, interpolation, indices, exact
)
Expand Down
Loading
Loading