Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate quantile.pxd to pylibcudf #15874

Merged
merged 13 commits into from
Jun 6, 2024
4 changes: 3 additions & 1 deletion cpp/src/quantiles/quantiles.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <thrust/iterator/transform_iterator.h>

#include <memory>
#include <stdexcept>
#include <vector>

namespace cudf {
Expand Down Expand Up @@ -78,7 +79,8 @@ std::unique_ptr<table> quantiles(table_view const& input,

CUDF_EXPECTS(interp == interpolation::HIGHER || interp == interpolation::LOWER ||
interp == interpolation::NEAREST,
"multi-column quantiles require a non-arithmetic interpolation strategy.");
"multi-column quantiles require a non-arithmetic interpolation strategy.",
std::invalid_argument);

CUDF_EXPECTS(input.num_rows() > 0, "multi-column quantiles require at least one input row.");

Expand Down
9 changes: 6 additions & 3 deletions cpp/tests/quantiles/quantiles_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,6 +25,8 @@
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <stdexcept>

template <typename T>
struct QuantilesTest : public cudf::test::BaseFixture {};

Expand Down Expand Up @@ -104,9 +106,10 @@ TYPED_TEST(QuantilesTest, TestMultiColumnArithmeticInterpolation)
cudf::test::fixed_width_column_wrapper<T> input_b({});
auto input = cudf::table_view({input_a});

EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::LINEAR), cudf::logic_error);
EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::LINEAR), std::invalid_argument);

EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::MIDPOINT), cudf::logic_error);
EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::MIDPOINT),
std::invalid_argument);
}

TYPED_TEST(QuantilesTest, TestMultiColumnUnsorted)
Expand Down
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This page provides API documentation for pylibcudf.
join
lists
merge
quantiles
reduce
reshape
rolling
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=========
quantiles
=========

.. automodule:: cudf._lib.pylibcudf.quantiles
:members:
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ set(cython_sources
join.pyx
lists.pyx
merge.pyx
quantiles.pyx
reduce.pyx
replace.pyx
reshape.pyx
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ from . cimport (
join,
lists,
merge,
quantiles,
reduce,
replace,
reshape,
Expand Down Expand Up @@ -46,6 +47,7 @@ __all__ = [
"join",
"lists",
"merge",
"quantiles",
"reduce",
"replace",
"rolling",
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
join,
lists,
merge,
quantiles,
reduce,
replace,
reshape,
Expand Down Expand Up @@ -46,6 +47,7 @@
"join",
"lists",
"merge",
"quantiles",
"reduce",
"replace",
"rolling",
Expand Down
25 changes: 25 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/quantiles.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from libcpp.vector cimport vector

from cudf._lib.pylibcudf.libcudf.types cimport interpolation, sorted

from .column cimport Column
from .table cimport Table


cpdef Column quantile(
Column input,
vector[double] q,
interpolation interp = *,
Column ordered_indices = *,
bint exact = *
)

cpdef Table quantiles(
Table input,
vector[double] q,
interpolation interp = *,
sorted is_input_sorted = *,
list column_order = *,
list null_precedence = *,
)
152 changes: 152 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/quantiles.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.quantiles cimport (
quantile as cpp_quantile,
quantiles as cpp_quantiles,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.types cimport null_order, order, sorted
vyasr marked this conversation as resolved.
Show resolved Hide resolved

from .column cimport Column
from .table cimport Table
from .types cimport interpolation


cpdef Column quantile(
Column input,
vector[double] q,
interpolation interp = interpolation.LINEAR,
Column ordered_indices = None,
bool exact=True
):
"""Computes quantiles with interpolation.

Computes the specified quantiles by interpolating values between which they lie,
using the interpolation strategy specified in interp.

Parameters
----------
input: Column
The Column to calculate quantiles on.
q: array-like that implements buffer-protocol
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
The quantiles to calculate in range [0,1]
interp: Interpolation, default Interpolation.LINEAR
The strategy used to select between values adjacent to a specified quantile.
ordered_indices: Column, default empty column
The column containing the sorted order of input.

If empty, all input values are used in existing order.
Indices must be in range [0, input.size()), but are not required to be unique.
Values not indexed by this column will be ignored.
exact: bool, default True
Returns doubles if True. Otherwise, returns same type as input

For details, see :cpp:func:`quantile`.

Returns
-------
Column
A Column containing specified quantiles, with nulls for indeterminable values
"""
cdef:
unique_ptr[column] c_result
column_view ordered_indices_view

if ordered_indices is None:
ordered_indices_view = column_view()
else:
ordered_indices_view = ordered_indices.view()

with nogil:
c_result = move(
cpp_quantile(
input.view(),
q,
interp,
ordered_indices_view,
exact,
)
)

return Column.from_libcudf(move(c_result))


cpdef Table quantiles(
Table input,
vector[double] q,
interpolation interp = interpolation.NEAREST,
sorted is_input_sorted = sorted.NO,
list column_order = None,
list null_precedence = None,
):
"""Computes row quantiles with interpolation.
vyasr marked this conversation as resolved.
Show resolved Hide resolved

Computes the specified quantiles by retrieving the row corresponding to the
specified quantiles. In the event a quantile lies in between rows, the specified
interpolation strategy is used to pick between the rows.

Parameters
----------
input: Table
The Table to calculate row quantiles on.
q: array-like
The quantiles to calculate in range [0,1]
interp: Interpolation, default Interpolation.NEAREST
The strategy used to select between values adjacent to a specified quantile.

Must be a non-arithmetic interpolation strategy
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
(i.e. one of
{`Interpolation.HIGHER`, `Interpolation.LOWER`, `Interpolation.NEAREST`})
is_input_sorted: Sorted, default Sorted.NO
Whether the input table has been pre-sorted or not.
column_order: list, default None
A list of :py:class:`~cudf._lib.pylibcudf.types.Order` enums,
indicating the desired sort order for each column.
By default, will sort all columns so that they are in ascending order.

Ignored if `is_input_sorted` is `Sorted.YES`
null_precedence: list, default None
A list of :py:class:`~cudf._lib.pylibcudf.types.NullOrder` enums,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps:

Suggested change
A list of :py:class:`~cudf._lib.pylibcudf.types.NullOrder` enums,
A list of :py:class:`~.types.NullOrder` enums,

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, updated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nevermind, I don't think sphinx is able to find the enum as a class generally - I'm getting errors where sphinx isn't able to find the reference.
(maybe since the enum doesn't have a docstring).

I changed it back to regular backtics.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will definitely have to do some work on improving documentation of pylibcudf before the end. I'm hoping to find time between now and then to improve enum support in Cython in some of the edge cases that tend to bite us.

indicating how nulls should be sorted.
By default, will sort all columns so that nulls appear before
all other elements.

Ignored if `is_input_sorted` is `Sorted.YES`

For details, see :cpp:func:`quantiles`.

Returns
-------
Column
A Column containing specified quantiles, with nulls for indeterminable values
"""
cdef:
unique_ptr[table] c_result
vector[order] column_order_vec
vector[null_order] null_precedence_vec

if column_order is not None:
column_order_vec = column_order
if null_precedence is not None:
null_precedence_vec = null_precedence

with nogil:
c_result = move(
cpp_quantiles(
input.view(),
q,
interp,
is_input_sorted,
column_order_vec,
null_precedence_vec,
)
)

return Table.from_libcudf(move(c_result))
29 changes: 29 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pyarrow as pa
import pytest

import cudf._lib.pylibcudf as plc

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common"))

from utils import DEFAULT_STRUCT_TESTING_TYPE
Expand All @@ -29,3 +31,30 @@
)
def pa_type(request):
return request.param


@pytest.fixture(
scope="session",
params=[
pa.int64(),
pa.float64(),
pa.uint64(),
],
)
def numeric_pa_type(request):
return request.param


@pytest.fixture(
scope="session", params=[opt for opt in plc.types.Interpolation]
)
def interp_opt(request):
return request.param


@pytest.fixture(
scope="session",
params=[opt for opt in plc.types.Sorted],
)
def sorted_opt(request):
return request.param
Loading
Loading