Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate round to pylibcudf #15863

Merged
merged 13 commits into from
May 31, 2024
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ This page provides API documentation for pylibcudf.
reduce
reshape
rolling
round
scalar
search
stream_compaction
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/round.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=====
round
=====

.. automodule:: cudf._lib.pylibcudf.round
:members:
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set(cython_sources
replace.pyx
reshape.pyx
rolling.pyx
round.pyx
scalar.pyx
search.pyx
stream_compaction.pyx
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ from . cimport (
replace,
reshape,
rolling,
round,
search,
sorting,
stream_compaction,
Expand Down Expand Up @@ -48,6 +49,7 @@ __all__ = [
"reduce",
"replace",
"rolling",
"round",
"search",
"stream_compaction",
"strings",
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
replace,
reshape,
rolling,
round,
search,
sorting,
stream_compaction,
Expand Down Expand Up @@ -48,6 +49,7 @@
"reduce",
"replace",
"rolling",
"round",
"search",
"stream_compaction",
"strings",
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources aggregation.pyx binaryop.pyx copying.pyx replace.pyx reduce.pxd
set(cython_sources aggregation.pyx binaryop.pyx copying.pyx replace.pyx reduce.pxd round.pyx
stream_compaction.pyx types.pyx unary.pyx
)

Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/round.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view

cdef extern from "cudf/round.hpp" namespace "cudf" nogil:

ctypedef enum rounding_method "cudf::rounding_method":
HALF_UP "cudf::rounding_method::HALF_UP"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do the strings after the enum values mean anything (e.g. "cudf::rounding_method::HALF_UP") or are they purely cosmetic?

(asking since I don't want to unintentionally change behavior by changing this here)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The strings are meaningful, but removing them is the right answer. The strings are a way to alias the names of Cython objects so that they appear differently in generated C code. IOW when you write cdef rounding_method x = HALF_UP in Cython code, that will translate to C code that says enum rounding_method X = cudf::rounding_method::HALF_UP. The reason that we previously need this in enums in our Cython is because prior to version 3.0 Cython did not support C++ scoped enumerations (enum class), but all of our C++ enums are defined that way, so the alias was the only way to trick Cython into generating the code that we needed (with raw C enums you don't need to do cudf::rounding_method::HALF_UP, you can just do cudf::HALF_UP because the enum doesn't create it's own namespace). By switching to using an enum class here we remove the need for that.

HALF_EVEN "cudf::rounding_method::HALF_EVEN"
cpdef enum class rounding_method:
HALF_UP
HALF_EVEN

cdef unique_ptr[column] round (
const column_view& input,
Expand Down
Empty file.
15 changes: 15 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/round.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from libc.stdint cimport int32_t

from cudf._lib.pylibcudf.libcudf.round cimport (
rounding_method as rounding_method,
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
)

from .column cimport Column


cpdef Column round(
Column source,
int32_t decimal_places = *,
rounding_method round_method = *
)
52 changes: 52 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/round.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from libc.stdint cimport int32_t
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.pylibcudf.libcudf.round cimport (
round as cpp_round,
rounding_method,
)

from cudf._lib.pylibcudf.libcudf.round import \
rounding_method as RoundingMethod # no-cython-lint

from cudf._lib.pylibcudf.libcudf.column.column cimport column

from .column cimport Column


cpdef Column round(
Column source,
int32_t decimal_places = 0,
rounding_method round_method = rounding_method.HALF_UP
):
"""Rounds all the values in a column to the specified number of decimal places.

lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
Parameters
----------
source : Column
The Column for which to round values.
decimal_places: int32_t, optional
The number of decimal places to round to (default 0)
round_method: rounding_method, optional
The method by which to round each value.
Can be one of { RoundingMethod.HALF_UP, RoundingMethod.HALF_EVEN }
(default rounding_method.HALF_UP)

Returns
-------
pylibcudf.Column
A Column with values rounded
"""
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_round(
source.view(),
decimal_places,
round_method
)
)

return Column.from_libcudf(move(c_result))
36 changes: 13 additions & 23 deletions python/cudf/cudf/_lib/round.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,10 @@

from cudf.core.buffer import acquire_spill_lock

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.column cimport Column
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.round cimport (
round as cpp_round,
rounding_method as cpp_rounding_method,
)
from cudf._lib.pylibcudf.round cimport rounding_method
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a general rule we want to avoid cimporting anything from pylibcudf into cudf._lib. The goal is for cudf to become pure Python, or as close to it as possible (we may still use Cython, but primarily for cudf-specific internal optimizations; calls to the libcudf APIs should be done by treating pylibcudf as a pure Python API).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

Followup:
Is there still a plan for a Cython API to pylibcudf then?
(or are the pxd's for internal pylibcudf use only)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. I still want the Cython API, but it's a lower priority. The Cython API will probably end up used in specific parts of cudf internals where we might actually see performance benefits, but in general I expect the bulk of usage to be of the Python API. The exceptions I foresee are cases like groupby or joins where generating many groupby/agg objects from cuDF can be accelerated using the pylibcudf Cython API.


import cudf._lib.pylibcudf as plc


@acquire_spill_lock()
Expand All @@ -31,19 +25,15 @@ def round(Column input_col, int decimal_places=0, how="half_even"):
if how not in {"half_even", "half_up"}:
raise ValueError("'how' must be either 'half_even' or 'half_up'")

cdef column_view input_col_view = input_col.view()
cdef unique_ptr[column] c_result
cdef cpp_rounding_method c_how = (
cpp_rounding_method.HALF_EVEN if how == "half_even"
else cpp_rounding_method.HALF_UP
cdef rounding_method c_how = (
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
rounding_method.HALF_EVEN if how == "half_even"
else rounding_method.HALF_UP
)
with nogil:
c_result = move(
cpp_round(
input_col_view,
decimal_places,
c_how
)
)

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.round.round(
input_col.to_pylibcudf(mode="read"),
decimal_places,
c_how
)
)
40 changes: 40 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/test_round.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pyarrow as pa
import pytest
from utils import assert_column_eq

import cudf._lib.pylibcudf as plc


@pytest.fixture(scope="module")
def pa_round_data():
pa_arr = pa.array([1.5, 2.5, 1.35, 1.45, 15, 25], type=pa.float64())
return pa_arr


@pytest.fixture(scope="module")
def plc_round_data(pa_round_data):
return plc.interop.from_arrow(pa_round_data)


@pytest.mark.parametrize("decimal_places", [0, 1, 10])
@pytest.mark.parametrize(
"round_mode",
[
("half_up", plc.round.RoundingMethod.HALF_UP),
("half_to_even", plc.round.RoundingMethod.HALF_EVEN),
],
)
def test_round(pa_round_data, plc_round_data, decimal_places, round_mode):
pa_round_mode, plc_round_mode = round_mode
res = plc.round.round(
plc_round_data,
decimal_places=decimal_places,
round_method=plc_round_mode,
)
expected = pa.compute.round(
pa_round_data, ndigits=decimal_places, round_mode=pa_round_mode
)

assert_column_eq(res, expected)
Loading