From 985758c5a9d81648e0dc6b0a5bef049c3d114fbc Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:09:40 -0800 Subject: [PATCH] Remove cudf._lib.filling in favor of inlining pylibcudf --- python/cudf/cudf/_lib/CMakeLists.txt | 1 - python/cudf/cudf/_lib/__init__.py | 1 - python/cudf/cudf/_lib/filling.pyx | 57 --------------------- python/cudf/cudf/core/column/categorical.py | 7 +-- python/cudf/cudf/core/column/column.py | 47 +++++++++++------ python/cudf/cudf/core/frame.py | 11 +++- python/cudf/cudf/core/index.py | 14 ++--- 7 files changed, 51 insertions(+), 87 deletions(-) delete mode 100644 python/cudf/cudf/_lib/filling.pyx diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 45e0fc345b5..c078fece114 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -19,7 +19,6 @@ set(cython_sources copying.pyx csv.pyx datetime.pyx - filling.pyx groupby.pyx interop.pyx json.pyx diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index c51db601985..7d722abceda 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -6,7 +6,6 @@ copying, csv, datetime, - filling, groupby, interop, json, diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx deleted file mode 100644 index b2f4c620144..00000000000 --- a/python/cudf/cudf/_lib/filling.pyx +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -from cudf.core.buffer import acquire_spill_lock - -from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.utils cimport columns_from_pylibcudf_table - -import pylibcudf - -from cudf._lib.scalar import as_device_scalar - - -@acquire_spill_lock() -def fill_in_place(Column destination, int begin, int end, DeviceScalar value): - pylibcudf.filling.fill_in_place( - destination.to_pylibcudf(mode='write'), - begin, - end, - ( as_device_scalar(value, dtype=destination.dtype)).c_value - ) - - -@acquire_spill_lock() -def fill(Column destination, int begin, int end, DeviceScalar value): - return Column.from_pylibcudf( - pylibcudf.filling.fill( - destination.to_pylibcudf(mode='read'), - begin, - end, - ( as_device_scalar(value)).c_value - ) - ) - - -@acquire_spill_lock() -def repeat(list inp, object count): - ctbl = pylibcudf.Table([col.to_pylibcudf(mode="read") for col in inp]) - if isinstance(count, Column): - count = count.to_pylibcudf(mode="read") - return columns_from_pylibcudf_table( - pylibcudf.filling.repeat( - ctbl, - count - ) - ) - - -@acquire_spill_lock() -def sequence(int size, DeviceScalar init, DeviceScalar step): - return Column.from_pylibcudf( - pylibcudf.filling.sequence( - size, - ( as_device_scalar(init)).c_value, - ( as_device_scalar(step)).c_value - ) - ) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 7354b917f90..c39eb9eed85 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -668,13 +668,8 @@ def _fill( return self if inplace else self.copy() fill_code = self._encode(fill_value) - fill_scalar = cudf._lib.scalar.as_device_scalar( - fill_code, self.codes.dtype - ) - result = self if inplace else self.copy() - - libcudf.filling.fill_in_place(result.codes, begin, end, fill_scalar) + result.codes._fill(fill_code, begin, end, inplace=True) return result def slice(self, start: int, stop: int, stride: int | None = None) -> Self: diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index f0df4a3c1b3..8729b91c404 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -400,21 +400,31 @@ def _fill( # the scalar is None when calling `is_valid`. slr = cudf.Scalar(fill_value, dtype=self.dtype) - if not inplace: - return libcudf.filling.fill(self, begin, end, slr.device_value) - - if is_string_dtype(self.dtype): - return self._mimic_inplace( - libcudf.filling.fill(self, begin, end, slr.device_value), - inplace=True, - ) + if not inplace or is_string_dtype(self.dtype): + with acquire_spill_lock(): + result = type(self).from_pylibcudf( + plc.filling.fill( + self.to_pylibcudf(mode="read"), + begin, + end, + slr.device_value.c_value, + ) + ) + if is_string_dtype(self.dtype): + return self._mimic_inplace(result, inplace=True) + return result # type: ignore[return-value] if not slr.is_valid() and not self.nullable: mask = create_null_mask(self.size, state=MaskState.ALL_VALID) self.set_base_mask(mask) - libcudf.filling.fill_in_place(self, begin, end, slr.device_value) - + with acquire_spill_lock(): + plc.filling.fill_in_place( + self.to_pylibcudf(mode="write"), + begin, + end, + slr.device_value.c_value, + ) return self def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase: @@ -1771,11 +1781,18 @@ def as_column( * range objects """ if isinstance(arbitrary, (range, pd.RangeIndex, cudf.RangeIndex)): - column = libcudf.filling.sequence( - len(arbitrary), - as_device_scalar(arbitrary.start, dtype=cudf.dtype("int64")), - as_device_scalar(arbitrary.step, dtype=cudf.dtype("int64")), - ) + with acquire_spill_lock(): + column = Column.from_pylibcudf( + plc.filling.sequence( + len(arbitrary), + as_device_scalar( + arbitrary.start, dtype=np.dtype(np.int64) + ).c_value, + as_device_scalar( + arbitrary.step, dtype=np.dtype(np.int64) + ).c_value, + ) + ) if cudf.get_option("default_integer_bitwidth") and dtype is None: dtype = cudf.dtype( f'i{cudf.get_option("default_integer_bitwidth")//8}' diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 0c0f271fe6f..03213a7470a 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1901,7 +1901,16 @@ def _repeat( if not is_scalar(repeats): repeats = as_column(repeats) - return libcudf.filling.repeat(columns, repeats) + with acquire_spill_lock(): + plc_table = plc.Table( + [col.to_pylibcudf(mode="read") for col in columns] + ) + if isinstance(repeats, ColumnBase): + repeats = repeats.to_pylibcudf(mode="read") + return [ + libcudf.column.Column.from_pylibcudf(col) + for col in plc.filling.repeat(plc_table, repeats).columns() + ] @_performance_tracking @_warn_no_dask_cudf diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 80e037c36fd..bee25a24001 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -20,7 +20,6 @@ import cudf from cudf import _lib as libcudf -from cudf._lib.filling import sequence from cudf._lib.types import size_type_dtype from cudf.api.extensions import no_default from cudf.api.types import ( @@ -3415,11 +3414,14 @@ def interval_range( start = start.astype(common_dtype) freq = freq.astype(common_dtype) - bin_edges = sequence( - size=periods + 1, - init=start.device_value, - step=freq.device_value, - ) + with acquire_spill_lock(): + bin_edges = libcudf.column.Column.from_pylibcudf( + plc.filling.sequence( + size=periods + 1, + init=start.device_value.c_value, + step=freq.device_value.c_value, + ) + ) return IntervalIndex.from_breaks(bin_edges, closed=closed, name=name)