Skip to content

Commit

Permalink
Remove cudf._lib.filling in favor of inlining pylibcudf (#17459)
Browse files Browse the repository at this point in the history
Contributes to #17317

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: #17459
  • Loading branch information
mroeschke authored Dec 2, 2024
1 parent 852338e commit da72cf6
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 87 deletions.
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ set(cython_sources
column.pyx
copying.pyx
csv.pyx
filling.pyx
groupby.pyx
interop.pyx
merge.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
binaryop,
copying,
csv,
filling,
groupby,
interop,
merge,
Expand Down
57 changes: 0 additions & 57 deletions python/cudf/cudf/_lib/filling.pyx

This file was deleted.

7 changes: 1 addition & 6 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,13 +668,8 @@ def _fill(
return self if inplace else self.copy()

fill_code = self._encode(fill_value)
fill_scalar = cudf._lib.scalar.as_device_scalar(
fill_code, self.codes.dtype
)

result = self if inplace else self.copy()

libcudf.filling.fill_in_place(result.codes, begin, end, fill_scalar)
result.codes._fill(fill_code, begin, end, inplace=True)
return result

def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
Expand Down
47 changes: 32 additions & 15 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,14 +401,19 @@ def _fill(
# the scalar is None when calling `is_valid`.
slr = cudf.Scalar(fill_value, dtype=self.dtype)

if not inplace:
return libcudf.filling.fill(self, begin, end, slr.device_value)

if is_string_dtype(self.dtype):
return self._mimic_inplace(
libcudf.filling.fill(self, begin, end, slr.device_value),
inplace=True,
)
if not inplace or is_string_dtype(self.dtype):
with acquire_spill_lock():
result = type(self).from_pylibcudf(
plc.filling.fill(
self.to_pylibcudf(mode="read"),
begin,
end,
slr.device_value.c_value,
)
)
if is_string_dtype(self.dtype):
return self._mimic_inplace(result, inplace=True)
return result # type: ignore[return-value]

if not slr.is_valid() and not self.nullable:
mask = as_buffer(
Expand All @@ -418,8 +423,13 @@ def _fill(
)
self.set_base_mask(mask)

libcudf.filling.fill_in_place(self, begin, end, slr.device_value)

with acquire_spill_lock():
plc.filling.fill_in_place(
self.to_pylibcudf(mode="write"),
begin,
end,
slr.device_value.c_value,
)
return self

def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase:
Expand Down Expand Up @@ -1813,11 +1823,18 @@ def as_column(
* range objects
"""
if isinstance(arbitrary, (range, pd.RangeIndex, cudf.RangeIndex)):
column = libcudf.filling.sequence(
len(arbitrary),
as_device_scalar(arbitrary.start, dtype=cudf.dtype("int64")),
as_device_scalar(arbitrary.step, dtype=cudf.dtype("int64")),
)
with acquire_spill_lock():
column = Column.from_pylibcudf(
plc.filling.sequence(
len(arbitrary),
as_device_scalar(
arbitrary.start, dtype=np.dtype(np.int64)
).c_value,
as_device_scalar(
arbitrary.step, dtype=np.dtype(np.int64)
).c_value,
)
)
if cudf.get_option("default_integer_bitwidth") and dtype is None:
dtype = cudf.dtype(
f'i{cudf.get_option("default_integer_bitwidth")//8}'
Expand Down
11 changes: 10 additions & 1 deletion python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1901,7 +1901,16 @@ def _repeat(
if not is_scalar(repeats):
repeats = as_column(repeats)

return libcudf.filling.repeat(columns, repeats)
with acquire_spill_lock():
plc_table = plc.Table(
[col.to_pylibcudf(mode="read") for col in columns]
)
if isinstance(repeats, ColumnBase):
repeats = repeats.to_pylibcudf(mode="read")
return [
libcudf.column.Column.from_pylibcudf(col)
for col in plc.filling.repeat(plc_table, repeats).columns()
]

@_performance_tracking
@_warn_no_dask_cudf
Expand Down
14 changes: 8 additions & 6 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import cudf
from cudf import _lib as libcudf
from cudf._lib.filling import sequence
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import (
Expand Down Expand Up @@ -3402,11 +3401,14 @@ def interval_range(
start = start.astype(common_dtype)
freq = freq.astype(common_dtype)

bin_edges = sequence(
size=periods + 1,
init=start.device_value,
step=freq.device_value,
)
with acquire_spill_lock():
bin_edges = libcudf.column.Column.from_pylibcudf(
plc.filling.sequence(
size=periods + 1,
init=start.device_value.c_value,
step=freq.device_value.c_value,
)
)
return IntervalIndex.from_breaks(bin_edges, closed=closed, name=name)


Expand Down

0 comments on commit da72cf6

Please sign in to comment.