Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cudf._lib.filling in favor of inlining pylibcudf #17459

Merged
merged 4 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ set(cython_sources
column.pyx
copying.pyx
csv.pyx
filling.pyx
groupby.pyx
interop.pyx
merge.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
binaryop,
copying,
csv,
filling,
groupby,
interop,
merge,
Expand Down
57 changes: 0 additions & 57 deletions python/cudf/cudf/_lib/filling.pyx

This file was deleted.

7 changes: 1 addition & 6 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,13 +668,8 @@ def _fill(
return self if inplace else self.copy()

fill_code = self._encode(fill_value)
fill_scalar = cudf._lib.scalar.as_device_scalar(
fill_code, self.codes.dtype
)

result = self if inplace else self.copy()

libcudf.filling.fill_in_place(result.codes, begin, end, fill_scalar)
result.codes._fill(fill_code, begin, end, inplace=True)
return result

def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
Expand Down
47 changes: 32 additions & 15 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,14 +395,19 @@ def _fill(
# the scalar is None when calling `is_valid`.
slr = cudf.Scalar(fill_value, dtype=self.dtype)

if not inplace:
return libcudf.filling.fill(self, begin, end, slr.device_value)

if is_string_dtype(self.dtype):
return self._mimic_inplace(
libcudf.filling.fill(self, begin, end, slr.device_value),
inplace=True,
)
if not inplace or is_string_dtype(self.dtype):
with acquire_spill_lock():
result = type(self).from_pylibcudf(
plc.filling.fill(
self.to_pylibcudf(mode="read"),
begin,
end,
slr.device_value.c_value,
)
)
if is_string_dtype(self.dtype):
return self._mimic_inplace(result, inplace=True)
return result # type: ignore[return-value]

if not slr.is_valid() and not self.nullable:
mask = as_buffer(
Expand All @@ -412,8 +417,13 @@ def _fill(
)
self.set_base_mask(mask)

libcudf.filling.fill_in_place(self, begin, end, slr.device_value)

with acquire_spill_lock():
plc.filling.fill_in_place(
self.to_pylibcudf(mode="write"),
begin,
end,
slr.device_value.c_value,
)
return self

def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase:
Expand Down Expand Up @@ -1774,11 +1784,18 @@ def as_column(
* range objects
"""
if isinstance(arbitrary, (range, pd.RangeIndex, cudf.RangeIndex)):
column = libcudf.filling.sequence(
len(arbitrary),
as_device_scalar(arbitrary.start, dtype=cudf.dtype("int64")),
as_device_scalar(arbitrary.step, dtype=cudf.dtype("int64")),
)
with acquire_spill_lock():
column = Column.from_pylibcudf(
plc.filling.sequence(
len(arbitrary),
as_device_scalar(
arbitrary.start, dtype=np.dtype(np.int64)
).c_value,
as_device_scalar(
arbitrary.step, dtype=np.dtype(np.int64)
).c_value,
)
)
if cudf.get_option("default_integer_bitwidth") and dtype is None:
dtype = cudf.dtype(
f'i{cudf.get_option("default_integer_bitwidth")//8}'
Expand Down
11 changes: 10 additions & 1 deletion python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1901,7 +1901,16 @@ def _repeat(
if not is_scalar(repeats):
repeats = as_column(repeats)

return libcudf.filling.repeat(columns, repeats)
with acquire_spill_lock():
plc_table = plc.Table(
[col.to_pylibcudf(mode="read") for col in columns]
)
if isinstance(repeats, ColumnBase):
repeats = repeats.to_pylibcudf(mode="read")
return [
libcudf.column.Column.from_pylibcudf(col)
for col in plc.filling.repeat(plc_table, repeats).columns()
]

@_performance_tracking
@_warn_no_dask_cudf
Expand Down
14 changes: 8 additions & 6 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import cudf
from cudf import _lib as libcudf
from cudf._lib.filling import sequence
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import (
Expand Down Expand Up @@ -3402,11 +3401,14 @@ def interval_range(
start = start.astype(common_dtype)
freq = freq.astype(common_dtype)

bin_edges = sequence(
size=periods + 1,
init=start.device_value,
step=freq.device_value,
)
with acquire_spill_lock():
bin_edges = libcudf.column.Column.from_pylibcudf(
plc.filling.sequence(
size=periods + 1,
init=start.device_value.c_value,
step=freq.device_value.c_value,
)
)
return IntervalIndex.from_breaks(bin_edges, closed=closed, name=name)


Expand Down
Loading