Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-25.02' into cleanup-mur…
Browse files Browse the repository at this point in the history
…murhash-32
  • Loading branch information
PointKernel committed Nov 24, 2024
2 parents 0cdaeaa + 44b2e79 commit 14c8326
Show file tree
Hide file tree
Showing 11 changed files with 87 additions and 263 deletions.
175 changes: 0 additions & 175 deletions cpp/include/cudf/detail/utilities/int_fastdiv.h

This file was deleted.

1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ set(cython_sources
rolling.pyx
round.pyx
scalar.pyx
search.pyx
sort.pyx
stream_compaction.pyx
string_casting.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
reshape,
rolling,
round,
search,
sort,
stream_compaction,
string_casting,
Expand Down
68 changes: 0 additions & 68 deletions python/cudf/cudf/_lib/search.pyx

This file was deleted.

56 changes: 56 additions & 0 deletions python/cudf/cudf/core/_internals/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
from __future__ import annotations

from typing import TYPE_CHECKING, Literal

import pylibcudf as plc

from cudf._lib.column import Column
from cudf.core.buffer import acquire_spill_lock

if TYPE_CHECKING:
from cudf.core.column import ColumnBase


@acquire_spill_lock()
def search_sorted(
source: list[ColumnBase],
values: list[ColumnBase],
side: Literal["left", "right"],
ascending: bool = True,
na_position: Literal["first", "last"] = "last",
) -> ColumnBase:
"""Find indices where elements should be inserted to maintain order
Parameters
----------
source : list of columns
List of columns to search in
values : List of columns
List of value columns to search for
side : str {'left', 'right'} optional
If 'left', the index of the first suitable location is given.
If 'right', return the last such index
"""
# Note: We are ignoring index columns here
column_order = [
plc.types.Order.ASCENDING if ascending else plc.types.Order.DESCENDING
] * len(source)
null_precedence = [
plc.types.NullOrder.AFTER
if na_position == "last"
else plc.types.NullOrder.BEFORE
] * len(source)

func = getattr(
plc.search,
"lower_bound" if side == "left" else "upper_bound",
)
return Column.from_pylibcudf(
func(
plc.Table([col.to_pylibcudf(mode="read") for col in source]),
plc.Table([col.to_pylibcudf(mode="read") for col in values]),
column_order,
null_precedence,
)
)
23 changes: 20 additions & 3 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ def indices_of(
raise ValueError("value must be a scalar")
else:
value = as_column(value, dtype=self.dtype, length=1)
mask = libcudf.search.contains(value, self)
mask = value.contains(self)
return apply_boolean_mask(
[as_column(range(0, len(self)), dtype=size_type_dtype)], mask
)[0]
Expand Down Expand Up @@ -914,7 +914,7 @@ def _obtain_isin_result(self, rhs: ColumnBase) -> ColumnBase:
# self.isin(other) asks "which values of self are in other"
# contains(haystack, needles) asks "which needles are in haystack"
# hence this argument ordering.
result = libcudf.search.contains(rhs, self)
result = rhs.contains(self)
if self.null_count > 0:
# If one of the needles is null, then the result contains
# nulls, these nulls should be replaced by whether or not the
Expand Down Expand Up @@ -956,6 +956,23 @@ def is_monotonic_decreasing(self) -> bool:
[self], [False], None
)

def contains(self, other: ColumnBase) -> ColumnBase:
"""
Check whether column contains multiple values.
Parameters
----------
other : Column
A column of values to search for
"""
with acquire_spill_lock():
return Column.from_pylibcudf(
plc.search.contains(
self.to_pylibcudf(mode="read"),
other.to_pylibcudf(mode="read"),
)
)

def sort_values(
self: Self,
ascending: bool = True,
Expand Down Expand Up @@ -1190,7 +1207,7 @@ def searchsorted(
raise ValueError(
"Column searchsorted expects values to be column of same dtype"
)
return libcudf.search.search_sorted(
return cudf.core._internals.search.search_sorted( # type: ignore[return-value]
[self],
[value],
side=side,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

import cudf
from cudf import _lib as libcudf
from cudf._lib.search import search_sorted
from cudf.core._compat import PANDAS_GE_220
from cudf.core._internals import unary
from cudf.core._internals.search import search_sorted
from cudf.core._internals.timezones import (
check_ambiguous_and_nonexistent,
get_compatible_timezone,
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ def __contains__(self, item: ScalarLike) -> bool:
except (TypeError, ValueError):
return False
# TODO: Use `scalar`-based `contains` wrapper
return libcudf.search.contains(
self, column.as_column([search_item], dtype=self.dtype)
return self.contains(
column.as_column([search_item], dtype=self.dtype)
).any()

def indices_of(self, value: ScalarLike) -> NumericalColumn:
Expand Down
Loading

0 comments on commit 14c8326

Please sign in to comment.