diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index e23da59b883..3754ed1392e 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -56,7 +56,6 @@ infer_dtype, is_bool_dtype, is_dtype_equal, - is_integer_dtype, is_scalar, is_string_dtype, ) @@ -606,7 +605,8 @@ def _scatter_by_slice( start, stop, step = key.indices(len(self)) if start >= stop: return None - num_keys = len(range(start, stop, step)) + rng = range(start, stop, step) + num_keys = len(rng) self._check_scatter_key_length(num_keys, value) @@ -625,7 +625,7 @@ def _scatter_by_slice( # step != 1, create a scatter map with arange scatter_map = as_column( - range(start, stop, step), + rng, dtype=cudf.dtype(np.int32), ) @@ -672,18 +672,16 @@ def _scatter_by_column( def _check_scatter_key_length( self, num_keys: int, value: Union[cudf.core.scalar.Scalar, ColumnBase] - ): + ) -> None: """`num_keys` is the number of keys to scatter. Should equal to the number of rows in ``value`` if ``value`` is a column. """ - if isinstance(value, ColumnBase): - if len(value) != num_keys: - msg = ( - f"Size mismatch: cannot set value " - f"of size {len(value)} to indexing result of size " - f"{num_keys}" - ) - raise ValueError(msg) + if isinstance(value, ColumnBase) and len(value) != num_keys: + raise ValueError( + f"Size mismatch: cannot set value " + f"of size {len(value)} to indexing result of size " + f"{num_keys}" + ) def fillna( self, @@ -820,7 +818,7 @@ def take( # TODO: For performance, the check and conversion of gather map should # be done by the caller. This check will be removed in future release. - if not is_integer_dtype(indices.dtype): + if indices.dtype.kind not in {"u", "i"}: indices = indices.astype(libcudf.types.size_type_dtype) if not libcudf.copying._gather_map_is_valid( indices, len(self), check_bounds, nullify diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index c45a9c7fd5d..541c32a2520 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. """Define an interface for columns that can perform numerical operations.""" from __future__ import annotations @@ -112,7 +112,13 @@ def quantile( ), ) else: - result = self._numeric_quantile(q, interpolation, exact) + # get sorted indices and exclude nulls + indices = libcudf.sort.order_by( + [self], [True], "first", stable=True + ).slice(self.null_count, len(self)) + result = libcudf.quantiles.quantile( + self, q, interpolation, indices, exact + ) if return_scalar: scalar_result = result.element_indexing(0) if interpolation in {"lower", "higher", "nearest"}: @@ -178,18 +184,6 @@ def median(self, skipna: Optional[bool] = None) -> NumericalBaseColumn: return_scalar=True, ) - def _numeric_quantile( - self, q: np.ndarray, interpolation: str, exact: bool - ) -> NumericalBaseColumn: - # get sorted indices and exclude nulls - indices = libcudf.sort.order_by( - [self], [True], "first", stable=True - ).slice(self.null_count, len(self)) - - return libcudf.quantiles.quantile( - self, q, interpolation, indices, exact - ) - def cov(self, other: NumericalBaseColumn) -> float: if ( len(self) == 0