From bc8fc0267244c7a372420451ddeed7b51f9c30ba Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 10 Jun 2024 15:33:59 -0700 Subject: [PATCH 1/3] Move some misc Frame methods to appropriate locations --- python/cudf/cudf/core/frame.py | 70 +++++------------------------ python/cudf/cudf/core/multiindex.py | 49 +++++++++++++++++++- 2 files changed, 59 insertions(+), 60 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index af8886a44a6..f94091f5e15 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -32,7 +32,7 @@ import cudf from cudf import _lib as libcudf from cudf._typing import Dtype -from cudf.api.types import is_bool_dtype, is_dtype_equal, is_scalar +from cudf.api.types import is_dtype_equal, is_scalar from cudf.core.buffer import acquire_spill_lock from cudf.core.column import ( ColumnBase, @@ -1455,51 +1455,6 @@ def _get_sorted_inds( stable=True, ) - @_cudf_nvtx_annotate - def _is_sorted(self, ascending=None, null_position=None): - """ - Returns a boolean indicating whether the data of the Frame are sorted - based on the parameters given. Does not account for the index. - - Parameters - ---------- - self : Frame - Frame whose columns are to be checked for sort order - ascending : None or list-like of booleans - None or list-like of boolean values indicating expected sort order - of each column. If list-like, size of list-like must be - len(columns). If None, all columns expected sort order is set to - ascending. False (0) - ascending, True (1) - descending. - null_position : None or list-like of booleans - None or list-like of boolean values indicating desired order of - nulls compared to other elements. If list-like, size of list-like - must be len(columns). If None, null order is set to before. False - (0) - before, True (1) - after. - - Returns - ------- - returns : boolean - Returns True, if sorted as expected by ``ascending`` and - ``null_position``, False otherwise. - """ - if ascending is not None and not cudf.api.types.is_list_like( - ascending - ): - raise TypeError( - f"Expected a list-like or None for `ascending`, got " - f"{type(ascending)}" - ) - if null_position is not None and not cudf.api.types.is_list_like( - null_position - ): - raise TypeError( - f"Expected a list-like or None for `null_position`, got " - f"{type(null_position)}" - ) - return libcudf.sort.is_sorted( - [*self._columns], ascending=ascending, null_position=null_position - ) - @_cudf_nvtx_annotate def _split(self, splits): """Split a frame with split points in ``splits``. Returns a list of @@ -1918,6 +1873,17 @@ def __copy__(self): @_cudf_nvtx_annotate def __invert__(self): """Bitwise invert (~) for integral dtypes, logical NOT for bools.""" + + def _apply_inverse_column(col: ColumnBase) -> ColumnBase: + if col.dtype.kind in "ui": + return col.unary_operator("invert") + elif col.dtype == "b": + return col.unary_operator("not") + else: + raise TypeError( + f"Operation `~` not supported on {col.dtype.type.__name__}" + ) + return self._from_data_like_self( self._data._from_columns_like_self( (_apply_inverse_column(col) for col in self._data.columns) @@ -1970,15 +1936,3 @@ def __dask_tokenize__(self): str(dict(self._dtypes)), normalize_token(self.to_pandas()), ] - - -def _apply_inverse_column(col: ColumnBase) -> ColumnBase: - """Bitwise invert (~) for integral dtypes, logical NOT for bools.""" - if np.issubdtype(col.dtype, np.integer): - return col.unary_operator("invert") - elif is_bool_dtype(col.dtype): - return col.unary_operator("not") - else: - raise TypeError( - f"Operation `~` not supported on {col.dtype.type.__name__}" - ) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 11b4b9154a2..6d3520e33cf 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -1636,9 +1636,54 @@ def is_unique(self): def dtype(self): return np.dtype("O") + @_cudf_nvtx_annotate + def _is_sorted(self, ascending=None, null_position=None) -> bool: + """ + Returns a boolean indicating whether the data of the MultiIndex are sorted + based on the parameters given. Does not account for the index. + + Parameters + ---------- + self : MultiIndex + MultiIndex whose columns are to be checked for sort order + ascending : None or list-like of booleans + None or list-like of boolean values indicating expected sort order + of each column. If list-like, size of list-like must be + len(columns). If None, all columns expected sort order is set to + ascending. False (0) - ascending, True (1) - descending. + null_position : None or list-like of booleans + None or list-like of boolean values indicating desired order of + nulls compared to other elements. If list-like, size of list-like + must be len(columns). If None, null order is set to before. False + (0) - before, True (1) - after. + + Returns + ------- + returns : boolean + Returns True, if sorted as expected by ``ascending`` and + ``null_position``, False otherwise. + """ + if ascending is not None and not cudf.api.types.is_list_like( + ascending + ): + raise TypeError( + f"Expected a list-like or None for `ascending`, got " + f"{type(ascending)}" + ) + if null_position is not None and not cudf.api.types.is_list_like( + null_position + ): + raise TypeError( + f"Expected a list-like or None for `null_position`, got " + f"{type(null_position)}" + ) + return libcudf.sort.is_sorted( + [*self._columns], ascending=ascending, null_position=null_position + ) + @cached_property # type: ignore @_cudf_nvtx_annotate - def is_monotonic_increasing(self): + def is_monotonic_increasing(self) -> bool: """ Return if the index is monotonic increasing (only equal or increasing) values. @@ -1647,7 +1692,7 @@ def is_monotonic_increasing(self): @cached_property # type: ignore @_cudf_nvtx_annotate - def is_monotonic_decreasing(self): + def is_monotonic_decreasing(self) -> bool: """ Return if the index is monotonic decreasing (only equal or decreasing) values. From e25a2c6b72d46c6a231d1980cd9f24f84dd098c4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 10 Jun 2024 18:25:59 -0700 Subject: [PATCH 2/3] .kind --- python/cudf/cudf/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index f94091f5e15..f0daa6ea6fe 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1877,7 +1877,7 @@ def __invert__(self): def _apply_inverse_column(col: ColumnBase) -> ColumnBase: if col.dtype.kind in "ui": return col.unary_operator("invert") - elif col.dtype == "b": + elif col.dtype.kind == "b": return col.unary_operator("not") else: raise TypeError( From 3efcbb7bdb32c6e2c9c020af3b4b32d2b58590b9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:47:49 -0700 Subject: [PATCH 3/3] Define __invert__ on column instead --- python/cudf/cudf/core/column/column.py | 5 +++++ python/cudf/cudf/core/column/numerical.py | 8 ++++++++ python/cudf/cudf/core/frame.py | 13 +------------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 7abdbc85720..18288dc2ce1 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1124,6 +1124,11 @@ def __cuda_array_interface__(self) -> abc.Mapping[str, Any]: def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return _array_ufunc(self, ufunc, method, inputs, kwargs) + def __invert__(self): + raise TypeError( + f"Operation `~` not supported on {self.dtype.type.__name__}" + ) + def searchsorted( self, value, diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 6fb4f17b76d..1952d7eeb71 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -194,6 +194,14 @@ def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: unaryop = pylibcudf.unary.UnaryOperator[unaryop] return libcudf.unary.unary_operation(self, unaryop) + def __invert__(self): + if self.dtype.kind in "ui": + return self.unary_operator("invert") + elif self.dtype.kind == "b": + return self.unary_operator("not") + else: + return super().__invert__() + def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: int_float_dtype_mapping = { np.int8: np.float32, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index f0daa6ea6fe..01b56f1edc4 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1873,20 +1873,9 @@ def __copy__(self): @_cudf_nvtx_annotate def __invert__(self): """Bitwise invert (~) for integral dtypes, logical NOT for bools.""" - - def _apply_inverse_column(col: ColumnBase) -> ColumnBase: - if col.dtype.kind in "ui": - return col.unary_operator("invert") - elif col.dtype.kind == "b": - return col.unary_operator("not") - else: - raise TypeError( - f"Operation `~` not supported on {col.dtype.type.__name__}" - ) - return self._from_data_like_self( self._data._from_columns_like_self( - (_apply_inverse_column(col) for col in self._data.columns) + (~col for col in self._data.columns) ) )