Skip to content

Commit

Permalink
Fix negative unary operation for boolean type (#13780)
Browse files Browse the repository at this point in the history
Negative unary op on boolean series is resulting in conversion to `int` type:
```python
In [1]: import cudf

In [2]: s = cudf.Series([True, False])

In [3]: s
Out[3]: 
0     True
1    False
dtype: bool

In [4]: -s
Out[4]: 
0   -1
1    0
dtype: int8

In [5]: -s.to_pandas()
Out[5]: 
0    False
1     True
dtype: bool
```
The PR fixes the above issue by returning inversion of the boolean column instead of multiplying with `-1`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)

URL: #13780
  • Loading branch information
galipremsagar authored Jul 28, 2023
1 parent 0910046 commit 3dba6ea
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 3 deletions.
43 changes: 40 additions & 3 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import cudf
from cudf import _lib as libcudf
from cudf._typing import Dtype
from cudf.api.types import is_dtype_equal, is_scalar
from cudf.api.types import is_bool_dtype, is_dtype_equal, is_scalar
from cudf.core.buffer import acquire_spill_lock
from cudf.core.column import (
ColumnBase,
Expand Down Expand Up @@ -97,6 +97,7 @@ def _dtypes(self):
def _has_nulls(self):
return any(col.has_nulls() for col in self._data.values())

@_cudf_nvtx_annotate
def serialize(self):
header = {
"type-serialized": pickle.dumps(type(self)),
Expand All @@ -106,6 +107,7 @@ def serialize(self):
return header, frames

@classmethod
@_cudf_nvtx_annotate
def deserialize(cls, header, frames):
cls_deserialize = pickle.loads(header["type-serialized"])
column_names = pickle.loads(header["column_names"])
Expand Down Expand Up @@ -151,6 +153,7 @@ def _from_columns_like_self(
frame = self.__class__._from_columns(columns, column_names)
return frame._copy_type_metadata(self, override_dtypes=override_dtypes)

@_cudf_nvtx_annotate
def _mimic_inplace(
self, result: Self, inplace: bool = False
) -> Optional[Self]:
Expand All @@ -166,6 +169,7 @@ def _mimic_inplace(
return result

@property
@_cudf_nvtx_annotate
def size(self):
"""
Return the number of elements in the underlying data.
Expand Down Expand Up @@ -243,11 +247,13 @@ def size(self):
return self._num_columns * self._num_rows

@property
@_cudf_nvtx_annotate
def shape(self):
"""Returns a tuple representing the dimensionality of the DataFrame."""
return self._num_rows, self._num_columns

@property
@_cudf_nvtx_annotate
def empty(self):
"""
Indicator whether DataFrame or Series is empty.
Expand Down Expand Up @@ -308,6 +314,7 @@ def empty(self):
"""
return self.size == 0

@_cudf_nvtx_annotate
def memory_usage(self, deep=False):
"""Return the memory usage of an object.
Expand All @@ -323,6 +330,7 @@ def memory_usage(self, deep=False):
"""
raise NotImplementedError

@_cudf_nvtx_annotate
def __len__(self):
return self._num_rows

Expand Down Expand Up @@ -425,6 +433,7 @@ def _get_columns_by_label(self, labels, downcast=False):
return self._data.select_by_label(labels)

@property
@_cudf_nvtx_annotate
def values(self):
"""
Return a CuPy representation of the DataFrame.
Expand All @@ -440,6 +449,7 @@ def values(self):
return self.to_cupy()

@property
@_cudf_nvtx_annotate
def values_host(self):
"""
Return a NumPy representation of the data.
Expand All @@ -454,6 +464,7 @@ def values_host(self):
"""
return self.to_numpy()

@_cudf_nvtx_annotate
def __array__(self, dtype=None):
raise TypeError(
"Implicit conversion to a host NumPy array via __array__ is not "
Expand All @@ -462,12 +473,14 @@ def __array__(self, dtype=None):
"using .to_numpy()."
)

@_cudf_nvtx_annotate
def __arrow_array__(self, type=None):
raise TypeError(
"Implicit conversion to a host PyArrow object via __arrow_array__ "
"is not allowed. Consider using .to_arrow()"
)

@_cudf_nvtx_annotate
def _to_array(
self,
get_column_values: Callable,
Expand Down Expand Up @@ -1183,6 +1196,7 @@ def to_arrow(self):
{str(name): col.to_arrow() for name, col in self._data.items()}
)

@_cudf_nvtx_annotate
def _positions_from_column_names(self, column_names):
"""Map each column name into their positions in the frame.
Expand All @@ -1195,6 +1209,7 @@ def _positions_from_column_names(self, column_names):
if name in set(column_names)
]

@_cudf_nvtx_annotate
def _copy_type_metadata(
self,
other: Self,
Expand Down Expand Up @@ -1566,6 +1581,7 @@ def argsort(
by=by, ascending=ascending, na_position=na_position
).values

@_cudf_nvtx_annotate
def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
"""
Get the indices required to sort self according to the columns
Expand Down Expand Up @@ -1768,9 +1784,11 @@ def _colwise_binop(

return output

@_cudf_nvtx_annotate
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
return _array_ufunc(self, ufunc, method, inputs, kwargs)

@_cudf_nvtx_annotate
@acquire_spill_lock()
def _apply_cupy_ufunc_to_operands(
self, ufunc, cupy_func, operands, **kwargs
Expand Down Expand Up @@ -1884,30 +1902,45 @@ def dot(self, other, reflect=False):
return cudf.DataFrame(result)
return result.item()

@_cudf_nvtx_annotate
def __matmul__(self, other):
return self.dot(other)

@_cudf_nvtx_annotate
def __rmatmul__(self, other):
return self.dot(other, reflect=True)

# Unary logical operators
@_cudf_nvtx_annotate
def __neg__(self):
return -1 * self
"""Negate for integral dtypes, logical NOT for bools."""
return self._from_data_like_self(
{
name: col.unary_operator("not")
if is_bool_dtype(col.dtype)
else -1 * col
for name, col in self._data.items()
}
)

@_cudf_nvtx_annotate
def __pos__(self):
return self.copy(deep=True)

@_cudf_nvtx_annotate
def __abs__(self):
return self._unaryop("abs")

# Reductions
@classmethod
@_cudf_nvtx_annotate
def _get_axis_from_axis_arg(cls, axis):
try:
return cls._SUPPORT_AXIS_LOOKUP[axis]
except KeyError:
raise ValueError(f"No axis named {axis} for object type {cls}")

@_cudf_nvtx_annotate
def _reduce(self, *args, **kwargs):
raise NotImplementedError(
f"Reductions are not supported for objects of type {type(self)}."
Expand Down Expand Up @@ -2577,6 +2610,7 @@ def to_string(self):
"""
return repr(self)

@_cudf_nvtx_annotate
def __str__(self):
return self.to_string()

Expand Down Expand Up @@ -2780,6 +2814,7 @@ def __invert__(self):
}
)

@_cudf_nvtx_annotate
def nunique(self, dropna: bool = True):
"""
Returns a per column mapping with counts of unique values for
Expand All @@ -2801,6 +2836,7 @@ def nunique(self, dropna: bool = True):
}

@staticmethod
@_cudf_nvtx_annotate
def _repeat(
columns: List[ColumnBase], repeats, axis=None
) -> List[ColumnBase]:
Expand All @@ -2814,6 +2850,7 @@ def _repeat(

return libcudf.filling.repeat(columns, repeats)

@_cudf_nvtx_annotate
@_warn_no_dask_cudf
def __dask_tokenize__(self):
return [
Expand All @@ -2827,7 +2864,7 @@ def _apply_inverse_column(col: ColumnBase) -> ColumnBase:
"""Bitwise invert (~) for integral dtypes, logical NOT for bools."""
if np.issubdtype(col.dtype, np.integer):
return col.unary_operator("invert")
elif np.issubdtype(col.dtype, np.bool_):
elif is_bool_dtype(col.dtype):
return col.unary_operator("not")
else:
raise TypeError(
Expand Down
6 changes: 6 additions & 0 deletions python/cudf/cudf/tests/test_unaops.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,9 @@ def test_scalar_no_negative_bools():
),
):
-x


def test_series_bool_neg():
sr = Series([True, False, True, None, False, None, True, True])
psr = sr.to_pandas(nullable=True)
utils.assert_eq((-sr).to_pandas(nullable=True), -psr, check_dtype=True)

0 comments on commit 3dba6ea

Please sign in to comment.