Skip to content

Commit

Permalink
Remove deprecated code (#10124)
Browse files Browse the repository at this point in the history
This PR removes a large number of deprecated code paths in cuDF. This PR resolves #9465 and partially addresses #9828 (this PR does not address any mask-related API deprecations other than the removal of the already deprecated Series.set_mask).

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Ashwin Srinath (https://github.com/shwina)

URL: #10124
  • Loading branch information
vyasr authored Jan 28, 2022
1 parent e2123db commit b7aa47f
Show file tree
Hide file tree
Showing 29 changed files with 118 additions and 695 deletions.
23 changes: 0 additions & 23 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,17 +569,6 @@ def to_dlpack(self):

return cudf.io.dlpack.to_dlpack(self)

@property
def gpu_values(self):
"""
View the data as a numba device array object
"""
warnings.warn(
"The gpu_values property is deprecated and will be removed.",
FutureWarning,
)
return self._values.data_array_view

def append(self, other):
"""
Append a collection of Index options together.
Expand Down Expand Up @@ -1254,10 +1243,6 @@ def astype(self, dtype, copy=False):
self.copy(deep=copy)._values.astype(dtype), name=self.name
)

# TODO: This method is deprecated and can be removed.
def to_array(self, fillna=None):
return self._values.to_array(fillna=fillna)

def to_series(self, index=None, name=None):
"""
Create a Series with both index and values equal to the index keys.
Expand Down Expand Up @@ -1536,14 +1521,6 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None):
"`allow_fill` and `fill_value` are unsupported."
)

indices = cudf.core.column.as_column(indices)
if is_bool_dtype(indices):
warnings.warn(
"Calling take with a boolean array is deprecated and will be "
"removed in the future.",
FutureWarning,
)
return self._apply_boolean_mask(indices)
return self._gather(indices)

def _apply_boolean_mask(self, boolean_mask):
Expand Down
20 changes: 12 additions & 8 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@
)


_DEFAULT_CATEGORICAL_VALUE = -1


class CategoricalAccessor(ColumnMethods):
"""
Accessor object for categorical properties of the Series values.
Expand Down Expand Up @@ -946,7 +949,11 @@ def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series:
col = self

signed_dtype = min_signed_type(len(col.categories))
codes = col.codes.astype(signed_dtype).fillna(-1).to_array()
codes = (
col.codes.astype(signed_dtype)
.fillna(_DEFAULT_CATEGORICAL_VALUE)
.values_host
)
if is_interval_dtype(col.categories.dtype):
# leaving out dropna because it temporarily changes an interval
# index into a struct and throws off results.
Expand Down Expand Up @@ -1015,13 +1022,10 @@ def _encode(self, value) -> ScalarLike:
return self.categories.find_first_value(value)

def _decode(self, value: int) -> ScalarLike:
if value == self._default_na_value():
if value == _DEFAULT_CATEGORICAL_VALUE:
return None
return self.categories.element_indexing(value)

def _default_na_value(self) -> ScalarLike:
return -1

def find_and_replace(
self,
to_replace: ColumnLike,
Expand Down Expand Up @@ -1178,7 +1182,7 @@ def fillna(
fill_is_scalar = np.isscalar(fill_value)

if fill_is_scalar:
if fill_value == self._default_na_value():
if fill_value == _DEFAULT_CATEGORICAL_VALUE:
fill_value = self.codes.dtype.type(fill_value)
else:
try:
Expand Down Expand Up @@ -1578,7 +1582,7 @@ def _create_empty_categorical_column(
categories=column.as_column(dtype.categories),
codes=column.as_column(
cudf.utils.utils.scalar_broadcast_to(
categorical_column._default_na_value(),
_DEFAULT_CATEGORICAL_VALUE,
categorical_column.size,
categorical_column.codes.dtype,
)
Expand All @@ -1601,7 +1605,7 @@ def pandas_categorical_as_column(
codes = categorical.codes if codes is None else codes
codes = column.as_column(codes)

valid_codes = codes != codes.dtype.type(-1)
valid_codes = codes != codes.dtype.type(_DEFAULT_CATEGORICAL_VALUE)

mask = None
if not valid_codes.all():
Expand Down
45 changes: 0 additions & 45 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,51 +314,6 @@ def memory_usage(self) -> int:
n += bitmask_allocation_size_bytes(self.size)
return n

def _default_na_value(self) -> Any:
raise NotImplementedError()

# TODO: This method is deprecated and can be removed when the associated
# Frame methods are removed.
def to_gpu_array(self, fillna=None) -> "cuda.devicearray.DeviceNDArray":
"""Get a dense numba device array for the data.
Parameters
----------
fillna : scalar, 'pandas', or None
See *fillna* in ``.to_array``.
Notes
-----
if ``fillna`` is ``None``, null values are skipped. Therefore, the
output size could be smaller.
"""
if fillna:
return self.fillna(self._default_na_value()).data_array_view
else:
return self.dropna(drop_nan=False).data_array_view

# TODO: This method is deprecated and can be removed when the associated
# Frame methods are removed.
def to_array(self, fillna=None) -> np.ndarray:
"""Get a dense numpy array for the data.
Parameters
----------
fillna : scalar, 'pandas', or None
Defaults to None, which will skip null values.
If it equals "pandas", null values are filled with NaNs.
Non integral dtype is promoted to np.float64.
Notes
-----
if ``fillna`` is ``None``, null values are skipped. Therefore, the
output size could be smaller.
"""

return self.to_gpu_array(fillna=fillna).copy_to_host()

def _fill(
self,
fill_value: ScalarLike,
Expand Down
15 changes: 1 addition & 14 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def to_pandas(

# Pandas supports only `datetime64[ns]`, hence the cast.
return pd.Series(
self.astype("datetime64[ns]").to_array("NAT"),
self.astype("datetime64[ns]").fillna("NaT").values_host,
copy=False,
index=index,
)
Expand Down Expand Up @@ -346,10 +346,6 @@ def as_string_column(
column.column_empty(0, dtype="object", masked=False),
)

def _default_na_value(self) -> DatetimeLikeScalar:
"""Returns the default NA value for this column"""
return np.datetime64("nat", self.time_unit)

def mean(self, skipna=None, dtype=np.float64) -> ScalarLike:
return pd.Timestamp(
self.as_numerical.mean(skipna=skipna, dtype=dtype),
Expand Down Expand Up @@ -488,15 +484,6 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
return False


def binop_offset(lhs, rhs, op):
if rhs._is_no_op:
return lhs
else:
rhs = rhs._generate_column(len(lhs), op)
out = libcudf.datetime.add_months(lhs, rhs)
return out


def infer_format(element: str, **kwargs) -> str:
"""
Infers datetime format from a string, also takes cares for `ms` and `ns`
Expand Down
14 changes: 0 additions & 14 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,20 +355,6 @@ def _process_for_reduction(
skipna=skipna, min_count=min_count
)

def _default_na_value(self) -> ScalarLike:
"""Returns the default NA value for this column"""
dkind = self.dtype.kind
if dkind == "f":
return self.dtype.type(np.nan)
elif dkind == "i":
return np.iinfo(self.dtype).min
elif dkind == "u":
return np.iinfo(self.dtype).max
elif dkind == "b":
return self.dtype.type(False)
else:
raise TypeError(f"numeric column of {self.dtype} has no NaN value")

def find_and_replace(
self,
to_replace: ColumnLike,
Expand Down
23 changes: 0 additions & 23 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5218,26 +5218,6 @@ def values(self) -> cupy.ndarray:
"""
raise TypeError("String Arrays is not yet implemented in cudf")

# TODO: This method is deprecated and should be removed when the associated
# Frame methods are removed.
def to_array(self, fillna: bool = None) -> np.ndarray:
"""Get a dense numpy array for the data.
Notes
-----
if ``fillna`` is ``None``, null values are skipped. Therefore, the
output size could be smaller.
Raises
------
``NotImplementedError`` if there are nulls
"""
if fillna is not None:
warnings.warn("fillna parameter not supported for string arrays")

return self.to_arrow().to_pandas().values

def to_pandas(
self, index: pd.Index = None, nullable: bool = False, **kwargs
) -> "pd.Series":
Expand Down Expand Up @@ -5402,9 +5382,6 @@ def normalize_binop_value(self, other) -> "column.ColumnBase":
else:
raise TypeError(f"cannot broadcast {type(other)}")

def _default_na_value(self) -> ScalarLike:
return None

def binary_operator(
self, op: builtins.str, rhs, reflect: bool = False
) -> "column.ColumnBase":
Expand Down
15 changes: 3 additions & 12 deletions python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,7 @@

import cudf
from cudf import _lib as libcudf
from cudf._typing import (
BinaryOperand,
DatetimeLikeScalar,
Dtype,
DtypeObj,
ScalarLike,
)
from cudf._typing import BinaryOperand, DatetimeLikeScalar, Dtype, DtypeObj
from cudf.api.types import is_scalar
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase, column, string
Expand Down Expand Up @@ -123,7 +117,8 @@ def to_pandas(

# Pandas supports only `timedelta64[ns]`, hence the cast.
pd_series = pd.Series(
self.astype("timedelta64[ns]").to_array("NAT"), copy=False
self.astype("timedelta64[ns]").fillna("NaT").values_host,
copy=False,
)

if index is not None:
Expand Down Expand Up @@ -304,10 +299,6 @@ def as_numerical(self) -> "cudf.core.column.NumericalColumn":
),
)

def _default_na_value(self) -> ScalarLike:
"""Returns the default NA value for this column"""
return np.timedelta64("nat", self.time_unit)

@property
def time_unit(self) -> str:
return self._time_unit
Expand Down
Loading

0 comments on commit b7aa47f

Please sign in to comment.