Remove deprecated code (#10124)

This PR removes a large number of deprecated code paths in cuDF. This PR resolves #9465 and partially addresses #9828 (this PR does not address any mask-related API deprecations other than the removal of the already deprecated Series.set_mask). Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Ashwin Srinath (https://github.com/shwina) URL: #10124
rapidsai · Jan 28, 2022 · b7aa47f · b7aa47f
1 parent e2123db
commit b7aa47f
Show file tree

Hide file tree

Showing 29 changed files with 118 additions and 695 deletions.
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
@@ -569,17 +569,6 @@ def to_dlpack(self):
 
         return cudf.io.dlpack.to_dlpack(self)
 
-    @property
-    def gpu_values(self):
-        """
-        View the data as a numba device array object
-        """
-        warnings.warn(
-            "The gpu_values property is deprecated and will be removed.",
-            FutureWarning,
-        )
-        return self._values.data_array_view
-
     def append(self, other):
         """
         Append a collection of Index options together.
@@ -1254,10 +1243,6 @@ def astype(self, dtype, copy=False):
             self.copy(deep=copy)._values.astype(dtype), name=self.name
         )
 
-    # TODO: This method is deprecated and can be removed.
-    def to_array(self, fillna=None):
-        return self._values.to_array(fillna=fillna)
-
     def to_series(self, index=None, name=None):
         """
         Create a Series with both index and values equal to the index keys.
@@ -1536,14 +1521,6 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None):
                 "`allow_fill` and `fill_value` are unsupported."
             )
 
-        indices = cudf.core.column.as_column(indices)
-        if is_bool_dtype(indices):
-            warnings.warn(
-                "Calling take with a boolean array is deprecated and will be "
-                "removed in the future.",
-                FutureWarning,
-            )
-            return self._apply_boolean_mask(indices)
         return self._gather(indices)
 
     def _apply_boolean_mask(self, boolean_mask):

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
@@ -46,6 +46,9 @@
     )
 
 
+_DEFAULT_CATEGORICAL_VALUE = -1
+
+
 class CategoricalAccessor(ColumnMethods):
     """
     Accessor object for categorical properties of the Series values.
@@ -946,7 +949,11 @@ def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series:
             col = self
 
         signed_dtype = min_signed_type(len(col.categories))
-        codes = col.codes.astype(signed_dtype).fillna(-1).to_array()
+        codes = (
+            col.codes.astype(signed_dtype)
+            .fillna(_DEFAULT_CATEGORICAL_VALUE)
+            .values_host
+        )
         if is_interval_dtype(col.categories.dtype):
             # leaving out dropna because it temporarily changes an interval
             # index into a struct and throws off results.
@@ -1015,13 +1022,10 @@ def _encode(self, value) -> ScalarLike:
         return self.categories.find_first_value(value)
 
     def _decode(self, value: int) -> ScalarLike:
-        if value == self._default_na_value():
+        if value == _DEFAULT_CATEGORICAL_VALUE:
             return None
         return self.categories.element_indexing(value)
 
-    def _default_na_value(self) -> ScalarLike:
-        return -1
-
     def find_and_replace(
         self,
         to_replace: ColumnLike,
@@ -1178,7 +1182,7 @@ def fillna(
             fill_is_scalar = np.isscalar(fill_value)
 
             if fill_is_scalar:
-                if fill_value == self._default_na_value():
+                if fill_value == _DEFAULT_CATEGORICAL_VALUE:
                     fill_value = self.codes.dtype.type(fill_value)
                 else:
                     try:
@@ -1578,7 +1582,7 @@ def _create_empty_categorical_column(
         categories=column.as_column(dtype.categories),
         codes=column.as_column(
             cudf.utils.utils.scalar_broadcast_to(
-                categorical_column._default_na_value(),
+                _DEFAULT_CATEGORICAL_VALUE,
                 categorical_column.size,
                 categorical_column.codes.dtype,
             )
@@ -1601,7 +1605,7 @@ def pandas_categorical_as_column(
     codes = categorical.codes if codes is None else codes
     codes = column.as_column(codes)
 
-    valid_codes = codes != codes.dtype.type(-1)
+    valid_codes = codes != codes.dtype.type(_DEFAULT_CATEGORICAL_VALUE)
 
     mask = None
     if not valid_codes.all():

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
@@ -314,51 +314,6 @@ def memory_usage(self) -> int:
             n += bitmask_allocation_size_bytes(self.size)
         return n
 
-    def _default_na_value(self) -> Any:
-        raise NotImplementedError()
-
-    # TODO: This method is deprecated and can be removed when the associated
-    # Frame methods are removed.
-    def to_gpu_array(self, fillna=None) -> "cuda.devicearray.DeviceNDArray":
-        """Get a dense numba device array for the data.
-
-        Parameters
-        ----------
-        fillna : scalar, 'pandas', or None
-            See *fillna* in ``.to_array``.
-
-        Notes
-        -----
-
-        if ``fillna`` is ``None``, null values are skipped.  Therefore, the
-        output size could be smaller.
-        """
-        if fillna:
-            return self.fillna(self._default_na_value()).data_array_view
-        else:
-            return self.dropna(drop_nan=False).data_array_view
-
-    # TODO: This method is deprecated and can be removed when the associated
-    # Frame methods are removed.
-    def to_array(self, fillna=None) -> np.ndarray:
-        """Get a dense numpy array for the data.
-
-        Parameters
-        ----------
-        fillna : scalar, 'pandas', or None
-            Defaults to None, which will skip null values.
-            If it equals "pandas", null values are filled with NaNs.
-            Non integral dtype is promoted to np.float64.
-
-        Notes
-        -----
-
-        if ``fillna`` is ``None``, null values are skipped.  Therefore, the
-        output size could be smaller.
-        """
-
-        return self.to_gpu_array(fillna=fillna).copy_to_host()
-
     def _fill(
         self,
         fill_value: ScalarLike,

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
@@ -199,7 +199,7 @@ def to_pandas(
 
         # Pandas supports only `datetime64[ns]`, hence the cast.
         return pd.Series(
-            self.astype("datetime64[ns]").to_array("NAT"),
+            self.astype("datetime64[ns]").fillna("NaT").values_host,
             copy=False,
             index=index,
         )
@@ -346,10 +346,6 @@ def as_string_column(
                 column.column_empty(0, dtype="object", masked=False),
             )
 
-    def _default_na_value(self) -> DatetimeLikeScalar:
-        """Returns the default NA value for this column"""
-        return np.datetime64("nat", self.time_unit)
-
     def mean(self, skipna=None, dtype=np.float64) -> ScalarLike:
         return pd.Timestamp(
             self.as_numerical.mean(skipna=skipna, dtype=dtype),
@@ -488,15 +484,6 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
             return False
 
 
-def binop_offset(lhs, rhs, op):
-    if rhs._is_no_op:
-        return lhs
-    else:
-        rhs = rhs._generate_column(len(lhs), op)
-        out = libcudf.datetime.add_months(lhs, rhs)
-        return out
-
-
 def infer_format(element: str, **kwargs) -> str:
     """
     Infers datetime format from a string, also takes cares for `ms` and `ns`

diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
@@ -355,20 +355,6 @@ def _process_for_reduction(
             skipna=skipna, min_count=min_count
         )
 
-    def _default_na_value(self) -> ScalarLike:
-        """Returns the default NA value for this column"""
-        dkind = self.dtype.kind
-        if dkind == "f":
-            return self.dtype.type(np.nan)
-        elif dkind == "i":
-            return np.iinfo(self.dtype).min
-        elif dkind == "u":
-            return np.iinfo(self.dtype).max
-        elif dkind == "b":
-            return self.dtype.type(False)
-        else:
-            raise TypeError(f"numeric column of {self.dtype} has no NaN value")
-
     def find_and_replace(
         self,
         to_replace: ColumnLike,

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
@@ -5218,26 +5218,6 @@ def values(self) -> cupy.ndarray:
         """
         raise TypeError("String Arrays is not yet implemented in cudf")
 
-    # TODO: This method is deprecated and should be removed when the associated
-    # Frame methods are removed.
-    def to_array(self, fillna: bool = None) -> np.ndarray:
-        """Get a dense numpy array for the data.
-
-        Notes
-        -----
-
-        if ``fillna`` is ``None``, null values are skipped.  Therefore, the
-        output size could be smaller.
-
-        Raises
-        ------
-        ``NotImplementedError`` if there are nulls
-        """
-        if fillna is not None:
-            warnings.warn("fillna parameter not supported for string arrays")
-
-        return self.to_arrow().to_pandas().values
-
     def to_pandas(
         self, index: pd.Index = None, nullable: bool = False, **kwargs
     ) -> "pd.Series":
@@ -5402,9 +5382,6 @@ def normalize_binop_value(self, other) -> "column.ColumnBase":
         else:
             raise TypeError(f"cannot broadcast {type(other)}")
 
-    def _default_na_value(self) -> ScalarLike:
-        return None
-
     def binary_operator(
         self, op: builtins.str, rhs, reflect: bool = False
     ) -> "column.ColumnBase":

diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
@@ -12,13 +12,7 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._typing import (
-    BinaryOperand,
-    DatetimeLikeScalar,
-    Dtype,
-    DtypeObj,
-    ScalarLike,
-)
+from cudf._typing import BinaryOperand, DatetimeLikeScalar, Dtype, DtypeObj
 from cudf.api.types import is_scalar
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, column, string
@@ -123,7 +117,8 @@ def to_pandas(
 
         # Pandas supports only `timedelta64[ns]`, hence the cast.
         pd_series = pd.Series(
-            self.astype("timedelta64[ns]").to_array("NAT"), copy=False
+            self.astype("timedelta64[ns]").fillna("NaT").values_host,
+            copy=False,
         )
 
         if index is not None:
@@ -304,10 +299,6 @@ def as_numerical(self) -> "cudf.core.column.NumericalColumn":
             ),
         )
 
-    def _default_na_value(self) -> ScalarLike:
-        """Returns the default NA value for this column"""
-        return np.timedelta64("nat", self.time_unit)
-
     @property
     def time_unit(self) -> str:
         return self._time_unit