diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py index 33cec21caa5..272abdece9e 100644 --- a/python/cudf/cudf/core/algorithms.py +++ b/python/cudf/cudf/core/algorithms.py @@ -142,10 +142,10 @@ def _index_or_values_interpolation(column, index=None): BooleanMask(~mask, len(to_interp)) ) - known_x = known_x_and_y._index._column.values + known_x = known_x_and_y.index.to_cupy() known_y = known_x_and_y._data.columns[0].values - result = cp.interp(to_interp._index.values, known_x, known_y) + result = cp.interp(index.to_cupy(), known_x, known_y) # find the first nan first_nan_idx = (mask == 0).argmax().item() diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 6fa957684e4..6928425a867 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1768,7 +1768,7 @@ def _concat( indices[:first_data_column_position], ) if not isinstance(out._index, MultiIndex) and isinstance( - out._index._values.dtype, cudf.CategoricalDtype + out._index.dtype, cudf.CategoricalDtype ): out = out.set_index( cudf.core.index.as_index(out.index._values) @@ -3582,7 +3582,7 @@ def rename( if index: if ( any(isinstance(item, str) for item in index.values()) - and type(self.index._values) != cudf.core.column.StringColumn + and self.index.dtype != "object" ): raise NotImplementedError( "Implicit conversion of index to " diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index f9dd328aaa8..52322b0160f 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2882,7 +2882,7 @@ def __init__( @property def closed(self): - return self._values.dtype.closed + return self.dtype.closed @classmethod @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 62ee780ebbb..e656fd49758 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -174,7 +174,7 @@ def _indices_from_labels(obj, labels): if isinstance(obj.index.dtype, cudf.CategoricalDtype): labels = labels.astype("category") - codes = labels.codes.astype(obj.index._values.codes.dtype) + codes = labels.codes.astype(obj.index.codes.dtype) labels = cudf.core.column.build_categorical_column( categories=labels.dtype.categories, codes=codes, diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index c3d232aaa7c..63a49a898f4 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -39,11 +39,9 @@ _is_scalar_or_zero_d_array, is_bool_dtype, is_dict_like, - is_float_dtype, is_integer, is_integer_dtype, is_scalar, - is_string_dtype, ) from cudf.core import indexing_utils from cudf.core._compat import PANDAS_LT_300 @@ -205,19 +203,10 @@ def __setitem__(self, key, value): if is_scalar(value): value = to_cudf_compatible_scalar(value) if ( - not isinstance( - self._frame._column, - ( - cudf.core.column.DatetimeColumn, - cudf.core.column.TimeDeltaColumn, - ), - ) + self._frame.dtype.kind not in "mM" and cudf.utils.utils._isnat(value) and not ( - isinstance( - self._frame._column, cudf.core.column.StringColumn - ) - and isinstance(value, str) + self._frame.dtype == "object" and isinstance(value, str) ) ): raise MixedTypeError( @@ -226,14 +215,10 @@ def __setitem__(self, key, value): ) elif ( not ( - is_float_dtype(self._frame._column.dtype) + self._frame.dtype.kind == "f" or ( - isinstance( - self._frame._column.dtype, cudf.CategoricalDtype - ) - and is_float_dtype( - self._frame._column.dtype.categories.dtype - ) + isinstance(self._frame.dtype, cudf.CategoricalDtype) + and self._frame.dtype.categories.dtype.kind == "f" ) ) and isinstance(value, (np.float32, np.float64)) @@ -241,40 +226,37 @@ def __setitem__(self, key, value): ): raise MixedTypeError( f"Cannot assign {value=} to " - f"non-float dtype={self._frame._column.dtype}" + f"non-float dtype={self._frame.dtype}" ) elif ( - is_bool_dtype(self._frame._column.dtype) + self._frame.dtype.kind == "b" and not is_bool_dtype(value) and value not in {None, cudf.NA} ): raise MixedTypeError( f"Cannot assign {value=} to " - f"bool dtype={self._frame._column.dtype}" + f"bool dtype={self._frame.dtype}" ) elif not ( isinstance(value, (list, dict)) and isinstance( - self._frame._column.dtype, (cudf.ListDtype, cudf.StructDtype) + self._frame.dtype, (cudf.ListDtype, cudf.StructDtype) ) ): value = as_column(value) if ( - ( - _is_non_decimal_numeric_dtype(self._frame._column.dtype) - or is_string_dtype(self._frame._column.dtype) - ) + (self._frame.dtype.kind in "uifb" or self._frame.dtype == "object") and hasattr(value, "dtype") - and _is_non_decimal_numeric_dtype(value.dtype) + and value.dtype.kind in "uifb" ): # normalize types if necessary: # In contrast to Column.__setitem__ (which downcasts the value to # the dtype of the column) here we upcast the series to the # larger data type mimicking pandas - to_dtype = np.result_type(value.dtype, self._frame._column.dtype) + to_dtype = np.result_type(value.dtype, self._frame.dtype) value = value.astype(to_dtype) - if to_dtype != self._frame._column.dtype: + if to_dtype != self._frame.dtype: # Do not remove until pandas-3.0 support is added. assert ( PANDAS_LT_300 @@ -283,7 +265,7 @@ def __setitem__(self, key, value): f"Setting an item of incompatible dtype is deprecated " "and will raise in a future error of pandas. " f"Value '{value}' has dtype incompatible with " - f"{self._frame._column.dtype}, " + f"{self._frame.dtype}, " "please explicitly cast to a compatible dtype first.", FutureWarning, ) @@ -336,27 +318,27 @@ def __setitem__(self, key, value): and not isinstance(self._frame.index, cudf.MultiIndex) and is_scalar(value) ): - # TODO: Modifying index in place is bad because - # our index are immutable, but columns are not (which - # means our index are mutable with internal APIs). - # Get rid of the deep copy once columns too are - # immutable. - idx_copy = self._frame._index.copy(deep=True) - if ( - isinstance(idx_copy, cudf.RangeIndex) - and isinstance(key, int) - and (key == idx_copy[-1] + idx_copy.step) - ): - idx_copy = cudf.RangeIndex( - start=idx_copy.start, - stop=idx_copy.stop + idx_copy.step, - step=idx_copy.step, - name=idx_copy.name, - ) + idx = self._frame._index + if isinstance(idx, cudf.RangeIndex): + if isinstance(key, int) and (key == idx[-1] + idx.step): + idx_copy = cudf.RangeIndex( + start=idx.start, + stop=idx.stop + idx.step, + step=idx.step, + name=idx.name, + ) + else: + idx_copy = idx._as_int_index() + _append_new_row_inplace(idx_copy._column, key) else: - if isinstance(idx_copy, cudf.RangeIndex): - idx_copy = idx_copy._as_int_index() - _append_new_row_inplace(idx_copy._values, key) + # TODO: Modifying index in place is bad because + # our index are immutable, but columns are not (which + # means our index are mutable with internal APIs). + # Get rid of the deep copy once columns too are + # immutable. + idx_copy = idx.copy(deep=True) + _append_new_row_inplace(idx_copy._column, key) + self._frame._index = idx_copy _append_new_row_inplace(self._frame._column, value) return @@ -1407,34 +1389,23 @@ def __repr__(self): cudf.core.dtypes.DecimalDtype, ), ) - ) or isinstance( - preprocess._column, - cudf.core.column.timedelta.TimeDeltaColumn, - ): + ) or preprocess.dtype.kind == "m": fill_value = ( str(cudf.NaT) - if isinstance( - preprocess._column, - ( - cudf.core.column.TimeDeltaColumn, - cudf.core.column.DatetimeColumn, - ), - ) + if preprocess.dtype.kind in "mM" else str(cudf.NA) ) output = repr( preprocess.astype("str").fillna(fill_value).to_pandas() ) - elif isinstance( - preprocess._column, cudf.core.column.CategoricalColumn - ): + elif isinstance(preprocess.dtype, cudf.CategoricalDtype): min_rows = ( height if pd.get_option("display.min_rows") == 0 else pd.get_option("display.min_rows") ) show_dimensions = pd.get_option("display.show_dimensions") - if preprocess._column.categories.dtype.kind == "f": + if preprocess.dtype.categories.dtype.kind == "f": pd_series = ( preprocess.astype("str") .to_pandas() @@ -1461,13 +1432,13 @@ def __repr__(self): output = repr(preprocess.to_pandas()) lines = output.split("\n") - if isinstance(preprocess._column, cudf.core.column.CategoricalColumn): + if isinstance(preprocess.dtype, cudf.CategoricalDtype): category_memory = lines[-1] - if preprocess._column.categories.dtype.kind == "f": + if preprocess.dtype.categories.dtype.kind == "f": category_memory = category_memory.replace("'", "").split(": ") category_memory = ( category_memory[0].replace( - "object", preprocess._column.categories.dtype.name + "object", preprocess.dtype.categories.dtype.name ) + ": " + category_memory[1] diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 7f6ce1100ea..12a1ecc68e0 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -1058,8 +1058,7 @@ def _to_iso_calendar(arg): ) if isinstance(arg, cudf.Index): iso_params = [ - arg._column.as_string_column(arg._values.dtype, fmt) - for fmt in formats + arg._column.as_string_column(arg.dtype, fmt) for fmt in formats ] index = arg._column elif isinstance(arg.series, cudf.Series):