Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid accessing attributes via _column if not needed #15624

Merged
merged 3 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,10 @@ def _index_or_values_interpolation(column, index=None):
BooleanMask(~mask, len(to_interp))
)

known_x = known_x_and_y._index._column.values
known_x = known_x_and_y.index.to_cupy()
known_y = known_x_and_y._data.columns[0].values

result = cp.interp(to_interp._index.values, known_x, known_y)
result = cp.interp(index.to_cupy(), known_x, known_y)

# find the first nan
first_nan_idx = (mask == 0).argmax().item()
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1768,7 +1768,7 @@ def _concat(
indices[:first_data_column_position],
)
if not isinstance(out._index, MultiIndex) and isinstance(
out._index._values.dtype, cudf.CategoricalDtype
out._index.dtype, cudf.CategoricalDtype
):
out = out.set_index(
cudf.core.index.as_index(out.index._values)
Expand Down Expand Up @@ -3578,7 +3578,7 @@ def rename(
if index:
if (
any(isinstance(item, str) for item in index.values())
and type(self.index._values) != cudf.core.column.StringColumn
and self.index.dtype != "object"
):
raise NotImplementedError(
"Implicit conversion of index to "
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2873,7 +2873,7 @@ def __init__(

@property
def closed(self):
return self._values.dtype.closed
return self.dtype.closed

@classmethod
@_cudf_nvtx_annotate
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _indices_from_labels(obj, labels):

if isinstance(obj.index.dtype, cudf.CategoricalDtype):
labels = labels.astype("category")
codes = labels.codes.astype(obj.index._values.codes.dtype)
codes = labels.codes.astype(obj.index.codes.dtype)
labels = cudf.core.column.build_categorical_column(
categories=labels.dtype.categories,
codes=codes,
Expand Down
111 changes: 41 additions & 70 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,9 @@
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_dict_like,
is_float_dtype,
is_integer,
is_integer_dtype,
is_scalar,
is_string_dtype,
)
from cudf.core import indexing_utils
from cudf.core._compat import PANDAS_LT_300
Expand Down Expand Up @@ -205,19 +203,10 @@ def __setitem__(self, key, value):
if is_scalar(value):
value = to_cudf_compatible_scalar(value)
if (
not isinstance(
self._frame._column,
(
cudf.core.column.DatetimeColumn,
cudf.core.column.TimeDeltaColumn,
),
)
self._frame.dtype.kind not in "mM"
and cudf.utils.utils._isnat(value)
and not (
isinstance(
self._frame._column, cudf.core.column.StringColumn
)
and isinstance(value, str)
self._frame.dtype == "object" and isinstance(value, str)
)
):
raise MixedTypeError(
Expand All @@ -226,55 +215,48 @@ def __setitem__(self, key, value):
)
elif (
not (
is_float_dtype(self._frame._column.dtype)
self._frame.dtype.kind == "f"
or (
isinstance(
self._frame._column.dtype, cudf.CategoricalDtype
)
and is_float_dtype(
self._frame._column.dtype.categories.dtype
)
isinstance(self._frame.dtype, cudf.CategoricalDtype)
and self._frame.dtype.categories.dtype.kind == "f"
)
)
and isinstance(value, (np.float32, np.float64))
and np.isnan(value)
):
raise MixedTypeError(
f"Cannot assign {value=} to "
f"non-float dtype={self._frame._column.dtype}"
f"non-float dtype={self._frame.dtype}"
)
elif (
is_bool_dtype(self._frame._column.dtype)
self._frame.dtype.kind == "b"
and not is_bool_dtype(value)
and value not in {None, cudf.NA}
):
raise MixedTypeError(
f"Cannot assign {value=} to "
f"bool dtype={self._frame._column.dtype}"
f"bool dtype={self._frame.dtype}"
)
elif not (
isinstance(value, (list, dict))
and isinstance(
self._frame._column.dtype, (cudf.ListDtype, cudf.StructDtype)
self._frame.dtype, (cudf.ListDtype, cudf.StructDtype)
)
):
value = as_column(value)

if (
(
_is_non_decimal_numeric_dtype(self._frame._column.dtype)
or is_string_dtype(self._frame._column.dtype)
)
(self._frame.dtype.kind in "uifb" or self._frame.dtype == "object")
and hasattr(value, "dtype")
and _is_non_decimal_numeric_dtype(value.dtype)
and value.dtype.kind in "uifb"
):
# normalize types if necessary:
# In contrast to Column.__setitem__ (which downcasts the value to
# the dtype of the column) here we upcast the series to the
# larger data type mimicking pandas
to_dtype = np.result_type(value.dtype, self._frame._column.dtype)
to_dtype = np.result_type(value.dtype, self._frame.dtype)
value = value.astype(to_dtype)
if to_dtype != self._frame._column.dtype:
if to_dtype != self._frame.dtype:
# Do not remove until pandas-3.0 support is added.
assert (
PANDAS_LT_300
Expand All @@ -283,7 +265,7 @@ def __setitem__(self, key, value):
f"Setting an item of incompatible dtype is deprecated "
"and will raise in a future error of pandas. "
f"Value '{value}' has dtype incompatible with "
f"{self._frame._column.dtype}, "
f"{self._frame.dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
)
Expand Down Expand Up @@ -336,27 +318,27 @@ def __setitem__(self, key, value):
and not isinstance(self._frame.index, cudf.MultiIndex)
and is_scalar(value)
):
# TODO: Modifying index in place is bad because
# our index are immutable, but columns are not (which
# means our index are mutable with internal APIs).
# Get rid of the deep copy once columns too are
# immutable.
idx_copy = self._frame._index.copy(deep=True)
if (
isinstance(idx_copy, cudf.RangeIndex)
and isinstance(key, int)
and (key == idx_copy[-1] + idx_copy.step)
):
idx_copy = cudf.RangeIndex(
start=idx_copy.start,
stop=idx_copy.stop + idx_copy.step,
step=idx_copy.step,
name=idx_copy.name,
)
idx = self._frame._index
if isinstance(idx, cudf.RangeIndex):
if isinstance(key, int) and (key == idx[-1] + idx.step):
idx_copy = cudf.RangeIndex(
start=idx.start,
stop=idx.stop + idx.step,
step=idx.step,
name=idx.name,
)
else:
idx_copy = idx._as_int_index()
_append_new_row_inplace(idx_copy._column, key)
else:
if isinstance(idx_copy, cudf.RangeIndex):
idx_copy = idx_copy._as_int_index()
_append_new_row_inplace(idx_copy._values, key)
# TODO: Modifying index in place is bad because
# our index are immutable, but columns are not (which
# means our index are mutable with internal APIs).
# Get rid of the deep copy once columns too are
# immutable.
idx_copy = idx.copy(deep=True)
_append_new_row_inplace(idx_copy._column, key)

self._frame._index = idx_copy
_append_new_row_inplace(self._frame._column, value)
return
Expand Down Expand Up @@ -1407,34 +1389,23 @@ def __repr__(self):
cudf.core.dtypes.DecimalDtype,
),
)
) or isinstance(
preprocess._column,
cudf.core.column.timedelta.TimeDeltaColumn,
):
) or preprocess.dtype.kind == "m":
fill_value = (
str(cudf.NaT)
if isinstance(
preprocess._column,
(
cudf.core.column.TimeDeltaColumn,
cudf.core.column.DatetimeColumn,
),
)
if preprocess.dtype.kind in "mM"
else str(cudf.NA)
)
output = repr(
preprocess.astype("str").fillna(fill_value).to_pandas()
)
elif isinstance(
preprocess._column, cudf.core.column.CategoricalColumn
):
elif isinstance(preprocess.dtype, cudf.CategoricalDtype):
min_rows = (
height
if pd.get_option("display.min_rows") == 0
else pd.get_option("display.min_rows")
)
show_dimensions = pd.get_option("display.show_dimensions")
if preprocess._column.categories.dtype.kind == "f":
if preprocess.dtype.categories.dtype.kind == "f":
pd_series = (
preprocess.astype("str")
.to_pandas()
Expand All @@ -1461,13 +1432,13 @@ def __repr__(self):
output = repr(preprocess.to_pandas())

lines = output.split("\n")
if isinstance(preprocess._column, cudf.core.column.CategoricalColumn):
if isinstance(preprocess.dtype, cudf.CategoricalDtype):
category_memory = lines[-1]
if preprocess._column.categories.dtype.kind == "f":
if preprocess.dtype.categories.dtype.kind == "f":
category_memory = category_memory.replace("'", "").split(": ")
category_memory = (
category_memory[0].replace(
"object", preprocess._column.categories.dtype.name
"object", preprocess.dtype.categories.dtype.name
)
+ ": "
+ category_memory[1]
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,8 +1061,7 @@ def _to_iso_calendar(arg):
)
if isinstance(arg, cudf.Index):
iso_params = [
arg._column.as_string_column(arg._values.dtype, fmt)
for fmt in formats
arg._column.as_string_column(arg.dtype, fmt) for fmt in formats
]
index = arg._column
elif isinstance(arg.series, cudf.Series):
Expand Down
Loading