Skip to content

Commit

Permalink
Avoid accessing attributes via _column if not needed (#15624)
Browse files Browse the repository at this point in the history
xref #15494

If the attributes are exposed on the top level object e.g. `Index.dtype` it should be sufficient to just access the attributes there instead of reaching for the underlying object

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #15624
  • Loading branch information
mroeschke authored May 7, 2024
1 parent 5d244df commit 5154661
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 78 deletions.
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,10 @@ def _index_or_values_interpolation(column, index=None):
BooleanMask(~mask, len(to_interp))
)

known_x = known_x_and_y._index._column.values
known_x = known_x_and_y.index.to_cupy()
known_y = known_x_and_y._data.columns[0].values

result = cp.interp(to_interp._index.values, known_x, known_y)
result = cp.interp(index.to_cupy(), known_x, known_y)

# find the first nan
first_nan_idx = (mask == 0).argmax().item()
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1768,7 +1768,7 @@ def _concat(
indices[:first_data_column_position],
)
if not isinstance(out._index, MultiIndex) and isinstance(
out._index._values.dtype, cudf.CategoricalDtype
out._index.dtype, cudf.CategoricalDtype
):
out = out.set_index(
cudf.core.index.as_index(out.index._values)
Expand Down Expand Up @@ -3582,7 +3582,7 @@ def rename(
if index:
if (
any(isinstance(item, str) for item in index.values())
and type(self.index._values) != cudf.core.column.StringColumn
and self.index.dtype != "object"
):
raise NotImplementedError(
"Implicit conversion of index to "
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2882,7 +2882,7 @@ def __init__(

@property
def closed(self):
return self._values.dtype.closed
return self.dtype.closed

@classmethod
@_cudf_nvtx_annotate
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _indices_from_labels(obj, labels):

if isinstance(obj.index.dtype, cudf.CategoricalDtype):
labels = labels.astype("category")
codes = labels.codes.astype(obj.index._values.codes.dtype)
codes = labels.codes.astype(obj.index.codes.dtype)
labels = cudf.core.column.build_categorical_column(
categories=labels.dtype.categories,
codes=codes,
Expand Down
111 changes: 41 additions & 70 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,9 @@
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_dict_like,
is_float_dtype,
is_integer,
is_integer_dtype,
is_scalar,
is_string_dtype,
)
from cudf.core import indexing_utils
from cudf.core._compat import PANDAS_LT_300
Expand Down Expand Up @@ -205,19 +203,10 @@ def __setitem__(self, key, value):
if is_scalar(value):
value = to_cudf_compatible_scalar(value)
if (
not isinstance(
self._frame._column,
(
cudf.core.column.DatetimeColumn,
cudf.core.column.TimeDeltaColumn,
),
)
self._frame.dtype.kind not in "mM"
and cudf.utils.utils._isnat(value)
and not (
isinstance(
self._frame._column, cudf.core.column.StringColumn
)
and isinstance(value, str)
self._frame.dtype == "object" and isinstance(value, str)
)
):
raise MixedTypeError(
Expand All @@ -226,55 +215,48 @@ def __setitem__(self, key, value):
)
elif (
not (
is_float_dtype(self._frame._column.dtype)
self._frame.dtype.kind == "f"
or (
isinstance(
self._frame._column.dtype, cudf.CategoricalDtype
)
and is_float_dtype(
self._frame._column.dtype.categories.dtype
)
isinstance(self._frame.dtype, cudf.CategoricalDtype)
and self._frame.dtype.categories.dtype.kind == "f"
)
)
and isinstance(value, (np.float32, np.float64))
and np.isnan(value)
):
raise MixedTypeError(
f"Cannot assign {value=} to "
f"non-float dtype={self._frame._column.dtype}"
f"non-float dtype={self._frame.dtype}"
)
elif (
is_bool_dtype(self._frame._column.dtype)
self._frame.dtype.kind == "b"
and not is_bool_dtype(value)
and value not in {None, cudf.NA}
):
raise MixedTypeError(
f"Cannot assign {value=} to "
f"bool dtype={self._frame._column.dtype}"
f"bool dtype={self._frame.dtype}"
)
elif not (
isinstance(value, (list, dict))
and isinstance(
self._frame._column.dtype, (cudf.ListDtype, cudf.StructDtype)
self._frame.dtype, (cudf.ListDtype, cudf.StructDtype)
)
):
value = as_column(value)

if (
(
_is_non_decimal_numeric_dtype(self._frame._column.dtype)
or is_string_dtype(self._frame._column.dtype)
)
(self._frame.dtype.kind in "uifb" or self._frame.dtype == "object")
and hasattr(value, "dtype")
and _is_non_decimal_numeric_dtype(value.dtype)
and value.dtype.kind in "uifb"
):
# normalize types if necessary:
# In contrast to Column.__setitem__ (which downcasts the value to
# the dtype of the column) here we upcast the series to the
# larger data type mimicking pandas
to_dtype = np.result_type(value.dtype, self._frame._column.dtype)
to_dtype = np.result_type(value.dtype, self._frame.dtype)
value = value.astype(to_dtype)
if to_dtype != self._frame._column.dtype:
if to_dtype != self._frame.dtype:
# Do not remove until pandas-3.0 support is added.
assert (
PANDAS_LT_300
Expand All @@ -283,7 +265,7 @@ def __setitem__(self, key, value):
f"Setting an item of incompatible dtype is deprecated "
"and will raise in a future error of pandas. "
f"Value '{value}' has dtype incompatible with "
f"{self._frame._column.dtype}, "
f"{self._frame.dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
)
Expand Down Expand Up @@ -336,27 +318,27 @@ def __setitem__(self, key, value):
and not isinstance(self._frame.index, cudf.MultiIndex)
and is_scalar(value)
):
# TODO: Modifying index in place is bad because
# our index are immutable, but columns are not (which
# means our index are mutable with internal APIs).
# Get rid of the deep copy once columns too are
# immutable.
idx_copy = self._frame._index.copy(deep=True)
if (
isinstance(idx_copy, cudf.RangeIndex)
and isinstance(key, int)
and (key == idx_copy[-1] + idx_copy.step)
):
idx_copy = cudf.RangeIndex(
start=idx_copy.start,
stop=idx_copy.stop + idx_copy.step,
step=idx_copy.step,
name=idx_copy.name,
)
idx = self._frame._index
if isinstance(idx, cudf.RangeIndex):
if isinstance(key, int) and (key == idx[-1] + idx.step):
idx_copy = cudf.RangeIndex(
start=idx.start,
stop=idx.stop + idx.step,
step=idx.step,
name=idx.name,
)
else:
idx_copy = idx._as_int_index()
_append_new_row_inplace(idx_copy._column, key)
else:
if isinstance(idx_copy, cudf.RangeIndex):
idx_copy = idx_copy._as_int_index()
_append_new_row_inplace(idx_copy._values, key)
# TODO: Modifying index in place is bad because
# our index are immutable, but columns are not (which
# means our index are mutable with internal APIs).
# Get rid of the deep copy once columns too are
# immutable.
idx_copy = idx.copy(deep=True)
_append_new_row_inplace(idx_copy._column, key)

self._frame._index = idx_copy
_append_new_row_inplace(self._frame._column, value)
return
Expand Down Expand Up @@ -1407,34 +1389,23 @@ def __repr__(self):
cudf.core.dtypes.DecimalDtype,
),
)
) or isinstance(
preprocess._column,
cudf.core.column.timedelta.TimeDeltaColumn,
):
) or preprocess.dtype.kind == "m":
fill_value = (
str(cudf.NaT)
if isinstance(
preprocess._column,
(
cudf.core.column.TimeDeltaColumn,
cudf.core.column.DatetimeColumn,
),
)
if preprocess.dtype.kind in "mM"
else str(cudf.NA)
)
output = repr(
preprocess.astype("str").fillna(fill_value).to_pandas()
)
elif isinstance(
preprocess._column, cudf.core.column.CategoricalColumn
):
elif isinstance(preprocess.dtype, cudf.CategoricalDtype):
min_rows = (
height
if pd.get_option("display.min_rows") == 0
else pd.get_option("display.min_rows")
)
show_dimensions = pd.get_option("display.show_dimensions")
if preprocess._column.categories.dtype.kind == "f":
if preprocess.dtype.categories.dtype.kind == "f":
pd_series = (
preprocess.astype("str")
.to_pandas()
Expand All @@ -1461,13 +1432,13 @@ def __repr__(self):
output = repr(preprocess.to_pandas())

lines = output.split("\n")
if isinstance(preprocess._column, cudf.core.column.CategoricalColumn):
if isinstance(preprocess.dtype, cudf.CategoricalDtype):
category_memory = lines[-1]
if preprocess._column.categories.dtype.kind == "f":
if preprocess.dtype.categories.dtype.kind == "f":
category_memory = category_memory.replace("'", "").split(": ")
category_memory = (
category_memory[0].replace(
"object", preprocess._column.categories.dtype.name
"object", preprocess.dtype.categories.dtype.name
)
+ ": "
+ category_memory[1]
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,8 +1058,7 @@ def _to_iso_calendar(arg):
)
if isinstance(arg, cudf.Index):
iso_params = [
arg._column.as_string_column(arg._values.dtype, fmt)
for fmt in formats
arg._column.as_string_column(arg.dtype, fmt) for fmt in formats
]
index = arg._column
elif isinstance(arg.series, cudf.Series):
Expand Down

0 comments on commit 5154661

Please sign in to comment.