Skip to content

Commit

Permalink
Clean up index methods (#15496)
Browse files Browse the repository at this point in the history
- Removed `_index_from_columns` in favor of an inline call
- Renamed `_setdefault_name` to `_getdefault_name` and to not modify `kwargs`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15496
  • Loading branch information
mroeschke authored Apr 22, 2024
1 parent 475f5e5 commit 818b29d
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 25 deletions.
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,9 @@ def _grouped(self, *, include_groups: bool = True):
offsets, grouped_key_cols, grouped_value_cols = self._groupby.groups(
[*self.obj._index._columns, *self.obj._columns]
)
grouped_keys = cudf.core.index._index_from_columns(grouped_key_cols)
grouped_keys = cudf.core.index._index_from_data(
dict(enumerate(grouped_key_cols))
)
if isinstance(self.grouping.keys, cudf.MultiIndex):
grouped_keys.names = self.grouping.keys.names
to_drop = self.grouping.keys.names
Expand Down
31 changes: 11 additions & 20 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,6 @@ def _index_from_data(data: MutableMapping, name: Any = no_default):
return index_class_type._from_data(data, name)


def _index_from_columns(
columns: List[cudf.core.column.ColumnBase], name: Any = no_default
):
"""Construct an index from ``columns``, with levels named 0, 1, 2..."""
return _index_from_data(dict(zip(range(len(columns)), columns)), name=name)


class RangeIndex(BaseIndex, BinaryOperand):
"""
Immutable Index implementing a monotonic integer range.
Expand Down Expand Up @@ -988,8 +981,7 @@ class Index(SingleColumnFrame, BaseIndex, metaclass=IndexMeta):

@_cudf_nvtx_annotate
def __init__(self, data, **kwargs):
kwargs = _setdefault_name(data, **kwargs)
name = kwargs.get("name")
name = _getdefault_name(data, name=kwargs.get("name"))
super().__init__({name: data})

@_cudf_nvtx_annotate
Expand Down Expand Up @@ -1397,8 +1389,7 @@ def __repr__(self):
def __getitem__(self, index):
res = self._get_elements_from_column(index)
if isinstance(res, ColumnBase):
res = as_index(res)
res.name = self.name
res = as_index(res, name=self.name)
return res

@property # type: ignore
Expand Down Expand Up @@ -1713,7 +1704,7 @@ def __init__(
if dtype.kind != "M":
raise TypeError("dtype must be a datetime type")

name = _setdefault_name(data, name=name)["name"]
name = _getdefault_name(data, name=name)
data = column.as_column(data)

# TODO: Remove this if statement and fix tests now that
Expand Down Expand Up @@ -2432,7 +2423,7 @@ def __init__(
if dtype.kind != "m":
raise TypeError("dtype must be a timedelta type")

name = _setdefault_name(data, name=name)["name"]
name = _getdefault_name(data, name=name)
data = column.as_column(data, dtype=dtype)

if copy:
Expand Down Expand Up @@ -2601,7 +2592,7 @@ def __init__(
)
if copy:
data = column.as_column(data, dtype=dtype).copy(deep=True)
kwargs = _setdefault_name(data, name=name)
name = _getdefault_name(data, name=name)
if isinstance(data, CategoricalColumn):
data = data
elif isinstance(data, pd.Series) and (
Expand Down Expand Up @@ -2635,7 +2626,7 @@ def __init__(
data = data.as_ordered(ordered=True)
elif ordered is False and data.ordered is True:
data = data.as_ordered(ordered=False)
super().__init__(data, **kwargs)
super().__init__(data, name=name)

@property # type: ignore
@_cudf_nvtx_annotate
Expand Down Expand Up @@ -2821,7 +2812,7 @@ def __init__(
copy: bool = False,
name=None,
):
name = _setdefault_name(data, name=name)["name"]
name = _getdefault_name(data, name=name)

if dtype is not None:
dtype = cudf.dtype(dtype)
Expand Down Expand Up @@ -3053,10 +3044,10 @@ def as_index(
return idx


def _setdefault_name(values, **kwargs):
if kwargs.get("name") is None:
kwargs["name"] = getattr(values, "name", None)
return kwargs
def _getdefault_name(values, name):
if name is None:
return getattr(values, "name", None)
return name


@_cudf_nvtx_annotate
Expand Down
10 changes: 6 additions & 4 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
from cudf.core.dtypes import ListDtype
from cudf.core.frame import Frame
from cudf.core.groupby.groupby import GroupBy
from cudf.core.index import Index, RangeIndex, _index_from_columns
from cudf.core.index import Index, RangeIndex, _index_from_data
from cudf.core.missing import NA
from cudf.core.multiindex import MultiIndex
from cudf.core.resample import _Resampler
Expand Down Expand Up @@ -331,7 +331,9 @@ def _from_columns_like_self(
if index_names is not None:
n_index_columns = len(index_names)
data_columns = columns[n_index_columns:]
index = _index_from_columns(columns[:n_index_columns])
index = _index_from_data(
dict(enumerate(columns[:n_index_columns]))
)
if isinstance(index, cudf.MultiIndex):
index.names = index_names
else:
Expand Down Expand Up @@ -4348,8 +4350,8 @@ def _reset_index(self, level, drop, col_level=0, col_fill=""):
index_names,
) = self._index._split_columns_by_levels(level)
if index_columns:
index = _index_from_columns(
index_columns,
index = _index_from_data(
dict(enumerate(index_columns)),
name=self._index.name,
)
if isinstance(index, MultiIndex):
Expand Down

0 comments on commit 818b29d

Please sign in to comment.