From 818b29d2ee49a7cc6de910951f64c36c55cc6d08 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 Apr 2024 13:07:33 -1000 Subject: [PATCH] Clean up index methods (#15496) - Removed `_index_from_columns` in favor of an inline call - Renamed `_setdefault_name` to `_getdefault_name` and to not modify `kwargs` Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/15496 --- python/cudf/cudf/core/groupby/groupby.py | 4 ++- python/cudf/cudf/core/index.py | 31 +++++++++--------------- python/cudf/cudf/core/indexed_frame.py | 10 +++++--- 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index dd4924676f3..3e4b8192888 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1201,7 +1201,9 @@ def _grouped(self, *, include_groups: bool = True): offsets, grouped_key_cols, grouped_value_cols = self._groupby.groups( [*self.obj._index._columns, *self.obj._columns] ) - grouped_keys = cudf.core.index._index_from_columns(grouped_key_cols) + grouped_keys = cudf.core.index._index_from_data( + dict(enumerate(grouped_key_cols)) + ) if isinstance(self.grouping.keys, cudf.MultiIndex): grouped_keys.names = self.grouping.keys.names to_drop = self.grouping.keys.names diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index bbe496333cd..6f08b1d83b3 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -149,13 +149,6 @@ def _index_from_data(data: MutableMapping, name: Any = no_default): return index_class_type._from_data(data, name) -def _index_from_columns( - columns: List[cudf.core.column.ColumnBase], name: Any = no_default -): - """Construct an index from ``columns``, with levels named 0, 1, 2...""" - return _index_from_data(dict(zip(range(len(columns)), columns)), name=name) - - class RangeIndex(BaseIndex, BinaryOperand): """ Immutable Index implementing a monotonic integer range. @@ -988,8 +981,7 @@ class Index(SingleColumnFrame, BaseIndex, metaclass=IndexMeta): @_cudf_nvtx_annotate def __init__(self, data, **kwargs): - kwargs = _setdefault_name(data, **kwargs) - name = kwargs.get("name") + name = _getdefault_name(data, name=kwargs.get("name")) super().__init__({name: data}) @_cudf_nvtx_annotate @@ -1397,8 +1389,7 @@ def __repr__(self): def __getitem__(self, index): res = self._get_elements_from_column(index) if isinstance(res, ColumnBase): - res = as_index(res) - res.name = self.name + res = as_index(res, name=self.name) return res @property # type: ignore @@ -1713,7 +1704,7 @@ def __init__( if dtype.kind != "M": raise TypeError("dtype must be a datetime type") - name = _setdefault_name(data, name=name)["name"] + name = _getdefault_name(data, name=name) data = column.as_column(data) # TODO: Remove this if statement and fix tests now that @@ -2432,7 +2423,7 @@ def __init__( if dtype.kind != "m": raise TypeError("dtype must be a timedelta type") - name = _setdefault_name(data, name=name)["name"] + name = _getdefault_name(data, name=name) data = column.as_column(data, dtype=dtype) if copy: @@ -2601,7 +2592,7 @@ def __init__( ) if copy: data = column.as_column(data, dtype=dtype).copy(deep=True) - kwargs = _setdefault_name(data, name=name) + name = _getdefault_name(data, name=name) if isinstance(data, CategoricalColumn): data = data elif isinstance(data, pd.Series) and ( @@ -2635,7 +2626,7 @@ def __init__( data = data.as_ordered(ordered=True) elif ordered is False and data.ordered is True: data = data.as_ordered(ordered=False) - super().__init__(data, **kwargs) + super().__init__(data, name=name) @property # type: ignore @_cudf_nvtx_annotate @@ -2821,7 +2812,7 @@ def __init__( copy: bool = False, name=None, ): - name = _setdefault_name(data, name=name)["name"] + name = _getdefault_name(data, name=name) if dtype is not None: dtype = cudf.dtype(dtype) @@ -3053,10 +3044,10 @@ def as_index( return idx -def _setdefault_name(values, **kwargs): - if kwargs.get("name") is None: - kwargs["name"] = getattr(values, "name", None) - return kwargs +def _getdefault_name(values, name): + if name is None: + return getattr(values, "name", None) + return name @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index c412b7a7e47..48e80d8162f 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -56,7 +56,7 @@ from cudf.core.dtypes import ListDtype from cudf.core.frame import Frame from cudf.core.groupby.groupby import GroupBy -from cudf.core.index import Index, RangeIndex, _index_from_columns +from cudf.core.index import Index, RangeIndex, _index_from_data from cudf.core.missing import NA from cudf.core.multiindex import MultiIndex from cudf.core.resample import _Resampler @@ -331,7 +331,9 @@ def _from_columns_like_self( if index_names is not None: n_index_columns = len(index_names) data_columns = columns[n_index_columns:] - index = _index_from_columns(columns[:n_index_columns]) + index = _index_from_data( + dict(enumerate(columns[:n_index_columns])) + ) if isinstance(index, cudf.MultiIndex): index.names = index_names else: @@ -4348,8 +4350,8 @@ def _reset_index(self, level, drop, col_level=0, col_fill=""): index_names, ) = self._index._split_columns_by_levels(level) if index_columns: - index = _index_from_columns( - index_columns, + index = _index_from_data( + dict(enumerate(index_columns)), name=self._index.name, ) if isinstance(index, MultiIndex):