Skip to content

Commit

Permalink
Access self.index instead of self._index where possible (#15781)
Browse files Browse the repository at this point in the history
Since `index` is defined as 

```python
@Property
def index(self):
    return self._index
```

Get and set to `self.index` when possible. Setting to `self.index` ensures that we may not be creating an invalid `IndexedFrame` with a `len(index) != len(columns)`.

There are times when still setting `self._index` was necessary because some data was being swapped "inplace" and validation needed to be avoided. (Hoping to avoid this pattern in the future)

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #15781
  • Loading branch information
mroeschke authored May 22, 2024
1 parent 45dc595 commit 57444ed
Show file tree
Hide file tree
Showing 8 changed files with 253 additions and 231 deletions.
216 changes: 123 additions & 93 deletions python/cudf/cudf/core/dataframe.py

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ def nth(self, n):

result = result[sizes > n]

result._index = self.obj.index.take(
result.index = self.obj.index.take(
result._data["__groupbynth_order__"]
)
del result._data["__groupbynth_order__"]
Expand Down Expand Up @@ -1038,7 +1038,7 @@ def ngroup(self, ascending=True):
if has_null_group:
group_ids.iloc[-1] = cudf.NA

group_ids._index = index
group_ids.index = index
return self._broadcast(group_ids)

def sample(
Expand Down Expand Up @@ -1208,7 +1208,7 @@ def deserialize(cls, header, frames):

def _grouped(self, *, include_groups: bool = True):
offsets, grouped_key_cols, grouped_value_cols = self._groupby.groups(
[*self.obj._index._columns, *self.obj._columns]
[*self.obj.index._columns, *self.obj._columns]
)
grouped_keys = cudf.core.index._index_from_data(
dict(enumerate(grouped_key_cols))
Expand Down Expand Up @@ -2849,8 +2849,8 @@ def _handle_label(self, by):
self._key_columns.append(self._obj._data[by])
except KeyError as e:
# `by` can be index name(label) too.
if by in self._obj._index.names:
self._key_columns.append(self._obj._index._data[by])
if by in self._obj.index.names:
self._key_columns.append(self._obj.index._data[by])
else:
raise e
self.names.append(by)
Expand Down
Loading

0 comments on commit 57444ed

Please sign in to comment.