Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove _source_index from MultiIndex #9191

Merged
merged 13 commits into from
Sep 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3983,7 +3983,7 @@ def sort_index(
]
else:
labels = [self.index._get_level_label(level)]
inds = self.index._source_data[labels].argsort(
inds = self.index.to_frame(index=False)[labels].argsort(
ascending=ascending, na_position=na_position
)
else:
Expand Down
14 changes: 9 additions & 5 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1856,7 +1856,7 @@ def sample(
if isinstance(self, cudf.MultiIndex):
# TODO: Need to update this once MultiIndex is refactored,
# should be able to treat it similar to other Frame object
result = cudf.Index(self._source_data[gather_map])
result = cudf.Index(self.to_frame(index=False)[gather_map])
else:
result = self[gather_map]
if not keep_index:
Expand Down Expand Up @@ -3168,9 +3168,13 @@ def _reindex(
index = cudf.core.index.as_index(index)

if isinstance(index, cudf.MultiIndex):
idx_dtype_match = (
df.index._source_data.dtypes == index._source_data.dtypes
).all()
idx_dtype_match = all(
left_dtype == right_dtype
for left_dtype, right_dtype in zip(
(col.dtype for col in df.index._data.columns),
(col.dtype for col in index._data.columns),
)
)
else:
idx_dtype_match = df.index.dtype == index.dtype

Expand Down Expand Up @@ -5220,7 +5224,7 @@ def _drop_rows_by_labels(
# 1. Merge Index df and data df along column axis:
# | id | ._index df | data column(s) |
idx_nlv = obj._index.nlevels
working_df = obj._index._source_data
working_df = obj._index.to_frame(index=False)
working_df.columns = [i for i in range(idx_nlv)]
for i, col in enumerate(obj._data):
working_df[idx_nlv + i] = obj._data[col]
Expand Down
9 changes: 3 additions & 6 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1335,12 +1335,9 @@ def keys(self):
if nkeys == 0:
return cudf.core.index.as_index([], name=None)
elif nkeys > 1:
return cudf.MultiIndex(
source_data=cudf.DataFrame(
dict(zip(range(nkeys), self._key_columns))
),
names=self.names,
)
return cudf.MultiIndex._from_data(
dict(zip(range(nkeys), self._key_columns))
)._set_names(self.names)
else:
return cudf.core.index.as_index(
self._key_columns[0], name=self.names[0]
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2203,7 +2203,7 @@ def as_index(arbitrary, **kwargs) -> BaseIndex:
elif isinstance(arbitrary, pd.MultiIndex):
return cudf.MultiIndex.from_pandas(arbitrary)
elif isinstance(arbitrary, cudf.DataFrame):
return cudf.MultiIndex(source_data=arbitrary)
return cudf.MultiIndex.from_frame(arbitrary)
return as_index(
column.as_column(arbitrary, dtype=kwargs.get("dtype", None)), **kwargs
)
Expand Down
Loading