Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove or split up Frame methods that use the index #10439

Merged
merged 18 commits into from
Mar 21, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions python/cudf/cudf/core/column_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,26 @@ def select_by_label(self, key: Any) -> ColumnAccessor:
return self._select_by_label_with_wildcard(key)
return self._select_by_label_grouped(key)

def get_labels_by_index(self, index: Any) -> tuple:
"""Get the labels corresponding to the provided column indices.

Parameters
----------
index : integer, integer slice, or list-like of integers
The column indexes.

Returns
-------
tuple
"""
if isinstance(index, slice):
start, stop, step = index.indices(len(self._data))
return self.names[start:stop:step]
elif pd.api.types.is_integer(index):
return (self.names[index],)
else:
return tuple(self.names[i] for i in index)

def select_by_index(self, index: Any) -> ColumnAccessor:
"""
Return a ColumnAccessor composed of the columns
Expand All @@ -355,13 +375,7 @@ def select_by_index(self, index: Any) -> ColumnAccessor:
-------
ColumnAccessor
"""
if isinstance(index, slice):
start, stop, step = index.indices(len(self._data))
keys = self.names[start:stop:step]
elif pd.api.types.is_integer(index):
keys = (self.names[index],)
else:
keys = tuple(self.names[i] for i in index)
keys = self.get_labels_by_index(index)
data = {k: self._data[k] for k in keys}
return self.__class__(
data, multiindex=self.multiindex, level_names=self.level_names,
Expand Down
29 changes: 17 additions & 12 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,9 +372,9 @@ class _DataFrameIlocIndexer(_DataFrameIndexer):
def _getitem_tuple_arg(self, arg):
# Iloc Step 1:
# Gather the columns specified by the second tuple arg
columns_df = self._frame._get_columns_by_index(arg[1])

columns_df._index = self._frame._index
columns_df = self._frame._from_data(
self._frame._data.select_by_index(arg[1]), self._frame._index
)

# Iloc Step 2:
# Gather the rows specified by the first tuple arg
Expand Down Expand Up @@ -422,9 +422,9 @@ def _getitem_tuple_arg(self, arg):

@_cudf_nvtx_annotate
def _setitem_tuple_arg(self, key, value):
columns = self._frame._get_columns_by_index(key[1])

for col in columns:
# TODO: Determine if this usage is prevalent enough to expose this
# selection logic at a higher level than ColumnAccessor.
for col in self._frame._data.get_labels_by_index(key[1]):
self._frame[col].iloc[key[0]] = value

def _getitem_scalar(self, arg):
Expand Down Expand Up @@ -612,7 +612,7 @@ def __init__(
new_df = self._from_arrays(data, index=index, columns=columns)

self._data = new_df._data
self.index = new_df._index
self._index = new_df._index
elif hasattr(data, "__array_interface__"):
arr_interface = data.__array_interface__
if len(arr_interface["descr"]) == 1:
Expand All @@ -621,7 +621,7 @@ def __init__(
else:
new_df = self.from_records(data, index=index, columns=columns)
self._data = new_df._data
self.index = new_df._index
self._index = new_df._index
else:
if is_list_like(data):
if len(data) > 0 and is_scalar(data[0]):
Expand All @@ -632,7 +632,7 @@ def __init__(
new_df = DataFrame(data=data, index=index)

self._data = new_df._data
self.index = new_df._index
self._index = new_df._index
elif len(data) > 0 and isinstance(data[0], Series):
self._init_from_series_list(
data=data, columns=columns, index=index
Expand All @@ -650,6 +650,11 @@ def __init__(
data, index=index, columns=columns, nan_as_null=nan_as_null
)

if self._data.nrows > 0 and self._data.nrows != len(self._index):
vyasr marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f"Shape of passed values is {self.shape}, indices imply "
f"({len(self._index)}, {self._num_columns})"
)
if dtype:
self._data = self.astype(dtype)._data

Expand Down Expand Up @@ -855,10 +860,10 @@ def _from_data(
data: MutableMapping,
index: Optional[BaseIndex] = None,
columns: Any = None,
*args,
vyasr marked this conversation as resolved.
Show resolved Hide resolved
**kwargs,
) -> DataFrame:
out = super()._from_data(data, index)
if index is None:
out.index = RangeIndex(out._data.nrows)
out = super()._from_data(data=data, index=index)
if columns is not None:
out.columns = columns
return out
Expand Down
Loading