Skip to content

Commit

Permalink
Add new internal API for setting columns and use it where possible.
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr committed Feb 22, 2022
1 parent 10fac9e commit 68bb602
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 16 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def where(
)
# Setting `frame` column names to `cond`
# as `cond` has no column names.
cond.columns = frame._data.to_pandas_index()
cond._set_column_names_like(frame)

(source_df, others,) = _normalize_columns_and_scalars_type(
frame, other
Expand Down
33 changes: 20 additions & 13 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,9 +999,10 @@ def __setattr__(self, key, col):
super().__setattr__(key, col)

except RuntimeError as e:
# Need to allow setting properties that are marked as forbidden for
# internal usage.
# TODO: Check if there are alternatives that could be used instead.
# TODO: This allows setting properties that are marked as forbidden
# for internal usage. It is necesary because the __getattribute__
# call in the try block will trigger the error. We should see if
# setting these variables can also always be disabled
if "External-only API" not in str(e):
raise
super().__setattr__(key, col)
Expand Down Expand Up @@ -1314,7 +1315,7 @@ def _slice(self: T, arg: slice) -> T:
# Adding index of type RangeIndex back to
# result
result.index = self.index[start:stop]
result.columns = self._data.to_pandas_index()
result._set_column_names_like(self)
return result

@annotate("DATAFRAME_MEMORY_USAGE", color="blue", domain="cudf_python")
Expand Down Expand Up @@ -2191,12 +2192,20 @@ def columns(self, columns):
f"got {len(columns)} elements"
)

data = dict(zip(columns, self._data.columns))
if len(columns) != len(data):
self._set_column_names(columns, is_multiindex, columns.names)

def _set_column_names(self, names, multiindex=False, level_names=None):
data = dict(zip(names, self._data.columns))
if len(names) != len(data):
raise ValueError("Duplicate column names are not allowed")

self._data = ColumnAccessor(
data, multiindex=is_multiindex, level_names=columns.names,
data, multiindex=multiindex, level_names=level_names,
)

def _set_column_names_like(self, other):
self._set_column_names(
other._data.names, other._data.multiindex, other._data.level_names
)

@annotate("DATAFRAME_REINDEX_INTERNAL", color="blue", domain="cudf_python")
Expand Down Expand Up @@ -5522,7 +5531,7 @@ def mode(self, axis=0, numeric_only=False, dropna=True):
if isinstance(df, Series):
df = df.to_frame()

df.columns = data_df._data.to_pandas_index()
df._set_column_names_like(data_df)

return df

Expand Down Expand Up @@ -5652,7 +5661,7 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
return Series(result, index=self.index, dtype=result_dtype,)
else:
result_df = DataFrame(result).set_index(self.index)
result_df.columns = prepared._data.to_pandas_index()
result_df._set_column_names_like(prepared)
return result_df

@annotate("DATAFRAME_COLUMNS_VIEW", color="green", domain="cudf_python")
Expand Down Expand Up @@ -5927,20 +5936,18 @@ def cov(self, **kwargs):
cov : DataFrame
"""
cov = cupy.cov(self.values, rowvar=False)
# TODO: Why are we setting this for both index and columns?
cols = self._data.to_pandas_index()
df = DataFrame(cupy.asfortranarray(cov)).set_index(cols)
df.columns = cols
df._set_column_names_like(self)
return df

@annotate("DATAFRAME_CORR", color="green", domain="cudf_python")
def corr(self):
"""Compute the correlation matrix of a DataFrame."""
corr = cupy.corrcoef(self.values, rowvar=False)
# TODO: Why are we setting this for both index and columns?
cols = self._data.to_pandas_index()
df = DataFrame(cupy.asfortranarray(corr)).set_index(cols)
df.columns = cols
df._set_column_names_like(self)
return df

@annotate("DATAFRAME_TO_STRUCT", color="green", domain="cudf_python")
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ def sort_index(
isinstance(self, cudf.core.dataframe.DataFrame)
and self._data.multiindex
):
out.columns = self._data.to_pandas_index()
out._set_column_names_like(self)
elif (ascending and idx.is_monotonic_increasing) or (
not ascending and idx.is_monotonic_decreasing
):
Expand All @@ -462,7 +462,7 @@ def sort_index(
isinstance(self, cudf.core.dataframe.DataFrame)
and self._data.multiindex
):
out.columns = self._data.to_pandas_index()
out._set_column_names_like(self)
else:
labels = sorted(self._data.names, reverse=not ascending)
out = self[labels]
Expand Down

0 comments on commit 68bb602

Please sign in to comment.