Skip to content

Commit

Permalink
REFACTOR-#5310: Remove some hasattr('columns') checks. (#5311)
Browse files Browse the repository at this point in the history
Signed-off-by: mvashishtha <[email protected]>
  • Loading branch information
mvashishtha authored Dec 6, 2022
1 parent c868065 commit 2ebc9cf
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 10 deletions.
27 changes: 20 additions & 7 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,22 @@ class BasePandasDataset(ClassLogger):
# but lives in "pandas" namespace.
_pandas_class = pandas.core.generic.NDFrame

@pandas.util.cache_readonly
def _is_dataframe(self) -> bool:
"""
Tell whether this is a dataframe.
Ideally, other methods of BasePandasDataset shouldn't care whether this
is a dataframe or a series, but sometimes we need to know. This method
is better than hasattr(self, "columns"), which for series will call
self.__getattr__("columns"), which requires materializing the index.
Returns
-------
bool : Whether this is a dataframe.
"""
return issubclass(self._pandas_class, pandas.DataFrame)

def _add_sibling(self, sibling):
"""
Add a DataFrame or Series object to the list of siblings.
Expand Down Expand Up @@ -162,12 +178,10 @@ def _build_repr_df(self, num_rows, num_cols):
A pandas dataset with `num_rows` or fewer rows and `num_cols` or fewer columns.
"""
# Fast track for empty dataframe.
if len(self.index) == 0 or (
hasattr(self, "columns") and len(self.columns) == 0
):
if len(self.index) == 0 or (self._is_dataframe and len(self.columns) == 0):
return pandas.DataFrame(
index=self.index,
columns=self.columns if hasattr(self, "columns") else None,
columns=self.columns if self._is_dataframe else None,
)
if len(self.index) <= num_rows:
row_indexer = slice(None)
Expand All @@ -188,7 +202,7 @@ def _build_repr_df(self, num_rows, num_cols):
if num_rows_for_tail is not None
else []
)
if hasattr(self, "columns"):
if self._is_dataframe:
if len(self.columns) <= num_cols:
col_indexer = slice(None)
else:
Expand Down Expand Up @@ -3632,8 +3646,7 @@ def __getitem__(self, key):
# This lets us reuse code in pandas to error check
indexer = None
if isinstance(key, slice) or (
isinstance(key, str)
and (not hasattr(self, "columns") or key not in self.columns)
isinstance(key, str) and (not self._is_dataframe or key not in self.columns)
):
indexer = convert_to_index_sliceable(
pandas.DataFrame(index=self.index), key
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,7 +964,7 @@ def _compute_index_grouped(self, numerical=False):
# `dropna` param is the only one that matters for the group indices result
dropna = self._kwargs.get("dropna", True)

if hasattr(self._by, "columns") and is_multi_by:
if isinstance(self._by, BaseQueryCompiler) and is_multi_by:
by = list(self._by.columns)

if is_multi_by:
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ def __getitem__(self, key):
):
result.index = result.index.droplevel(list(range(len(row_loc_as_list))))
if (
hasattr(result, "columns")
isinstance(result, DataFrame)
and not isinstance(col_loc_as_list, slice)
and not levels_already_dropped
and result._query_compiler.has_multiindex(axis=1)
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def broadcast_item(
index_values = obj.index[row_lookup]
if not index_values.equals(item.index):
axes_to_reindex["index"] = index_values
if need_columns_reindex and hasattr(item, "columns"):
if need_columns_reindex and isinstance(item, (pandas.DataFrame, DataFrame)):
column_values = obj.columns[col_lookup]
if not column_values.equals(item.columns):
axes_to_reindex["columns"] = column_values
Expand Down

0 comments on commit 2ebc9cf

Please sign in to comment.