diff --git a/modin/pandas/base.py b/modin/pandas/base.py index f96fe35f49c..e89e79c25dd 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -122,6 +122,22 @@ class BasePandasDataset(ClassLogger): # but lives in "pandas" namespace. _pandas_class = pandas.core.generic.NDFrame + @pandas.util.cache_readonly + def _is_dataframe(self) -> bool: + """ + Tell whether this is a dataframe. + + Ideally, other methods of BasePandasDataset shouldn't care whether this + is a dataframe or a series, but sometimes we need to know. This method + is better than hasattr(self, "columns"), which for series will call + self.__getattr__("columns"), which requires materializing the index. + + Returns + ------- + bool : Whether this is a dataframe. + """ + return issubclass(self._pandas_class, pandas.DataFrame) + def _add_sibling(self, sibling): """ Add a DataFrame or Series object to the list of siblings. @@ -162,12 +178,10 @@ def _build_repr_df(self, num_rows, num_cols): A pandas dataset with `num_rows` or fewer rows and `num_cols` or fewer columns. """ # Fast track for empty dataframe. - if len(self.index) == 0 or ( - hasattr(self, "columns") and len(self.columns) == 0 - ): + if len(self.index) == 0 or (self._is_dataframe and len(self.columns) == 0): return pandas.DataFrame( index=self.index, - columns=self.columns if hasattr(self, "columns") else None, + columns=self.columns if self._is_dataframe else None, ) if len(self.index) <= num_rows: row_indexer = slice(None) @@ -188,7 +202,7 @@ def _build_repr_df(self, num_rows, num_cols): if num_rows_for_tail is not None else [] ) - if hasattr(self, "columns"): + if self._is_dataframe: if len(self.columns) <= num_cols: col_indexer = slice(None) else: @@ -3632,8 +3646,7 @@ def __getitem__(self, key): # This lets us reuse code in pandas to error check indexer = None if isinstance(key, slice) or ( - isinstance(key, str) - and (not hasattr(self, "columns") or key not in self.columns) + isinstance(key, str) and (not self._is_dataframe or key not in self.columns) ): indexer = convert_to_index_sliceable( pandas.DataFrame(index=self.index), key diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 68272971a39..f6394070ced 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -964,7 +964,7 @@ def _compute_index_grouped(self, numerical=False): # `dropna` param is the only one that matters for the group indices result dropna = self._kwargs.get("dropna", True) - if hasattr(self._by, "columns") and is_multi_by: + if isinstance(self._by, BaseQueryCompiler) and is_multi_by: by = list(self._by.columns) if is_multi_by: diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py index 16b3003f0f9..c5fe7903db2 100644 --- a/modin/pandas/indexing.py +++ b/modin/pandas/indexing.py @@ -718,7 +718,7 @@ def __getitem__(self, key): ): result.index = result.index.droplevel(list(range(len(row_loc_as_list)))) if ( - hasattr(result, "columns") + isinstance(result, DataFrame) and not isinstance(col_loc_as_list, slice) and not levels_already_dropped and result._query_compiler.has_multiindex(axis=1) diff --git a/modin/pandas/utils.py b/modin/pandas/utils.py index 7564732c9bd..2acdb1c6da8 100644 --- a/modin/pandas/utils.py +++ b/modin/pandas/utils.py @@ -315,7 +315,7 @@ def broadcast_item( index_values = obj.index[row_lookup] if not index_values.equals(item.index): axes_to_reindex["index"] = index_values - if need_columns_reindex and hasattr(item, "columns"): + if need_columns_reindex and isinstance(item, (pandas.DataFrame, DataFrame)): column_values = obj.columns[col_lookup] if not column_values.equals(item.columns): axes_to_reindex["columns"] = column_values