modin-project · anmyachev · Dec 6, 2022 · Dec 2, 2022 · Dec 2, 2022 · Dec 6, 2022
@@ -122,6 +122,22 @@ class BasePandasDataset(ClassLogger):
     # but lives in "pandas" namespace.
     _pandas_class = pandas.core.generic.NDFrame
 
+    @pandas.util.cache_readonly
+    def _is_dataframe(self) -> bool:
+        """
+        Tell whether this is a dataframe.
+
+        Ideally, other methods of BasePandasDataset shouldn't care whether this
+        is a dataframe or a series, but sometimes we need to know. This method
+        is better than hasattr(self, "columns"), which for series will call
+        self.__getattr__("columns"), which requires materializing the index.
+
+        Returns
+        -------
+        bool : Whether this is a dataframe.
+        """
+        return issubclass(self._pandas_class, pandas.DataFrame)
+
     def _add_sibling(self, sibling):
         """
         Add a DataFrame or Series object to the list of siblings.
@@ -162,12 +178,10 @@ def _build_repr_df(self, num_rows, num_cols):
             A pandas dataset with `num_rows` or fewer rows and `num_cols` or fewer columns.
         """
         # Fast track for empty dataframe.
-        if len(self.index) == 0 or (
-            hasattr(self, "columns") and len(self.columns) == 0
-        ):
+        if len(self.index) == 0 or (self._is_dataframe and len(self.columns) == 0):
             return pandas.DataFrame(
                 index=self.index,
-                columns=self.columns if hasattr(self, "columns") else None,
+                columns=self.columns if self._is_dataframe else None,
             )
         if len(self.index) <= num_rows:
             row_indexer = slice(None)
@@ -188,7 +202,7 @@ def _build_repr_df(self, num_rows, num_cols):
                 if num_rows_for_tail is not None
                 else []
             )
-        if hasattr(self, "columns"):
+        if self._is_dataframe:
             if len(self.columns) <= num_cols:
                 col_indexer = slice(None)
             else:
@@ -3632,8 +3646,7 @@ def __getitem__(self, key):
         # This lets us reuse code in pandas to error check
         indexer = None
         if isinstance(key, slice) or (
-            isinstance(key, str)
-            and (not hasattr(self, "columns") or key not in self.columns)
+            isinstance(key, str) and (not self._is_dataframe or key not in self.columns)
         ):
             indexer = convert_to_index_sliceable(
                 pandas.DataFrame(index=self.index), key

@@ -964,7 +964,7 @@ def _compute_index_grouped(self, numerical=False):
         # `dropna` param is the only one that matters for the group indices result
         dropna = self._kwargs.get("dropna", True)
 
-        if hasattr(self._by, "columns") and is_multi_by:
+        if isinstance(self._by, BaseQueryCompiler) and is_multi_by:
             by = list(self._by.columns)
 
         if is_multi_by:

@@ -718,7 +718,7 @@ def __getitem__(self, key):
             ):
                 result.index = result.index.droplevel(list(range(len(row_loc_as_list))))
         if (
-            hasattr(result, "columns")
+            isinstance(result, DataFrame)
             and not isinstance(col_loc_as_list, slice)
             and not levels_already_dropped
             and result._query_compiler.has_multiindex(axis=1)

@@ -315,7 +315,7 @@ def broadcast_item(
         index_values = obj.index[row_lookup]
         if not index_values.equals(item.index):
             axes_to_reindex["index"] = index_values
-        if need_columns_reindex and hasattr(item, "columns"):
+        if need_columns_reindex and isinstance(item, (pandas.DataFrame, DataFrame)):
             column_values = obj.columns[col_lookup]
             if not column_values.equals(item.columns):
                 axes_to_reindex["columns"] = column_values