From eac3c77baf456c7bd7e1e5fde81790a4ed3ebb27 Mon Sep 17 00:00:00 2001
From: Arun Jose <40291569+arunjose696@users.noreply.github.com>
Date: Fri, 14 Jun 2024 10:34:06 +0200
Subject: [PATCH] REFACTOR-modin-project#7313: Add similar methods as in 7294
 for operating on columns (#7314)

Signed-off-by: arunjose696 <arunjose696@gmail.com>
---
 modin/core/dataframe/algebra/binary.py        | 17 +++-----
 .../storage_formats/base/query_compiler.py    | 43 +++++++++++++++++++
 .../storage_formats/pandas/aggregations.py    |  2 +-
 modin/core/storage_formats/pandas/merge.py    |  4 +-
 .../storage_formats/pandas/query_compiler.py  |  4 +-
 .../storage_formats/pandas/test_internals.py  | 16 +++----
 6 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/modin/core/dataframe/algebra/binary.py b/modin/core/dataframe/algebra/binary.py
index 2afb7733baf..b5e701d2d4b 100644
--- a/modin/core/dataframe/algebra/binary.py
+++ b/modin/core/dataframe/algebra/binary.py
@@ -205,13 +205,10 @@ def maybe_build_dtypes_series(
     Finds a union of columns and finds dtypes for all these columns.
     """
     if not trigger_computations:
-        if not first._modin_frame.has_columns_cache:
+        if not first.frame_has_columns_cache:
             return None
 
-        if (
-            isinstance(second, type(first))
-            and not second._modin_frame.has_columns_cache
-        ):
+        if isinstance(second, type(first)) and not second.frame_has_columns_cache:
             return None
 
     columns_first = set(first.columns)
@@ -384,8 +381,8 @@ def caller(
             if isinstance(other, type(query_compiler)):
                 if broadcast:
                     if (
-                        query_compiler._modin_frame.has_materialized_columns
-                        and other._modin_frame.has_materialized_columns
+                        query_compiler.frame_has_materialized_columns
+                        and other.frame_has_materialized_columns
                     ):
                         if (
                             len(query_compiler.columns) == 1
@@ -408,8 +405,8 @@ def caller(
                     )
                 else:
                     if (
-                        query_compiler._modin_frame.has_materialized_columns
-                        and other._modin_frame.has_materialized_columns
+                        query_compiler.frame_has_materialized_columns
+                        and other.frame_has_materialized_columns
                     ):
                         if (
                             len(query_compiler.columns) == 1
@@ -440,7 +437,7 @@ def caller(
                     )
                 else:
                     if (
-                        query_compiler._modin_frame.has_materialized_columns
+                        query_compiler.frame_has_materialized_columns
                         and len(query_compiler._modin_frame.columns) == 1
                         and is_scalar(other)
                     ):
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index f239f1b46ae..80e89a577a2 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -4532,6 +4532,28 @@ def frame_has_materialized_dtypes(self) -> bool:
         """
         return self._modin_frame.has_materialized_dtypes
 
+    @property
+    def frame_has_materialized_columns(self) -> bool:
+        """
+        Check if the undelying dataframe has materialized columns.
+
+        Returns
+        -------
+        bool
+        """
+        return self._modin_frame.has_materialized_columns
+
+    @property
+    def frame_has_materialized_index(self) -> bool:
+        """
+        Check if the undelying dataframe has materialized index.
+
+        Returns
+        -------
+        bool
+        """
+        return self._modin_frame.has_materialized_index
+
     def set_frame_dtypes_cache(self, dtypes):
         """
         Set dtypes cache for the underlying dataframe frame.
@@ -4552,6 +4574,16 @@ def set_frame_index_cache(self, index):
         """
         self._modin_frame.set_index_cache(index)
 
+    def set_frame_columns_cache(self, index):
+        """
+        Set columns cache for underlying dataframe.
+
+        Parameters
+        ----------
+        index : sequence, callable or None
+        """
+        self._modin_frame.set_columns_cache(index)
+
     @property
     def frame_has_index_cache(self):
         """
@@ -4563,6 +4595,17 @@ def frame_has_index_cache(self):
         """
         return self._modin_frame.has_index_cache
 
+    @property
+    def frame_has_columns_cache(self):
+        """
+        Check if the columns cache exists for underlying dataframe.
+
+        Returns
+        -------
+        bool
+        """
+        return self._modin_frame.has_columns_cache
+
     @property
     def frame_has_dtypes_cache(self) -> bool:
         """
diff --git a/modin/core/storage_formats/pandas/aggregations.py b/modin/core/storage_formats/pandas/aggregations.py
index e8905e857bc..b0367d007ef 100644
--- a/modin/core/storage_formats/pandas/aggregations.py
+++ b/modin/core/storage_formats/pandas/aggregations.py
@@ -62,7 +62,7 @@ def corr_method(
                     method=method, min_periods=min_periods, numeric_only=numeric_only
                 )
 
-            if not numeric_only and qc._modin_frame.has_materialized_columns:
+            if not numeric_only and qc.frame_has_materialized_columns:
                 new_index, new_columns = (
                     qc._modin_frame.copy_columns_cache(),
                     qc._modin_frame.copy_columns_cache(),
diff --git a/modin/core/storage_formats/pandas/merge.py b/modin/core/storage_formats/pandas/merge.py
index 37a9c325bd0..62583bc5ddb 100644
--- a/modin/core/storage_formats/pandas/merge.py
+++ b/modin/core/storage_formats/pandas/merge.py
@@ -216,7 +216,7 @@ def map_func(
             # it's fine too, we can also decide that by columns, which tend to be already
             # materialized quite often compared to the indexes.
             keep_index = False
-            if left._modin_frame.has_materialized_index:
+            if left.frame_has_materialized_index:
                 keep_index = should_keep_index(left, right)
             else:
                 # Have to trigger columns materialization. Hope they're already available at this point.
@@ -286,7 +286,7 @@ def _compute_result_metadata(
         new_columns = None
         new_dtypes = None
 
-        if not left._modin_frame.has_materialized_columns:
+        if not left.frame_has_materialized_columns:
             return new_columns, new_dtypes
 
         if left_on is None and right_on is None:
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index d29901b8fdb..7c4f7e79f55 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -623,7 +623,7 @@ def _reset(df, *axis_lengths, partition_idx):  # pragma: no cover
             new_columns = None
             if kwargs["drop"]:
                 dtypes = self._modin_frame.copy_dtypes_cache()
-                if self._modin_frame.has_columns_cache:
+                if self.frame_has_columns_cache:
                     new_columns = self._modin_frame.copy_columns_cache(
                         copy_lengths=True
                     )
@@ -642,7 +642,7 @@ def _reset(df, *axis_lengths, partition_idx):  # pragma: no cover
                     dtypes = None
                 if (
                     # can precompute new columns if we know columns and index names
-                    self._modin_frame.has_materialized_columns
+                    self.frame_has_materialized_columns
                     and index_dtypes is not None
                 ):
                     empty_index = (
diff --git a/modin/tests/core/storage_formats/pandas/test_internals.py b/modin/tests/core/storage_formats/pandas/test_internals.py
index 99846655df1..e893e48582b 100644
--- a/modin/tests/core/storage_formats/pandas/test_internals.py
+++ b/modin/tests/core/storage_formats/pandas/test_internals.py
@@ -1171,13 +1171,13 @@ def test_concat_dont_materialize_opposite_axis(axis):
 
     def assert_no_cache(df, axis):
         if axis:
-            assert not df._query_compiler._modin_frame.has_materialized_columns
+            assert not df._query_compiler.frame_has_materialized_columns
         else:
-            assert not df._query_compiler._modin_frame.has_materialized_index
+            assert not df._query_compiler.frame_has_materialized_index
 
     def remove_cache(df, axis):
         if axis:
-            df._query_compiler._modin_frame.set_columns_cache(None)
+            df._query_compiler.set_frame_columns_cache(None)
         else:
             df._query_compiler.set_frame_index_cache(None)
         assert_no_cache(df, axis)
@@ -2038,7 +2038,7 @@ def test_concat_axis_1(
                 or remaining_dtype is not None
             )
             # setting columns cache to 'None', in order to prevent completing 'dtypes' with the materialized columns
-            md_df._query_compiler._modin_frame.set_columns_cache(None)
+            md_df._query_compiler.set_frame_columns_cache(None)
             md_df._query_compiler.set_frame_dtypes_cache(
                 ModinDtypes(
                     DtypesDescriptor(
@@ -2401,10 +2401,10 @@ def test_preserve_dtypes_reset_index(self, drop, has_materialized_index):
             # case 1: 'df' has complete dtype by default
             df = pd.DataFrame({"a": [1, 2, 3]})
             if has_materialized_index:
-                assert df._query_compiler._modin_frame.has_materialized_index
+                assert df._query_compiler.frame_has_materialized_index
             else:
                 df._query_compiler.set_frame_index_cache(None)
-                assert not df._query_compiler._modin_frame.has_materialized_index
+                assert not df._query_compiler.frame_has_materialized_index
             assert df._query_compiler.frame_has_materialized_dtypes
 
             res = df.reset_index(drop=drop)
@@ -2444,10 +2444,10 @@ def test_preserve_dtypes_reset_index(self, drop, has_materialized_index):
                 )
             )
             if has_materialized_index:
-                assert df._query_compiler._modin_frame.has_materialized_index
+                assert df._query_compiler.frame_has_materialized_index
             else:
                 df._query_compiler.set_frame_index_cache(None)
-                assert not df._query_compiler._modin_frame.has_materialized_index
+                assert not df._query_compiler.frame_has_materialized_index
 
             res = df.reset_index(drop=drop)
             if drop: