From c36779e59aa0ebb45c7e06225712871875327d8d Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 26 Jul 2023 11:40:03 -0700
Subject: [PATCH 1/5] preserve column names

---
 python/cudf/cudf/core/dataframe.py     | 7 +++++++
 python/cudf/cudf/core/indexed_frame.py | 4 +++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0fe89490905..faa1f5b2396 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -723,6 +723,10 @@ def __init__(
         if dtype:
             self._data = self.astype(dtype)._data
 
+        self._data.multiindex = self._data.multiindex or isinstance(
+            columns, pd.MultiIndex
+        )
+
     @_cudf_nvtx_annotate
     def _init_from_series_list(self, data, columns, index):
         if index is None:
@@ -5042,6 +5046,7 @@ def from_pandas(cls, dataframe, nan_as_null=None):
 
         index = cudf.from_pandas(dataframe.index, nan_as_null=nan_as_null)
         df = cls._from_data(data, index)
+        df._data._level_names = list(dataframe.columns.names)
 
         # Set columns only if it is a MultiIndex
         if isinstance(dataframe.columns, pd.MultiIndex):
@@ -5337,6 +5342,8 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False):
             df._data[names[0]] = column.as_column(
                 data, nan_as_null=nan_as_null
             )
+        if isinstance(columns, pd.Index):
+            df._data._level_names = list(columns.names)
 
         if index is None:
             df._index = RangeIndex(start=0, stop=len(data))
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index aa0f060c8da..c7ef015bf23 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -293,7 +293,9 @@ def _from_data(
 
     @_cudf_nvtx_annotate
     def _from_data_like_self(self, data: MutableMapping):
-        return self._from_data(data, self._index)
+        out = self._from_data(data, self._index)
+        out._data._level_names = self._data._level_names
+        return out
 
     @classmethod
     @_cudf_nvtx_annotate

From e845c72b44ff789da2a7097999c10823cd6b98ab Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 26 Jul 2023 12:48:39 -0700
Subject: [PATCH 2/5] add tests

---
 python/cudf/cudf/core/dataframe.py       | 40 ++++++++++++++++++------
 python/cudf/cudf/core/indexed_frame.py   | 20 +++++++++---
 python/cudf/cudf/core/series.py          |  5 ++-
 python/cudf/cudf/tests/test_dataframe.py | 15 +++++++--
 4 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index faa1f5b2396..667341588fd 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1824,19 +1824,29 @@ def _make_operands_and_index_for_binop(
             NotImplementedType,
         ],
         Optional[BaseIndex],
+        bool,
     ]:
         lhs, rhs = self._data, other
         index = self._index
         fill_requires_key = False
         left_default: Any = False
+        equal_columns = False
+        can_use_self_column_name = True
 
         if _is_scalar_or_zero_d_array(other):
             rhs = {name: other for name in self._data}
+            equal_columns = True
         elif isinstance(other, Series):
-            rhs = dict(zip(other.index.values_host, other.values_host))
+            rhs = dict(zip(other.index.to_pandas(), other.values_host))
             # For keys in right but not left, perform binops between NaN (not
             # NULL!) and the right value (result is NaN).
             left_default = as_column(np.nan, length=len(self))
+            equal_columns = other.index.to_pandas().equals(
+                self._data.to_pandas_index()
+            )
+            can_use_self_column_name = equal_columns or (
+                list(other._index._data.names) == self._data._level_names
+            )
         elif isinstance(other, DataFrame):
             if (
                 not can_reindex
@@ -1858,13 +1868,18 @@ def _make_operands_and_index_for_binop(
             # For DataFrame-DataFrame ops, always default to operating against
             # the fill value.
             left_default = fill_value
+            equal_columns = self._column_names == other._column_names
+            can_use_self_column_name = (
+                equal_columns
+                or self._data._level_names == other._data._level_names
+            )
         elif isinstance(other, (dict, abc.Mapping)):
             # Need to fail early on host mapping types because we ultimately
             # convert everything to a dict.
-            return NotImplemented, None
+            return NotImplemented, None, True
 
         if not isinstance(rhs, (dict, abc.Mapping)):
-            return NotImplemented, None
+            return NotImplemented, None, True
 
         operands = {
             k: (
@@ -1880,7 +1895,8 @@ def _make_operands_and_index_for_binop(
             for k, v in rhs.items():
                 if k not in lhs:
                     operands[k] = (left_default, v, reflect, None)
-        return operands, index
+
+        return operands, index, can_use_self_column_name
 
     @classmethod
     @_cudf_nvtx_annotate
@@ -5090,13 +5106,19 @@ def from_arrow(cls, table):
         2  3  6
         """
         index_col = None
+        col_index_names = None
         if isinstance(table, pa.Table) and isinstance(
             table.schema.pandas_metadata, dict
         ):
             index_col = table.schema.pandas_metadata["index_columns"]
+            if "column_indexes" in table.schema.pandas_metadata:
+                col_index_names = []
+                for col_meta in table.schema.pandas_metadata["column_indexes"]:
+                    col_index_names.append(col_meta["name"])
 
         out = super().from_arrow(table)
-
+        if col_index_names is not None:
+            out._data._level_names = col_index_names
         if index_col:
             if isinstance(index_col[0], dict):
                 idx = cudf.RangeIndex(
@@ -7450,13 +7472,13 @@ def _align_indices(lhs, rhs):
         lhs_out = DataFrame(index=df.index)
         rhs_out = DataFrame(index=df.index)
         common = set(lhs._column_names) & set(rhs._column_names)
-        common_x = {f"{x}_x" for x in common}
-        common_y = {f"{x}_y" for x in common}
+        common_x = {f"{x}_x": x for x in common}
+        common_y = {f"{x}_y": x for x in common}
         for col in df._column_names:
             if col in common_x:
-                lhs_out[col[:-2]] = df[col]
+                lhs_out[common_x[col]] = df[col]
             elif col in common_y:
-                rhs_out[col[:-2]] = df[col]
+                rhs_out[common_y[col]] = df[col]
             elif col in lhs:
                 lhs_out[col] = df[col]
             elif col in rhs:
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index c7ef015bf23..492cbfd8cb6 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3130,7 +3130,9 @@ def _reset_index(self, level, drop, col_level=0, col_fill=""):
         # inserted to the left of existing data columns.
         return (
             ColumnAccessor(
-                {**new_column_data, **self._data}, self._data.multiindex
+                {**new_column_data, **self._data},
+                self._data.multiindex,
+                self._data._level_names,
             ),
             index,
         )
@@ -3467,14 +3469,23 @@ def _binaryop(
         **kwargs,
     ):
         reflect, op = self._check_reflected_op(op)
-        operands, out_index = self._make_operands_and_index_for_binop(
+        (
+            operands,
+            out_index,
+            can_use_self_column_name,
+        ) = self._make_operands_and_index_for_binop(
             other, op, fill_value, reflect, can_reindex
         )
         if operands is NotImplemented:
             return NotImplemented
 
         return self._from_data(
-            ColumnAccessor(type(self)._colwise_binop(operands, op)),
+            ColumnAccessor(
+                type(self)._colwise_binop(operands, op),
+                level_names=None
+                if not can_use_self_column_name
+                else self._data._level_names,
+            ),
             index=out_index,
         )
 
@@ -3493,6 +3504,7 @@ def _make_operands_and_index_for_binop(
             NotImplementedType,
         ],
         Optional[cudf.BaseIndex],
+        bool,
     ]:
         raise NotImplementedError(
             f"Binary operations are not supported for {self.__class__}"
@@ -3518,7 +3530,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         if cupy_func:
             if ufunc.nin == 2:
                 other = inputs[self is inputs[0]]
-                inputs, index = self._make_operands_and_index_for_binop(
+                inputs, index, _ = self._make_operands_and_index_for_binop(
                     other, fname
                 )
             else:
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index aaac91e927a..02de3b8282a 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1472,6 +1472,7 @@ def _make_operands_and_index_for_binop(
             NotImplementedType,
         ],
         Optional[BaseIndex],
+        bool,
     ]:
         # Specialize binops to align indices.
         if isinstance(other, Series):
@@ -1484,11 +1485,13 @@ def _make_operands_and_index_for_binop(
                     "Can only compare identically-labeled Series objects"
                 )
             lhs, other = _align_indices([self, other], allow_non_unique=True)
+            can_use_self_column_name = self.name == other.name
         else:
             lhs = self
+            can_use_self_column_name = False
 
         operands = lhs._make_operands_for_binop(other, fill_value, reflect)
-        return operands, lhs._index
+        return operands, lhs._index, can_use_self_column_name
 
     @copy_docstring(CategoricalAccessor)  # type: ignore
     @property
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 25a17697538..e512c8e9530 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1708,6 +1708,7 @@ def test_nonmatching_index_setitem(nrows):
 )
 def test_from_pandas(dtype):
     df = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0], dtype=dtype)
+    df.columns.name = "custom_column_name"
     gdf = cudf.DataFrame.from_pandas(df)
     assert isinstance(gdf, cudf.DataFrame)
 
@@ -2483,8 +2484,12 @@ def test_bitwise_binops_series(pdf, gdf, binop):
 
 
 @pytest.mark.parametrize("unaryop", [operator.neg, operator.inv, operator.abs])
-def test_unaryops_df(pdf, gdf, unaryop):
-    d = unaryop(pdf - 5)
+@pytest.mark.parametrize("col_name", [None, "abc"])
+def test_unaryops_df(pdf, unaryop, col_name):
+    pd_df = pdf.copy()
+    pd_df.columns.name = col_name
+    gdf = cudf.from_pandas(pd_df)
+    d = unaryop(pd_df - 5)
     g = unaryop(gdf - 5)
     assert_eq(d, g)
 
@@ -2626,6 +2631,12 @@ def test_arrow_pandas_compat(pdf, gdf, preserve_index):
     pdf2 = pdf_arrow_table.to_pandas()
 
     assert_eq(pdf2, gdf2)
+    pdf.columns.name = "abc"
+    pdf_arrow_table = pa.Table.from_pandas(pdf, preserve_index=preserve_index)
+
+    gdf2 = cudf.DataFrame.from_arrow(pdf_arrow_table)
+    pdf2 = pdf_arrow_table.to_pandas()
+    assert_eq(pdf2, gdf2)
 
 
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"])

From 388e6f49a8f8782201699fedef4247b75e9eb106 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 26 Jul 2023 13:00:51 -0700
Subject: [PATCH 3/5] revert some changes

---
 python/cudf/cudf/core/dataframe.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 667341588fd..fc6c669256f 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1837,7 +1837,7 @@ def _make_operands_and_index_for_binop(
             rhs = {name: other for name in self._data}
             equal_columns = True
         elif isinstance(other, Series):
-            rhs = dict(zip(other.index.to_pandas(), other.values_host))
+            rhs = dict(zip(other.index.values_host, other.values_host))
             # For keys in right but not left, perform binops between NaN (not
             # NULL!) and the right value (result is NaN).
             left_default = as_column(np.nan, length=len(self))
@@ -7472,13 +7472,13 @@ def _align_indices(lhs, rhs):
         lhs_out = DataFrame(index=df.index)
         rhs_out = DataFrame(index=df.index)
         common = set(lhs._column_names) & set(rhs._column_names)
-        common_x = {f"{x}_x": x for x in common}
-        common_y = {f"{x}_y": x for x in common}
+        common_x = {f"{x}_x" for x in common}
+        common_y = {f"{x}_y" for x in common}
         for col in df._column_names:
             if col in common_x:
-                lhs_out[common_x[col]] = df[col]
+                lhs_out[col[:-2]] = df[col]
             elif col in common_y:
-                rhs_out[common_y[col]] = df[col]
+                rhs_out[col[:-2]] = df[col]
             elif col in lhs:
                 lhs_out[col] = df[col]
             elif col in rhs:

From a892023f072afca2f32a414cab5e19480371e0e2 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 26 Jul 2023 14:38:23 -0700
Subject: [PATCH 4/5] Add more tests

---
 python/cudf/cudf/tests/test_dataframe.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e512c8e9530..9db45ceba44 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2923,6 +2923,7 @@ def test_tail_for_string():
         ["v0", "v1"],
         ["v0", "index"],
         pd.MultiIndex.from_tuples([("x0", "x1"), ("y0", "y1")]),
+        pd.MultiIndex.from_tuples([(1, 2), (10, 11)], names=["ABC", "DEF"]),
     ],
 )
 @pytest.mark.parametrize("inplace", [True, False])
@@ -10158,3 +10159,25 @@ def test_dataframe_init_length_error(data, index):
             {"data": data, "index": index},
         ),
     )
+
+
+def test_dataframe_init_columns_named_multiindex():
+    np.random.seed(0)
+    data = np.random.randn(2, 2)
+    columns = cudf.MultiIndex.from_tuples(
+        [("A", "one"), ("A", "two")], names=["y", "z"]
+    )
+    gdf = cudf.DataFrame(data, columns=columns)
+    pdf = pd.DataFrame(data, columns=columns.to_pandas())
+
+    assert_eq(gdf, pdf)
+
+
+def test_dataframe_init_columns_named_index():
+    np.random.seed(0)
+    data = np.random.randn(2, 2)
+    columns = pd.Index(["a", "b"], name="custom_name")
+    gdf = cudf.DataFrame(data, columns=columns)
+    pdf = pd.DataFrame(data, columns=columns)
+
+    assert_eq(gdf, pdf)

From 4513ba87aa0a3d3828a28b3b55208a252551e2ad Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 28 Jul 2023 07:57:59 -0700
Subject: [PATCH 5/5] address reviews

---
 python/cudf/cudf/core/indexed_frame.py   | 7 ++++---
 python/cudf/cudf/tests/test_dataframe.py | 9 ++++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 492cbfd8cb6..0ffc3948e67 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3479,12 +3479,13 @@ def _binaryop(
         if operands is NotImplemented:
             return NotImplemented
 
+        level_names = (
+            None if not can_use_self_column_name else self._data._level_names
+        )
         return self._from_data(
             ColumnAccessor(
                 type(self)._colwise_binop(operands, op),
-                level_names=None
-                if not can_use_self_column_name
-                else self._data._level_names,
+                level_names=level_names,
             ),
             index=out_index,
         )
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 9db45ceba44..d443cd92968 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2484,10 +2484,13 @@ def test_bitwise_binops_series(pdf, gdf, binop):
 
 
 @pytest.mark.parametrize("unaryop", [operator.neg, operator.inv, operator.abs])
-@pytest.mark.parametrize("col_name", [None, "abc"])
-def test_unaryops_df(pdf, unaryop, col_name):
+@pytest.mark.parametrize(
+    "col_name,assign_col_name", [(None, False), (None, True), ("abc", True)]
+)
+def test_unaryops_df(pdf, unaryop, col_name, assign_col_name):
     pd_df = pdf.copy()
-    pd_df.columns.name = col_name
+    if assign_col_name:
+        pd_df.columns.name = col_name
     gdf = cudf.from_pandas(pd_df)
     d = unaryop(pd_df - 5)
     g = unaryop(gdf - 5)