From d70d4f046d8ac01fe02346f75f4e0f54aec790ff Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <mroeschke@dgx11.aselab.nvidia.com>
Date: Fri, 4 Aug 2023 18:03:50 -0700
Subject: [PATCH 1/4] DataFrame with namedtuples uses ._field as column names

---
 python/cudf/cudf/core/dataframe.py       | 24 ++++++++++++++++++++++--
 python/cudf/cudf/tests/test_dataframe.py | 14 +++++++++++++-
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d421258b06b..cc04d080e48 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -708,7 +708,10 @@ def __init__(
                     )
                 else:
                     self._init_from_list_like(
-                        data, index=index, columns=columns
+                        data,
+                        index=index,
+                        columns=columns,
+                        nan_as_null=nan_as_null,
                     )
                 self._check_data_index_length_match()
             else:
@@ -827,7 +830,9 @@ def _init_from_series_list(self, data, columns, index):
             self._data = self._data.select_by_label(columns)
 
     @_cudf_nvtx_annotate
-    def _init_from_list_like(self, data, index=None, columns=None):
+    def _init_from_list_like(
+        self, data, index=None, columns=None, nan_as_null=None
+    ):
         if index is None:
             index = RangeIndex(start=0, stop=len(data))
         else:
@@ -842,6 +847,21 @@ def _init_from_list_like(self, data, index=None, columns=None):
         elif len(data) > 0 and isinstance(data[0], pd._libs.interval.Interval):
             data = DataFrame.from_pandas(pd.DataFrame(data))
             self._data = data._data
+        # namedtuple in a list
+        elif (
+            len(data) > 0
+            and columns is None
+            and isinstance(data[0], tuple)
+            and hasattr(data[0], "_fields")
+        ):
+            # pandas behavior is to use the fields from the first tuple
+            # as the column names
+            columns = data[0]._fields
+            values = itertools.zip_longest(*data)
+            data = dict(zip(columns, values))
+            self._init_from_dict_like(
+                data, index=index, nan_as_null=nan_as_null
+            )
         else:
             if any(
                 not isinstance(col, (abc.Iterable, abc.Sequence))
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 0898cb2ef3d..3b87aa2812c 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -10,7 +10,7 @@
 import string
 import textwrap
 import warnings
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict, defaultdict, namedtuple
 from copy import copy
 
 import cupy
@@ -10261,3 +10261,15 @@ def __getitem__(self, key):
 
     with pytest.raises(TypeError):
         cudf.DataFrame({"a": A()})
+
+
+def test_dataframe_constructor_from_namedtuple():
+    Point1 = namedtuple("Point1", ["a", "b", "c"])
+    Point2 = namedtuple("Point1", ["x", "y"])
+
+    data = [Point1(1, 2, 3), Point2(4, 5)]
+    idx = ["a", "b"]
+    gdf = cudf.DataFrame(data, index=idx)
+    pdf = pd.DataFrame(data, index=idx)
+
+    assert_eq(gdf, pdf)

From 4476ab5e023ecbf9c33ba6157eb49cfcca04d71d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <mroeschke@dgx11.aselab.nvidia.com>
Date: Tue, 8 Aug 2023 18:07:35 -0700
Subject: [PATCH 2/4] Simplify code path, add test for raising

---
 python/cudf/cudf/core/dataframe.py       | 24 +++++++++---------------
 python/cudf/cudf/tests/test_dataframe.py |  6 ++++++
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a778793e3f3..97539e240e5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -847,27 +847,21 @@ def _init_from_list_like(
         elif len(data) > 0 and isinstance(data[0], pd._libs.interval.Interval):
             data = DataFrame.from_pandas(pd.DataFrame(data))
             self._data = data._data
-        # namedtuple in a list
-        elif (
-            len(data) > 0
-            and columns is None
-            and isinstance(data[0], tuple)
-            and hasattr(data[0], "_fields")
-        ):
-            # pandas behavior is to use the fields from the first tuple
-            # as the column names
-            columns = data[0]._fields
-            values = itertools.zip_longest(*data)
-            data = dict(zip(columns, values))
-            self._init_from_dict_like(
-                data, index=index, nan_as_null=nan_as_null
-            )
         else:
             if any(
                 not isinstance(col, (abc.Iterable, abc.Sequence))
                 for col in data
             ):
                 raise TypeError("Inputs should be an iterable or sequence.")
+            if (
+                len(data) > 0
+                and columns is None
+                and isinstance(data[0], tuple)
+                and hasattr(data[0], "_fields")
+            ):
+                # pandas behavior is to use the fields from the first
+                # namedtuple as the column names
+                columns = data[0]._fields
 
             data = list(itertools.zip_longest(*data))
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 3b87aa2812c..97e399a9cd5 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -10273,3 +10273,9 @@ def test_dataframe_constructor_from_namedtuple():
     pdf = pd.DataFrame(data, index=idx)
 
     assert_eq(gdf, pdf)
+
+    data = [Point2(4, 5), Point1(1, 2, 3)]
+    with pytest.raises(ValueError):
+        cudf.DataFrame(data, index=idx)
+    with pytest.raises(ValueError):
+        pd.DataFrame(data, index=idx)

From d4feb46392805501bca5783bcc25a2ee9676e239 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <mroeschke@dgx11.aselab.nvidia.com>
Date: Tue, 8 Aug 2023 18:14:23 -0700
Subject: [PATCH 3/4] Remove unused keyword

---
 python/cudf/cudf/core/dataframe.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 97539e240e5..fa12c733d89 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -711,7 +711,6 @@ def __init__(
                         data,
                         index=index,
                         columns=columns,
-                        nan_as_null=nan_as_null,
                     )
                 self._check_data_index_length_match()
             else:
@@ -830,9 +829,7 @@ def _init_from_series_list(self, data, columns, index):
             self._data = self._data.select_by_label(columns)
 
     @_cudf_nvtx_annotate
-    def _init_from_list_like(
-        self, data, index=None, columns=None, nan_as_null=None
-    ):
+    def _init_from_list_like(self, data, index=None, columns=None):
         if index is None:
             index = RangeIndex(start=0, stop=len(data))
         else:

From 414d7a1cff5d0cb7a5d887f2d32fd722c3a5d7ad Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <mroeschke@dgx11.aselab.nvidia.com>
Date: Tue, 8 Aug 2023 18:15:07 -0700
Subject: [PATCH 4/4] Reduce diff further

---
 python/cudf/cudf/core/dataframe.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index fa12c733d89..281c96a8c54 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -708,9 +708,7 @@ def __init__(
                     )
                 else:
                     self._init_from_list_like(
-                        data,
-                        index=index,
-                        columns=columns,
+                        data, index=index, columns=columns
                     )
                 self._check_data_index_length_match()
             else: