From 17d7ab39191af9988ab9d99e1cd72579d5c79639 Mon Sep 17 00:00:00 2001
From: Shane Ding <shane200195@gmail.com>
Date: Thu, 20 May 2021 19:55:40 +0000
Subject: [PATCH 1/5] added _is_homogeneous property

---
 python/cudf/cudf/core/dataframe.py       |  9 +++++++++
 python/cudf/cudf/tests/test_dataframe.py | 16 ++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index f2be0e3bd6e..61a86a2ba4d 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -602,6 +602,15 @@ def deserialize(cls, header, frames):
 
         return cls(dict(zip(column_names, columns)), index=index)
 
+    @property
+    def _is_homogeneous(self):
+        # make sure that the dataframe has columns
+        if not self._data.columns:
+            return True
+
+        first_type = self._data.columns[0].dtype
+        return all(x.dtype == first_type for x in self._data.columns)
+
     @property
     def dtypes(self):
         """
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e5e36ba7e21..082c5a32554 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8579,3 +8579,19 @@ def test_dataframe_init_from_series(data, columns, index):
         actual,
         check_index_type=False if len(expected) == 0 else True,
     )
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        ({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [1.2, 1, 2, 3]}, False),
+        ({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, True),
+        ({"a": ["a", "b", "c"], "b": [4, 5, 6], "c": [7, 8, 9]}, False),
+        ({"a": [True, False, False], "b": [False, False, True]}, True),
+        ({}, True),
+    ],
+)
+def test_is_homogeneous(data, expected):
+    actual = cudf.DataFrame(data)._is_homogeneous
+
+    assert actual == expected

From 15ced45c89e867b46fe30534f85949d3c522156d Mon Sep 17 00:00:00 2001
From: Shane Ding <shane200195@gmail.com>
Date: Thu, 20 May 2021 20:39:21 +0000
Subject: [PATCH 2/5] moved _is_homogeneous to Frame

---
 python/cudf/cudf/core/dataframe.py | 9 ---------
 python/cudf/cudf/core/frame.py     | 9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 61a86a2ba4d..f2be0e3bd6e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -602,15 +602,6 @@ def deserialize(cls, header, frames):
 
         return cls(dict(zip(column_names, columns)), index=index)
 
-    @property
-    def _is_homogeneous(self):
-        # make sure that the dataframe has columns
-        if not self._data.columns:
-            return True
-
-        first_type = self._data.columns[0].dtype
-        return all(x.dtype == first_type for x in self._data.columns)
-
     @property
     def dtypes(self):
         """
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index f59954aaf08..b177b96cffc 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -156,6 +156,15 @@ def size(self):
         """
         return self._num_columns * self._num_rows
 
+    @property
+    def _is_homogeneous(self):
+        # make sure that the dataframe has columns
+        if not self._data.columns:
+            return True
+
+        first_type = self._data.columns[0].dtype
+        return all(x.dtype == first_type for x in self._data.columns)
+
     @property
     def empty(self):
         """

From c06be48ccc01c23e9d933a9cac6dc31ed4918f19 Mon Sep 17 00:00:00 2001
From: Shane Ding <shane200195@gmail.com>
Date: Fri, 21 May 2021 14:59:27 +0000
Subject: [PATCH 3/5] Added more testcases and changed how equality is handled

---
 python/cudf/cudf/core/frame.py           |  4 +-
 python/cudf/cudf/tests/test_dataframe.py | 56 +++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index b177b96cffc..012e2f3788a 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -162,8 +162,8 @@ def _is_homogeneous(self):
         if not self._data.columns:
             return True
 
-        first_type = self._data.columns[0].dtype
-        return all(x.dtype == first_type for x in self._data.columns)
+        first_type = self._data.columns[0].dtype.name
+        return all(x.dtype.name == first_type for x in self._data.columns)
 
     @property
     def empty(self):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 082c5a32554..e2d173cc6c3 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8582,12 +8582,16 @@ def test_dataframe_init_from_series(data, columns, index):
 
 
 @pytest.mark.parametrize(
-    "data,expected",
+    "data, expected",
     [
         ({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [1.2, 1, 2, 3]}, False),
         ({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, True),
         ({"a": ["a", "b", "c"], "b": [4, 5, 6], "c": [7, 8, 9]}, False),
         ({"a": [True, False, False], "b": [False, False, True]}, True),
+        ({"a": [True, False, False]}, True),
+        ({"a": [[1, 2], [3, 4]]}, True),
+        ({"a": [[1, 2], [3, 4]], "b": ["a", "b"]}, False),
+        ({"a": [{"c": 5}, {"e": 5}], "b": [{"c": 5}, {"g": 7}]}, True),
         ({}, True),
     ],
 )
@@ -8595,3 +8599,53 @@ def test_is_homogeneous(data, expected):
     actual = cudf.DataFrame(data)._is_homogeneous
 
     assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "data, indexes, expected",
+    [
+        (
+            {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [1.2, 1, 2, 3]},
+            ["a", "b"],
+            True,
+        ),
+        (
+            {
+                "a": [1, 2, 3, 4],
+                "b": [5, 6, 7, 8],
+                "c": [1.2, 1, 2, 3],
+                "d": ["hello", "world", "cudf", "rapids"],
+            },
+            ["a", "b"],
+            False,
+        ),
+        (
+            {
+                "a": ["a", "b", "c"],
+                "b": [4, 5, 6],
+                "c": [7, 8, 9],
+                "d": [1, 2, 3],
+            },
+            ["a", "b"],
+            True,
+        ),
+    ],
+)
+def test_is_homogeneous_multiindex(data, indexes, expected):
+    test_dataframe = cudf.DataFrame(data).set_index(indexes)
+    actual = cudf.DataFrame(test_dataframe)._is_homogeneous
+
+    assert actual == expected
+
+
+"""
+({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, True),
+        ({"a": ["a", "b", "c"], "b": [4, 5, 6], "c": [7, 8, 9]}, False),
+        ({"a": [True, False, False], "b": [False, False, True]}, True),
+        ({"a": [True, False, False]}, True),
+        ({"a": [[1,2],[3,4]]}, True),
+        ({'a': [[1,2], [3,4]], 'b': ["a", "b"]}, False),
+        ({'a': [{'c':5} , {'e': 5}], 'b': [{'c':5} , {'g': 7}]}, True),
+        ({}, True),
+
+"""

From 4861e9a6496bc8a62668c10bf33d490a1c97bc21 Mon Sep 17 00:00:00 2001
From: Shane Ding <shane200195@gmail.com>
Date: Fri, 21 May 2021 15:01:05 +0000
Subject: [PATCH 4/5] removing unused testcases

---
 python/cudf/cudf/tests/test_dataframe.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e2d173cc6c3..93c68387bed 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8636,16 +8636,3 @@ def test_is_homogeneous_multiindex(data, indexes, expected):
     actual = cudf.DataFrame(test_dataframe)._is_homogeneous
 
     assert actual == expected
-
-
-"""
-({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, True),
-        ({"a": ["a", "b", "c"], "b": [4, 5, 6], "c": [7, 8, 9]}, False),
-        ({"a": [True, False, False], "b": [False, False, True]}, True),
-        ({"a": [True, False, False]}, True),
-        ({"a": [[1,2],[3,4]]}, True),
-        ({'a': [[1,2], [3,4]], 'b': ["a", "b"]}, False),
-        ({'a': [{'c':5} , {'e': 5}], 'b': [{'c':5} , {'g': 7}]}, True),
-        ({}, True),
-
-"""

From e9311bf8e8d36334b65e674c0cda874e2054499c Mon Sep 17 00:00:00 2001
From: Shane Ding <shane200195@gmail.com>
Date: Fri, 21 May 2021 18:11:20 +0000
Subject: [PATCH 5/5] Added more test cases for multiIndex, series and Index

---
 python/cudf/cudf/tests/test_dataframe.py | 44 ++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 93c68387bed..0b73f32e94d 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8595,7 +8595,7 @@ def test_dataframe_init_from_series(data, columns, index):
         ({}, True),
     ],
 )
-def test_is_homogeneous(data, expected):
+def test_is_homogeneous_dataframe(data, expected):
     actual = cudf.DataFrame(data)._is_homogeneous
 
     assert actual == expected
@@ -8631,8 +8631,48 @@ def test_is_homogeneous(data, expected):
         ),
     ],
 )
-def test_is_homogeneous_multiindex(data, indexes, expected):
+def test_is_homogeneous_multiIndex_dataframe(data, indexes, expected):
     test_dataframe = cudf.DataFrame(data).set_index(indexes)
     actual = cudf.DataFrame(test_dataframe)._is_homogeneous
 
     assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "data, expected", [([1, 2, 3, 4], True), ([True, False], True)]
+)
+def test_is_homogeneous_series(data, expected):
+    actual = cudf.Series(data)._is_homogeneous
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "levels, codes, expected",
+    [
+        (
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+            True,
+        ),
+        (
+            [[1, 2, 3], [True, False, True]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+            False,
+        ),
+    ],
+)
+def test_is_homogeneous_multiIndex(levels, codes, expected):
+    actual = cudf.MultiIndex(levels=levels, codes=codes)._is_homogeneous
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [([1, 2, 3], True), (["Hello", "World"], True), ([True, False], True)],
+)
+def test_is_homogeneous_index(data, expected):
+    actual = cudf.Index(data)._is_homogeneous
+
+    assert actual == expected