diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index f59954aaf08..012e2f3788a 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -156,6 +156,15 @@ def size(self): """ return self._num_columns * self._num_rows + @property + def _is_homogeneous(self): + # make sure that the dataframe has columns + if not self._data.columns: + return True + + first_type = self._data.columns[0].dtype.name + return all(x.dtype.name == first_type for x in self._data.columns) + @property def empty(self): """ diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index e5e36ba7e21..0b73f32e94d 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -8579,3 +8579,100 @@ def test_dataframe_init_from_series(data, columns, index): actual, check_index_type=False if len(expected) == 0 else True, ) + + +@pytest.mark.parametrize( + "data, expected", + [ + ({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [1.2, 1, 2, 3]}, False), + ({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, True), + ({"a": ["a", "b", "c"], "b": [4, 5, 6], "c": [7, 8, 9]}, False), + ({"a": [True, False, False], "b": [False, False, True]}, True), + ({"a": [True, False, False]}, True), + ({"a": [[1, 2], [3, 4]]}, True), + ({"a": [[1, 2], [3, 4]], "b": ["a", "b"]}, False), + ({"a": [{"c": 5}, {"e": 5}], "b": [{"c": 5}, {"g": 7}]}, True), + ({}, True), + ], +) +def test_is_homogeneous_dataframe(data, expected): + actual = cudf.DataFrame(data)._is_homogeneous + + assert actual == expected + + +@pytest.mark.parametrize( + "data, indexes, expected", + [ + ( + {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [1.2, 1, 2, 3]}, + ["a", "b"], + True, + ), + ( + { + "a": [1, 2, 3, 4], + "b": [5, 6, 7, 8], + "c": [1.2, 1, 2, 3], + "d": ["hello", "world", "cudf", "rapids"], + }, + ["a", "b"], + False, + ), + ( + { + "a": ["a", "b", "c"], + "b": [4, 5, 6], + "c": [7, 8, 9], + "d": [1, 2, 3], + }, + ["a", "b"], + True, + ), + ], +) +def test_is_homogeneous_multiIndex_dataframe(data, indexes, expected): + test_dataframe = cudf.DataFrame(data).set_index(indexes) + actual = cudf.DataFrame(test_dataframe)._is_homogeneous + + assert actual == expected + + +@pytest.mark.parametrize( + "data, expected", [([1, 2, 3, 4], True), ([True, False], True)] +) +def test_is_homogeneous_series(data, expected): + actual = cudf.Series(data)._is_homogeneous + + assert actual == expected + + +@pytest.mark.parametrize( + "levels, codes, expected", + [ + ( + [["lama", "cow", "falcon"], ["speed", "weight", "length"]], + [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]], + True, + ), + ( + [[1, 2, 3], [True, False, True]], + [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]], + False, + ), + ], +) +def test_is_homogeneous_multiIndex(levels, codes, expected): + actual = cudf.MultiIndex(levels=levels, codes=codes)._is_homogeneous + + assert actual == expected + + +@pytest.mark.parametrize( + "data, expected", + [([1, 2, 3], True), (["Hello", "World"], True), ([True, False], True)], +) +def test_is_homogeneous_index(data, expected): + actual = cudf.Index(data)._is_homogeneous + + assert actual == expected