From 2c337fef45da6154a783cdf4109a40792d8813a0 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 10 Jan 2023 10:31:55 -0800 Subject: [PATCH 1/8] Handle list & struct types in find_common_type --- python/cudf/cudf/utils/dtypes.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 92c23d8b97b..cac24fd4a3c 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. import datetime from collections import namedtuple @@ -572,6 +572,21 @@ def find_common_type(dtypes): ) else: return cudf.dtype("O") + if any( + cudf.api.types.is_list_dtype(dtype) + or cudf.api.types.is_struct_dtype(dtype) + for dtype in dtypes + ): + if len(dtypes) == 1: + return dtypes[0] + else: + # TODO: As list & struct dtypes allow casting + # to identical types, improve this logic of returning a + # common dtype, for example: + # ListDtype(int64) & ListDtype(int32) common + # dtype could be ListDtype(int64). Same holds + # for StructDtype too. + return cudf.dtype("O") # Corner case 1: # Resort to np.result_type to handle "M" and "m" types separately From 678e33eb8bde034c8ee5d4cfa3860807527f33f8 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 11 Jan 2023 12:33:52 -0800 Subject: [PATCH 2/8] updates --- python/cudf/cudf/core/frame.py | 16 +++++++++++++++- python/cudf/cudf/tests/test_concat.py | 14 ++++++++++++++ python/cudf/cudf/utils/dtypes.py | 21 ++++++++++++--------- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 32764c6c2f0..ccd0f0e6aac 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from __future__ import annotations @@ -484,6 +484,20 @@ def get_column_values_na(col): ) if dtype is None: + dtypes = [col.dtype for col in self._data.values()] + for dtype in dtypes: + if isinstance( + dtype, + ( + cudf.ListDtype, + cudf.core.dtypes.DecimalDtype, + cudf.StructDtype, + ), + ): + raise NotImplementedError( + f"{dtype} are not yet supported via " + "`__cuda_array_interface__`" + ) dtype = find_common_type( [col.dtype for col in self._data.values()] ) diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 2ff0bddf1c8..c90bb13cf07 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -1869,3 +1869,17 @@ def test_concat_invalid_axis(axis): s = gd.Series([1, 2, 3]) with pytest.raises(ValueError): gd.concat([s], axis=axis) + + +@pytest.mark.parametrize( + "s1,s2,expected", + [ + ([1, 2], [[1, 2], [3, 4]], ["1", "2", "[1, 2]", "[3, 4]"]), + ], +) +def test_concat_mixed_list_types(s1, s2, expected): + s1, s2 = gd.Series(s1), gd.Series(s2) + expected = pd.Series(expected) + actual = gd.concat([s1, s2], ignore_index=True) + + assert_eq(expected, actual, check_dtype=False) diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index cac24fd4a3c..05da594b3ff 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -572,21 +572,24 @@ def find_common_type(dtypes): ) else: return cudf.dtype("O") - if any( - cudf.api.types.is_list_dtype(dtype) - or cudf.api.types.is_struct_dtype(dtype) - for dtype in dtypes - ): + if any(cudf.api.types.is_list_dtype(dtype) for dtype in dtypes): if len(dtypes) == 1: - return dtypes[0] + return dtypes.get(0) else: - # TODO: As list & struct dtypes allow casting + # TODO: As list dtypes allow casting # to identical types, improve this logic of returning a # common dtype, for example: # ListDtype(int64) & ListDtype(int32) common - # dtype could be ListDtype(int64). Same holds - # for StructDtype too. + # dtype could be ListDtype(int64). return cudf.dtype("O") + if any(cudf.api.types.is_struct_dtype(dtype) for dtype in dtypes): + if len(dtypes) == 1: + return dtypes.get(0) + else: + raise NotImplementedError( + "Finding a common type for `StructDtype` is currently " + "not supported" + ) # Corner case 1: # Resort to np.result_type to handle "M" and "m" types separately From 5eff37b29f92a10f510fa17b1de092a9a489f7b4 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 11 Jan 2023 12:38:13 -0800 Subject: [PATCH 3/8] add tests --- python/cudf/cudf/tests/test_dataframe.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 7fa37006195..82ce6a6950e 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10013,3 +10013,17 @@ def test_dataframe_transpose_complex_types(data): actual = gdf.T assert_eq(expected, actual) + + +@pytest.mark.parametrize( + "data", + [ + {"col": [{"a": 1.1}, {"a": 2.1}, {"a": 10.0}, {"a": 11.2323}, None]}, + {"a": [[{"b": 567}], None] * 10}, + {"a": [decimal.Decimal(10), decimal.Decimal(20), None]}, + ], +) +def test_dataframe_values_complex_types(data): + gdf = cudf.DataFrame(data) + with pytest.raises(NotImplementedError): + gdf.values From e2e16c442097d5b4bc7d1b868144e051a253611d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 11 Jan 2023 12:40:53 -0800 Subject: [PATCH 4/8] update --- python/cudf/cudf/core/frame.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index ccd0f0e6aac..d644856004b 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -498,9 +498,7 @@ def get_column_values_na(col): f"{dtype} are not yet supported via " "`__cuda_array_interface__`" ) - dtype = find_common_type( - [col.dtype for col in self._data.values()] - ) + dtype = find_common_type(dtypes) matrix = make_empty_matrix( shape=(len(self), ncol), dtype=dtype, order="F" From 6daea3511a20f7acbc4a33302bde57db16b78208 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 21 Mar 2023 13:54:34 -0500 Subject: [PATCH 5/8] Apply suggestions from code review Co-authored-by: Lawrence Mitchell --- python/cudf/cudf/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 37807215afa..865651ec43e 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -496,7 +496,7 @@ def get_column_values_na(col): ), ): raise NotImplementedError( - f"{dtype} are not yet supported via " + f"{dtype} is not yet supported via " "`__cuda_array_interface__`" ) dtype = find_common_type(dtypes) From f81c1b3034548d9192de5a3afab6555dac73319c Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 21 Mar 2023 11:55:33 -0700 Subject: [PATCH 6/8] update msg --- python/cudf/cudf/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 865651ec43e..809a11227e6 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -496,8 +496,8 @@ def get_column_values_na(col): ), ): raise NotImplementedError( - f"{dtype} is not yet supported via " - "`__cuda_array_interface__`" + f"{dtype} is not yet supported to be exported to" + "a cupy array" ) dtype = find_common_type(dtypes) From 4bf1b910b99cbdc57ebeb8c91948a47defd307b0 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 21 Mar 2023 17:59:48 -0700 Subject: [PATCH 7/8] updates --- python/cudf/cudf/tests/test_concat.py | 11 +++++------ python/cudf/cudf/tests/test_dataframe.py | 3 ++- python/cudf/cudf/utils/dtypes.py | 5 ++++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index c90bb13cf07..910f0b9cf86 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -1872,14 +1872,13 @@ def test_concat_invalid_axis(axis): @pytest.mark.parametrize( - "s1,s2,expected", + "s1,s2", [ - ([1, 2], [[1, 2], [3, 4]], ["1", "2", "[1, 2]", "[3, 4]"]), + ([1, 2], [[1, 2], [3, 4]]), ], ) -def test_concat_mixed_list_types(s1, s2, expected): +def test_concat_mixed_list_types_error(s1, s2): s1, s2 = gd.Series(s1), gd.Series(s2) - expected = pd.Series(expected) - actual = gd.concat([s1, s2], ignore_index=True) - assert_eq(expected, actual, check_dtype=False) + with pytest.raises(NotImplementedError): + gd.concat([s1, s2], ignore_index=True) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 948fc3e0a0e..d7912985356 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10030,6 +10030,7 @@ def test_dataframe_transpose_complex_types(data): assert_eq(expected, actual) + @pytest.mark.parametrize( "data", [ @@ -10043,6 +10044,7 @@ def test_dataframe_values_complex_types(data): with pytest.raises(NotImplementedError): gdf.values + def test_dataframe_from_arrow_slice(): table = pa.Table.from_pandas( pd.DataFrame.from_dict( @@ -10055,4 +10057,3 @@ def test_dataframe_from_arrow_slice(): actual = cudf.DataFrame.from_arrow(table_slice) assert_eq(expected, actual) - diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index bb0668bb01c..901a510f6c5 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -580,7 +580,10 @@ def find_common_type(dtypes): # common dtype, for example: # ListDtype(int64) & ListDtype(int32) common # dtype could be ListDtype(int64). - return cudf.dtype("O") + raise NotImplementedError( + "Finding a common type for `ListDtype` is currently " + "not supported" + ) if any(cudf.api.types.is_struct_dtype(dtype) for dtype in dtypes): if len(dtypes) == 1: return dtypes.get(0) From 74272b4e0881331464f80a6c08e3a8f516da6766 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 22 Mar 2023 09:04:00 -0500 Subject: [PATCH 8/8] Update frame.py --- python/cudf/cudf/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 809a11227e6..d8b9ee4d006 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -496,8 +496,7 @@ def get_column_values_na(col): ), ): raise NotImplementedError( - f"{dtype} is not yet supported to be exported to" - "a cupy array" + f"{dtype} cannot be exposed as a cupy array" ) dtype = find_common_type(dtypes)