From 678e33eb8bde034c8ee5d4cfa3860807527f33f8 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 11 Jan 2023 12:33:52 -0800 Subject: [PATCH] updates --- python/cudf/cudf/core/frame.py | 16 +++++++++++++++- python/cudf/cudf/tests/test_concat.py | 14 ++++++++++++++ python/cudf/cudf/utils/dtypes.py | 21 ++++++++++++--------- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 32764c6c2f0..ccd0f0e6aac 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from __future__ import annotations @@ -484,6 +484,20 @@ def get_column_values_na(col): ) if dtype is None: + dtypes = [col.dtype for col in self._data.values()] + for dtype in dtypes: + if isinstance( + dtype, + ( + cudf.ListDtype, + cudf.core.dtypes.DecimalDtype, + cudf.StructDtype, + ), + ): + raise NotImplementedError( + f"{dtype} are not yet supported via " + "`__cuda_array_interface__`" + ) dtype = find_common_type( [col.dtype for col in self._data.values()] ) diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 2ff0bddf1c8..c90bb13cf07 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -1869,3 +1869,17 @@ def test_concat_invalid_axis(axis): s = gd.Series([1, 2, 3]) with pytest.raises(ValueError): gd.concat([s], axis=axis) + + +@pytest.mark.parametrize( + "s1,s2,expected", + [ + ([1, 2], [[1, 2], [3, 4]], ["1", "2", "[1, 2]", "[3, 4]"]), + ], +) +def test_concat_mixed_list_types(s1, s2, expected): + s1, s2 = gd.Series(s1), gd.Series(s2) + expected = pd.Series(expected) + actual = gd.concat([s1, s2], ignore_index=True) + + assert_eq(expected, actual, check_dtype=False) diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index cac24fd4a3c..05da594b3ff 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -572,21 +572,24 @@ def find_common_type(dtypes): ) else: return cudf.dtype("O") - if any( - cudf.api.types.is_list_dtype(dtype) - or cudf.api.types.is_struct_dtype(dtype) - for dtype in dtypes - ): + if any(cudf.api.types.is_list_dtype(dtype) for dtype in dtypes): if len(dtypes) == 1: - return dtypes[0] + return dtypes.get(0) else: - # TODO: As list & struct dtypes allow casting + # TODO: As list dtypes allow casting # to identical types, improve this logic of returning a # common dtype, for example: # ListDtype(int64) & ListDtype(int32) common - # dtype could be ListDtype(int64). Same holds - # for StructDtype too. + # dtype could be ListDtype(int64). return cudf.dtype("O") + if any(cudf.api.types.is_struct_dtype(dtype) for dtype in dtypes): + if len(dtypes) == 1: + return dtypes.get(0) + else: + raise NotImplementedError( + "Finding a common type for `StructDtype` is currently " + "not supported" + ) # Corner case 1: # Resort to np.result_type to handle "M" and "m" types separately