diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 5d694dac255..e8ba4cc258e 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1271,6 +1271,12 @@ def column_empty( column_empty(row_count, field_dtype) for field_dtype in dtype.fields.values() ) + elif is_list_dtype(dtype): + data = None + children = ( + full(row_count + 1, 0, dtype="int32"), + column_empty(row_count, dtype=dtype.element_type), + ) elif is_categorical_dtype(dtype): data = None children = ( diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 44749103b54..3ef579574c6 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -1,4 +1,5 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + import functools import operator @@ -586,3 +587,13 @@ def test_listcol_setitem_error_cases(data, item, error): sr = cudf.Series(data) with pytest.raises(BaseException, match=error): sr[1] = item + + +def test_listcol_setitem_retain_dtype(): + df = cudf.DataFrame( + {"a": cudf.Series([["a", "b"], []]), "b": [1, 2], "c": [123, 321]} + ) + df1 = df[df.b.isna()] + df1["b"] = df1["c"] + df2 = df1.drop(["c"], axis=1) + assert df2.a.dtype == df.a.dtype