Preserve the correct ListDtype while creating an identical empty co…

…lumn (#10151) Fixes: #10122 This PR fixes an issue where the list columns children[1]'s dtype wasn't being preserved correctly. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - https://github.com/brandon-b-miller URL: #10151
rapidsai · Jan 31, 2022 · c25d35b · c25d35b
1 parent b217d7e
commit c25d35b
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 1 deletion.
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
@@ -1271,6 +1271,12 @@ def column_empty(
             column_empty(row_count, field_dtype)
             for field_dtype in dtype.fields.values()
         )
+    elif is_list_dtype(dtype):
+        data = None
+        children = (
+            full(row_count + 1, 0, dtype="int32"),
+            column_empty(row_count, dtype=dtype.element_type),
+        )
     elif is_categorical_dtype(dtype):
         data = None
         children = (

diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
 import functools
 import operator
 
@@ -586,3 +587,13 @@ def test_listcol_setitem_error_cases(data, item, error):
     sr = cudf.Series(data)
     with pytest.raises(BaseException, match=error):
         sr[1] = item
+
+
+def test_listcol_setitem_retain_dtype():
+    df = cudf.DataFrame(
+        {"a": cudf.Series([["a", "b"], []]), "b": [1, 2], "c": [123, 321]}
+    )
+    df1 = df[df.b.isna()]
+    df1["b"] = df1["c"]
+    df2 = df1.drop(["c"], axis=1)
+    assert df2.a.dtype == df.a.dtype