Skip to content

Commit

Permalink
Preserve the correct ListDtype while creating an identical empty co…
Browse files Browse the repository at this point in the history
…lumn (#10151)

Fixes: #10122 

This PR fixes an issue where the list columns children[1]'s dtype wasn't being preserved correctly.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - https://github.com/brandon-b-miller

URL: #10151
  • Loading branch information
galipremsagar authored Jan 31, 2022
1 parent b217d7e commit c25d35b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
6 changes: 6 additions & 0 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,12 @@ def column_empty(
column_empty(row_count, field_dtype)
for field_dtype in dtype.fields.values()
)
elif is_list_dtype(dtype):
data = None
children = (
full(row_count + 1, 0, dtype="int32"),
column_empty(row_count, dtype=dtype.element_type),
)
elif is_categorical_dtype(dtype):
data = None
children = (
Expand Down
13 changes: 12 additions & 1 deletion python/cudf/cudf/tests/test_list.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import functools
import operator

Expand Down Expand Up @@ -586,3 +587,13 @@ def test_listcol_setitem_error_cases(data, item, error):
sr = cudf.Series(data)
with pytest.raises(BaseException, match=error):
sr[1] = item


def test_listcol_setitem_retain_dtype():
df = cudf.DataFrame(
{"a": cudf.Series([["a", "b"], []]), "b": [1, 2], "c": [123, 321]}
)
df1 = df[df.b.isna()]
df1["b"] = df1["c"]
df2 = df1.drop(["c"], axis=1)
assert df2.a.dtype == df.a.dtype

0 comments on commit c25d35b

Please sign in to comment.