Skip to content

Commit

Permalink
Fix repr and concat of StructColumn (#10042)
Browse files Browse the repository at this point in the history
Fixes: #8963 

This PR fixes a trivial issue in `concat` where the assumption was that `_with_type_metadata` is an in-place operation, but it isn't.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: #10042
  • Loading branch information
galipremsagar authored Jan 14, 2022
1 parent b01c846 commit 12adb8a
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 7 deletions.
14 changes: 11 additions & 3 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1548,10 +1548,18 @@ def _concat(
cudf.core.index.as_index(out.index._values)
)

# Reassign precision for any decimal cols
# Reassign precision for decimal cols & type schema for struct cols
for name, col in out._data.items():
if isinstance(col, cudf.core.column.Decimal64Column):
col = col._with_type_metadata(tables[0]._data[name].dtype)
if isinstance(
col,
(
cudf.core.column.Decimal64Column,
cudf.core.column.StructColumn,
),
):
out._data[name] = col._with_type_metadata(
tables[0]._data[name].dtype
)

# Reassign index and column names
if isinstance(objs[0].columns, pd.MultiIndex):
Expand Down
9 changes: 5 additions & 4 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1448,10 +1448,11 @@ def _concat(cls, objs, axis=0, index=True):

col = concat_columns([o._column for o in objs])

if isinstance(col, cudf.core.column.Decimal64Column):
col = col._with_type_metadata(objs[0]._column.dtype)

if isinstance(col, cudf.core.column.StructColumn):
# Reassign precision for decimal cols & type schema for struct cols
if isinstance(
col,
(cudf.core.column.Decimal64Column, cudf.core.column.StructColumn),
):
col = col._with_type_metadata(objs[0].dtype)

return cls(data=col, index=index, name=name)
Expand Down
30 changes: 30 additions & 0 deletions python/cudf/cudf/tests/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1475,3 +1475,33 @@ def test_empty_series_name():
gs = cudf.from_pandas(ps)

assert ps.__repr__() == gs.__repr__()


def test_repr_struct_after_concat():
df = cudf.DataFrame(
{
"a": cudf.Series(
[
{"sa": 2056831253},
{"sa": -1463792165},
{"sa": 1735783038},
{"sa": 103774433},
{"sa": -1413247520},
]
* 13
),
"b": cudf.Series(
[
{"sa": {"ssa": 1140062029}},
None,
{"sa": {"ssa": 1998862860}},
{"sa": None},
{"sa": {"ssa": -395088502}},
]
* 13
),
}
)
pdf = df.to_pandas()

assert df.__repr__() == pdf.__repr__()

0 comments on commit 12adb8a

Please sign in to comment.