diff --git a/python/dask_cudf/dask_cudf/accessors.py b/python/dask_cudf/dask_cudf/accessors.py index 77973ee34ff..1c21fca51c8 100644 --- a/python/dask_cudf/dask_cudf/accessors.py +++ b/python/dask_cudf/dask_cudf/accessors.py @@ -37,6 +37,32 @@ def field(self, key): meta=self.d_series._meta._constructor([], dtype=typ), ) + def explode(self): + """ + Creates a dataframe view of the struct column, one column per field. + + Returns + ------- + DataFrame + + Examples + -------- + >>> import cudf, dask_cudf + >>> ds = dask_cudf.from_cudf(cudf.Series( + ... [{'a': 42, 'b': 'str1', 'c': [-1]}, + ... {'a': 0, 'b': 'str2', 'c': [400, 500]}, + ... {'a': 7, 'b': '', 'c': []}]), npartitions=2) + >>> ds.struct.explode().compute() + a b c + 0 42 str1 [-1] + 1 0 str2 [400, 500] + 2 7 [] + """ + return self.d_series.map_partitions( + lambda s: s.struct.explode(), + meta=self.d_series._meta.struct.explode(), + ) + class ListMethods: def __init__(self, d_series): diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py index 8227023aa51..2c02afd96a9 100644 --- a/python/dask_cudf/dask_cudf/tests/test_accessor.py +++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py @@ -500,3 +500,18 @@ def test_dask_struct_field_Int_Error(data): with pytest.raises(IndexError): got.struct.field(1000).compute() + + +@pytest.mark.parametrize( + "data", + [ + [{}, {}, {}], + [{"a": 100, "b": "abc"}, {"a": 42, "b": "def"}, {"a": -87, "b": ""}], + [{"a": [1, 2, 3], "b": {"c": 101}}, {"a": [4, 5], "b": {"c": 102}}], + ], +) +def test_struct_explode(data): + expect = Series(data).struct.explode() + got = dgd.from_cudf(Series(data), 2).struct.explode() + + assert_eq(expect, got.compute())