Skip to content

Commit

Permalink
Add StructMethods.field() API to access field of struct column (rap…
Browse files Browse the repository at this point in the history
  • Loading branch information
skirui-source authored and shwina committed Apr 7, 2021
1 parent e847246 commit 2c7723c
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 0 deletions.
60 changes: 60 additions & 0 deletions python/cudf/cudf/core/column/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,19 @@

import cudf
from cudf.core.column import ColumnBase
from cudf.core.column.methods import ColumnMethodsMixin
from cudf.utils.dtypes import is_struct_dtype


class StructColumn(ColumnBase):
"""
Column that stores fields of values.
Every column has n children, where n is
the number of fields in the Struct Dtype.
"""

dtype: cudf.core.dtypes.StructDtype

@property
Expand Down Expand Up @@ -74,6 +84,9 @@ def copy(self, deep=True):
result = result._rename_fields(self.dtype.fields.keys())
return result

def struct(self, parent=None):
return StructMethods(self, parent=parent)

def _rename_fields(self, names):
"""
Return a StructColumn with the same field values as this StructColumn,
Expand All @@ -91,3 +104,50 @@ def _rename_fields(self, names):
null_count=self.null_count,
children=self.base_children,
)


class StructMethods(ColumnMethodsMixin):
"""
Struct methods for Series
"""

def __init__(self, column, parent=None):
if not is_struct_dtype(column.dtype):
raise AttributeError(
"Can only use .struct accessor with a 'struct' dtype"
)
super().__init__(column=column, parent=parent)

def field(self, key):
"""
Extract children of the specified struct column
in the Series
Parameters
----------
key: int or str
index/position or field name of the respective
struct column
Returns
-------
Series
Examples
--------
>>> s = cudf.Series([{'a': 1, 'b': 2}, {'a': 3, 'b': 4}])
>>> s.struct.field(0)
0 1
1 3
dtype: int64
>>> s.struct.field('a')
0 1
1 3
dtype: int64
"""
fields = list(self._column.dtype.fields.keys())
if key in fields:
pos = fields.index(key)
return self._return_or_inplace(self._column.children[pos])
else:
return self._return_or_inplace(self._column.children[key])
6 changes: 6 additions & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from cudf.core.column.lists import ListMethods
from cudf.core.column.string import StringMethods
from cudf.core.column.struct import StructMethods
from cudf.core.column_accessor import ColumnAccessor
from cudf.core.frame import Frame, _drop_rows_by_labels
from cudf.core.groupby.groupby import SeriesGroupBy
Expand Down Expand Up @@ -2675,6 +2676,11 @@ def str(self):
def list(self):
return ListMethods(column=self._column, parent=self)

@copy_docstring(StructMethods.__init__) # type: ignore
@property
def struct(self):
return StructMethods(column=self._column, parent=self)

@property
def dtype(self):
"""dtype of the Series"""
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,13 @@ def test_struct_of_struct_loc():
df = cudf.DataFrame({"col": [{"a": {"b": 1}}]})
expect = cudf.Series([{"a": {"b": 1}}], name="col")
assert_eq(expect, df["col"])


@pytest.mark.parametrize(
"key, expect", [(0, [1, 3]), (1, [2, 4]), ("a", [1, 3]), ("b", [2, 4])]
)
def test_struct_for_field(key, expect):
sr = cudf.Series([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
expect = cudf.Series(expect)
got = sr.struct.field(key)
assert_eq(expect, got)
3 changes: 3 additions & 0 deletions python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,9 @@ def cudf_dtype_to_pa_type(dtype):


def cudf_dtype_from_pa_type(typ):
""" Given a cuDF pyarrow dtype, converts it into the equivalent
cudf pandas dtype.
"""
if pa.types.is_list(typ):
return cudf.core.dtypes.ListDtype.from_arrow(typ)
elif pa.types.is_struct(typ):
Expand Down

0 comments on commit 2c7723c

Please sign in to comment.