diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index 67ff3e48dbd..69d70cf427f 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -1,6 +1,8 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. from __future__ import annotations +from functools import cached_property + import pandas as pd import pyarrow as pa @@ -65,6 +67,17 @@ def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": pd_series.index = index return pd_series + @cached_property + def memory_usage(self): + n = 0 + if self.nullable: + n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size) + + for child in self.children: + n += child.memory_usage + + return n + def element_indexing(self, index: int): result = super().element_indexing(index) return { diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 84f528549e9..a56de0cd451 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -4,6 +4,7 @@ import operator import pickle import textwrap +from functools import cached_property from typing import Any, Callable, Dict, List, Tuple, Type, Union import numpy as np @@ -627,6 +628,13 @@ def deserialize(cls, header: dict, frames: list): fields[k] = pickle.loads(dtype) return cls(fields) + @cached_property + def itemsize(self): + return sum( + cudf.utils.dtypes.cudf_dtype_from_pa_type(field.type).itemsize + for field in self._typ + ) + decimal_dtype_template = textwrap.dedent( """ diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 8ea11382419..4c2a14fc45c 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -864,6 +864,8 @@ def test_memory_usage(): assert s1.memory_usage() == 44 s2 = cudf.Series([[[[1, 2]]], [[[3, 4]]]]) assert s2.memory_usage() == 68 + s3 = cudf.Series([[{"b": 1, "a": 10}, {"b": 2, "a": 100}]]) + assert s3.memory_usage() == 40 @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py index 4c70d20c488..eaee1efcbc8 100644 --- a/python/cudf/cudf/tests/test_struct.py +++ b/python/cudf/cudf/tests/test_struct.py @@ -371,3 +371,24 @@ def test_nested_struct_extract_host_scalars(data, idx, expected): series = cudf.Series(data) assert _nested_na_replace(series[idx]) == _nested_na_replace(expected) + + +def test_struct_memory_usage(): + s = cudf.Series([{"a": 1, "b": 10}, {"a": 2, "b": 20}, {"a": 3, "b": 30}]) + df = s.struct.explode() + + assert_eq(s.memory_usage(), df.memory_usage().sum()) + + +def test_struct_with_null_memory_usage(): + df = cudf.DataFrame( + { + "a": cudf.Series([1, 2, -1, -1, 3], dtype="int64"), + "b": cudf.Series([10, 20, -1, -1, 30], dtype="int64"), + } + ) + s = df.to_struct() + assert s.memory_usage() == 80 + + s[2:4] = None + assert s.memory_usage() == 272