Skip to content

Commit

Permalink
Test of sizeof proxy object (#774)
Browse files Browse the repository at this point in the history
Adding test of `sizeof` of `ProxyObject` motivated by rapidsai/cudf#9544, which change the `__sizeof__` semantic of cudf.  
As far as I can see, `ProxyObject` should continue working after rapidsai/cudf#9544 has been merged.

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Benjamin Zaitlen (https://github.com/quasiben)

URL: #774
  • Loading branch information
madsbk authored Nov 11, 2021
1 parent b659475 commit 5136835
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dask_cuda/proxy_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def __class__(self):

@_pxy_cache_wrapper("sizeof")
def __sizeof__(self):
"""Returns either the size of the proxied object
"""Returns the size of the proxy object (serialized or not)
Notice, we cache the result even though the size of proxied object
when serialized or not serialized might slightly differ.
Expand All @@ -523,7 +523,7 @@ def __sizeof__(self):
_, frames = pxy.obj
return sum(map(distributed.utils.nbytes, frames))
else:
return sizeof(self._pxy_deserialize())
return sizeof(pxy.obj)

def __len__(self):
pxy = self._pxy_get(copy=True)
Expand Down
30 changes: 30 additions & 0 deletions dask_cuda/tests/test_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import dask
import dask.array
from dask.dataframe.core import has_parallel_type
from dask.sizeof import sizeof
from distributed import Client
from distributed.protocol.serialize import deserialize, serialize

Expand Down Expand Up @@ -578,3 +579,32 @@ def test_cudf_fillna():
df = cudf.DataFrame({"A": range(10)})
df = proxify_device_objects(df)
df = df.fillna(0)


def test_sizeof_cupy():
cupy = pytest.importorskip("cupy")
cupy.cuda.set_allocator(None)
a = cupy.arange(1e7)
a_size = sizeof(a)
pxy = proxy_object.asproxy(a)
assert a_size == pytest.approx(sizeof(pxy))
pxy._pxy_serialize(serializers=("dask",))
assert a_size == pytest.approx(sizeof(pxy))
assert pxy._pxy_get().is_serialized()
pxy._pxy_cache = {}
assert a_size == pytest.approx(sizeof(pxy))
assert pxy._pxy_get().is_serialized()


def test_sizeof_cudf():
cudf = pytest.importorskip("cudf")
a = cudf.datasets.timeseries().reset_index()
a_size = sizeof(a)
pxy = proxy_object.asproxy(a)
assert a_size == pytest.approx(sizeof(pxy))
pxy._pxy_serialize(serializers=("dask",))
assert a_size == pytest.approx(sizeof(pxy))
assert pxy._pxy_get().is_serialized()
pxy._pxy_cache = {}
assert a_size == pytest.approx(sizeof(pxy))
assert pxy._pxy_get().is_serialized()

0 comments on commit 5136835

Please sign in to comment.