Skip to content

Commit

Permalink
Support no compressor in open_cupy_array() (#312)
Browse files Browse the repository at this point in the history
... also added some more examples.

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: #312
  • Loading branch information
madsbk authored Nov 6, 2023
1 parent d56d88c commit 34f6d8e
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
30 changes: 30 additions & 0 deletions python/examples/zarr_cupy_nvcomp.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def main(path):
# E.g., `open_cupy_array()` uses nvCOMP's Snappy GPU compression by default,
# which, as far as we know, isn’t compatible with any CPU compressor. Thus,
# let's re-write our Zarr array using a CPU and GPU compatible compressor.
#
# Warning: it isn't possible to use `CompatCompressor` as a compressor argument
# in Zarr directly. It is only meant for `open_cupy_array()`. However,
# in an example further down, we show how to write using regular Zarr.
z = kvikio.zarr.open_cupy_array(
store=path,
mode="w",
Expand All @@ -53,6 +57,32 @@ def main(path):
assert isinstance(z[:], cupy.ndarray)
assert (cupy.arange(20, 40) == z[:]).all()

# Similarly, we can also open a file written by regular Zarr.
# Let's write the file without any compressor.
ary = numpy.arange(10)
z = zarr.open(store=path, mode="w", shape=ary.shape, compressor=None)
z[:] = ary
# This works as before where the file is read as a CuPy array
z = kvikio.zarr.open_cupy_array(store=path)
assert isinstance(z[:], cupy.ndarray)
assert (z[:] == cupy.asarray(ary)).all()

# Using a compressor is a bit more tricky since not all CPU compressors
# are GPU compatible. To make sure we use a compable compressor, we use
# the CPU-part of `CompatCompressor.lz4()`.
ary = numpy.arange(10)
z = zarr.open(
store=path,
mode="w",
shape=ary.shape,
compressor=kvikio.zarr.CompatCompressor.lz4().cpu,
)
z[:] = ary
# This works as before where the file is read as a CuPy array
z = kvikio.zarr.open_cupy_array(store=path)
assert isinstance(z[:], cupy.ndarray)
assert (z[:] == cupy.asarray(ary)).all()


if __name__ == "__main__":
main("/tmp/zarr-cupy-nvcomp")
19 changes: 18 additions & 1 deletion python/kvikio/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,22 @@ def get_nvcomp_manager(self):


class CompatCompressor:
"""A pair of compatible compressors one using the CPU and one using the GPU"""
"""A pair of compatible compressors one using the CPU and one using the GPU
Warning
-------
`CompatCompressor` is only supported by KvikIO's `open_cupy_array()` and
cannot be used as a compressor argument in Zarr functions like `open()`
and `open_array()` directly. However, it is possible to use its `.cpu`
like: `open(..., compressor=CompatCompressor.lz4().cpu)`.
Parameters
----------
cpu
The CPU compressor.
gpu
The GPU compressor.
"""

def __init__(self, cpu: Codec, gpu: CudaCodec) -> None:
self.cpu = cpu
Expand Down Expand Up @@ -347,6 +362,8 @@ def open_cupy_array(
if mode in ("r", "r+"):
raise
else:
if ret.compressor is None:
return ret
# If we are reading a LZ4-CPU compressed file, we overwrite the
# metadata on-the-fly to make Zarr use LZ4-GPU for both compression
# and decompression.
Expand Down
16 changes: 16 additions & 0 deletions python/tests/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,22 @@ def test_open_cupy_array(tmp_path, write_mode, read_mode):
numpy.testing.assert_array_equal(a.get(), z[:])


@pytest.mark.parametrize("compressor", [None, kvikio_zarr.CompatCompressor.lz4().cpu])
def test_open_cupy_array_written_by_zarr(tmp_path, compressor):
data = numpy.arange(100)
z = zarr.open_array(
tmp_path,
shape=data.shape,
mode="w",
compressor=compressor,
)
z[:] = data

z = kvikio_zarr.open_cupy_array(tmp_path, mode="r")
assert isinstance(z[:], cupy.ndarray)
cupy.testing.assert_array_equal(z[:], data)


@pytest.mark.parametrize("mode", ["r", "r+", "a"])
def test_open_cupy_array_incompatible_compressor(tmp_path, mode):
zarr.create((10,), store=tmp_path, compressor=numcodecs.Blosc())
Expand Down

0 comments on commit 34f6d8e

Please sign in to comment.