From 5d11f4806f5c9e7134ebe79e0ac712c89c3f30e0 Mon Sep 17 00:00:00 2001 From: Nathan Zimmerman Date: Tue, 9 Jul 2024 14:04:37 -0500 Subject: [PATCH] Future-proof offset and size records in chunkmanifest --- virtualizarr/manifests/manifest.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/virtualizarr/manifests/manifest.py b/virtualizarr/manifests/manifest.py index 0297d17a..cc196e6d 100644 --- a/virtualizarr/manifests/manifest.py +++ b/virtualizarr/manifests/manifest.py @@ -71,8 +71,8 @@ class ChunkManifest: """ _paths: np.ndarray[Any, np.dtypes.StringDType] # type: ignore[name-defined] - _offsets: np.ndarray[Any, np.dtype[np.int32]] - _lengths: np.ndarray[Any, np.dtype[np.int32]] + _offsets: np.ndarray[Any, np.dtype[np.uint64]] + _lengths: np.ndarray[Any, np.dtype[np.uint64]] def __init__(self, entries: dict) -> None: """ @@ -100,8 +100,8 @@ def __init__(self, entries: dict) -> None: # Initializing to empty implies that entries with path='' are treated as missing chunks paths = np.empty(shape=shape, dtype=np.dtypes.StringDType()) # type: ignore[attr-defined] - offsets = np.empty(shape=shape, dtype=np.dtype("int32")) - lengths = np.empty(shape=shape, dtype=np.dtype("int32")) + offsets = np.empty(shape=shape, dtype=np.dtype("uint64")) + lengths = np.empty(shape=shape, dtype=np.dtype("uint64")) # populate the arrays for key, entry in entries.items(): @@ -128,8 +128,8 @@ def __init__(self, entries: dict) -> None: def from_arrays( cls, paths: np.ndarray[Any, np.dtype[np.dtypes.StringDType]], # type: ignore[name-defined] - offsets: np.ndarray[Any, np.dtype[np.int32]], - lengths: np.ndarray[Any, np.dtype[np.int32]], + offsets: np.ndarray[Any, np.dtype[np.uint64]], + lengths: np.ndarray[Any, np.dtype[np.uint64]], ) -> "ChunkManifest": """ Create manifest directly from numpy arrays containing the path and byte range information. @@ -161,13 +161,13 @@ def from_arrays( raise ValueError( f"paths array must have a numpy variable-length string dtype, but got dtype {paths.dtype}" ) - if offsets.dtype != np.dtype("int32"): + if offsets.dtype != np.dtype("uint64"): raise ValueError( - f"offsets array must have 32-bit integer dtype, but got dtype {offsets.dtype}" + f"offsets array must have 64-bit unsigned integer dtype, but got dtype {offsets.dtype}" ) - if lengths.dtype != np.dtype("int32"): + if lengths.dtype != np.dtype("uint64"): raise ValueError( - f"lengths array must have 32-bit integer dtype, but got dtype {lengths.dtype}" + f"lengths array must have 64-bit unsigned integer dtype, but got dtype {lengths.dtype}" ) # check shapes