Skip to content

Commit

Permalink
Validate vlen codec length32 encoding against zarr-python v2
Browse files Browse the repository at this point in the history
  • Loading branch information
LDeakin committed Jul 18, 2024
1 parent db563a2 commit ac7ebc7
Show file tree
Hide file tree
Showing 100 changed files with 88 additions and 4 deletions.
17 changes: 13 additions & 4 deletions tests/cities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ use zarrs::{
},
array_subset::ArraySubset,
metadata::v3::codec::vlen::Encoding,
storage::{store::MemoryStore, ListableStorageTraits},
storage::{
store::{FilesystemStore, MemoryStore},
ReadableWritableListableStorage,
},
};

fn read_cities() -> std::io::Result<Vec<String>> {
Expand All @@ -33,7 +36,12 @@ fn cities_impl(
chunk_size: u64,
shard_size: Option<u64>,
) -> Result<u64, Box<dyn Error>> {
let store = std::sync::Arc::new(MemoryStore::default());
let store: ReadableWritableListableStorage =
if encoding == Encoding::Length32 && compression_level.is_none() {
std::sync::Arc::new(FilesystemStore::new("tests/data/v3/cities.zarr")?)
} else {
std::sync::Arc::new(MemoryStore::default())
};

let mut builder = ArrayBuilder::new(
vec![cities.len() as u64], // array shape
Expand All @@ -60,7 +68,8 @@ fn cities_impl(
]);
}

let array = builder.build(store.clone(), "/array")?;
let array = builder.build(store.clone(), "/")?;
array.store_metadata()?;

let subset_all = ArraySubset::new_with_shape(array.shape().to_vec());
array.store_array_subset_elements(&subset_all, &cities)?;
Expand All @@ -71,7 +80,7 @@ fn cities_impl(
let (_bytes, offsets) = last_block.into_variable()?;
assert_eq!(offsets.len() as u64, chunk_size + 1);

Ok(store.size()?)
Ok(store.size_prefix(&"c/".try_into().unwrap())?) // only chunks
}

#[rustfmt::skip]
Expand Down
18 changes: 18 additions & 0 deletions tests/data/v2/cities.zarr/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"chunks": [
1000
],
"compressor": null,
"dtype": "|O",
"fill_value": "",
"filters": [
{
"id": "vlen-utf8"
}
],
"order": "C",
"shape": [
47868
],
"zarr_format": 2
}
Binary file added tests/data/v2/cities.zarr/0
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/1
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/10
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/11
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/12
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/13
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/14
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/15
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/16
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/17
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/18
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/19
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/2
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/20
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/21
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/22
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/23
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/24
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/25
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/26
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/27
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/28
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/29
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/3
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/30
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/31
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/32
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/33
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/34
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/35
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/36
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/37
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/38
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/39
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/4
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/40
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/41
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/42
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/43
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/44
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/45
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/46
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/47
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/5
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/6
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/7
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/8
Binary file not shown.
Binary file added tests/data/v2/cities.zarr/9
Binary file not shown.
19 changes: 19 additions & 0 deletions tests/data/v2_cities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import zarr # v2
import pandas as pd

print(zarr.__version__)

df = pd.read_csv("tests/data/cities.csv", header=None)
cities = df[0]

path_out = 'tests/data/v2/cities.zarr'
array = zarr.open(path_out, mode='w', dtype=str, shape=(len(cities),), chunks=(1000,), compressor = None, fill_value='')
array[:] = cities.values
print(array.info)

for i in range(48):
v2 = open(f'tests/data/v2/cities.zarr/{i}', 'rb').read()
v3 = open(f'tests/data/v3/cities.zarr/c/{i}', 'rb').read()
assert v2 == v3

print("V2 and V3 chunks are identical!")
Binary file added tests/data/v3/cities.zarr/c/0
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/1
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/10
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/11
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/12
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/13
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/14
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/15
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/16
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/17
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/18
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/19
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/2
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/20
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/21
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/22
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/23
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/24
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/25
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/26
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/27
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/28
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/29
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/3
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/30
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/31
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/32
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/33
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/34
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/35
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/36
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/37
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/38
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/39
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/4
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/40
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/41
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/42
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/43
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/44
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/45
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/46
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/47
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/5
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/6
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/7
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/8
Binary file not shown.
Binary file added tests/data/v3/cities.zarr/c/9
Binary file not shown.
38 changes: 38 additions & 0 deletions tests/data/v3/cities.zarr/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"zarr_format": 3,
"node_type": "array",
"shape": [
47868
],
"data_type": "string",
"chunk_grid": {
"name": "regular",
"configuration": {
"chunk_shape": [
1000
]
}
},
"chunk_key_encoding": {
"name": "default",
"configuration": {
"separator": "/"
}
},
"fill_value": null,
"codecs": [
{
"name": "vlen",
"configuration": {
"encoding": "length32"
}
}
],
"attributes": {
"_zarrs": {
"description": "This array was created with zarrs",
"repository": "https://github.com/LDeakin/zarrs",
"version": "0.16.0"
}
}
}

0 comments on commit ac7ebc7

Please sign in to comment.