-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add support for variable length data types Data type sizes are now represented by `DataTypeSize` instead of usize Also adds `ArraySize`. Both are `Fixed` only for now. Need to support `Variable` throughout the codebase. Change codec API in prep for variable sized data types Enable `{Array,DataType}Size::Variable` Implement `CowArrayBytes::validate()` and add `CodecError::InvalidVariableSizedArrayOffsets` Use `CowArrayBytes::validate()` impl `From` for `CowArrayBytes` for various types Array `_element` methods now use `T: Element` Add `vlen` codec metadata Fix codecs bench Implement an experimental vlen codec Use `impl Into<ArrayBytesCow<'a>>` in array methods Use `RawBytesCow` consistently Remove various vlen todo's Cleanup `ArrayBytes` Use `ArrayError::InvalidElementValue` for invalid string encodings Add `ArraySubset::contains()` Add `FillValue::new_empty()` Add remaining vlen support to array `store_` methods and improve vlen validation Add remaining vlen support to array `retrieve_` methods Partial decoding in the vlen filter Fix async vlen errors Sharding codec vlen support Add vlen support to sharding partial decoder vlen support for sharded_readable_ext `offsets_u64_to_usize` handle 32-bit system Minor FillValue doc update Remove unused ArraySubset methods and add related convenience functions Add cities test Add `Arrow32` vlen encoding Add support for Interleave32 (Zarr V2) vlen encoding fmt clippy Set minimum version for num-complex Fix `ArrayBytes` from `&[u8; N]` for rust < 1.77 Add `binary` data type Vlen improve docs and test various encodings. Fix `cities.csv` encoding. `vlen` change encoding names Validate `vlen` codec `length32` encoding against `zarr-python` v2 Don't store `zarrs` metadata in cities test output Split `vlen` into `vlen` and `vlen_interleaved` Vlen supports separate index/dat encoding with full codec chains. Fix typesize in vlen `index_codecs` metadata Add support for `String` fill value metadata Add `FillValueMetadata::Unsupported` `ArrayMetadata` can be serialised and deserialised with an unsupported `fill_value`, but `Array` creation will fail. vlen cleanup Change vlen codec identifiers given they are experimental Move duplicate `extract_decoded_regions` fn into `array_bytes` + other minor changes Minor vlen_partial_decoder cleanup Add support for `zarr-python` nonconformant `|O` V2 data type Support conversion of Zarr V2 arrats with `vlen-*` codecs to V3 Update root docs for new vlen related codecs/data types Cleanup `get_vlen_bytes_and_offsets` * Fix store value truncation * Add `ArraySize::new`, fix `ArrayBytes::new_fill_value`, fix `FillValue::equals_all` with vlen data * Add `array_write_read_string` example * Rename `vlen_interleaved` to `vlen_v2` * Fmt pass
- Loading branch information
Showing
183 changed files
with
53,233 additions
and
2,208 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
use itertools::Itertools; | ||
use ndarray::{array, Array2, ArrayD}; | ||
use zarrs::storage::{ | ||
storage_transformer::{StorageTransformerExtension, UsageLogStorageTransformer}, | ||
ReadableWritableListableStorage, | ||
}; | ||
|
||
fn array_write_read() -> Result<(), Box<dyn std::error::Error>> { | ||
use std::sync::Arc; | ||
use zarrs::{ | ||
array::{DataType, FillValue}, | ||
array_subset::ArraySubset, | ||
storage::store, | ||
}; | ||
|
||
// Create a store | ||
// let path = tempfile::TempDir::new()?; | ||
// let mut store: ReadableWritableListableStorage = Arc::new(store::FilesystemStore::new(path.path())?); | ||
// let mut store: ReadableWritableListableStorage = Arc::new(store::FilesystemStore::new( | ||
// "tests/data/array_write_read.zarr", | ||
// )?); | ||
let mut store: ReadableWritableListableStorage = Arc::new(store::MemoryStore::new()); | ||
if let Some(arg1) = std::env::args().collect::<Vec<_>>().get(1) { | ||
if arg1 == "--usage-log" { | ||
let log_writer = Arc::new(std::sync::Mutex::new( | ||
// std::io::BufWriter::new( | ||
std::io::stdout(), | ||
// ) | ||
)); | ||
let usage_log = Arc::new(UsageLogStorageTransformer::new(log_writer, || { | ||
chrono::Utc::now().format("[%T%.3f] ").to_string() | ||
})); | ||
store = usage_log | ||
.clone() | ||
.create_readable_writable_listable_transformer(store); | ||
} | ||
} | ||
|
||
// Create a group | ||
let group_path = "/group"; | ||
let mut group = zarrs::group::GroupBuilder::new().build(store.clone(), group_path)?; | ||
|
||
// Update group metadata | ||
group | ||
.attributes_mut() | ||
.insert("foo".into(), serde_json::Value::String("bar".into())); | ||
|
||
// Write group metadata to store | ||
group.store_metadata()?; | ||
|
||
println!( | ||
"The group metadata is:\n{}\n", | ||
serde_json::to_string_pretty(&group.metadata()).unwrap() | ||
); | ||
|
||
// Create an array | ||
let array_path = "/group/array"; | ||
let array = zarrs::array::ArrayBuilder::new( | ||
vec![4, 4], // array shape | ||
DataType::String, | ||
vec![2, 2].try_into()?, // regular chunk shape | ||
FillValue::from("_"), | ||
) | ||
// .bytes_to_bytes_codecs(vec![]) // uncompressed | ||
.dimension_names(["y", "x"].into()) | ||
// .storage_transformers(vec![].into()) | ||
.build(store.clone(), array_path)?; | ||
|
||
// Write array metadata to store | ||
array.store_metadata()?; | ||
|
||
println!( | ||
"The array metadata is:\n{}\n", | ||
serde_json::to_string_pretty(&array.metadata()).unwrap() | ||
); | ||
|
||
// Write some chunks | ||
array.store_chunk_ndarray( | ||
&[0, 0], | ||
ArrayD::<&str>::from_shape_vec(vec![2, 2], vec!["a", "bb", "ccc", "dddd"]).unwrap(), | ||
)?; | ||
array.store_chunk_ndarray( | ||
&[0, 1], | ||
ArrayD::<&str>::from_shape_vec(vec![2, 2], vec!["4444", "333", "22", "1"]).unwrap(), | ||
)?; | ||
let subset_all = ArraySubset::new_with_shape(array.shape().to_vec()); | ||
let data_all = array.retrieve_array_subset_ndarray::<String>(&subset_all)?; | ||
println!("store_chunk [0, 0] and [0, 1]:\n{data_all}\n"); | ||
|
||
// Write a subset spanning multiple chunks, including updating chunks already written | ||
let ndarray_subset: Array2<&str> = array![["!", "@@"], ["###", "$$$$"]]; | ||
array.store_array_subset_ndarray( | ||
ArraySubset::new_with_ranges(&[1..3, 1..3]).start(), | ||
ndarray_subset, | ||
)?; | ||
let data_all = array.retrieve_array_subset_ndarray::<String>(&subset_all)?; | ||
println!("store_array_subset [1..3, 1..3]:\nndarray::ArrayD<String>\n{data_all}"); | ||
|
||
// Retrieve bytes directly, convert into a single string allocation, create a &str ndarray | ||
// TODO: Add a convenience function for this? | ||
let data_all = array.retrieve_array_subset(&subset_all)?; | ||
let (bytes, offsets) = data_all.into_variable()?; | ||
let string = String::from_utf8(bytes.into_owned())?; | ||
let elements = offsets | ||
.iter() | ||
.tuple_windows() | ||
.map(|(&curr, &next)| &string[curr..next]) | ||
.collect::<Vec<&str>>(); | ||
let ndarray = ArrayD::<&str>::from_shape_vec(subset_all.shape_usize(), elements)?; | ||
println!("ndarray::ArrayD<&str>:\n{ndarray}"); | ||
|
||
Ok(()) | ||
} | ||
|
||
fn main() { | ||
if let Err(err) = array_write_read() { | ||
println!("{:?}", err); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.