From d19119e4985eceab4fc849fd6c9794f90d60fde7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Zemanovi=C4=8D?= Date: Tue, 16 Aug 2022 12:51:19 +0200 Subject: [PATCH] Switch to use storage::KeySeg and add LazySet Also refactored the iterators implementation to take advantage of changes from #335. Note that this requires `'static` lifetime bound on the types of the collections' elements, which means we cannot use non-static references, but we wouldn't do that anyway. --- .../storage_api/collections/lazy_hashmap.rs | 50 ++++---- .../storage_api/collections/lazy_hashset.rs | 119 ++++++++++++++++++ .../storage_api/collections/lazy_map.rs | 58 +++++---- .../storage_api/collections/lazy_set.rs | 70 ++++++----- .../storage_api/collections/lazy_vec.rs | 35 +++--- .../src/ledger/storage_api/collections/mod.rs | 27 +++- 6 files changed, 252 insertions(+), 107 deletions(-) create mode 100644 shared/src/ledger/storage_api/collections/lazy_hashset.rs diff --git a/shared/src/ledger/storage_api/collections/lazy_hashmap.rs b/shared/src/ledger/storage_api/collections/lazy_hashmap.rs index 405262b33d..fbb76beb8b 100644 --- a/shared/src/ledger/storage_api/collections/lazy_hashmap.rs +++ b/shared/src/ledger/storage_api/collections/lazy_hashmap.rs @@ -1,4 +1,4 @@ -//! Lazy hash map +//! Lazy hash map. use std::marker::PhantomData; @@ -12,7 +12,22 @@ use crate::types::storage; /// Subkey corresponding to the data elements of the LazyMap pub const DATA_SUBKEY: &str = "data"; -/// LazyHashmap ! fill in ! +/// Lazy hash map. +/// +/// This can be used as an alternative to `std::collections::HashMap` and +/// `BTreeMap`. In the lazy map, the elements do not reside in memory but are +/// instead read and written to storage sub-keys of the storage `key` given to +/// construct the map. +/// +/// In the [`LazyHashMap`], the type of key `K` can be anything that +/// [`BorshSerialize`] and [`BorshDeserialize`] and a hex string of sha256 hash +/// over the borsh encoded keys are used as storage key segments. +/// +/// This is different from [`super::LazyMap`], which uses [`storage::KeySeg`] +/// trait. +/// +/// Additionally, [`LazyHashMap`] also writes the unhashed values into the +/// storage together with the values (using an internal `KeyVal` type). pub struct LazyHashMap { key: storage::Key, phantom_k: PhantomData, @@ -26,10 +41,10 @@ struct KeyVal { val: V, } -impl LazyMap +impl LazyHashMap where - K: BorshDeserialize + BorshSerialize, - V: BorshDeserialize + BorshSerialize, + K: BorshDeserialize + BorshSerialize + 'static, + V: BorshDeserialize + BorshSerialize + 'static, { /// Create or use an existing map with the given storage `key`. pub fn new(key: storage::Key) -> Self { @@ -85,7 +100,7 @@ where key: &K, ) -> Result> { let res = self.get_key_val(storage, key)?; - Ok(res.map(|elem| elem.1)) + Ok(res.map(|(_key, val)| val)) } /// Returns the key-value corresponding to the key, if any. @@ -120,23 +135,12 @@ where &self, storage: &'a impl StorageRead, ) -> Result> + 'a> { - let iter = storage.iter_prefix(&self.get_data_prefix())?; - let iter = itertools::unfold(iter, |iter| { - match storage.iter_next(iter) { - Ok(Some((_key, value))) => { - match KeyVal::::try_from_slice(&value[..]) { - Ok(KeyVal { key, val }) => Some(Ok((key, val))), - Err(err) => Some(Err(storage_api::Error::new(err))), - } - } - Ok(None) => None, - Err(err) => { - // Propagate errors into Iterator's Item - Some(Err(err)) - } - } - }); - Ok(iter) + let iter = storage_api::iter_prefix(storage, &self.get_data_prefix())?; + Ok(iter.map(|key_val_res| { + let (_key, val) = key_val_res?; + let KeyVal { key, val } = val; + Ok((key, val)) + })) } /// Reads a key-value from storage diff --git a/shared/src/ledger/storage_api/collections/lazy_hashset.rs b/shared/src/ledger/storage_api/collections/lazy_hashset.rs new file mode 100644 index 0000000000..ae03ff1f0e --- /dev/null +++ b/shared/src/ledger/storage_api/collections/lazy_hashset.rs @@ -0,0 +1,119 @@ +//! Lazy hash set. + +use std::marker::PhantomData; + +use borsh::{BorshDeserialize, BorshSerialize}; + +use super::super::Result; +use super::hasher::hash_for_storage_key; +use crate::ledger::storage_api::{self, StorageRead, StorageWrite}; +use crate::types::storage; + +/// Subkey corresponding to the data elements of the LazySet +pub const DATA_SUBKEY: &str = "data"; + +/// Lazy hash set. +/// +/// This can be used as an alternative to `std::collections::HashSet` and +/// `BTreeSet`. In the lazy set, the elements do not reside in memory but are +/// instead read and written to storage sub-keys of the storage `key` given to +/// construct the set. +/// +/// In the [`LazyHashSet`], the type of value `T` can be anything that +/// [`BorshSerialize`] and [`BorshDeserialize`] and a hex string of sha256 hash +/// over the borsh encoded values are used as storage key segments. +/// +/// This is different from [`super::LazySet`], which uses [`storage::KeySeg`] +/// trait. +/// +/// Additionally, [`LazyHashSet`] also writes the unhashed values into the +/// storage. +pub struct LazyHashSet { + key: storage::Key, + phantom: PhantomData, +} + +impl LazyHashSet +where + T: BorshSerialize + BorshDeserialize + 'static, +{ + /// Create or use an existing set with the given storage `key`. + pub fn new(key: storage::Key) -> Self { + Self { + key, + phantom: PhantomData, + } + } + + /// Adds a value to the set. If the set did not have this value present, + /// `Ok(true)` is returned, `Ok(false)` otherwise. + pub fn insert(&self, storage: &mut S, val: &T) -> Result + where + S: StorageWrite + StorageRead, + { + if self.contains(storage, val)? { + Ok(false) + } else { + let data_key = self.get_data_key(val); + storage.write(&data_key, &val)?; + Ok(true) + } + } + + /// Removes a value from the set. Returns whether the value was present in + /// the set. + pub fn remove(&self, storage: &mut S, val: &T) -> Result + where + S: StorageWrite + StorageRead, + { + let data_key = self.get_data_key(val); + let value: Option = storage.read(&data_key)?; + storage.delete(&data_key)?; + Ok(value.is_some()) + } + + /// Returns whether the set contains a value. + pub fn contains( + &self, + storage: &impl StorageRead, + val: &T, + ) -> Result { + let value: Option = storage.read(&self.get_data_key(val))?; + Ok(value.is_some()) + } + + /// Returns whether the set contains no elements. + pub fn is_empty(&self, storage: &impl StorageRead) -> Result { + let mut iter = storage.iter_prefix(&self.get_data_prefix())?; + Ok(storage.iter_next(&mut iter)?.is_none()) + } + + /// An iterator visiting all elements. The iterator element type is + /// `Result`, because iterator's call to `next` may fail with e.g. out of + /// gas or data decoding error. + /// + /// Note that this function shouldn't be used in transactions and VPs code + /// on unbounded sets to avoid gas usage increasing with the length of the + /// set. + pub fn iter<'a>( + &self, + storage: &'a impl StorageRead, + ) -> Result> + 'a> { + let iter = storage_api::iter_prefix(storage, &self.get_data_prefix())?; + Ok(iter.map(|key_val_res| { + let (_key, val) = key_val_res?; + Ok(val) + })) + } + + /// Get the prefix of set's elements storage + fn get_data_prefix(&self) -> storage::Key { + self.key.push(&DATA_SUBKEY.to_owned()).unwrap() + } + + /// Get the sub-key of a given element + fn get_data_key(&self, val: &T) -> storage::Key { + let hash_str = hash_for_storage_key(val); + self.get_data_prefix().push(&hash_str).unwrap() + } +} diff --git a/shared/src/ledger/storage_api/collections/lazy_map.rs b/shared/src/ledger/storage_api/collections/lazy_map.rs index 01af3c5a42..bf7b45324b 100644 --- a/shared/src/ledger/storage_api/collections/lazy_map.rs +++ b/shared/src/ledger/storage_api/collections/lazy_map.rs @@ -1,18 +1,30 @@ -//! Lazy hash map +//! Lazy map. -use std::fmt::Display; use std::marker::PhantomData; use borsh::{BorshDeserialize, BorshSerialize}; use super::super::Result; -use crate::ledger::storage_api::{self, StorageRead, StorageWrite}; -use crate::types::storage; +use super::ReadError; +use crate::ledger::storage_api::{self, ResultExt, StorageRead, StorageWrite}; +use crate::types::storage::{self, KeySeg}; /// Subkey corresponding to the data elements of the LazyMap pub const DATA_SUBKEY: &str = "data"; -/// LazyMap ! fill in ! +/// Lazy map. +/// +/// This can be used as an alternative to `std::collections::HashMap` and +/// `BTreeMap`. In the lazy map, the elements do not reside in memory but are +/// instead read and written to storage sub-keys of the storage `key` used to +/// construct the map. +/// +/// In the [`LazyMap`], the type of key `K` can be anything that implements +/// [`storage::KeySeg`] and this trait is used to turn the keys into key +/// segments. +/// +/// This is different from [`super::LazyHashMap`], which hashes borsh encoded +/// key. pub struct LazyMap { key: storage::Key, phantom_k: PhantomData, @@ -21,8 +33,8 @@ pub struct LazyMap { impl LazyMap where - K: BorshDeserialize + BorshSerialize + Display, - V: BorshDeserialize + BorshSerialize, + K: storage::KeySeg, + V: BorshDeserialize + BorshSerialize + 'static, { /// Create or use an existing map with the given storage `key`. pub fn new(key: storage::Key) -> Self { @@ -94,24 +106,17 @@ where pub fn iter<'a>( &self, storage: &'a impl StorageRead, - ) -> Result> + 'a> { - let iter = storage.iter_prefix(&self.get_data_prefix())?; - let iter = itertools::unfold(iter, |iter| { - match storage.iter_next(iter) { - Ok(Some((_key, value))) => { - match V::try_from_slice(&value[..]) { - Ok(decoded_value) => Some(Ok(decoded_value)), - Err(err) => Some(Err(storage_api::Error::new(err))), - } - } - Ok(None) => None, - Err(err) => { - // Propagate errors into Iterator's Item - Some(Err(err)) - } - } - }); - Ok(iter) + ) -> Result> + 'a> { + let iter = storage_api::iter_prefix(storage, &self.get_data_prefix())?; + Ok(iter.map(|key_val_res| { + let (key, val) = key_val_res?; + let last_key_seg = key + .last() + .ok_or(ReadError::UnexpectedlyEmptyStorageKey) + .into_storage_result()?; + let key = K::parse(last_key_seg.raw()).into_storage_result()?; + Ok((key, val)) + })) } /// Reads a value from storage @@ -139,6 +144,7 @@ where /// Get the sub-key of a given element fn get_data_key(&self, key: &K) -> storage::Key { - self.get_data_prefix().push(&key.to_string()).unwrap() + let key_str = key.to_db_key(); + self.get_data_prefix().push(&key_str).unwrap() } } diff --git a/shared/src/ledger/storage_api/collections/lazy_set.rs b/shared/src/ledger/storage_api/collections/lazy_set.rs index 862485b687..8c1bbd871f 100644 --- a/shared/src/ledger/storage_api/collections/lazy_set.rs +++ b/shared/src/ledger/storage_api/collections/lazy_set.rs @@ -1,18 +1,28 @@ -//! Lazy hash set +//! Lazy set. use std::marker::PhantomData; -use borsh::{BorshDeserialize, BorshSerialize}; - use super::super::Result; -use super::hasher::hash_for_storage_key; -use crate::ledger::storage_api::{self, StorageRead, StorageWrite}; -use crate::types::storage; +use super::ReadError; +use crate::ledger::storage_api::{self, ResultExt, StorageRead, StorageWrite}; +use crate::types::storage::{self, KeySeg}; /// Subkey corresponding to the data elements of the LazySet pub const DATA_SUBKEY: &str = "data"; -/// lazy hash set +/// Lazy set. +/// +/// This can be used as an alternative to `std::collections::HashSet` and +/// `BTreeSet`. In the lazy set, the elements do not reside in memory but are +/// instead read and written to storage sub-keys of the storage `key` used to +/// construct the set. +/// +/// In the [`LazySet`], the type of value `T` can be anything that implements +/// [`storage::KeySeg`] and this trait is used to turn the values into key +/// segments. +/// +/// This is different from [`super::LazyHashSet`], which hashes borsh encoded +/// values. pub struct LazySet { key: storage::Key, phantom: PhantomData, @@ -20,7 +30,7 @@ pub struct LazySet { impl LazySet where - T: BorshSerialize + BorshDeserialize, + T: storage::KeySeg, { /// Create or use an existing set with the given storage `key`. pub fn new(key: storage::Key) -> Self { @@ -40,7 +50,9 @@ where Ok(false) } else { let data_key = self.get_data_key(val); - storage.write(&data_key, &val)?; + // The actual value is written into the key, so the value written to + // the storage is empty (unit) + storage.write(&data_key, ())?; Ok(true) } } @@ -52,7 +64,7 @@ where S: StorageWrite + StorageRead, { let data_key = self.get_data_key(val); - let value: Option = storage.read(&data_key)?; + let value: Option<()> = storage.read(&data_key)?; storage.delete(&data_key)?; Ok(value.is_some()) } @@ -63,14 +75,15 @@ where storage: &impl StorageRead, val: &T, ) -> Result { - let value: Option = storage.read(&self.get_data_key(val))?; + let value: Option<()> = storage.read(&self.get_data_key(val))?; Ok(value.is_some()) } /// Returns whether the set contains no elements. pub fn is_empty(&self, storage: &impl StorageRead) -> Result { - let mut iter = storage.iter_prefix(&self.get_data_prefix())?; - Ok(storage.iter_next(&mut iter)?.is_none()) + let mut iter = + storage_api::iter_prefix_bytes(storage, &self.get_data_prefix())?; + Ok(iter.next().is_none()) } /// An iterator visiting all elements. The iterator element type is @@ -84,23 +97,16 @@ where &self, storage: &'a impl StorageRead, ) -> Result> + 'a> { - let iter = storage.iter_prefix(&self.get_data_prefix())?; - let iter = itertools::unfold(iter, |iter| { - match storage.iter_next(iter) { - Ok(Some((_key, value))) => { - match T::try_from_slice(&value[..]) { - Ok(decoded_value) => Some(Ok(decoded_value)), - Err(err) => Some(Err(storage_api::Error::new(err))), - } - } - Ok(None) => None, - Err(err) => { - // Propagate errors into Iterator's Item - Some(Err(err)) - } - } - }); - Ok(iter) + let iter = + storage_api::iter_prefix_bytes(storage, &self.get_data_prefix())?; + Ok(iter.map(|key_val_res| { + let (key, _val) = key_val_res?; + let last_key_seg = key + .last() + .ok_or(ReadError::UnexpectedlyEmptyStorageKey) + .into_storage_result()?; + T::parse(last_key_seg.raw()).into_storage_result() + })) } /// Get the prefix of set's elements storage @@ -110,7 +116,7 @@ where /// Get the sub-key of a given element fn get_data_key(&self, val: &T) -> storage::Key { - let hash_str = hash_for_storage_key(val); - self.get_data_prefix().push(&hash_str).unwrap() + let key_str = val.to_db_key(); + self.get_data_prefix().push(&key_str).unwrap() } } diff --git a/shared/src/ledger/storage_api/collections/lazy_vec.rs b/shared/src/ledger/storage_api/collections/lazy_vec.rs index c55c39e516..f57797f35c 100644 --- a/shared/src/ledger/storage_api/collections/lazy_vec.rs +++ b/shared/src/ledger/storage_api/collections/lazy_vec.rs @@ -1,4 +1,4 @@ -//! Lazy vec +//! Lazy dynamically-sized vector. use std::marker::PhantomData; @@ -13,7 +13,12 @@ pub const LEN_SUBKEY: &str = "len"; /// Subkey corresponding to the data elements of the LazyVec pub const DATA_SUBKEY: &str = "data"; -/// LazyVec ! fill in ! +/// Lazy dynamically-sized vector. +/// +/// This can be used as an alternative to `std::collections::Vec`. In the lazy +/// vector, the elements do not reside in memory but are instead read and +/// written to storage sub-keys of the storage `key` used to construct the +/// vector. pub struct LazyVec { key: storage::Key, phantom: PhantomData, @@ -21,7 +26,7 @@ pub struct LazyVec { impl LazyVec where - T: BorshSerialize + BorshDeserialize, + T: BorshSerialize + BorshDeserialize + 'static, { /// Create or use an existing vector with the given storage `key`. pub fn new(key: storage::Key) -> Self { @@ -44,7 +49,7 @@ where /// Removes the last element from a vector and returns it, or `Ok(None)` if /// it is empty. - + /// /// Note that an empty vector is completely removed from storage. pub fn pop(&self, storage: &mut S) -> Result> where @@ -99,23 +104,11 @@ where &self, storage: &'a impl StorageRead, ) -> Result> + 'a> { - let iter = storage.iter_prefix(&self.get_data_prefix())?; - let iter = itertools::unfold(iter, |iter| { - match storage.iter_next(iter) { - Ok(Some((_key, value))) => { - match T::try_from_slice(&value[..]) { - Ok(decoded_value) => Some(Ok(decoded_value)), - Err(err) => Some(Err(storage_api::Error::new(err))), - } - } - Ok(None) => None, - Err(err) => { - // Propagate errors into Iterator's Item - Some(Err(err)) - } - } - }); - Ok(iter) + let iter = storage_api::iter_prefix(storage, &self.get_data_prefix())?; + Ok(iter.map(|key_val_res| { + let (_key, val) = key_val_res?; + Ok(val) + })) } /// Get the prefix of set's elements storage diff --git a/shared/src/ledger/storage_api/collections/mod.rs b/shared/src/ledger/storage_api/collections/mod.rs index b0dc43779b..156615b9de 100644 --- a/shared/src/ledger/storage_api/collections/mod.rs +++ b/shared/src/ledger/storage_api/collections/mod.rs @@ -1,13 +1,30 @@ //! Lazy data structures for storage access where elements are not all loaded //! into memory. This serves to minimize gas costs, avoid unbounded iteration -//! in some cases, and ease the validation of storage changes in the VP. +//! in some cases, and ease the validation of storage changes in VPs. //! -//! Rather than finding the diff of the state before and after, the VP will -//! just receive the storage sub-keys that have experienced changes. -//! -//! CONTINUE TO UPDATE THE ABOVE +//! Rather than finding the diff of the state before and after (which requires +//! iteration over both of the states that also have to be decoded), VPs will +//! just receive the storage sub-keys that have experienced changes without +//! having to check any of the unchanged elements. + +use thiserror::Error; mod hasher; +pub mod lazy_hashmap; +pub mod lazy_hashset; pub mod lazy_map; pub mod lazy_set; pub mod lazy_vec; + +pub use lazy_hashmap::LazyHashMap; +pub use lazy_hashset::LazyHashSet; +pub use lazy_map::LazyMap; +pub use lazy_set::LazySet; +pub use lazy_vec::LazyVec; + +#[allow(missing_docs)] +#[derive(Error, Debug)] +pub enum ReadError { + #[error("A storage key was unexpectedly empty")] + UnexpectedlyEmptyStorageKey, +}