Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

pallet-mmr: handle forks without collisions in offchain storage #11594

Merged
merged 35 commits into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
4ded369
pallet-mmr: fix some typos
acatangiu Jun 1, 2022
eb9eea8
pallet-mmr: make the MMR resilient to chain forks
acatangiu Jun 1, 2022
7c8aac0
pallet-mmr: get hash for block that added node
acatangiu Jun 2, 2022
c82ac27
beefy-mmr: add debug logging
acatangiu Jun 21, 2022
98abe4a
add explanatory comment
acatangiu Jun 22, 2022
db41f81
account for block offset of pallet activation
acatangiu Jun 22, 2022
bfd4142
add support for finding all nodes added by leaf
acatangiu Jun 22, 2022
417de11
minor improvements
acatangiu Jun 23, 2022
25468f9
add helper to return all nodes added to mmr with a leaf append
Lederstrumpf Jun 23, 2022
19b35ee
simplify leaf_node_index_to_leaf_index
Lederstrumpf Jun 23, 2022
f9f202a
dead fish: this also doesn't work
acatangiu Jun 23, 2022
05c5f26
simplify rightmost_leaf_node_index_from_pos
Lederstrumpf Jun 23, 2022
e15810d
minor fix
acatangiu Jun 24, 2022
f803ce9
move leaf canonicalization to offchain worker
acatangiu Jun 24, 2022
94e3b61
move storage related code to storage.rs
acatangiu Jun 24, 2022
ecb77a7
on offchain reads use canonic key for old leaves
acatangiu Jun 24, 2022
a17cb65
fix offchain worker write using canon key
acatangiu Jun 24, 2022
e9ab363
fix pallet-mmr tests
acatangiu Jun 24, 2022
b75b0ae
Merge branch 'master' of github.com:paritytech/substrate into mmr-han…
acatangiu Jun 27, 2022
8856db1
add documentation and fix logging
acatangiu Jun 27, 2022
dfc5b45
add offchain mmr canonicalization test
acatangiu Jun 28, 2022
e9edd28
test canon + generate + verify
acatangiu Jun 28, 2022
f472e9b
fix pallet-beefy-mmr tests
acatangiu Jun 28, 2022
5ed95c7
implement review suggestions
acatangiu Jun 28, 2022
2c8b101
improve test
acatangiu Jun 29, 2022
56f2eaa
pallet-mmr: add offchain pruning of forks
acatangiu Jun 30, 2022
e22a936
Merge branch 'master' of github.com:paritytech/substrate into mmr-han…
acatangiu Jun 30, 2022
2629734
pallet-mmr: improve offchain pruning
acatangiu Jul 1, 2022
58d116f
Merge branch 'master' of github.com:paritytech/substrate into mmr-han…
acatangiu Jul 1, 2022
4e6de82
pallet-mmr: improve MMRStore<OffchainStorage>::get()
acatangiu Jul 1, 2022
e72e344
Merge branch 'master' of github.com:paritytech/substrate into mmr-han…
acatangiu Jul 5, 2022
6335512
pallet-mmr: storage: improve logs
acatangiu Jul 5, 2022
eab718c
fix tests: correctly persist overlay
acatangiu Jul 5, 2022
8a8f3d2
pallet-mmr: fix numeric typo in test
Lederstrumpf Jul 6, 2022
ba9fd71
add comment around LeafData requirements
acatangiu Jul 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions frame/beefy-mmr/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,12 @@ pub fn beefy_log(log: ConsensusLog<BeefyId>) -> DigestItem {
DigestItem::Consensus(BEEFY_ENGINE_ID, log.encode())
}

fn offchain_key(pos: usize) -> Vec<u8> {
(<Test as pallet_mmr::Config>::INDEXING_PREFIX, pos as u64).encode()
}

fn read_mmr_leaf(ext: &mut TestExternalities, index: usize) -> MmrLeaf {
fn read_mmr_leaf(ext: &mut TestExternalities, key: Vec<u8>) -> MmrLeaf {
type Node = pallet_mmr::primitives::DataOrHash<Keccak256, MmrLeaf>;
ext.persist_offchain_overlay();
let offchain_db = ext.offchain_db();
offchain_db
.get(&offchain_key(index))
.get(&key)
.map(|d| Node::decode(&mut &*d).unwrap())
.map(|n| match n {
Node::Data(d) => d,
Expand Down Expand Up @@ -105,12 +101,17 @@ fn should_contain_mmr_digest() {

#[test]
fn should_contain_valid_leaf_data() {
fn offchain_key(parent_hash: H256, pos: usize) -> Vec<u8> {
(<Test as pallet_mmr::Config>::INDEXING_PREFIX, parent_hash, pos as u64).encode()
}

let mut ext = new_test_ext(vec![1, 2, 3, 4]);
ext.execute_with(|| {
let parent_hash = ext.execute_with(|| {
init_block(1);
<frame_system::Pallet<Test>>::parent_hash()
});

let mmr_leaf = read_mmr_leaf(&mut ext, 0);
let mmr_leaf = read_mmr_leaf(&mut ext, offchain_key(parent_hash, 0));
assert_eq!(
mmr_leaf,
MmrLeaf {
Expand All @@ -128,11 +129,13 @@ fn should_contain_valid_leaf_data() {
);

// build second block on top
ext.execute_with(|| {
let parent_hash = ext.execute_with(|| {
init_block(2);
<frame_system::Pallet<Test>>::parent_hash()
});

let mmr_leaf = read_mmr_leaf(&mut ext, 1);
// let key =
acatangiu marked this conversation as resolved.
Show resolved Hide resolved
let mmr_leaf = read_mmr_leaf(&mut ext, offchain_key(parent_hash, 1));
assert_eq!(
mmr_leaf,
MmrLeaf {
Expand Down
64 changes: 59 additions & 5 deletions frame/merkle-mountain-range/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@

use codec::Encode;
use frame_support::weights::Weight;
use sp_runtime::traits::{self, One, Saturating};
use sp_runtime::{
traits::{self, One, Saturating},
SaturatedConversion,
};

#[cfg(any(feature = "runtime-benchmarks", test))]
mod benchmarking;
Expand Down Expand Up @@ -116,12 +119,12 @@ pub mod pallet {
/// Prefix for elements stored in the Off-chain DB via Indexing API.
///
/// Each node of the MMR is inserted both on-chain and off-chain via Indexing API.
/// The former does not store full leaf content, just it's compact version (hash),
/// The former does not store full leaf content, just its compact version (hash),
/// and some of the inner mmr nodes might be pruned from on-chain storage.
/// The latter will contain all the entries in their full form.
///
/// Each node is stored in the Off-chain DB under key derived from the
/// [`Self::INDEXING_PREFIX`] and it's in-tree index (MMR position).
/// [`Self::INDEXING_PREFIX`] and its in-tree index (MMR position).
const INDEXING_PREFIX: &'static [u8];

/// A hasher type for MMR.
Expand Down Expand Up @@ -215,8 +218,26 @@ pub mod pallet {
<RootHash<T, I>>::put(root);

let peaks_after = mmr::utils::NodesUtils::new(leaves).number_of_peaks();

T::WeightInfo::on_initialize(peaks_before.max(peaks_after))
}

fn offchain_worker(_n: T::BlockNumber) {
use mmr::storage::{OffchainStorage, Storage};
// MMR pallet uses offchain storage to hold full MMR and leaves.
// The leaves are saved under fork-unique keys `(parent_hash, pos)`.
// MMR Runtime depends on `frame_system::block_hash(block_num)` mappings to find
// parent hashes for particular nodes or leaves.
// This MMR offchain worker function moves a rolling window of the same size
// as `frame_system::block_hash` map, where nodes/leaves added by blocks that are just
// about to exit the window are "canonicalized" so that their offchain key no longer
// depends on `parent_hash` therefore on access to `frame_system::block_hash`.
//
// This approach works to eliminate fork-induced leaf collisions in offchain db,
// under the assumption that no fork will be deeper than `frame_system::BlockHashCount`
// blocks (2400 blocks on Polkadot, Kusama, Rococo, etc).
cheme marked this conversation as resolved.
Show resolved Hide resolved
Storage::<OffchainStorage, T, I, LeafOf<T, I>>::canonicalize_offchain();
}
}
}

Expand Down Expand Up @@ -254,17 +275,50 @@ where
}

impl<T: Config<I>, I: 'static> Pallet<T, I> {
fn offchain_key(pos: NodeIndex) -> sp_std::prelude::Vec<u8> {
/// Build offchain key from `parent_hash` of block that originally added node `pos` to MMR.
///
/// This combination makes the offchain (key,value) entry resilient to chain forks.
fn offchain_key(
parent_hash: <T as frame_system::Config>::Hash,
pos: NodeIndex,
) -> sp_std::prelude::Vec<u8> {
(T::INDEXING_PREFIX, parent_hash, pos).encode()
}

/// Build canonical offchain key for node `pos` in MMR.
///
/// Used for nodes added by now finalized blocks.
fn canon_offchain_key(pos: NodeIndex) -> sp_std::prelude::Vec<u8> {
(T::INDEXING_PREFIX, pos).encode()
}

/// Provide the parent hash for the block that added `leaf_index` to the MMR.
///
/// Should only be called for blocks still available in `<frame_system::Pallet<T>>::block_hash`.
fn parent_hash_of_leaf(
leaf_index: LeafIndex,
leaves_count: LeafIndex,
) -> <T as frame_system::Config>::Hash {
// leaves are zero-indexed and were added one per block since pallet activation,
// while block numbers are one-indexed, so block number that added `leaf_idx` is:
// `block_num = block_num_when_pallet_activated + leaf_idx + 1`
// `block_num = (current_block_num - leaves_count) + leaf_idx + 1`
// `parent_block_num = current_block_num - leaves_count + leaf_idx`.
let parent_block_num: <T as frame_system::Config>::BlockNumber =
<frame_system::Pallet<T>>::block_number()
.saturating_sub(leaves_count.saturated_into())
.saturating_add(leaf_index.saturated_into());
<frame_system::Pallet<T>>::block_hash(parent_block_num)
}

/// Generate a MMR proof for the given `leaf_indices`.
///
/// Note this method can only be used from an off-chain context
/// (Offchain Worker or Runtime API call), since it requires
/// all the leaves to be present.
/// It may return an error or panic if used incorrectly.
acatangiu marked this conversation as resolved.
Show resolved Hide resolved
pub fn generate_batch_proof(
leaf_indices: Vec<NodeIndex>,
leaf_indices: Vec<LeafIndex>,
) -> Result<
(Vec<LeafOf<T, I>>, primitives::BatchProof<<T as Config<I>>::Hash>),
primitives::Error,
Expand Down
109 changes: 105 additions & 4 deletions frame/merkle-mountain-range/src/mmr/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! A MMR storage implementations.

use codec::Encode;
use frame_support::traits::Get;
use mmr_lib::helper;
use sp_io::offchain_index;
use sp_std::iter::Peekable;
Expand Down Expand Up @@ -58,16 +59,101 @@ impl<StorageType, T, I, L> Default for Storage<StorageType, T, I, L> {
}
}

impl<T, I, L> Storage<OffchainStorage, T, I, L>
where
T: Config<I>,
I: 'static,
L: primitives::FullLeaf,
{
/// Move nodes and leaves added by block `N` in offchain db from _fork-aware key_ to
/// _canonical key_,
/// where `N` is `frame_system::BlockHashCount` blocks behind current block number.
///
/// This "canonicalization" process is required because the _fork-aware key_ value depends
/// on `frame_system::block_hash(block_num)` map which only holds the last
/// `frame_system::BlockHashCount` blocks.
///
/// Should only be called from offchain context, because it requires both read and write
/// access to offchain db.
pub fn canonicalize_offchain() {
use sp_core::offchain::StorageKind;
use sp_io::offchain;
use sp_runtime::traits::UniqueSaturatedInto;

// Effectively move a rolling window of fork-unique leaves. Once out of the window, leaves
// are "canonicalized" in offchain db by moving them under `Pallet::canon_offchain_key`.
let leaves = NumberOfLeaves::<T, I>::get();
let window_size =
<T as frame_system::Config>::BlockHashCount::get().unique_saturated_into();
if leaves >= window_size {
// move the rolling window towards the end of `block_num->hash` mappings available
// in the runtime: we "canonicalize" the leaf at the end.
let leaf_to_canon = leaves.saturating_sub(window_size);
let parent_hash_of_leaf = Pallet::<T, I>::parent_hash_of_leaf(leaf_to_canon, leaves);
cheme marked this conversation as resolved.
Show resolved Hide resolved
let nodes_to_canon = NodesUtils::right_branch_ending_in_leaf(leaf_to_canon);
sp_std::if_std! {
acatangiu marked this conversation as resolved.
Show resolved Hide resolved
frame_support::log::debug!(
target: "runtime::mmr", "Nodes to canon for leaf {}: {:?}",
leaf_to_canon, nodes_to_canon
);
}
for pos in nodes_to_canon {
let key = Pallet::<T, I>::offchain_key(parent_hash_of_leaf, pos);
let canon_key = Pallet::<T, I>::canon_offchain_key(pos);
// Retrieve the element from Off-chain DB under fork-aware key.
if let Some(elem) = offchain::local_storage_get(StorageKind::PERSISTENT, &key) {
// Delete entry with old key.
offchain::local_storage_clear(StorageKind::PERSISTENT, &key);
// Add under new canon key.
offchain::local_storage_set(StorageKind::PERSISTENT, &canon_key, &elem);
sp_std::if_std! {
frame_support::log::debug!(
target: "runtime::mmr",
"Moved elem at pos {} from key {:?} to canon key {:?}",
pos, key, canon_key
);
}
} else {
sp_std::if_std! {
frame_support::log::debug!(
target: "runtime::mmr",
"Offchain: could not get elem at pos {} using key {:?}",
pos, key
);
}
}
}
}
}
}

impl<T, I, L> mmr_lib::MMRStore<NodeOf<T, I, L>> for Storage<OffchainStorage, T, I, L>
where
T: Config<I>,
I: 'static,
L: primitives::FullLeaf + codec::Decode,
{
fn get_elem(&self, pos: NodeIndex) -> mmr_lib::Result<Option<NodeOf<T, I, L>>> {
let key = Pallet::<T, I>::offchain_key(pos);
let leaves_count = NumberOfLeaves::<T, I>::get();
// Get the parent hash of the ancestor block that added node at index `pos`.
// Use the hash as extra identifier to differentiate between various `pos` entries
// in offchain DB coming from various chain forks.
let ancestor_leaf_idx = NodesUtils::leaf_index_that_added_node(pos);
let parent_hash_of_ancestor =
Pallet::<T, I>::parent_hash_of_leaf(ancestor_leaf_idx, leaves_count);
let key = Pallet::<T, I>::offchain_key(parent_hash_of_ancestor, pos);
sp_std::if_std! {
frame_support::log::debug!(
target: "runtime::mmr", "offchain get {}: leaf idx {:?}, hash {:?}, key {:?}",
pos, ancestor_leaf_idx, parent_hash_of_ancestor, key
);
}
// Retrieve the element from Off-chain DB.
Ok(sp_io::offchain::local_storage_get(sp_core::offchain::StorageKind::PERSISTENT, &key)
.or_else(|| {
let key = Pallet::<T, I>::canon_offchain_key(pos);
cheme marked this conversation as resolved.
Show resolved Hide resolved
sp_io::offchain::local_storage_get(sp_core::offchain::StorageKind::PERSISTENT, &key)
})
.and_then(|v| codec::Decode::decode(&mut &*v).ok()))
}

Expand Down Expand Up @@ -112,11 +198,26 @@ where
let mut leaf_index = leaves;
let mut node_index = size;

// Use parent hash of block adding new nodes (this block) as extra identifier
// in offchain DB to avoid DB collisions and overwrites in case of forks.
let parent_hash = <frame_system::Pallet<T>>::parent_hash();
for elem in elems {
// For now we store this leaf offchain keyed by `(parent_hash, node_index)`
// to make it fork-resistant.
// Offchain worker task will "canonicalize" it `frame_system::BlockHashCount` blocks
// later when we are not worried about forks anymore (highly unlikely to have a fork
// in the chain that deep).
// "Canonicalization" in this case means moving this leaf under a new key based
// only on the leaf's `node_index`.
let key = Pallet::<T, I>::offchain_key(parent_hash, node_index);
sp_std::if_std! {
frame_support::log::debug!(
target: "runtime::mmr", "offchain set: pos {} parent_hash {:?} key {:?}",
node_index, parent_hash, key
);
}
// Indexing API is used to store the full node content (both leaf and inner).
elem.using_encoded(|elem| {
offchain_index::set(&Pallet::<T, I>::offchain_key(node_index), elem)
});
elem.using_encoded(|elem| offchain_index::set(&key, elem));

// On-chain we are going to only store new peaks.
if peaks_to_store.next_if_eq(&node_index).is_some() {
Expand Down
68 changes: 68 additions & 0 deletions frame/merkle-mountain-range/src/mmr/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! Merkle Mountain Range utilities.

use crate::primitives::{LeafIndex, NodeIndex};
use mmr_lib::helper;

/// MMR nodes & size -related utilities.
pub struct NodesUtils {
Expand Down Expand Up @@ -53,11 +54,78 @@ impl NodesUtils {

64 - self.no_of_leaves.next_power_of_two().leading_zeros()
}

/// Calculate `LeafIndex` for the leaf that added `node_index` to the MMR.
pub fn leaf_index_that_added_node(node_index: NodeIndex) -> LeafIndex {
let rightmost_leaf_pos = Self::rightmost_leaf_node_index_from_pos(node_index);
Self::leaf_node_index_to_leaf_index(rightmost_leaf_pos)
}

// Translate a _leaf_ `NodeIndex` to its `LeafIndex`.
fn leaf_node_index_to_leaf_index(pos: NodeIndex) -> LeafIndex {
Lederstrumpf marked this conversation as resolved.
Show resolved Hide resolved
if pos == 0 {
return 0
}
let peaks = helper::get_peaks(pos);
(pos + peaks.len() as u64) >> 1
}

// Starting from any node position get position of rightmost leaf; this is the leaf
// responsible for the addition of node `pos`.
fn rightmost_leaf_node_index_from_pos(pos: NodeIndex) -> NodeIndex {
pos - (helper::pos_height_in_tree(pos) as u64)
}

/// Starting from any leaf index, get the sequence of positions of the nodes added
/// to the mmr when this leaf was added (inclusive of the leaf's position itself).
/// That is, all of these nodes are right children of their respective parents.
pub fn right_branch_ending_in_leaf(leaf_index: LeafIndex) -> crate::Vec<u64> {
let pos = helper::leaf_index_to_pos(leaf_index);
let num_parents = leaf_index.trailing_ones() as u64;
return (pos..=pos + num_parents).collect()
}
}

#[cfg(test)]
mod tests {
use super::*;
use mmr_lib::helper::leaf_index_to_pos;

#[test]
fn should_calculate_node_index_from_leaf_index() {
for index in 0..100000 {
let pos = leaf_index_to_pos(index);
assert_eq!(NodesUtils::leaf_node_index_to_leaf_index(pos), index);
}
}

#[test]
fn should_calculate_right_branch_correctly() {
fn left_jump_sequence(leaf_index: LeafIndex) -> Vec<u64> {
let pos = leaf_index_to_pos(leaf_index);
let mut right_branch_ending_in_leaf = vec![pos];
let mut next_pos = pos + 1;
while mmr_lib::helper::pos_height_in_tree(next_pos) > 0 {
right_branch_ending_in_leaf.push(next_pos);
next_pos += 1;
}
right_branch_ending_in_leaf
}

for leaf_index in 0..100000 {
let pos = mmr_lib::helper::leaf_index_to_pos(leaf_index);
assert_eq!(NodesUtils::right_branch_ending_in_leaf(pos), left_jump_sequence(pos));
}
}

#[test]
fn should_calculate_rightmost_leaf_node_index_from_pos() {
for pos in 0..100000 {
let leaf_pos = NodesUtils::rightmost_leaf_node_index_from_pos(pos);
let leaf_index = NodesUtils::leaf_node_index_to_leaf_index(leaf_pos);
assert!(NodesUtils::right_branch_ending_in_leaf(leaf_index).contains(&pos));
}
}

#[test]
fn should_calculate_number_of_leaves_correctly() {
Expand Down
Loading