diff --git a/firewood/src/stream.rs b/firewood/src/stream.rs index 0df5ed62f..f03d689ff 100644 --- a/firewood/src/stream.rs +++ b/firewood/src/stream.rs @@ -84,7 +84,7 @@ impl FusedStream for MerkleNodeStream<'_, T> { impl<'a, T: TrieReader> MerkleNodeStream<'a, T> { /// Returns a new iterator that will iterate over all the nodes in `merkle` /// with keys greater than or equal to `key`. - pub(super) fn new(merkle: &'a T, key: Key) -> Self { + pub fn new(merkle: &'a T, key: Key) -> Self { Self { state: NodeStreamState::from(key), merkle, @@ -105,7 +105,7 @@ impl Stream for MerkleNodeStream<'_, T> { match state { NodeStreamState::StartFromKey(key) => { - self.state = get_iterator_intial_state(*merkle, key)?; + self.state = get_iterator_initial_state(*merkle, key)?; self.poll_next(_cx) } NodeStreamState::Iterating { iter_stack } => { @@ -173,7 +173,7 @@ impl Stream for MerkleNodeStream<'_, T> { } /// Returns the initial state for an iterator over the given `merkle` which starts at `key`. -fn get_iterator_intial_state( +fn get_iterator_initial_state( merkle: &T, key: &[u8], ) -> Result { diff --git a/fwdctl/Cargo.toml b/fwdctl/Cargo.toml index b65f646d5..ba74a2ca5 100644 --- a/fwdctl/Cargo.toml +++ b/fwdctl/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] firewood = { version = "0.0.4", path = "../firewood" } +storage = { version = "0.0.4", path = "../storage" } clap = { version = "4.5.0", features = ["cargo", "derive"] } env_logger = "0.11.2" log = "0.4.20" diff --git a/fwdctl/src/check.rs b/fwdctl/src/check.rs new file mode 100644 index 000000000..44b4e3feb --- /dev/null +++ b/fwdctl/src/check.rs @@ -0,0 +1,129 @@ +// Copyright (C) 2023, Ava Labs, Inc. All rights reserved. +// See the file LICENSE.md for licensing terms. + +use clap::Args; +use log::warn; +use std::collections::BTreeMap; +use std::io::{Error, ErrorKind}; +use std::ops::Bound; +use std::str; +use std::sync::Arc; + +use firewood::db::{Db, DbConfig}; +use firewood::v2::api::{self, Db as _}; +use storage::{Committed, HashedNodeReader as _, LinearAddress, Node, NodeStore, ReadableStorage}; + +#[derive(Debug, Args)] +pub struct Options { + /// The database path. Defaults to firewood. + #[arg( + long, + required = false, + value_name = "DB_NAME", + default_value_t = String::from("firewood.db"), + help = "Name of the database" + )] + pub db: String, +} + +pub(super) async fn run(opts: &Options) -> Result<(), api::Error> { + let cfg = DbConfig::builder().truncate(false); + + let db = Db::new(opts.db.clone(), cfg.build()).await?; + + let hash = db.root_hash().await?; + + let Some(hash) = hash else { + println!("Database is empty"); + return Ok(()); + }; + + let rev = db.revision(hash).await?; + + // walk the nodes + + let addr = rev.root_address_and_hash()?.expect("was not empty").0; + let mut allocated = BTreeMap::new(); + + visitor(rev.clone(), addr, &mut allocated)?; + + let mut expected = 2048; + for (addr, size) in allocated.iter() { + match addr.get().cmp(&expected) { + std::cmp::Ordering::Less => { + warn!( + "Node at {:?} is before the expected address {}", + addr, expected + ); + } + std::cmp::Ordering::Greater => { + warn!("{} bytes missing at {}", addr.get() - expected, expected); + } + std::cmp::Ordering::Equal => {} + } + expected = addr.get() + rev.size_from_area_index(*size); + } + + Ok(()) +} + +fn visitor( + rev: Arc>, + addr: LinearAddress, + allocated: &mut BTreeMap, +) -> Result<(), Error> { + // find the node before this one, check if it overlaps + if let Some((found_addr, found_size)) = allocated + .range((Bound::Unbounded, Bound::Included(addr))) + .next_back() + { + match found_addr + .get() + .checked_add(rev.size_from_area_index(*found_size)) + { + None => warn!("Node at {:?} overflows a u64", found_addr), + Some(end) => { + if end > addr.get() { + warn!( + "Node at {:?} overlaps with another node at {:?} (size: {})", + addr, found_addr, found_size + ); + return Err(Error::new(ErrorKind::Other, "Overlapping nodes")); + } + } + } + } + if addr.get() > rev.header().size() { + warn!( + "Node at {:?} starts past the database high water mark", + addr + ); + return Err(Error::new(ErrorKind::Other, "Node overflows database")); + } + + let (node, size) = rev.uncached_read_node_and_size(addr)?; + if addr.get() + rev.size_from_area_index(size) > rev.header().size() { + warn!( + "Node at {:?} extends past the database high water mark", + addr + ); + return Err(Error::new(ErrorKind::Other, "Node overflows database")); + } + + allocated.insert(addr, size); + + if let Node::Branch(branch) = node.as_ref() { + for child in branch.children.iter() { + match child { + None => {} + Some(child) => match child { + storage::Child::Node(_) => unreachable!(), + storage::Child::AddressWithHash(addr, _hash) => { + visitor(rev.clone(), *addr, allocated)?; + } + }, + } + } + } + Ok(()) +} diff --git a/fwdctl/src/graph.rs b/fwdctl/src/graph.rs index e070169e4..1307cba66 100644 --- a/fwdctl/src/graph.rs +++ b/fwdctl/src/graph.rs @@ -18,7 +18,7 @@ pub struct Options { } pub(super) async fn run(opts: &Options) -> Result<(), api::Error> { - log::debug!("dump database {:?}", opts); + log::debug!("graph database {:?}", opts); let cfg = DbConfig::builder().truncate(false); let db = Db::new(opts.db.clone(), cfg.build()).await?; diff --git a/fwdctl/src/main.rs b/fwdctl/src/main.rs index 1c80537e7..f5e2a29f3 100644 --- a/fwdctl/src/main.rs +++ b/fwdctl/src/main.rs @@ -4,6 +4,7 @@ use clap::{Parser, Subcommand}; use firewood::v2::api; +pub mod check; pub mod create; pub mod delete; pub mod dump; @@ -45,6 +46,8 @@ enum Commands { Root(root::Options), /// Dump contents of key/value store Dump(dump::Options), + /// Check a database + Check(check::Options), /// Produce a dot file of the database Graph(graph::Options), } @@ -65,6 +68,7 @@ async fn main() -> Result<(), api::Error> { Commands::Delete(opts) => delete::run(opts).await, Commands::Root(opts) => root::run(opts).await, Commands::Dump(opts) => dump::run(opts).await, + Commands::Check(opts) => check::run(opts).await, Commands::Graph(opts) => graph::run(opts).await, } } diff --git a/storage/src/nodestore.rs b/storage/src/nodestore.rs index 56e0f65ff..98daa1e9b 100644 --- a/storage/src/nodestore.rs +++ b/storage/src/nodestore.rs @@ -192,8 +192,7 @@ struct StoredArea { impl NodeStore { /// Returns (index, area_size) for the [StoredArea] at `addr`. /// `index` is the index of `area_size` in [AREA_SIZES]. - #[allow(dead_code)] - fn area_index_and_size(&self, addr: LinearAddress) -> Result<(AreaIndex, u64), Error> { + pub fn area_index_and_size(&self, addr: LinearAddress) -> Result<(AreaIndex, u64), Error> { let mut area_stream = self.storage.stream_from(addr.get())?; let index: AreaIndex = serializer() @@ -240,6 +239,30 @@ impl NodeStore { } Ok(node) } + + /// Read a [Node] from the provided [LinearAddress] and size. + /// This is an uncached read, primarily used by check utilities + pub fn uncached_read_node_and_size( + &self, + addr: LinearAddress, + ) -> Result<(SharedNode, u8), Error> { + let mut area_stream = self.storage.stream_from(addr.get())?; + let mut size = [0u8]; + area_stream.read_exact(&mut size)?; + self.storage.stream_from(addr.get() + 1)?; + let node: SharedNode = Node::from_reader(area_stream)?.into(); + Ok((node, size[0])) + } + + /// Get a reference to the header of this nodestore + pub fn header(&self) -> &NodeStoreHeader { + &self.header + } + + /// Get the size of an area index (used by the checker) + pub fn size_from_area_index(&self, index: AreaIndex) -> u64 { + AREA_SIZES[index as usize] + } } impl NodeStore { @@ -325,7 +348,7 @@ impl Parentable for Arc { impl NodeStore, S> { /// When an immutable proposal commits, we need to reparent any proposal that /// has the committed proposal as it's parent - pub fn commit_reparent(&self, other: &Arc, S>>) -> bool { + pub fn commit_reparent(&self, other: &Arc, S>>) { match *other.kind.parent.load() { NodeStoreParent::Proposed(ref parent) => { if Arc::ptr_eq(&self.kind, parent) { @@ -333,12 +356,9 @@ impl NodeStore, S> { .kind .parent .store(NodeStoreParent::Committed(self.kind.root_hash()).into()); - true - } else { - false } } - NodeStoreParent::Committed(_) => false, + NodeStoreParent::Committed(_) => {} } } } @@ -602,7 +622,7 @@ pub type FreeLists = [Option; NUM_AREA_SIZES]; /// The [NodeStoreHeader] is at the start of the ReadableStorage. #[derive(Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Clone, NoUninit, AnyBitPattern)] #[repr(C)] -struct NodeStoreHeader { +pub struct NodeStoreHeader { /// Identifies the version of firewood used to create this [NodeStore]. version: Version, /// always "1"; verifies endianness @@ -634,6 +654,11 @@ impl NodeStoreHeader { free_lists: Default::default(), } } + + // return the size of this nodestore + pub fn size(&self) -> u64 { + self.size + } } /// A [FreeArea] is stored at the start of the area that contained a node that