From 9117d4de9872548252993fe825b62dcb7b97e1dc Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 24 May 2022 13:51:11 -0700 Subject: [PATCH] fix: make flush match lotus as much as possible (#586) * fix: make flush match lotus as much as possible 1. Reject anything we don't expect. 2. Properly handle identity cids. * fix: typos. Co-authored-by: Aayush Rajasekaran Co-authored-by: Aayush Rajasekaran --- fvm/src/blockstore/buffered.rs | 71 ++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 21 deletions(-) diff --git a/fvm/src/blockstore/buffered.rs b/fvm/src/blockstore/buffered.rs index 29049ac0c..4efb45511 100644 --- a/fvm/src/blockstore/buffered.rs +++ b/fvm/src/blockstore/buffered.rs @@ -5,6 +5,7 @@ use anyhow::{anyhow, Result}; use byteorder::{BigEndian, ByteOrder, ReadBytesExt}; use cid::Cid; use fvm_ipld_blockstore::{Blockstore, Buffered}; +use fvm_shared::commcid::{FIL_COMMITMENT_SEALED, FIL_COMMITMENT_UNSEALED}; // TODO: figure out where to put this. const DAG_CBOR: u64 = 0x71; @@ -171,31 +172,59 @@ fn copy_rec<'a>( root: Cid, buffer: &mut Vec<(Cid, &'a [u8])>, ) -> Result<()> { - // TODO: Make this non-recursive. - // Skip identity and Filecoin commitment Cids - if root.codec() != DAG_CBOR { - return Ok(()); - } - - let block = &*cache - .get(&root) - .ok_or_else(|| anyhow!("Invalid link ({}) in flushing buffered store", root))?; - - scan_for_links(&mut Cursor::new(block), |link| { - if link.codec() != DAG_CBOR { - return Ok(()); + const DAG_RAW: u64 = 0x55; + const BLAKE2B_256: u64 = 0xb220; + const BLAKE2B_LEN: u8 = 32; + const IDENTITY: u64 = 0x0; + + // Differences from lotus (vm.Copy): + // 1. We assume that if we don't have a block in our buffer, it must already be in the client. + // don't check. This should only happen if the lotus node is missing state. + // 2. We always write-back new blocks, even if lotus already has them. We haven't noticed a perf + // impact. + + match (root.codec(), root.hash().code(), root.hash().size()) { + // Allow non-truncated blake2b-256 raw/cbor (code/state) + (DAG_RAW | DAG_CBOR, BLAKE2B_256, BLAKE2B_LEN) => (), + // Ignore raw identity cids (fake code cids) + (DAG_RAW, IDENTITY, _) => return Ok(()), + // Copy links from cbor identity cids. + // We shouldn't be creating these at the moment, but lotus' vm.Copy supports them. + (DAG_CBOR, IDENTITY, _) => { + return scan_for_links(&mut Cursor::new(root.hash().digest()), |link| { + copy_rec(cache, link, buffer) + }) } - - // DB reads are expensive. So we check if it exists in the cache. - // If it doesnt exist in the DB, which is likely, we proceed with using the cache. - if !cache.contains_key(&link) { - return Ok(()); + // Ignore commitments (not even going to check the hash function. + (FIL_COMMITMENT_UNSEALED | FIL_COMMITMENT_SEALED, _, _) => return Ok(()), + // Fail on anything else. We usually want to continue on error, but there's really no going + // back from here. + (codec, hash, length) => { + return Err(anyhow!( + "cid {root} has unexpected codec ({codec}), hash ({hash}), or length ({length})" + )) } + } - // Recursively find more links under the links we're iterating over. - copy_rec(cache, link, buffer) - })?; + // If we don't have the block, we assume it's already in the datastore. + // + // The alternative would be to check if it's in the datastore, but that's likely even more + // expensive. And there wouldn't be much we could do at that point but abort the block. + let block = match cache.get(&root) { + Some(blk) => blk, + None => return Ok(()), + }; + + // At the moment, we only expect dag-cbor and raw. + // In M2, we'll need to copy explicitly. + if root.codec() == DAG_CBOR { + // TODO: Make this non-recursive. + scan_for_links(&mut Cursor::new(block), |link| { + copy_rec(cache, link, buffer) + })?; + } + // Finally, push the block. We do this _last_ so that we always include write before parents. buffer.push((root, block)); Ok(())