diff --git a/.golangci.yml b/.golangci.yml index 2b746fe31f..e7d4e36d70 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -51,9 +51,6 @@ issues: - path: core/state/metrics.go linters: - unused - - path: core/state/statedb_fuzz_test.go - linters: - - unused - path: core/txpool/legacypool/list.go linters: - staticcheck diff --git a/core/blockchain.go b/core/blockchain.go index 94c54fc996..a9de1aabd3 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -1180,8 +1180,7 @@ func (bc *BlockChain) Stop() { // - HEAD-127: So we have a hard limit on the number of blocks reexecuted if !bc.cacheConfig.TrieDirtyDisabled { triedb := bc.triedb - var once sync.Once - + var once sync.Once for _, offset := range []uint64{0, 1, TriesInMemory - 1} { if number := bc.CurrentBlock().Number.Uint64(); number > offset { recent := bc.GetBlockByNumber(number - offset) @@ -1191,9 +1190,9 @@ func (bc *BlockChain) Stop() { } else { rawdb.WriteSafePointBlockNumber(bc.db, recent.NumberU64()) once.Do(func() { - rawdb.WriteHeadBlockHash(bc.db, recent.Hash()) + rawdb.WriteHeadBlockHash(bc.db, recent.Hash()) }) - } + } } } if snapBase != (common.Hash{}) { diff --git a/core/rawdb/accessors_trie.go b/core/rawdb/accessors_trie.go index f5c2f8899a..ea437b8114 100644 --- a/core/rawdb/accessors_trie.go +++ b/core/rawdb/accessors_trie.go @@ -89,6 +89,16 @@ func HasAccountTrieNode(db ethdb.KeyValueReader, path []byte, hash common.Hash) return h.hash(data) == hash } +// ExistsAccountTrieNode checks the presence of the account trie node with the +// specified node path, regardless of the node hash. +func ExistsAccountTrieNode(db ethdb.KeyValueReader, path []byte) bool { + has, err := db.Has(accountTrieNodeKey(path)) + if err != nil { + return false + } + return has +} + // WriteAccountTrieNode writes the provided account trie node into database. func WriteAccountTrieNode(db ethdb.KeyValueWriter, path []byte, node []byte) { if err := db.Put(accountTrieNodeKey(path), node); err != nil { @@ -127,6 +137,16 @@ func HasStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path [ return h.hash(data) == hash } +// ExistsStorageTrieNode checks the presence of the storage trie node with the +// specified account hash and node path, regardless of the node hash. +func ExistsStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte) bool { + has, err := db.Has(storageTrieNodeKey(accountHash, path)) + if err != nil { + return false + } + return has +} + // WriteStorageTrieNode writes the provided storage trie node into database. func WriteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte, node []byte) { if err := db.Put(storageTrieNodeKey(accountHash, path), node); err != nil { diff --git a/core/rawdb/freezer_table.go b/core/rawdb/freezer_table.go index 76293a2da2..87b33240b8 100644 --- a/core/rawdb/freezer_table.go +++ b/core/rawdb/freezer_table.go @@ -220,6 +220,9 @@ func (t *freezerTable) repair() error { } // Ensure the index is a multiple of indexEntrySize bytes if overflow := stat.Size() % indexEntrySize; overflow != 0 { + if t.readonly { + return fmt.Errorf("index file(path: %s, name: %s) size is not a multiple of %d", t.path, t.name, indexEntrySize) + } truncateFreezerFile(t.index, stat.Size()-overflow) // New file can't trigger this path } // Retrieve the file sizes and prepare for truncation @@ -278,6 +281,9 @@ func (t *freezerTable) repair() error { // Keep truncating both files until they come in sync contentExp = int64(lastIndex.offset) for contentExp != contentSize { + if t.readonly { + return fmt.Errorf("freezer table(path: %s, name: %s, num: %d) is corrupted", t.path, t.name, lastIndex.filenum) + } verbose = true // Truncate the head file to the last offset pointer if contentExp < contentSize { diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 13a00d795a..9e7f8500cb 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -215,7 +215,11 @@ func accountSnapshotKey(hash common.Hash) []byte { // storageSnapshotKey = SnapshotStoragePrefix + account hash + storage hash func storageSnapshotKey(accountHash, storageHash common.Hash) []byte { - return append(append(SnapshotStoragePrefix, accountHash.Bytes()...), storageHash.Bytes()...) + buf := make([]byte, len(SnapshotStoragePrefix)+common.HashLength+common.HashLength) + n := copy(buf, SnapshotStoragePrefix) + n += copy(buf[n:], accountHash.Bytes()) + copy(buf[n:], storageHash.Bytes()) + return buf } // storageSnapshotsKey = SnapshotStoragePrefix + account hash + storage hash @@ -274,7 +278,11 @@ func accountTrieNodeKey(path []byte) []byte { // storageTrieNodeKey = trieNodeStoragePrefix + accountHash + nodePath. func storageTrieNodeKey(accountHash common.Hash, path []byte) []byte { - return append(append(trieNodeStoragePrefix, accountHash.Bytes()...), path...) + buf := make([]byte, len(trieNodeStoragePrefix)+common.HashLength+len(path)) + n := copy(buf, trieNodeStoragePrefix) + n += copy(buf[n:], accountHash.Bytes()) + copy(buf[n:], path) + return buf } // IsLegacyTrieNode reports whether a provided database entry is a legacy trie diff --git a/core/state/statedb.go b/core/state/statedb.go index 62397b083e..8c1b7eaea0 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -42,7 +42,12 @@ import ( "github.com/ethereum/go-ethereum/trie/triestate" ) -const defaultNumOfSlots = 100 +const ( + defaultNumOfSlots = 100 + // storageDeleteLimit denotes the highest permissible memory allocation + // employed for contract storage deletion. + storageDeleteLimit = 512 * 1024 * 1024 +) type revision struct { id int @@ -1342,63 +1347,134 @@ func (s *StateDB) clearJournalAndRefund() { s.validRevisions = s.validRevisions[:0] // Snapshots can be created without journal entries } -// deleteStorage iterates the storage trie belongs to the account and mark all -// slots inside as deleted. -func (s *StateDB) deleteStorage(addr common.Address, addrHash common.Hash, root common.Hash) (bool, map[common.Hash][]byte, *trienode.NodeSet, error) { - start := time.Now() +// fastDeleteStorage is the function that efficiently deletes the storage trie +// of a specific account. It leverages the associated state snapshot for fast +// storage iteration and constructs trie node deletion markers by creating +// stack trie with iterated slots. +func (s *StateDB) fastDeleteStorage(addrHash common.Hash, root common.Hash) (bool, common.StorageSize, map[common.Hash][]byte, *trienode.NodeSet, error) { + iter, err := s.snaps.StorageIterator(s.originalRoot, addrHash, common.Hash{}) + if err != nil { + return false, 0, nil, nil, err + } + defer iter.Release() + + var ( + size common.StorageSize + nodes = trienode.NewNodeSet(addrHash) + slots = make(map[common.Hash][]byte) + ) + stack := trie.NewStackTrie(func(owner common.Hash, path []byte, hash common.Hash, blob []byte) { + nodes.AddNode(path, trienode.NewDeleted()) + size += common.StorageSize(len(path)) + }) + for iter.Next() { + if size > storageDeleteLimit { + return true, size, nil, nil, nil + } + slot := common.CopyBytes(iter.Slot()) + if err := iter.Error(); err != nil { // error might occur after Slot function + return false, 0, nil, nil, err + } + size += common.StorageSize(common.HashLength + len(slot)) + slots[iter.Hash()] = slot + + if err := stack.Update(iter.Hash().Bytes(), slot); err != nil { + return false, 0, nil, nil, err + } + } + if err := iter.Error(); err != nil { // error might occur during iteration + return false, 0, nil, nil, err + } + if stack.Hash() != root { + return false, 0, nil, nil, fmt.Errorf("snapshot is not matched, exp %x, got %x", root, stack.Hash()) + } + return false, size, slots, nodes, nil +} + +// slowDeleteStorage serves as a less-efficient alternative to "fastDeleteStorage," +// employed when the associated state snapshot is not available. It iterates the +// storage slots along with all internal trie nodes via trie directly. +func (s *StateDB) slowDeleteStorage(addr common.Address, addrHash common.Hash, root common.Hash) (bool, common.StorageSize, map[common.Hash][]byte, *trienode.NodeSet, error) { tr, err := s.db.OpenStorageTrie(s.originalRoot, addr, root) if err != nil { - return false, nil, nil, fmt.Errorf("failed to open storage trie, err: %w", err) + return false, 0, nil, nil, fmt.Errorf("failed to open storage trie, err: %w", err) } // skip deleting storages for EmptyTrie if _, ok := tr.(*trie.EmptyTrie); ok { - return false, nil, nil, nil + return false, 0, nil, nil, nil } it, err := tr.NodeIterator(nil) if err != nil { - return false, nil, nil, fmt.Errorf("failed to open storage iterator, err: %w", err) + return false, 0, nil, nil, fmt.Errorf("failed to open storage iterator, err: %w", err) } var ( - set = trienode.NewNodeSet(addrHash) - slots = make(map[common.Hash][]byte) - stateSize common.StorageSize - nodeSize common.StorageSize + size common.StorageSize + nodes = trienode.NewNodeSet(addrHash) + slots = make(map[common.Hash][]byte) ) for it.Next(true) { - // arbitrary stateSize limit, make it configurable - if stateSize+nodeSize > 512*1024*1024 { - log.Info("Skip large storage deletion", "address", addr.Hex(), "states", stateSize, "nodes", nodeSize) - if metrics.EnabledExpensive { - slotDeletionSkip.Inc(1) - } - return true, nil, nil, nil + if size > storageDeleteLimit { + return true, size, nil, nil, nil } if it.Leaf() { slots[common.BytesToHash(it.LeafKey())] = common.CopyBytes(it.LeafBlob()) - stateSize += common.StorageSize(common.HashLength + len(it.LeafBlob())) + size += common.StorageSize(common.HashLength + len(it.LeafBlob())) continue } if it.Hash() == (common.Hash{}) { continue } - nodeSize += common.StorageSize(len(it.Path())) - set.AddNode(it.Path(), trienode.NewDeleted()) + size += common.StorageSize(len(it.Path())) + nodes.AddNode(it.Path(), trienode.NewDeleted()) } if err := it.Error(); err != nil { + return false, 0, nil, nil, err + } + return false, size, slots, nodes, nil +} + +// deleteStorage is designed to delete the storage trie of a designated account. +// It could potentially be terminated if the storage size is excessively large, +// potentially leading to an out-of-memory panic. The function will make an attempt +// to utilize an efficient strategy if the associated state snapshot is reachable; +// otherwise, it will resort to a less-efficient approach. +func (s *StateDB) deleteStorage(addr common.Address, addrHash common.Hash, root common.Hash) (bool, map[common.Hash][]byte, *trienode.NodeSet, error) { + var ( + start = time.Now() + err error + aborted bool + size common.StorageSize + slots map[common.Hash][]byte + nodes *trienode.NodeSet + ) + // The fast approach can be failed if the snapshot is not fully + // generated, or it's internally corrupted. Fallback to the slow + // one just in case. + if s.snap != nil { + aborted, size, slots, nodes, err = s.fastDeleteStorage(addrHash, root) + } + if s.snap == nil || err != nil { + aborted, size, slots, nodes, err = s.slowDeleteStorage(addr, addrHash, root) + } + if err != nil { return false, nil, nil, err } if metrics.EnabledExpensive { - if int64(len(slots)) > slotDeletionMaxCount.Value() { - slotDeletionMaxCount.Update(int64(len(slots))) + if aborted { + slotDeletionSkip.Inc(1) + } + n := int64(len(slots)) + if n > slotDeletionMaxCount.Value() { + slotDeletionMaxCount.Update(n) } - if int64(stateSize+nodeSize) > slotDeletionMaxSize.Value() { - slotDeletionMaxSize.Update(int64(stateSize + nodeSize)) + if int64(size) > slotDeletionMaxSize.Value() { + slotDeletionMaxSize.Update(int64(size)) } slotDeletionTimer.UpdateSince(start) - slotDeletionCount.Mark(int64(len(slots))) - slotDeletionSize.Mark(int64(stateSize + nodeSize)) + slotDeletionCount.Mark(n) + slotDeletionSize.Mark(int64(size)) } - return false, slots, set, nil + return aborted, slots, nodes, nil } // handleDestruction processes all destruction markers and deletes the account diff --git a/core/state/statedb_fuzz_test.go b/core/state/statedb_fuzz_test.go index 8ef6c02e36..26ca9b5e7d 100644 --- a/core/state/statedb_fuzz_test.go +++ b/core/state/statedb_fuzz_test.go @@ -31,10 +31,12 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/pathdb" "github.com/ethereum/go-ethereum/trie/triestate" ) @@ -179,16 +181,28 @@ func (test *stateTest) run() bool { storageList = append(storageList, copy2DSet(states.Storages)) } disk = rawdb.NewMemoryDatabase() - tdb = trie.NewDatabase(disk, &trie.Config{OnCommit: onCommit}) + tdb = trie.NewDatabase(disk, &trie.Config{OnCommit: onCommit, PathDB: pathdb.Defaults}) sdb = NewDatabaseWithNodeDB(disk, tdb) byzantium = rand.Intn(2) == 0 ) + defer disk.Close() + defer tdb.Close() + + var snaps *snapshot.Tree + if rand.Intn(3) == 0 { + snaps, _ = snapshot.New(snapshot.Config{ + CacheSize: 1, + Recovery: false, + NoBuild: false, + AsyncBuild: false, + }, disk, tdb, types.EmptyRootHash, 128, false) + } for i, actions := range test.actions { root := types.EmptyRootHash if i != 0 { root = roots[len(roots)-1] } - state, err := New(root, sdb, nil) + state, err := New(root, sdb, snaps) if err != nil { panic(err) } @@ -365,8 +379,7 @@ func (test *stateTest) verify(root common.Hash, next common.Hash, db *trie.Datab return nil } -// TODO(Nathan): enable this case after enabling pbss -func testStateChanges(t *testing.T) { +func TestStateChanges(t *testing.T) { config := &quick.Config{MaxCount: 1000} err := quick.Check((*stateTest).run, config) if cerr, ok := err.(*quick.CheckError); ok { diff --git a/core/state/statedb_test.go b/core/state/statedb_test.go index c61f63198a..eed905a87e 100644 --- a/core/state/statedb_test.go +++ b/core/state/statedb_test.go @@ -37,6 +37,8 @@ import ( "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/triedb/hashdb" "github.com/ethereum/go-ethereum/trie/triedb/pathdb" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/holiman/uint256" ) // Tests that updating a state trie does not leak any database writes prior to @@ -1112,3 +1114,57 @@ func TestResetObject(t *testing.T) { t.Fatalf("Unexpected storage slot value %v", slot) } } + +func TestDeleteStorage(t *testing.T) { + var ( + disk = rawdb.NewMemoryDatabase() + tdb = trie.NewDatabase(disk, nil) + db = NewDatabaseWithNodeDB(disk, tdb) + snaps, _ = snapshot.New(snapshot.Config{CacheSize: 10}, disk, tdb, types.EmptyRootHash, 128, false) + state, _ = New(types.EmptyRootHash, db, snaps) + addr = common.HexToAddress("0x1") + ) + // Initialize account and populate storage + state.SetBalance(addr, big.NewInt(1)) + state.CreateAccount(addr) + for i := 0; i < 1000; i++ { + slot := common.Hash(uint256.NewInt(uint64(i)).Bytes32()) + value := common.Hash(uint256.NewInt(uint64(10 * i)).Bytes32()) + state.SetState(addr, slot, value) + } + root, _, _ := state.Commit(0, nil) + // Init phase done, create two states, one with snap and one without + fastState, _ := New(root, db, snaps) + slowState, _ := New(root, db, nil) + + obj := fastState.GetOrNewStateObject(addr) + storageRoot := obj.data.Root + + _, _, fastNodes, err := fastState.deleteStorage(addr, crypto.Keccak256Hash(addr[:]), storageRoot) + if err != nil { + t.Fatal(err) + } + + _, _, slowNodes, err := slowState.deleteStorage(addr, crypto.Keccak256Hash(addr[:]), storageRoot) + if err != nil { + t.Fatal(err) + } + check := func(set *trienode.NodeSet) string { + var a []string + set.ForEachWithOrder(func(path string, n *trienode.Node) { + if n.Hash != (common.Hash{}) { + t.Fatal("delete should have empty hashes") + } + if len(n.Blob) != 0 { + t.Fatal("delete should have have empty blobs") + } + a = append(a, fmt.Sprintf("%x", path)) + }) + return strings.Join(a, ",") + } + slowRes := check(slowNodes) + fastRes := check(fastNodes) + if slowRes != fastRes { + t.Fatalf("difference found:\nfast: %v\nslow: %v\n", fastRes, slowRes) + } +} diff --git a/eth/protocols/eth/handler.go b/eth/protocols/eth/handler.go index 7f51d4f5cd..87ed1874e9 100644 --- a/eth/protocols/eth/handler.go +++ b/eth/protocols/eth/handler.go @@ -23,6 +23,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/txpool" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/p2p" @@ -95,11 +96,15 @@ type TxPool interface { // MakeProtocols constructs the P2P protocol definitions for `eth`. func MakeProtocols(backend Backend, network uint64, dnsdisc enode.Iterator) []p2p.Protocol { - protocols := make([]p2p.Protocol, len(ProtocolVersions)) - for i, version := range ProtocolVersions { + protocols := make([]p2p.Protocol, 0, len(ProtocolVersions)) + for _, version := range ProtocolVersions { version := version // Closure - protocols[i] = p2p.Protocol{ + // Path scheme does not support GetNodeData, don't advertise eth66 on it + if version <= ETH66 && backend.Chain().TrieDB().Scheme() == rawdb.PathScheme { + continue + } + protocols = append(protocols, p2p.Protocol{ Name: ProtocolName, Version: version, Length: protocolLengths[version], @@ -119,7 +124,7 @@ func MakeProtocols(backend Backend, network uint64, dnsdisc enode.Iterator) []p2 }, Attributes: []enr.Entry{currentENREntry(backend.Chain())}, DialCandidates: dnsdisc, - } + }) } return protocols } diff --git a/trie/sync.go b/trie/sync.go index 4f55845991..9da0706075 100644 --- a/trie/sync.go +++ b/trie/sync.go @@ -27,6 +27,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" ) // ErrNotRequested is returned by the trie sync when it's requested to process a @@ -42,6 +43,16 @@ var ErrAlreadyProcessed = errors.New("already processed") // memory if the node was configured with a significant number of peers. const maxFetchesPerDepth = 16384 +var ( + // deletionGauge is the metric to track how many trie node deletions + // are performed in total during the sync process. + deletionGauge = metrics.NewRegisteredGauge("trie/sync/delete", nil) + + // lookupGauge is the metric to track how many trie node lookups are + // performed to determine if node needs to be deleted. + lookupGauge = metrics.NewRegisteredGauge("trie/sync/lookup", nil) +) + // SyncPath is a path tuple identifying a particular trie node either in a single // trie (account) or a layered trie (account -> storage). // @@ -93,9 +104,10 @@ type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Ha // nodeRequest represents a scheduled or already in-flight trie node retrieval request. type nodeRequest struct { - hash common.Hash // Hash of the trie node to retrieve - path []byte // Merkle path leading to this node for prioritization - data []byte // Data content of the node, cached until all subtrees complete + hash common.Hash // Hash of the trie node to retrieve + path []byte // Merkle path leading to this node for prioritization + data []byte // Data content of the node, cached until all subtrees complete + deletes [][]byte // List of internal path segments for trie nodes to delete parent *nodeRequest // Parent state node referencing this entry deps int // Number of dependencies before allowed to commit this node @@ -125,18 +137,20 @@ type CodeSyncResult struct { // syncMemBatch is an in-memory buffer of successfully downloaded but not yet // persisted data items. type syncMemBatch struct { - nodes map[string][]byte // In-memory membatch of recently completed nodes - hashes map[string]common.Hash // Hashes of recently completed nodes - codes map[common.Hash][]byte // In-memory membatch of recently completed codes - size uint64 // Estimated batch-size of in-memory data. + nodes map[string][]byte // In-memory membatch of recently completed nodes + hashes map[string]common.Hash // Hashes of recently completed nodes + deletes map[string]struct{} // List of paths for trie node to delete + codes map[common.Hash][]byte // In-memory membatch of recently completed codes + size uint64 // Estimated batch-size of in-memory data. } // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. func newSyncMemBatch() *syncMemBatch { return &syncMemBatch{ - nodes: make(map[string][]byte), - hashes: make(map[string]common.Hash), - codes: make(map[common.Hash][]byte), + nodes: make(map[string][]byte), + hashes: make(map[string]common.Hash), + deletes: make(map[string]struct{}), + codes: make(map[common.Hash][]byte), } } @@ -347,16 +361,23 @@ func (s *Sync) ProcessNode(result NodeSyncResult) error { // Commit flushes the data stored in the internal membatch out to persistent // storage, returning any occurred error. func (s *Sync) Commit(dbw ethdb.Batch) error { - // Dump the membatch into a database dbw + // Flush the pending node writes into database batch. for path, value := range s.membatch.nodes { owner, inner := ResolvePath([]byte(path)) rawdb.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value, s.scheme) } + // Flush the pending node deletes into the database batch. + // Please note that each written and deleted node has a + // unique path, ensuring no duplication occurs. + for path := range s.membatch.deletes { + owner, inner := ResolvePath([]byte(path)) + rawdb.DeleteTrieNode(dbw, owner, inner, common.Hash{} /* unused */, s.scheme) + } + // Flush the pending code writes into database batch. for hash, value := range s.membatch.codes { rawdb.WriteCode(dbw, hash, value) } - // Drop the membatch data and return - s.membatch = newSyncMemBatch() + s.membatch = newSyncMemBatch() // reset the batch return nil } @@ -425,6 +446,39 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { node: node.Val, path: append(append([]byte(nil), req.path...), key...), }} + // Mark all internal nodes between shortNode and its **in disk** + // child as invalid. This is essential in the case of path mode + // scheme; otherwise, state healing might overwrite existing child + // nodes silently while leaving a dangling parent node within the + // range of this internal path on disk. This would break the + // guarantee for state healing. + // + // While it's possible for this shortNode to overwrite a previously + // existing full node, the other branches of the fullNode can be + // retained as they remain untouched and complete. + // + // This step is only necessary for path mode, as there is no deletion + // in hash mode at all. + if _, ok := node.Val.(hashNode); ok && s.scheme == rawdb.PathScheme { + owner, inner := ResolvePath(req.path) + for i := 1; i < len(key); i++ { + // While checking for a non-existent item in Pebble can be less efficient + // without a bloom filter, the relatively low frequency of lookups makes + // the performance impact negligible. + var exists bool + if owner == (common.Hash{}) { + exists = rawdb.ExistsAccountTrieNode(s.database, append(inner, key[:i]...)) + } else { + exists = rawdb.ExistsStorageTrieNode(s.database, owner, append(inner, key[:i]...)) + } + if exists { + req.deletes = append(req.deletes, key[:i]) + deletionGauge.Inc(1) + log.Debug("Detected dangling node", "owner", owner, "path", append(inner, key[:i]...)) + } + } + lookupGauge.Inc(int64(len(key) - 1)) + } case *fullNode: for i := 0; i < 17; i++ { if node.Children[i] != nil { @@ -509,10 +563,19 @@ func (s *Sync) commitNodeRequest(req *nodeRequest) error { // Write the node content to the membatch s.membatch.nodes[string(req.path)] = req.data s.membatch.hashes[string(req.path)] = req.hash + // The size tracking refers to the db-batch, not the in-memory data. - // Therefore, we ignore the req.path, and account only for the hash+data - // which eventually is written to db. - s.membatch.size += common.HashLength + uint64(len(req.data)) + if s.scheme == rawdb.PathScheme { + s.membatch.size += uint64(len(req.path) + len(req.data)) + } else { + s.membatch.size += common.HashLength + uint64(len(req.data)) + } + // Delete the internal nodes which are marked as invalid + for _, segment := range req.deletes { + path := append(req.path, segment...) + s.membatch.deletes[string(path)] = struct{}{} + s.membatch.size += uint64(len(path)) + } delete(s.nodeReqs, string(req.path)) s.fetches[len(req.path)]-- diff --git a/trie/sync_test.go b/trie/sync_test.go index dd3506559d..3b7986ef67 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -70,31 +70,53 @@ func makeTestTrie(scheme string) (ethdb.Database, *Database, *StateTrie, map[str // checkTrieContents cross references a reconstructed trie with an expected data // content map. -func checkTrieContents(t *testing.T, db ethdb.Database, scheme string, root []byte, content map[string][]byte) { +func checkTrieContents(t *testing.T, db ethdb.Database, scheme string, root []byte, content map[string][]byte, rawTrie bool) { // Check root availability and trie contents ndb := newTestDatabase(db, scheme) - trie, err := NewStateTrie(TrieID(common.BytesToHash(root)), ndb) - if err != nil { - t.Fatalf("failed to create trie at %x: %v", root, err) - } - if err := checkTrieConsistency(db, scheme, common.BytesToHash(root)); err != nil { + if err := checkTrieConsistency(db, scheme, common.BytesToHash(root), rawTrie); err != nil { t.Fatalf("inconsistent trie at %x: %v", root, err) } + type reader interface { + MustGet(key []byte) []byte + } + var r reader + if rawTrie { + trie, err := New(TrieID(common.BytesToHash(root)), ndb) + if err != nil { + t.Fatalf("failed to create trie at %x: %v", root, err) + } + r = trie + } else { + trie, err := NewStateTrie(TrieID(common.BytesToHash(root)), ndb) + if err != nil { + t.Fatalf("failed to create trie at %x: %v", root, err) + } + r = trie + } for key, val := range content { - if have := trie.MustGet([]byte(key)); !bytes.Equal(have, val) { + if have := r.MustGet([]byte(key)); !bytes.Equal(have, val) { t.Errorf("entry %x: content mismatch: have %x, want %x", key, have, val) } } } // checkTrieConsistency checks that all nodes in a trie are indeed present. -func checkTrieConsistency(db ethdb.Database, scheme string, root common.Hash) error { +func checkTrieConsistency(db ethdb.Database, scheme string, root common.Hash, rawTrie bool) error { ndb := newTestDatabase(db, scheme) - trie, err := NewStateTrie(TrieID(root), ndb) - if err != nil { - return nil // Consider a non existent state consistent + var it NodeIterator + if rawTrie { + trie, err := New(TrieID(root), ndb) + if err != nil { + return nil // Consider a non existent state consistent + } + it = trie.MustNodeIterator(nil) + } else { + trie, err := NewStateTrie(TrieID(root), ndb) + if err != nil { + return nil // Consider a non existent state consistent + } + it = trie.MustNodeIterator(nil) } - it := trie.MustNodeIterator(nil) for it.Next(true) { } return it.Error() @@ -205,7 +227,7 @@ func testIterativeSync(t *testing.T, count int, bypath bool, scheme string) { } } // Cross check that the two tries are in sync - checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that the trie scheduler can correctly reconstruct the state even if only @@ -271,7 +293,7 @@ func testIterativeDelayedSync(t *testing.T, scheme string) { } } // Cross check that the two tries are in sync - checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that given a root hash, a trie can sync iteratively on a single thread, @@ -341,7 +363,7 @@ func testIterativeRandomSync(t *testing.T, count int, scheme string) { } } // Cross check that the two tries are in sync - checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that the trie scheduler can correctly reconstruct the state even if only @@ -413,7 +435,7 @@ func testIterativeRandomDelayedSync(t *testing.T, scheme string) { } } // Cross check that the two tries are in sync - checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that a trie sync will not request nodes multiple times, even if they @@ -484,7 +506,7 @@ func testDuplicateAvoidanceSync(t *testing.T, scheme string) { } } // Cross check that the two tries are in sync - checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) } // Tests that at any point in time during a sync, only complete sub-tries are in @@ -569,7 +591,7 @@ func testIncompleteSync(t *testing.T, scheme string) { nodeHash := addedHashes[i] value := rawdb.ReadTrieNode(diskdb, owner, inner, nodeHash, scheme) rawdb.DeleteTrieNode(diskdb, owner, inner, nodeHash, scheme) - if err := checkTrieConsistency(diskdb, srcDb.Scheme(), root); err == nil { + if err := checkTrieConsistency(diskdb, srcDb.Scheme(), root, false); err == nil { t.Fatalf("trie inconsistency not caught, missing: %x", path) } rawdb.WriteTrieNode(diskdb, owner, inner, nodeHash, value, scheme) @@ -643,7 +665,7 @@ func testSyncOrdering(t *testing.T, scheme string) { } } // Cross check that the two tries are in sync - checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) // Check that the trie nodes have been requested path-ordered for i := 0; i < len(reqs)-1; i++ { @@ -664,7 +686,7 @@ func syncWith(t *testing.T, root common.Hash, db ethdb.Database, srcDb *Database // The code requests are ignored here since there is no code // at the testing trie. - paths, nodes, _ := sched.Missing(1) + paths, nodes, _ := sched.Missing(0) var elements []trieElement for i := 0; i < len(paths); i++ { elements = append(elements, trieElement{ @@ -698,7 +720,7 @@ func syncWith(t *testing.T, root common.Hash, db ethdb.Database, srcDb *Database } batch.Write() - paths, nodes, _ = sched.Missing(1) + paths, nodes, _ = sched.Missing(0) elements = elements[:0] for i := 0; i < len(paths); i++ { elements = append(elements, trieElement{ @@ -724,7 +746,7 @@ func testSyncMovingTarget(t *testing.T, scheme string) { // Create a destination trie and sync with the scheduler diskdb := rawdb.NewMemoryDatabase() syncWith(t, srcTrie.Hash(), diskdb, srcDb) - checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData, false) // Push more modifications into the src trie, to see if dest trie can still // sync with it(overwrite stale states) @@ -748,7 +770,7 @@ func testSyncMovingTarget(t *testing.T, scheme string) { srcTrie, _ = NewStateTrie(TrieID(root), srcDb) syncWith(t, srcTrie.Hash(), diskdb, srcDb) - checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), diff) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), diff, false) // Revert added modifications from the src trie, to see if dest trie can still // sync with it(overwrite reverted states) @@ -772,5 +794,98 @@ func testSyncMovingTarget(t *testing.T, scheme string) { srcTrie, _ = NewStateTrie(TrieID(root), srcDb) syncWith(t, srcTrie.Hash(), diskdb, srcDb) - checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), reverted) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), reverted, false) +} + +// Tests if state syncer can correctly catch up the pivot move. +func TestPivotMove(t *testing.T) { + testPivotMove(t, rawdb.HashScheme, true) + testPivotMove(t, rawdb.HashScheme, false) + testPivotMove(t, rawdb.PathScheme, true) + testPivotMove(t, rawdb.PathScheme, false) +} + +func testPivotMove(t *testing.T, scheme string, tiny bool) { + var ( + srcDisk = rawdb.NewMemoryDatabase() + srcTrieDB = newTestDatabase(srcDisk, scheme) + srcTrie, _ = New(TrieID(types.EmptyRootHash), srcTrieDB) + + deleteFn = func(key []byte, tr *Trie, states map[string][]byte) { + tr.Delete(key) + delete(states, string(key)) + } + writeFn = func(key []byte, val []byte, tr *Trie, states map[string][]byte) { + if val == nil { + if tiny { + val = randBytes(4) + } else { + val = randBytes(32) + } + } + tr.Update(key, val) + states[string(key)] = common.CopyBytes(val) + } + copyStates = func(states map[string][]byte) map[string][]byte { + cpy := make(map[string][]byte) + for k, v := range states { + cpy[k] = v + } + return cpy + } + ) + stateA := make(map[string][]byte) + writeFn([]byte{0x01, 0x23}, nil, srcTrie, stateA) + writeFn([]byte{0x01, 0x24}, nil, srcTrie, stateA) + writeFn([]byte{0x12, 0x33}, nil, srcTrie, stateA) + writeFn([]byte{0x12, 0x34}, nil, srcTrie, stateA) + writeFn([]byte{0x02, 0x34}, nil, srcTrie, stateA) + writeFn([]byte{0x13, 0x44}, nil, srcTrie, stateA) + + rootA, nodesA, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootA, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodesA), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootA, false); err != nil { + panic(err) + } + // Create a destination trie and sync with the scheduler + destDisk := rawdb.NewMemoryDatabase() + syncWith(t, rootA, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateA, true) + + // Delete element to collapse trie + stateB := copyStates(stateA) + srcTrie, _ = New(TrieID(rootA), srcTrieDB) + deleteFn([]byte{0x02, 0x34}, srcTrie, stateB) + deleteFn([]byte{0x13, 0x44}, srcTrie, stateB) + writeFn([]byte{0x01, 0x24}, nil, srcTrie, stateB) + + rootB, nodesB, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootB, rootA, 0, trienode.NewWithNodeSet(nodesB), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootB, false); err != nil { + panic(err) + } + syncWith(t, rootB, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateB, true) + + // Add elements to expand trie + stateC := copyStates(stateB) + srcTrie, _ = New(TrieID(rootB), srcTrieDB) + + writeFn([]byte{0x01, 0x24}, stateA[string([]byte{0x01, 0x24})], srcTrie, stateC) + writeFn([]byte{0x02, 0x34}, nil, srcTrie, stateC) + writeFn([]byte{0x13, 0x44}, nil, srcTrie, stateC) + + rootC, nodesC, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootC, rootB, 0, trienode.NewWithNodeSet(nodesC), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootC, false); err != nil { + panic(err) + } + syncWith(t, rootC, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateC, true) } diff --git a/trie/triedb/pathdb/difflayer.go b/trie/triedb/pathdb/difflayer.go index d25ac1c601..10567715d2 100644 --- a/trie/triedb/pathdb/difflayer.go +++ b/trie/triedb/pathdb/difflayer.go @@ -114,7 +114,7 @@ func (dl *diffLayer) node(owner common.Hash, path []byte, hash common.Hash, dept if n.Hash != hash { dirtyFalseMeter.Mark(1) log.Error("Unexpected trie node in diff layer", "owner", owner, "path", path, "expect", hash, "got", n.Hash) - return nil, newUnexpectedNodeError("diff", hash, n.Hash, owner, path) + return nil, newUnexpectedNodeError("diff", hash, n.Hash, owner, path, n.Blob) } dirtyHitMeter.Mark(1) dirtyNodeHitDepthHist.Update(int64(depth)) diff --git a/trie/triedb/pathdb/disklayer.go b/trie/triedb/pathdb/disklayer.go index 87718290f9..d3b6419cc5 100644 --- a/trie/triedb/pathdb/disklayer.go +++ b/trie/triedb/pathdb/disklayer.go @@ -150,7 +150,7 @@ func (dl *diskLayer) Node(owner common.Hash, path []byte, hash common.Hash) ([]b if nHash != hash { diskFalseMeter.Mark(1) log.Error("Unexpected trie node in disk", "owner", owner, "path", path, "expect", hash, "got", nHash) - return nil, newUnexpectedNodeError("disk", hash, nHash, owner, path) + return nil, newUnexpectedNodeError("disk", hash, nHash, owner, path, nBlob) } if dl.cleans != nil && len(nBlob) > 0 { dl.cleans.Set(key, nBlob) diff --git a/trie/triedb/pathdb/errors.go b/trie/triedb/pathdb/errors.go index f503a9c49d..f6ac0ec4a0 100644 --- a/trie/triedb/pathdb/errors.go +++ b/trie/triedb/pathdb/errors.go @@ -21,6 +21,7 @@ import ( "fmt" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" ) var ( @@ -46,6 +47,10 @@ var ( errUnexpectedNode = errors.New("unexpected node") ) -func newUnexpectedNodeError(loc string, expHash common.Hash, gotHash common.Hash, owner common.Hash, path []byte) error { - return fmt.Errorf("%w, loc: %s, node: (%x %v), %x!=%x", errUnexpectedNode, loc, owner, path, expHash, gotHash) +func newUnexpectedNodeError(loc string, expHash common.Hash, gotHash common.Hash, owner common.Hash, path []byte, blob []byte) error { + blobHex := "nil" + if len(blob) > 0 { + blobHex = hexutil.Encode(blob) + } + return fmt.Errorf("%w, loc: %s, node: (%x %v), %x!=%x, blob: %s", errUnexpectedNode, loc, owner, path, expHash, gotHash, blobHex) } diff --git a/trie/triedb/pathdb/journal.go b/trie/triedb/pathdb/journal.go index d8c7d39fb9..ea90207f29 100644 --- a/trie/triedb/pathdb/journal.go +++ b/trie/triedb/pathdb/journal.go @@ -21,6 +21,7 @@ import ( "errors" "fmt" "io" + "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" @@ -341,6 +342,14 @@ func (db *Database) Journal(root common.Hash) error { if l == nil { return fmt.Errorf("triedb layer [%#x] missing", root) } + disk := db.tree.bottom() + if l, ok := l.(*diffLayer); ok { + log.Info("Persisting dirty state to disk", "head", l.block, "root", root, "layers", l.id-disk.id+disk.buffer.layers) + } else { // disk layer only on noop runs (likely) or deep reorgs (unlikely) + log.Info("Persisting dirty state to disk", "root", root, "layers", disk.buffer.layers) + } + start := time.Now() + // Run the journaling db.lock.Lock() defer db.lock.Unlock() @@ -373,6 +382,6 @@ func (db *Database) Journal(root common.Hash) error { // Set the db in read only mode to reject all following mutations db.readOnly = true - log.Info("Stored journal in triedb", "disk", diskroot, "size", common.StorageSize(journal.Len())) + log.Info("Persisted dirty state to disk", "size", common.StorageSize(journal.Len()), "elapsed", common.PrettyDuration(time.Since(start))) return nil } diff --git a/trie/triedb/pathdb/nodebuffer.go b/trie/triedb/pathdb/nodebuffer.go index 67de225b04..4a7d328b9a 100644 --- a/trie/triedb/pathdb/nodebuffer.go +++ b/trie/triedb/pathdb/nodebuffer.go @@ -71,7 +71,7 @@ func (b *nodebuffer) node(owner common.Hash, path []byte, hash common.Hash) (*tr if n.Hash != hash { dirtyFalseMeter.Mark(1) log.Error("Unexpected trie node in node buffer", "owner", owner, "path", path, "expect", hash, "got", n.Hash) - return nil, newUnexpectedNodeError("dirty", hash, n.Hash, owner, path) + return nil, newUnexpectedNodeError("dirty", hash, n.Hash, owner, path, n.Blob) } return n, nil }