From 3813e135450f4c21829a13bf94a0743d493c001f Mon Sep 17 00:00:00 2001 From: VM <112189277+sysvm@users.noreply.github.com> Date: Tue, 9 Apr 2024 17:15:04 +0800 Subject: [PATCH] cmd/geth: add hbss to pbss convert tool (#79) Co-authored-by: Fynn Co-authored-by: VM --- cmd/geth/dbcmd.go | 292 +++++++++++++++++++++++++++++++++++++++-- core/rawdb/database.go | 22 ++++ trie/hbss2pbss.go | 247 ++++++++++++++++++++++++++++++++++ trie/node.go | 5 + 4 files changed, 554 insertions(+), 12 deletions(-) create mode 100644 trie/hbss2pbss.go diff --git a/cmd/geth/dbcmd.go b/cmd/geth/dbcmd.go index 6f802716c5..8a5c6e0fc5 100644 --- a/cmd/geth/dbcmd.go +++ b/cmd/geth/dbcmd.go @@ -19,6 +19,7 @@ package main import ( "bytes" "fmt" + "math" "os" "os/signal" "path/filepath" @@ -69,6 +70,10 @@ Remove blockchain and state databases`, dbExportCmd, dbMetadataCmd, dbCheckStateContentCmd, + dbHbss2PbssCmd, + dbPruneHashTrieCmd, + dbTrieGetCmd, + dbTrieDeleteCmd, }, } dbInspectCmd = &cli.Command{ @@ -91,6 +96,54 @@ Remove blockchain and state databases`, For each trie node encountered, it checks that the key corresponds to the keccak256(value). If this is not true, this indicates a data corruption.`, } + dbHbss2PbssCmd = &cli.Command{ + Action: hbss2pbss, + Name: "hbss-to-pbss", + ArgsUsage: "", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.SyncModeFlag, + }, + Usage: "Convert Hash-Base to Path-Base trie node.", + Description: `This command iterates the entire trie node database and convert the hash-base node to path-base node.`, + } + dbTrieGetCmd = &cli.Command{ + Action: dbTrieGet, + Name: "trie-get", + Usage: "Show the value of a trie node path key", + ArgsUsage: "[trie owner] ", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.SyncModeFlag, + utils.MainnetFlag, + utils.StateSchemeFlag, + }, + Description: "This command looks up the specified trie node key from the database.", + } + dbTrieDeleteCmd = &cli.Command{ + Action: dbTrieDelete, + Name: "trie-delete", + Usage: "delete the specify trie node", + ArgsUsage: "[trie owner] | ", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.SyncModeFlag, + utils.MainnetFlag, + utils.StateSchemeFlag, + }, + Description: "This command delete the specify trie node from the database.", + } + dbPruneHashTrieCmd = &cli.Command{ + Action: pruneHashTrie, + Name: "prune-hash-trie", + ArgsUsage: "", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.SyncModeFlag, + }, + Usage: "[Caution]Prune all the hash trie node in diskdb", + Description: `This command iterates the entrie kv in leveldb and delete all the hash trie node.`, + } dbStatCmd = &cli.Command{ Action: dbStats, Name: "stats", @@ -410,6 +463,134 @@ func dbGet(ctx *cli.Context) error { return nil } +// dbTrieGet shows the value of a given database key +func dbTrieGet(ctx *cli.Context) error { + if ctx.NArg() < 1 || ctx.NArg() > 2 { + return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage) + } + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, false) + defer db.Close() + + scheme := ctx.String(utils.StateSchemeFlag.Name) + if scheme == "" { + scheme = rawdb.HashScheme + } + + if scheme == rawdb.PathScheme { + var ( + pathKey []byte + owner []byte + err error + ) + if ctx.NArg() == 1 { + pathKey, err = hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Error("Could not decode the value", "error", err) + return err + } + nodeVal, hash := rawdb.ReadAccountTrieNode(db, pathKey) + log.Info("TrieGet result", "path key", common.Bytes2Hex(pathKey), "hash", hash, "node", trie.NodeString(hash.Bytes(), nodeVal)) + } else if ctx.NArg() == 2 { + owner, err = hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Error("Could not decode the value", "error", err) + return err + } + pathKey, err = hexutil.Decode(ctx.Args().Get(1)) + if err != nil { + log.Error("Could not decode the value", "error", err) + return err + } + + nodeVal, hash := rawdb.ReadStorageTrieNode(db, common.BytesToHash(owner), pathKey) + log.Info("TrieGet result", "path key", common.Bytes2Hex(pathKey), "owner", common.BytesToHash(owner), + "hash", hash, "node", trie.NodeString(hash.Bytes(), nodeVal)) + } + } else if scheme == rawdb.HashScheme { + if ctx.NArg() == 1 { + hashKey, err := hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Error("Could not decode the value", "error", err) + return err + } + val, err := db.Get(hashKey) + if err != nil { + log.Error("Failed to get value from db, ", "error", err) + return err + } + log.Info("TrieGet result", "hash key", common.BytesToHash(hashKey), "node", trie.NodeString(hashKey, val)) + } else { + log.Error("Too many args") + } + } + + return nil +} + +// dbTrieDelete delete the trienode of a given database key +func dbTrieDelete(ctx *cli.Context) error { + if ctx.NArg() < 1 || ctx.NArg() > 2 { + return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage) + } + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, false) + defer db.Close() + + scheme := ctx.String(utils.StateSchemeFlag.Name) + if scheme == "" { + scheme = rawdb.HashScheme + } + + if scheme == rawdb.PathScheme { + var ( + pathKey []byte + owner []byte + err error + ) + if ctx.NArg() == 1 { + pathKey, err = hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Error("Could not decode the value", "error", err) + return err + } + rawdb.DeleteAccountTrieNode(db, pathKey) + } else if ctx.NArg() == 2 { + owner, err = hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Error("Could not decode the value", "error", err) + return err + } + pathKey, err = hexutil.Decode(ctx.Args().Get(1)) + if err != nil { + log.Error("Could not decode the value", "error", err) + return err + } + rawdb.DeleteStorageTrieNode(db, common.BytesToHash(owner), pathKey) + } + } else if scheme == rawdb.HashScheme { + if ctx.NArg() == 1 { + hashKey, err := hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Error("Could not decode the value", "error", err) + return err + } + err = db.Delete(hashKey) + if err != nil { + log.Error("Failed to delete data in db", "err", err) + return err + } + } else { + log.Error("Too many args") + } + } + return nil +} + // dbDelete deletes a key from the database func dbDelete(ctx *cli.Context) error { if ctx.NArg() != 1 { @@ -423,7 +604,7 @@ func dbDelete(ctx *cli.Context) error { key, err := common.ParseHexOrString(ctx.Args().Get(0)) if err != nil { - log.Info("Could not decode the key", "error", err) + log.Error("Could not decode the key", "error", err) return err } data, err := db.Get(key) @@ -431,7 +612,7 @@ func dbDelete(ctx *cli.Context) error { fmt.Printf("Previous value: %#x\n", data) } if err = db.Delete(key); err != nil { - log.Info("Delete operation returned an error", "key", fmt.Sprintf("%#x", key), "error", err) + log.Error("Failed to delete value in db", "key", fmt.Sprintf("%#x", key), "error", err) return err } return nil @@ -456,12 +637,12 @@ func dbPut(ctx *cli.Context) error { ) key, err = common.ParseHexOrString(ctx.Args().Get(0)) if err != nil { - log.Info("Could not decode the key", "error", err) + log.Error("Could not decode the key", "error", err) return err } value, err = hexutil.Decode(ctx.Args().Get(1)) if err != nil { - log.Info("Could not decode the value", "error", err) + log.Error("Could not decode the value", "error", err) return err } data, err = db.Get(key) @@ -490,30 +671,30 @@ func dbDumpTrie(ctx *cli.Context) error { storage []byte account []byte start []byte - max = int64(-1) + maxVal = int64(-1) err error ) if state, err = hexutil.Decode(ctx.Args().Get(0)); err != nil { - log.Info("Could not decode the state root", "error", err) + log.Error("Could not decode the state root", "error", err) return err } if account, err = hexutil.Decode(ctx.Args().Get(1)); err != nil { - log.Info("Could not decode the account hash", "error", err) + log.Error("Could not decode the account hash", "error", err) return err } if storage, err = hexutil.Decode(ctx.Args().Get(2)); err != nil { - log.Info("Could not decode the storage trie root", "error", err) + log.Error("Could not decode the storage trie root", "error", err) return err } if ctx.NArg() > 3 { if start, err = hexutil.Decode(ctx.Args().Get(3)); err != nil { - log.Info("Could not decode the seek position", "error", err) + log.Error("Could not decode the seek position", "error", err) return err } } if ctx.NArg() > 4 { - if max, err = strconv.ParseInt(ctx.Args().Get(4), 10, 64); err != nil { - log.Info("Could not decode the max count", "error", err) + if maxVal, err = strconv.ParseInt(ctx.Args().Get(4), 10, 64); err != nil { + log.Error("Could not decode the max count", "error", err) return err } } @@ -529,7 +710,7 @@ func dbDumpTrie(ctx *cli.Context) error { var count int64 it := trie.NewIterator(trieIt) for it.Next() { - if max > 0 && count == max { + if maxVal > 0 && count == maxVal { fmt.Printf("Exiting after %d values\n", count) break } @@ -724,3 +905,90 @@ func showMetaData(ctx *cli.Context) error { table.Render() return nil } + +func hbss2pbss(ctx *cli.Context) error { + if ctx.NArg() > 1 { + return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage) + } + + var jobNum uint64 + var err error + if ctx.NArg() == 1 { + jobNum, err = strconv.ParseUint(ctx.Args().Get(0), 10, 64) + if err != nil { + return fmt.Errorf("failed to parse job num, Args[1]: %v, err: %v", ctx.Args().Get(1), err) + } + } else { + // by default + jobNum = 1000 + } + + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, false) + db.Sync() + defer db.Close() + + config := trie.HashDefaults + triedb := trie.NewDatabase(db, config) + triedb.Cap(0) + log.Info("hbss2pbss triedb", "scheme", triedb.Scheme()) + defer triedb.Close() + + headerHash := rawdb.ReadHeadHeaderHash(db) + blockNumber := rawdb.ReadHeaderNumber(db, headerHash) + if blockNumber == nil { + log.Error("Failed to read header number") + return fmt.Errorf("failed to read header number") + } + + log.Info("hbss2pbss converting", "HeaderHash", headerHash.String(), "blockNumber", *blockNumber) + + var headerBlockHash common.Hash + var trieRootHash common.Hash + + if *blockNumber != math.MaxUint64 { + headerBlockHash = rawdb.ReadCanonicalHash(db, *blockNumber) + if headerBlockHash == (common.Hash{}) { + return fmt.Errorf("ReadHeadBlockHash empty hash") + } + blockHeader := rawdb.ReadHeader(db, headerBlockHash, *blockNumber) + trieRootHash = blockHeader.Root + fmt.Println("Canonical Hash: ", headerBlockHash.String(), ", TrieRootHash: ", trieRootHash.String()) + } + if (trieRootHash == common.Hash{}) { + log.Error("Empty root hash") + return fmt.Errorf("empty root hash") + } + + id := trie.StateTrieID(trieRootHash) + theTrie, err := trie.New(id, triedb) + if err != nil { + log.Error("Failed to new trie tree", "err", err, "root hash", trieRootHash.String()) + return err + } + + h2p, err := trie.NewHbss2Pbss(theTrie, triedb, trieRootHash, *blockNumber, jobNum) + if err != nil { + log.Error("Failed to new hash2pbss", "err", err, "root hash", trieRootHash.String()) + return err + } + h2p.Run() + + return nil +} + +func pruneHashTrie(ctx *cli.Context) error { + if ctx.NArg() != 0 { + return fmt.Errorf("required none argument") + } + + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, false) + defer db.Close() + + return rawdb.PruneHashTrieNodeInDatabase(db) +} diff --git a/core/rawdb/database.go b/core/rawdb/database.go index 1d7b7d1ca8..641688476a 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -449,6 +449,28 @@ func (s *stat) Count() string { return s.count.String() } +func PruneHashTrieNodeInDatabase(db ethdb.Database) error { + it := db.NewIterator([]byte{}, []byte{}) + defer it.Release() + + total_num := 0 + for it.Next() { + var key = it.Key() + switch { + case IsLegacyTrieNode(key, it.Value()): + db.Delete(key) + total_num++ + if total_num%100000 == 0 { + log.Info("Pruning hash-base trie nodes", "complete progress", total_num) + } + default: + continue + } + } + log.Info("Pruning hash-base trie nodes", "complete progress", total_num) + return nil +} + // InspectDatabase traverses the entire database and checks the size // of all different categories of data. func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { diff --git a/trie/hbss2pbss.go b/trie/hbss2pbss.go new file mode 100644 index 0000000000..1c6e842c7f --- /dev/null +++ b/trie/hbss2pbss.go @@ -0,0 +1,247 @@ +package trie + +import ( + "bytes" + "errors" + "fmt" + "runtime" + "sync" + "sync/atomic" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +type Hbss2Pbss struct { + trie *Trie // traverse trie + db *Database + blockNum uint64 + root node // root of triedb + stateRootHash common.Hash + concurrentQueue chan struct{} + totalNum uint64 + wg sync.WaitGroup +} + +const ( + DEFAULT_TRIEDBCACHE_SIZE = 1024 * 1024 * 1024 +) + +// NewHbss2Pbss return a hash2Path obj +func NewHbss2Pbss(tr *Trie, db *Database, stateRootHash common.Hash, blockNum uint64, jobNum uint64) (*Hbss2Pbss, error) { + if tr == nil { + return nil, errors.New("trie is nil") + } + + if tr.root == nil { + return nil, errors.New("trie root is nil") + } + + ins := &Hbss2Pbss{ + trie: tr, + blockNum: blockNum, + db: db, + stateRootHash: stateRootHash, + root: tr.root, + concurrentQueue: make(chan struct{}, jobNum), + wg: sync.WaitGroup{}, + } + + return ins, nil +} + +func (t *Trie) resolveWithoutTrack(n node, prefix []byte) (node, error) { + if n, ok := n.(hashNode); ok { + blob, err := t.reader.node(prefix, common.BytesToHash(n)) + if err != nil { + return nil, err + } + return mustDecodeNode(n, blob), nil + } + return n, nil +} + +func (h2p *Hbss2Pbss) writeNode(pathKey []byte, n *trienode.Node, owner common.Hash) { + if owner == (common.Hash{}) { + rawdb.WriteAccountTrieNode(h2p.db.diskdb, pathKey, n.Blob) + log.Debug("Write account trie node", "path", common.Bytes2Hex(pathKey), "hash", n.Hash, + "blob hash", crypto.Keccak256Hash(n.Blob)) + } else { + rawdb.WriteStorageTrieNode(h2p.db.diskdb, owner, pathKey, n.Blob) + log.Debug("Write storage trie node", "path", common.Bytes2Hex(pathKey), "owner", owner.String(), + "hash", n.Hash, "blob hash", crypto.Keccak256Hash(n.Blob)) + } +} + +// Run statistics, external call +func (h2p *Hbss2Pbss) Run() { + log.Debug("Find account trie tree", "rootHash", h2p.trie.Hash().String(), "block num", h2p.blockNum) + + h2p.ConcurrentTraversal(h2p.trie, h2p.root, []byte{}) + h2p.wg.Wait() + + log.Info("Hbss to pbss statistics", "total complete", h2p.totalNum, "go routines num", runtime.NumGoroutine(), + "h2p concurrent queue", len(h2p.concurrentQueue)) + + rawdb.WritePersistentStateID(h2p.db.diskdb, h2p.blockNum) + rawdb.WriteStateID(h2p.db.diskdb, h2p.stateRootHash, h2p.blockNum) +} + +func (h2p *Hbss2Pbss) SubConcurrentTraversal(theTrie *Trie, theNode node, path []byte) { + h2p.concurrentQueue <- struct{}{} + h2p.ConcurrentTraversal(theTrie, theNode, path) + <-h2p.concurrentQueue + h2p.wg.Done() +} + +func (h2p *Hbss2Pbss) ConcurrentTraversal(theTrie *Trie, theNode node, path []byte) { + totalNum := uint64(0) + // nil node + if theNode == nil { + return + } + + switch current := (theNode).(type) { + case *shortNode: + collapsed := current.copy() + collapsed.Key = hexToCompact(current.Key) + var hash, _ = current.cache() + h2p.writeNode(path, trienode.New(common.BytesToHash(hash), nodeToBytes(collapsed)), theTrie.owner) + + h2p.ConcurrentTraversal(theTrie, current.Val, append(path, current.Key...)) + + case *fullNode: + // copy from trie/Committer (*committer).commit + collapsed := current.copy() + var hash, _ = collapsed.cache() + collapsed.Children = h2p.commitChildren(path, current) + + nodeBytes := nodeToBytes(collapsed) + if common.BytesToHash(hash) != common.BytesToHash(crypto.Keccak256(nodeBytes)) { + log.Error("Hash is inconsistent", "hash", common.BytesToHash(hash), + "node hash", common.BytesToHash(crypto.Keccak256(nodeBytes)), "node", collapsed.fstring("")) + panic("inconsistent hash") + } + + h2p.writeNode(path, trienode.New(common.BytesToHash(hash), nodeToBytes(collapsed)), theTrie.owner) + + for idx, child := range current.Children { + if child == nil { + continue + } + childPath := append(path, byte(idx)) + if len(h2p.concurrentQueue)*2 < cap(h2p.concurrentQueue) { + h2p.wg.Add(1) + dst := make([]byte, len(childPath)) + copy(dst, childPath) + go h2p.SubConcurrentTraversal(theTrie, child, dst) + } else { + h2p.ConcurrentTraversal(theTrie, child, childPath) + } + } + case hashNode: + n, err := theTrie.resolveWithoutTrack(current, path) + if err != nil { + log.Error("Failed to resolve hash node", "error", err, "trie root", theTrie.Hash(), "path", path) + return + } + h2p.ConcurrentTraversal(theTrie, n, path) + totalNum = atomic.AddUint64(&h2p.totalNum, 1) + if totalNum%100000 == 0 { + log.Info("Converting", "complete progress", totalNum, "go routines num", runtime.NumGoroutine(), + "h2p concurrentQueue", len(h2p.concurrentQueue)) + } + return + case valueNode: + if !hasTerm(path) { + log.Info("ValueNode misses path term", "path", common.Bytes2Hex(path)) + break + } + var account types.StateAccount + if err := rlp.Decode(bytes.NewReader(current), &account); err != nil { + // log.Error("Failed to decode rlp account", "err", err) + break + } + if account.Root == (common.Hash{}) || account.Root == types.EmptyRootHash { + // log.Info("Not a storage trie", "account", common.BytesToHash(path).String()) + break + } + + ownerAddress := common.BytesToHash(hexToCompact(path)) + tr, err := New(StorageTrieID(h2p.stateRootHash, ownerAddress, account.Root), h2p.db) + if err != nil { + log.Error("Failed to new Storage trie", "err", err, "root", account.Root.String(), "owner", ownerAddress.String()) + break + } + log.Debug("Find Contract Trie Tree", "rootHash", tr.Hash().String()) + h2p.wg.Add(1) + go h2p.SubConcurrentTraversal(tr, tr.root, []byte{}) + default: + panic(errors.New("invalid node type to traverse")) + } +} + +// copy from trie/Commiter (*committer).commit +func (h2p *Hbss2Pbss) commitChildren(path []byte, n *fullNode) [17]node { + var children [17]node + for i := 0; i < 16; i++ { + child := n.Children[i] + if child == nil { + continue + } + // If it's the hashed child, save the hash value directly. + // Note: it's impossible that the child in range [0, 15] + // is a valueNode. + if hn, ok := child.(hashNode); ok { + children[i] = hn + continue + } + + children[i] = h2p.commit(append(path, byte(i)), child) + } + // For the 17th child, it's possible the type is valuenode. + if n.Children[16] != nil { + children[16] = n.Children[16] + } + return children +} + +// commit collapses a node down into a hash node and returns it. +func (h2p *Hbss2Pbss) commit(path []byte, n node) node { + // if this path is clean, use available cached data + hash, dirty := n.cache() + if hash != nil && !dirty { + return hash + } + // Commit children, then parent, and remove the dirty flag. + switch cn := n.(type) { + case *shortNode: + // Commit child + collapsed := cn.copy() + + // If the child is fullNode, recursively commit, + // otherwise it can only be hashNode or valueNode. + if _, ok := cn.Val.(*fullNode); ok { + collapsed.Val = h2p.commit(append(path, cn.Key...), cn.Val) + } + // The key needs to be copied, since we're adding it to the + // modified nodeset. + collapsed.Key = hexToCompact(cn.Key) + return collapsed + case *fullNode: + hashedKids := h2p.commitChildren(path, cn) + collapsed := cn.copy() + collapsed.Children = hashedKids + return collapsed + case hashNode: + return cn + default: + // nil, valuenode shouldn't be committed + panic(fmt.Sprintf("%T: invalid node: %v", n, n)) + } +} diff --git a/trie/node.go b/trie/node.go index 15bbf62f1c..d78ed5c569 100644 --- a/trie/node.go +++ b/trie/node.go @@ -112,6 +112,11 @@ func (n rawNode) EncodeRLP(w io.Writer) error { return err } +func NodeString(hash, buf []byte) string { + node := mustDecodeNode(hash, buf) + return node.fstring("NodeString: ") +} + // mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered. func mustDecodeNode(hash, buf []byte) node { n, err := decodeNode(hash, buf)