From 2d927543e2c85c24887e0623da97cb1ea89b9ece Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Apr 2020 13:18:30 +0200 Subject: [PATCH 01/24] trie: binary trie + snapbin command --- cmd/geth/chaincmd.go | 45 +++++++ core/state/snapshot/hextrie_generator.go | 28 +++++ trie/binary.go | 154 +++++++++++++++++++++++ trie/binary_test.go | 97 ++++++++++++++ 4 files changed, 324 insertions(+) create mode 100644 trie/binary.go create mode 100644 trie/binary_test.go diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 219996d46611..6bd43fefd9fe 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -218,6 +218,22 @@ Use "ethereum dump 0" to dump the genesis block.`, }, Category: "BLOCKCHAIN COMMANDS", } + generateBinaryTrieCommand = cli.Command{ + Action: utils.MigrateFlags(snapToBin), + Name: "bintrie", + Usage: "Convert the snapshot DB to a binary trie", + ArgsUsage: " ", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.AncientFlag, + utils.CacheFlag, + utils.TestnetFlag, + utils.RinkebyFlag, + utils.GoerliFlag, + utils.SyncModeFlag, + }, + Category: "BLOCKCHAIN COMMANDS", + } ) // initGenesis will initialise the given JSON format genesis file and writes it as @@ -627,6 +643,35 @@ func snapToHash(ctx *cli.Context) error { return nil } +func snapToBin(ctx *cli.Context) error { + node, _ := makeConfigNode(ctx) + chain, chainDb := utils.MakeChain(ctx, node) + + defer func() { + node.Close() + chain.Stop() + chainDb.Close() + }() + + snapTree := chain.Snapshot() + if snapTree == nil { + return fmt.Errorf("No snapshot tree available") + } + block := chain.CurrentBlock() + if block == nil { + return fmt.Errorf("no blocks present") + } + root := block.Root() + it, err := snapTree.AccountIterator(root, common.Hash{}) + if err != nil { + return fmt.Errorf("Could not create iterator for root %x: %v", root, err) + } + log.Info("Generating binary trie", "root", root) + generatedRoot := snapshot.GenerateBinaryTree(it) + log.Info("Generation done", "root", root, "binary root", generatedRoot) + return nil +} + // hashish returns true for strings that look like hashes. func hashish(x string) bool { _, err := strconv.Atoi(x) diff --git a/core/state/snapshot/hextrie_generator.go b/core/state/snapshot/hextrie_generator.go index aff6e00aa254..229864353d09 100644 --- a/core/state/snapshot/hextrie_generator.go +++ b/core/state/snapshot/hextrie_generator.go @@ -17,10 +17,12 @@ package snapshot import ( + "fmt" "sync" "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/trie" @@ -33,6 +35,32 @@ type leaf struct { type trieGeneratorFn func(in chan (leaf), out chan (common.Hash)) +func GenerateBinaryTree(it AccountIterator) common.Hash { + db, err := rawdb.NewLevelDBDatabase("./bintrie", 128, 1024, "") + if err != nil { + panic(fmt.Sprintf("error opening bintrie db, err=%v", err)) + } + btrie, err := trie.NewBinary(db) + if err != nil { + panic(fmt.Sprintf("error creating binary trie, err=%v", err)) + } + counter := 0 + for it.Next() { + counter++ + // Don't get the entire expanded account at this + // stage - NOTE + btrie.TryUpdate(it.Hash().Bytes(), it.Account()) + } + log.Info("Inserted all leaves", "count", counter) + + h, err := btrie.Commit() + if err != nil { + panic(fmt.Sprintf("error committing trie, err=%v", err)) + } + + return common.BytesToHash(h) +} + // GenerateTrieRoot takes an account iterator and reproduces the root hash. func GenerateTrieRoot(it AccountIterator) common.Hash { //return generateTrieRoot(it, StackGenerate) diff --git a/trie/binary.go b/trie/binary.go new file mode 100644 index 000000000000..b0a5ad0dc00b --- /dev/null +++ b/trie/binary.go @@ -0,0 +1,154 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "golang.org/x/crypto/sha3" +) + +type BinaryTrie struct { + left *BinaryTrie + right *BinaryTrie + value []byte + db ethdb.Database +} + +func NewBinary(db ethdb.Database) (*BinaryTrie, error) { + if db == nil { + return nil, fmt.Errorf("trie.NewBinary called without a database") + } + + return &BinaryTrie{db: db}, nil +} + +func (t *BinaryTrie) Get(key []byte) []byte { + res, err := t.TryGet(key) + if err != nil { + log.Error(fmt.Sprintf("Unhandled trie error: %v", err)) + } + return res +} + +func (t *BinaryTrie) TryGet(key []byte) ([]byte, error) { + value, err := t.tryGet(key, 0) + return value, err +} + +func (t *BinaryTrie) tryGet(key []byte, depth int) ([]byte, error) { + by := key[depth/8] + bi := (by >> uint(depth%8)) & 1 + if bi == 0 { + if t.left == nil { + if depth < len(key)*8-1 || t.value == nil { + return nil, fmt.Errorf("could not find key %s in trie", common.ToHex(key)) + } + return t.value, nil + } + return t.left.tryGet(key, depth+1) + } else { + if t.right == nil { + if depth < len(key)*8-1 || t.value == nil { + return nil, fmt.Errorf("could not find key %s in trie", common.ToHex(key)) + } + return t.value, nil + } + return t.left.tryGet(key, depth+1) + } +} + +func (t *BinaryTrie) Update(key, value []byte) { + if err := t.TryUpdate(key, value); err != nil { + log.Error(fmt.Sprintf("Unhandled trie error: %v", err)) + } +} + +func (t *BinaryTrie) TryUpdate(key, value []byte) error { + // TODO check key depth + err := t.insert(0, key, value) + return err +} + +func (t *BinaryTrie) insert(depth int, key, value []byte) error { + // TODO hash intermediate nodes + by := key[depth/8] + bi := (by >> uint(depth%8)) & 1 + if bi == 0 { + if t.left == nil { + if depth == len(key)*8-2 { + t.left = &BinaryTrie{nil, nil, value, t.db} + return nil + } else { + t.left = &BinaryTrie{nil, nil, nil, t.db} + } + } + return t.left.insert(depth+1, key, value) + } else { + if t.right == nil { + if depth == len(key)*8-2 { + t.right = &BinaryTrie{nil, nil, value, t.db} + return nil + } else { + t.right = &BinaryTrie{nil, nil, nil, t.db} + } + } + return t.right.insert(depth+1, key, value) + } +} + +func (t *BinaryTrie) Commit() ([]byte, error) { + var payload [3][]byte + var err error + if t.left != nil { + payload[0], err = t.left.Commit() + if err != nil { + return nil, err + } + } + if t.right != nil { + payload[1], err = t.right.Commit() + if err != nil { + return nil, err + } + } + hasher := sha3.NewLegacyKeccak256() + if t.value != nil { + hasher.Write(t.value) + hasher.Sum(payload[2][:]) + } + + hasher.Reset() + hasher.Write(payload[0]) + hasher.Write(payload[1]) + hasher.Write(payload[2]) + h := hasher.Sum(nil) + data := make([]byte, len(payload[0])+len(payload[1])+len(payload[2])+3) + data[0] = byte(len(payload[0])) + copy(data[1:], payload[0]) + data[len(payload[0])+1] = byte(len(payload[1])) + copy(data[2+len(payload[0]):], payload[1]) + data[len(payload[0])+len(payload[1])+2] = byte(len(payload[2])) + copy(data[2+len(payload[0])+len(payload[1]):], payload[2]) + + t.db.Put(h, data) + + return h, err +} diff --git a/trie/binary_test.go b/trie/binary_test.go new file mode 100644 index 000000000000..2661ec2e261e --- /dev/null +++ b/trie/binary_test.go @@ -0,0 +1,97 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb/memorydb" +) + +func TestBinaryLeafReadEmpty(t *testing.T) { + trie, err := NewBinary(NewDatabase(memorydb.New())) + if err != nil { + t.Fatalf("error creating binary trie: %v", err) + } + + _, err = trie.TryGet(common.FromHex("00")) + if err == nil { + t.Fatalf("should have returned an error trying to get from an empty binry trie, err=%v", err) + } +} + +func TestBinaryLeafInsert(t *testing.T) { + trie, err := NewBinary(NewDatabase(memorydb.New())) + if err != nil { + t.Fatalf("error creating binary trie: %v", err) + } + + err = trie.TryUpdate(common.FromHex("00"), common.FromHex("00")) + if err != nil { + t.Fatalf("could not insert (0x00, 0x00) into an empty binary trie, err=%v", err) + } + +} + +func TestBinaryLeafInsertRead(t *testing.T) { + trie, err := NewBinary(NewDatabase(memorydb.New())) + if err != nil { + t.Fatalf("error creating binary trie: %v", err) + } + + err = trie.TryUpdate(common.FromHex("00"), common.FromHex("01")) + if err != nil { + t.Fatalf("could not insert (0x00, 0x01) into an empty binary trie, err=%v", err) + } + + v, err := trie.TryGet(common.FromHex("00")) + if err != nil { + t.Fatalf("could not read data back from simple binary trie, err=%v", err) + } + + if !bytes.Equal(v, common.FromHex("01")) { + t.Fatalf("Invalid value read from the binary trie: %s != %s", common.ToHex(v), "01") + } +} + +func TestBinaryForkInsertRead(t *testing.T) { + trie, err := NewBinary(NewDatabase(memorydb.New())) + if err != nil { + t.Fatalf("error creating binary trie: %v", err) + } + + for i := byte(0); i < 10; i++ { + err = trie.TryUpdate([]byte{i}, common.FromHex("01")) + if err != nil { + t.Fatalf("could not insert (%#x, 0x01) into an empty binary trie, err=%v", i, err) + } + } + + for i := byte(0); i < 10; i++ { + v, err := trie.TryGet([]byte{i}) + if err != nil { + t.Fatalf("could not read data back from simple binary trie, err=%v", err) + } + + if !bytes.Equal(v, common.FromHex("01")) { + t.Fatalf("Invalid value read from the binary trie: %s != %s", common.ToHex(v), "01") + } + } + +} From 93375a1dfa1e7af7c2b7ad43fe6264bd38f4a60c Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Apr 2020 13:23:50 +0200 Subject: [PATCH 02/24] Update trie/binary.go Co-Authored-By: Martin Holst Swende --- trie/binary.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trie/binary.go b/trie/binary.go index b0a5ad0dc00b..c64bc7d76b51 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -67,7 +67,7 @@ func (t *BinaryTrie) tryGet(key []byte, depth int) ([]byte, error) { } else { if t.right == nil { if depth < len(key)*8-1 || t.value == nil { - return nil, fmt.Errorf("could not find key %s in trie", common.ToHex(key)) + return nil, fmt.Errorf("could not find key 0x%x in trie", key) } return t.value, nil } From d40815b73b4fa130083ab41b576a8f4a9fc8ba85 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Apr 2020 13:28:36 +0200 Subject: [PATCH 03/24] Update trie/binary.go Co-Authored-By: Martin Holst Swende --- trie/binary.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trie/binary.go b/trie/binary.go index c64bc7d76b51..0222541640d8 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -71,7 +71,7 @@ func (t *BinaryTrie) tryGet(key []byte, depth int) ([]byte, error) { } return t.value, nil } - return t.left.tryGet(key, depth+1) + return t.right.tryGet(key, depth+1) } } From 2a1629d040075bb147e5df4ec41dd74ec65d2bc5 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Apr 2020 18:35:22 +0200 Subject: [PATCH 04/24] Fix bintrie command --- cmd/geth/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/geth/main.go b/cmd/geth/main.go index 5056c514c4b7..5c85870e0f13 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -211,6 +211,7 @@ func init() { dumpGenesisCommand, inspectCommand, generateTrieCommand, + generateBinaryTrieCommand, // See accountcmd.go: accountCommand, walletCommand, From 62370f04846a6dff9537cc787d8c5569f1e3472c Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Apr 2020 18:38:53 +0200 Subject: [PATCH 05/24] Fix a couple bugs in insert and add a corresponding test --- trie/binary.go | 26 ++++++++++++++++++++------ trie/binary_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/trie/binary.go b/trie/binary.go index 0222541640d8..682f5ba582d7 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -88,25 +88,39 @@ func (t *BinaryTrie) TryUpdate(key, value []byte) error { } func (t *BinaryTrie) insert(depth int, key, value []byte) error { - // TODO hash intermediate nodes by := key[depth/8] - bi := (by >> uint(depth%8)) & 1 + bi := (by >> uint(7-depth%8)) & 1 if bi == 0 { if t.left == nil { - if depth == len(key)*8-2 { + switch depth { + case len(key)*8 - 1: + t.value = value + return nil + case len(key)*8 - 2: t.left = &BinaryTrie{nil, nil, value, t.db} return nil - } else { + default: t.left = &BinaryTrie{nil, nil, nil, t.db} } } return t.left.insert(depth+1, key, value) } else { if t.right == nil { - if depth == len(key)*8-2 { + // Free the space taken by left branch as insert + // will no longer visit it. + if t.left != nil { + h := t.left.Hash() + t.left = hashBinaryNode(h) + } + + switch depth { + case len(key)*8 - 1: + t.value = value + return nil + case len(key)*8 - 2: t.right = &BinaryTrie{nil, nil, value, t.db} return nil - } else { + default: t.right = &BinaryTrie{nil, nil, nil, t.db} } } diff --git a/trie/binary_test.go b/trie/binary_test.go index 2661ec2e261e..6c31fd5f7418 100644 --- a/trie/binary_test.go +++ b/trie/binary_test.go @@ -95,3 +95,47 @@ func TestBinaryForkInsertRead(t *testing.T) { } } + +func TestBinaryInsertLeftRight(t *testing.T) { + trie, err := NewBinary(nil) + if err != nil { + t.Fatalf("error creating binary trie: %v", err) + } + + trie.TryUpdate([]byte{0}, []byte{0}) + trie.TryUpdate([]byte{128}, []byte{1}) + + // Trie is expected to look like this: + // /\ + // / / + // / / + // / / + // / / + // / / + // / / + // / / + + // Check there is a left branch + if trie.left == nil { + t.Fatal("empty left branch") + } + + // Check that the left branch has already been hashed + if _, ok := trie.left.(hashBinaryNode); !ok { + t.Fatalf("left branch should have been hashed!") + } + + // Check there is a right branch + if trie.right == nil { + t.Fatal("empty right branch") + } + + // Check that the right branch has only lefts after the + // first right. + for i, tr := 1, trie.right; i < 8; i++ { + if tr == nil { + t.Fatal("invalid trie structure") + } + tr = tr.(*BinaryTrie).left + } +} From 60cdf15a99f67d0a6e92470a717061bec17e7d86 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Apr 2020 18:40:05 +0200 Subject: [PATCH 06/24] Fix tests broken after rebase --- trie/binary_test.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/trie/binary_test.go b/trie/binary_test.go index 6c31fd5f7418..cd1b3c5a2909 100644 --- a/trie/binary_test.go +++ b/trie/binary_test.go @@ -21,11 +21,10 @@ import ( "testing" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/ethdb/memorydb" ) func TestBinaryLeafReadEmpty(t *testing.T) { - trie, err := NewBinary(NewDatabase(memorydb.New())) + trie, err := NewBinary(nil) if err != nil { t.Fatalf("error creating binary trie: %v", err) } @@ -37,7 +36,7 @@ func TestBinaryLeafReadEmpty(t *testing.T) { } func TestBinaryLeafInsert(t *testing.T) { - trie, err := NewBinary(NewDatabase(memorydb.New())) + trie, err := NewBinary(nil) if err != nil { t.Fatalf("error creating binary trie: %v", err) } @@ -50,7 +49,7 @@ func TestBinaryLeafInsert(t *testing.T) { } func TestBinaryLeafInsertRead(t *testing.T) { - trie, err := NewBinary(NewDatabase(memorydb.New())) + trie, err := NewBinary(nil) if err != nil { t.Fatalf("error creating binary trie: %v", err) } @@ -71,7 +70,7 @@ func TestBinaryLeafInsertRead(t *testing.T) { } func TestBinaryForkInsertRead(t *testing.T) { - trie, err := NewBinary(NewDatabase(memorydb.New())) + trie, err := NewBinary(nil) if err != nil { t.Fatalf("error creating binary trie: %v", err) } From 4936cc3d2f75d18a0b76e8c292411989a5304c3b Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Apr 2020 18:43:41 +0200 Subject: [PATCH 07/24] fix a fix in tryGet tests --- trie/binary_test.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/trie/binary_test.go b/trie/binary_test.go index cd1b3c5a2909..9a34c4ee355e 100644 --- a/trie/binary_test.go +++ b/trie/binary_test.go @@ -82,15 +82,13 @@ func TestBinaryForkInsertRead(t *testing.T) { } } - for i := byte(0); i < 10; i++ { - v, err := trie.TryGet([]byte{i}) - if err != nil { - t.Fatalf("could not read data back from simple binary trie, err=%v", err) - } + v, err := trie.TryGet([]byte{9}) + if err != nil { + t.Fatalf("could not read data back from simple binary trie, err=%v", err) + } - if !bytes.Equal(v, common.FromHex("01")) { - t.Fatalf("Invalid value read from the binary trie: %s != %s", common.ToHex(v), "01") - } + if !bytes.Equal(v, common.FromHex("01")) { + t.Fatalf("Invalid value read from the binary trie: %s != %s", common.ToHex(v), "01") } } From cbe3620abca79df166c8dcf62bac1147893aea17 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Apr 2020 18:48:48 +0200 Subject: [PATCH 08/24] Prune the left subtree when inserting into the right subtree --- core/state/snapshot/hextrie_generator.go | 4 +- trie/binary.go | 116 +++++++++++++++++------ 2 files changed, 90 insertions(+), 30 deletions(-) diff --git a/core/state/snapshot/hextrie_generator.go b/core/state/snapshot/hextrie_generator.go index 229864353d09..c645db2494bf 100644 --- a/core/state/snapshot/hextrie_generator.go +++ b/core/state/snapshot/hextrie_generator.go @@ -53,12 +53,12 @@ func GenerateBinaryTree(it AccountIterator) common.Hash { } log.Info("Inserted all leaves", "count", counter) - h, err := btrie.Commit() + err = btrie.Commit() if err != nil { panic(fmt.Sprintf("error committing trie, err=%v", err)) } - return common.BytesToHash(h) + return common.Hash{} } // GenerateTrieRoot takes an account iterator and reproduces the root hash. diff --git a/trie/binary.go b/trie/binary.go index 682f5ba582d7..dbf5ac90f8d2 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -17,7 +17,9 @@ package trie import ( + "bytes" "fmt" + "io" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/ethdb" @@ -25,17 +27,24 @@ import ( "golang.org/x/crypto/sha3" ) -type BinaryTrie struct { - left *BinaryTrie - right *BinaryTrie - value []byte - db ethdb.Database +type binaryNode interface { + Hash() []byte + Commit() error + insert(depth int, key, value []byte) error + tryGet(key []byte, depth int) ([]byte, error) } -func NewBinary(db ethdb.Database) (*BinaryTrie, error) { - if db == nil { - return nil, fmt.Errorf("trie.NewBinary called without a database") +type ( + BinaryTrie struct { + left binaryNode + right binaryNode + value []byte + db ethdb.Database } + hashBinaryNode []byte +) + +func NewBinary(db ethdb.Database) (*BinaryTrie, error) { return &BinaryTrie{db: db}, nil } @@ -55,7 +64,7 @@ func (t *BinaryTrie) TryGet(key []byte) ([]byte, error) { func (t *BinaryTrie) tryGet(key []byte, depth int) ([]byte, error) { by := key[depth/8] - bi := (by >> uint(depth%8)) & 1 + bi := (by >> uint(7-depth%8)) & 1 if bi == 0 { if t.left == nil { if depth < len(key)*8-1 || t.value == nil { @@ -81,6 +90,35 @@ func (t *BinaryTrie) Update(key, value []byte) { } } +func (t *BinaryTrie) Hash() []byte { + var payload bytes.Buffer + + var lh []byte + if t.left != nil { + lh = t.left.Hash() + } + payload.Write(lh) + t.left = hashBinaryNode(lh) + + var rh []byte + if t.right != nil { + rh = t.right.Hash() + } + payload.Write(rh) + t.right = hashBinaryNode(rh) + + hasher := sha3.NewLegacyKeccak256() + if t.value != nil { + hasher.Write(t.value) + hv := hasher.Sum(nil) + payload.Write(hv) + } + + hasher.Reset() + io.Copy(hasher, &payload) + return hasher.Sum(nil) +} + func (t *BinaryTrie) TryUpdate(key, value []byte) error { // TODO check key depth err := t.insert(0, key, value) @@ -128,41 +166,63 @@ func (t *BinaryTrie) insert(depth int, key, value []byte) error { } } -func (t *BinaryTrie) Commit() ([]byte, error) { - var payload [3][]byte +func (t *BinaryTrie) Commit() error { + var payload bytes.Buffer var err error + + var lh []byte if t.left != nil { - payload[0], err = t.left.Commit() + lh = t.left.Hash() + err := t.left.Commit() if err != nil { - return nil, err + return err } } + payload.Write(lh) + t.left = hashBinaryNode(lh) + + var rh []byte if t.right != nil { - payload[1], err = t.right.Commit() + rh = t.right.Hash() + err := t.right.Commit() if err != nil { - return nil, err + return err } } + payload.Write(rh) + t.right = hashBinaryNode(rh) + hasher := sha3.NewLegacyKeccak256() if t.value != nil { hasher.Write(t.value) - hasher.Sum(payload[2][:]) + hv := hasher.Sum(nil) + payload.Write(hv) } hasher.Reset() - hasher.Write(payload[0]) - hasher.Write(payload[1]) - hasher.Write(payload[2]) + io.Copy(hasher, &payload) h := hasher.Sum(nil) - data := make([]byte, len(payload[0])+len(payload[1])+len(payload[2])+3) - data[0] = byte(len(payload[0])) - copy(data[1:], payload[0]) - data[len(payload[0])+1] = byte(len(payload[1])) - copy(data[2+len(payload[0]):], payload[1]) - data[len(payload[0])+len(payload[1])+2] = byte(len(payload[2])) - copy(data[2+len(payload[0])+len(payload[1]):], payload[2]) - t.db.Put(h, data) + err = t.db.Put(h, payload.Bytes()) + + return err +} + +func (h hashBinaryNode) Commit() error { + return nil +} + +func (h hashBinaryNode) Hash() []byte { + return h +} + +func (h hashBinaryNode) insert(depth int, key, value []byte) error { + return fmt.Errorf("trying to insert into a hash") +} - return h, err +func (h hashBinaryNode) tryGet(key []byte, depth int) ([]byte, error) { + if depth == 2*len(key) { + return []byte(h), nil + } + return nil, fmt.Errorf("reached an empty branch") } From b8c27d5bc2c2a3b6dac5276aba9547fb98f3b70f Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Fri, 3 Apr 2020 14:12:07 +0200 Subject: [PATCH 09/24] Benchmarking code for bintrie --- core/state/snapshot/trie_generator_test.go | 94 ++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/core/state/snapshot/trie_generator_test.go b/core/state/snapshot/trie_generator_test.go index c79ea73af047..f3eba2967c2a 100644 --- a/core/state/snapshot/trie_generator_test.go +++ b/core/state/snapshot/trie_generator_test.go @@ -366,3 +366,97 @@ func BenchmarkSlimToFullHash(b *testing.B) { } } + +func BenchmarkBinTrieInsert(b *testing.B) { + // Get a fairly large trie + // Create a custom account factory to recreate the same addresses + makeAccounts := func(num int) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for i := 0; i < num; i++ { + h := common.Hash{} + binary.BigEndian.PutUint64(h[:], uint64(i+1)) + accounts[h] = randomAccountWithSmall() + } + return accounts + } + // Build up a large stack of snapshots + base := &diskLayer{ + diskdb: rawdb.NewMemoryDatabase(), + root: common.HexToHash("0x01"), + cache: fastcache.New(1024 * 500), + } + snaps := &Tree{ + layers: map[common.Hash]snapshot{ + base.root: base, + }, + } + b.Run("4K", func(b *testing.B) { + // 4K accounts + snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, makeAccounts(4000), nil) + head := snaps.Snapshot(common.HexToHash("0x02")) + // Call it once to make it create the lists before test starts + it := head.(*diffLayer).AccountIterator(common.HexToHash("0x00")) + + b.Run("recursive", func(b *testing.B) { + b.Run("noext", func(b *testing.B) { + b.ResetTimer() + trie, _ := trie.NewBinary(nil) + b.ReportAllocs() + for n := 0; n < b.N; n++ { + for it.Next() { + trie.TryUpdate(it.Hash().Bytes(), it.Account()) + } + } + b.StopTimer() + }) + }) + }) +} + +func BenchmarkBinTrieHash(b *testing.B) { + // Get a fairly large trie + // Create a custom account factory to recreate the same addresses + makeAccounts := func(num int) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for i := 0; i < num; i++ { + h := common.Hash{} + binary.BigEndian.PutUint64(h[:], uint64(i+1)) + accounts[h] = randomAccountWithSmall() + } + return accounts + } + // Build up a large stack of snapshots + base := &diskLayer{ + diskdb: rawdb.NewMemoryDatabase(), + root: common.HexToHash("0x01"), + cache: fastcache.New(1024 * 500), + } + snaps := &Tree{ + layers: map[common.Hash]snapshot{ + base.root: base, + }, + } + b.Run("4K", func(b *testing.B) { + // 4K accounts + snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, makeAccounts(4000), nil) + head := snaps.Snapshot(common.HexToHash("0x02")) + // Call it once to make it create the lists before test starts + it := head.(*diffLayer).AccountIterator(common.HexToHash("0x00")) + trie, _ := trie.NewBinary(nil) + + for it.Next() { + trie.TryUpdate(it.Hash().Bytes(), it.Account()) + } + + b.Run("recursive", func(b *testing.B) { + b.Run("noext", func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + trie.Hash() + } + b.StopTimer() + }) + }) + }) +} From 96d4e08065dc626bfcb005e8fd25a4c07dad483c Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Fri, 17 Apr 2020 17:11:44 +0200 Subject: [PATCH 10/24] trie: introduce extension nodes in search --- trie/binary.go | 83 +++++++++++++++++++++++-------- trie/binary_test.go | 118 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 20 deletions(-) diff --git a/trie/binary.go b/trie/binary.go index dbf5ac90f8d2..5f92b0bc9748 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -40,6 +40,15 @@ type ( right binaryNode value []byte db ethdb.Database + + // This is the binary equivalent of "extension nodes": + // binary nodes can have a prefix that is common to all + // subtrees. The prefix is defined by a series of bytes, + // and two offsets marking the start bit and the end bit + // of the range. + prefix []byte + startBit int + endBit int } hashBinaryNode []byte ) @@ -62,26 +71,60 @@ func (t *BinaryTrie) TryGet(key []byte) ([]byte, error) { return value, err } +func (t *BinaryTrie) getPrefixLen() int { + if t.endBit > t.startBit { + return t.endBit - t.startBit + } + return 0 +} + +func getBit(key []byte, off int) bool { + mask := byte(1) << (7 - uint(off)%8) + + return byte(key[uint(off)/8])&mask != byte(0) +} + +func (t *BinaryTrie) getPrefixBit(bitnum int) bool { + if bitnum > t.getPrefixLen() { + panic(fmt.Sprintf("Trying to get bit #%d in a %d bit-long bitfield", bitnum, t.getPrefixLen())) + } + return getBit(t.prefix, t.startBit+bitnum) +} + func (t *BinaryTrie) tryGet(key []byte, depth int) ([]byte, error) { - by := key[depth/8] - bi := (by >> uint(7-depth%8)) & 1 - if bi == 0 { - if t.left == nil { - if depth < len(key)*8-1 || t.value == nil { - return nil, fmt.Errorf("could not find key %s in trie", common.ToHex(key)) - } - return t.value, nil + // Compare the key and the prefix. If they represent the + // same bitfield, recurse. Otherwise, raise an error as + // the value isn't present in this trie. + var i int + for i = 0; i < t.getPrefixLen(); i++ { + if getBit(key, depth+i) != t.getPrefixBit(i) { + return nil, fmt.Errorf("Key %v isn't present in this trie", key) } - return t.left.tryGet(key, depth+1) - } else { - if t.right == nil { - if depth < len(key)*8-1 || t.value == nil { - return nil, fmt.Errorf("could not find key 0x%x in trie", key) - } - return t.value, nil + } + + // Exit condition: has the length of the key been reached? + if depth+i == 8*len(key) { + if t.value == nil { + return nil, fmt.Errorf("Key %v isn't present in this trie", key) + } + return t.value, nil + } + + // End of the key hasn't been reached, recurse into left or right + // if the corresponding node is available. + child := t.left + isRight := getBit(key, depth+i) + if isRight { + child = t.right + } + + if child == nil { + if depth+i < len(key)*8-1 || t.value == nil { + return nil, fmt.Errorf("could not find key 0x%s in trie %v %v %v", common.ToHex(key), depth+i, len(key), t.value) } - return t.right.tryGet(key, depth+1) + return t.value, nil } + return child.tryGet(key, depth+i+1) } func (t *BinaryTrie) Update(key, value []byte) { @@ -135,10 +178,10 @@ func (t *BinaryTrie) insert(depth int, key, value []byte) error { t.value = value return nil case len(key)*8 - 2: - t.left = &BinaryTrie{nil, nil, value, t.db} + t.left = &BinaryTrie{nil, nil, value, t.db, nil, 0, 0} return nil default: - t.left = &BinaryTrie{nil, nil, nil, t.db} + t.left = &BinaryTrie{nil, nil, nil, t.db, nil, 0, 0} } } return t.left.insert(depth+1, key, value) @@ -156,10 +199,10 @@ func (t *BinaryTrie) insert(depth int, key, value []byte) error { t.value = value return nil case len(key)*8 - 2: - t.right = &BinaryTrie{nil, nil, value, t.db} + t.right = &BinaryTrie{nil, nil, value, t.db, nil, 0, 0} return nil default: - t.right = &BinaryTrie{nil, nil, nil, t.db} + t.right = &BinaryTrie{nil, nil, nil, t.db, nil, 0, 0} } } return t.right.insert(depth+1, key, value) diff --git a/trie/binary_test.go b/trie/binary_test.go index 9a34c4ee355e..13a454fb2605 100644 --- a/trie/binary_test.go +++ b/trie/binary_test.go @@ -35,6 +35,63 @@ func TestBinaryLeafReadEmpty(t *testing.T) { } } +func TestBinaryReadPrefix(t *testing.T) { + trieLeaf := &BinaryTrie{ + prefix: []byte("croissants"), + startBit: 0, + endBit: 8 * len("croissants"), + left: nil, + right: nil, + value: []byte("baguette"), + } + + res, err := trieLeaf.TryGet([]byte("croissants")) + if !bytes.Equal(res, []byte("baguette")) { + t.Fatalf("should have returned an error trying to get from an empty binry trie, err=%v", err) + } + + trieExtLeaf := &BinaryTrie{ + prefix: []byte("crois"), + startBit: 0, + endBit: 8 * len("crois"), + left: &BinaryTrie{ + prefix: []byte("sants"), + startBit: 1, + endBit: 8 * len("sants"), + value: []byte("baguette"), + left: nil, + right: nil, + }, + right: nil, + } + + res, err = trieExtLeaf.TryGet([]byte("croissants")) + if !bytes.Equal(res, []byte("baguette")) { + t.Fatalf("should not have returned err=%v", err) + } + + // Same test as above but the break isn't on a byte boundary + trieExtLeaf = &BinaryTrie{ + prefix: []byte("crois"), + startBit: 0, + endBit: 8*len("crois") - 3, + left: &BinaryTrie{ + prefix: []byte("ssants"), + startBit: 6, + endBit: 8 * len("ssants"), + value: []byte("baguette"), + left: nil, + right: nil, + }, + right: nil, + } + + res, err = trieExtLeaf.TryGet([]byte("croissants")) + if !bytes.Equal(res, []byte("baguette")) { + t.Fatalf("should not have returned err=%v", err) + } +} + func TestBinaryLeafInsert(t *testing.T) { trie, err := NewBinary(nil) if err != nil { @@ -136,3 +193,64 @@ func TestBinaryInsertLeftRight(t *testing.T) { tr = tr.(*BinaryTrie).left } } + +func TestPrefixBitLen(t *testing.T) { + btrie := new(BinaryTrie) + + got := btrie.getPrefixLen() + if got != 0 { + t.Fatalf("Invalid prefix length, got %d != exp %d", got, 0) + } + + btrie.prefix = []byte("croissants") + got = btrie.getPrefixLen() + if got != 0 { + t.Fatalf("Invalid prefix length, got %d != exp %d", got, 0) + } + + btrie.endBit = 5 + got = btrie.getPrefixLen() + if got != 5 { + t.Fatalf("Invalid prefix length, got %d != exp %d", got, 5) + } + + btrie.endBit = 12 + got = btrie.getPrefixLen() + if got != 12 { + t.Fatalf("Invalid prefix length, got %d != exp %d", got, 12) + } + + btrie.endBit = 27 + got = btrie.getPrefixLen() + if got != 27 { + t.Fatalf("Invalid prefix length, got %d != exp %d", got, 27) + } + + btrie.startBit = 25 + got = btrie.getPrefixLen() + if got != 2 { + t.Fatalf("Invalid prefix length, got %d != exp %d", got, 2) + } + + btrie.endBit = 33 + got = btrie.getPrefixLen() + if got != 8 { + t.Fatalf("Invalid prefix length, got %d != exp %d", got, 8) + } +} + +func TestPrefixBitAccess(t *testing.T) { + btrie := new(BinaryTrie) + btrie.prefix = []byte{0x55, 0x55} + btrie.startBit = 0 + btrie.endBit = 15 + + for i := 0; i < btrie.getPrefixLen(); i += 2 { + if btrie.getPrefixBit(i) != false { + t.Fatal("Got the wrong bit value") + } + if btrie.getPrefixBit(i+1) != true { + t.Fatal("Got the wrong bit value") + } + } +} From 15e9a106cea8b2759bfda816351878be5a8227bf Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Fri, 17 Apr 2020 17:19:42 +0200 Subject: [PATCH 11/24] trie: cleanup and one more extension read test --- trie/binary.go | 2 +- trie/binary_test.go | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/trie/binary.go b/trie/binary.go index 5f92b0bc9748..5749e83408e2 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -120,7 +120,7 @@ func (t *BinaryTrie) tryGet(key []byte, depth int) ([]byte, error) { if child == nil { if depth+i < len(key)*8-1 || t.value == nil { - return nil, fmt.Errorf("could not find key 0x%s in trie %v %v %v", common.ToHex(key), depth+i, len(key), t.value) + return nil, fmt.Errorf("could not find key %s in trie depth=%d keylen=%d value=%v", common.ToHex(key), depth+i, len(key), t.value) } return t.value, nil } diff --git a/trie/binary_test.go b/trie/binary_test.go index 13a454fb2605..8b6d251634e5 100644 --- a/trie/binary_test.go +++ b/trie/binary_test.go @@ -90,6 +90,28 @@ func TestBinaryReadPrefix(t *testing.T) { if !bytes.Equal(res, []byte("baguette")) { t.Fatalf("should not have returned err=%v", err) } + + // Same test as above but the break is the last byte + // of the boundary + trieExtLeaf = &BinaryTrie{ + prefix: []byte("crois"), + startBit: 0, + endBit: 8*len("crois") - 1, + right: &BinaryTrie{ + prefix: []byte("ssants"), + startBit: 8, + endBit: 8 * len("ssants"), + value: []byte("baguette"), + left: nil, + right: nil, + }, + left: nil, + } + + res, err = trieExtLeaf.TryGet([]byte("croissants")) + if !bytes.Equal(res, []byte("baguette")) { + t.Fatalf("should not have returned err=%v", err) + } } func TestBinaryLeafInsert(t *testing.T) { From e954eb7fd42c3d6f4206da674e68403e2f99d256 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Sat, 18 Apr 2020 15:40:39 +0200 Subject: [PATCH 12/24] trie: Add helper to represent bintries in dot format --- trie/binary.go | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/trie/binary.go b/trie/binary.go index 5749e83408e2..b7a36cb35464 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -20,6 +20,7 @@ import ( "bytes" "fmt" "io" + "strings" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/ethdb" @@ -204,9 +205,37 @@ func (t *BinaryTrie) insert(depth int, key, value []byte) error { default: t.right = &BinaryTrie{nil, nil, nil, t.db, nil, 0, 0} } + +func dotHelper(prefix string, t *BinaryTrie) ([]string, []string) { + p := []byte{} + for i := 0; i < t.getPrefixLen(); i++ { + if t.getPrefixBit(i) { + p = append(p, []byte("1")...) + } else { + p = append(p, []byte("0")...) } - return t.right.insert(depth+1, key, value) } + nodeName := fmt.Sprintf("binNode%s%s", p, prefix) + nodes := []string{nodeName} + links := []string{} + if t.left != nil { + n, l := dotHelper(fmt.Sprintf("%s%s%d", p, prefix, 0), t.left.(*BinaryTrie)) + nodes = append(nodes, n...) + links = append(links, fmt.Sprintf("%s -> %s", nodeName, n[0])) + links = append(links, l...) + } + if t.right != nil { + n, l := dotHelper(fmt.Sprintf("%s%s%d", p, prefix, 1), t.right.(*BinaryTrie)) + nodes = append(nodes, n...) + links = append(links, fmt.Sprintf("%s -> %s", nodeName, n[0])) + links = append(links, l...) + } + return nodes, links +} + +func (t *BinaryTrie) toDot() string { + nodes, links := dotHelper("", t) + return fmt.Sprintf("digraph D {\n%s\n%s\n}", strings.Join(nodes, "\n"), strings.Join(links, "\n")) } func (t *BinaryTrie) Commit() error { From 0369446c93812ef55cdd692bcaea859b20b06507 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Mon, 20 Apr 2020 08:38:57 +0200 Subject: [PATCH 13/24] trie: extension support in binary trie --- trie/binary.go | 186 ++++++++++++++++++++++++++++++++------------ trie/binary_test.go | 103 ++++++++++++++++++++---- 2 files changed, 223 insertions(+), 66 deletions(-) diff --git a/trie/binary.go b/trie/binary.go index b7a36cb35464..a25fc505b4e0 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -18,6 +18,7 @@ package trie import ( "bytes" + "errors" "fmt" "io" "strings" @@ -31,7 +32,7 @@ import ( type binaryNode interface { Hash() []byte Commit() error - insert(depth int, key, value []byte) error + insert(depth int, key, value []byte, hashLeft bool) error tryGet(key []byte, depth int) ([]byte, error) } @@ -49,11 +50,16 @@ type ( // of the range. prefix []byte startBit int - endBit int + endBit int // Technically, this is the "1st bit past the end" } hashBinaryNode []byte ) +var ( + errInsertIntoHash = errors.New("trying to insert into a hash") + errReadFromEmptyTree = errors.New("reached an empty subtree") +) + func NewBinary(db ethdb.Database) (*BinaryTrie, error) { return &BinaryTrie{db: db}, nil @@ -165,46 +171,112 @@ func (t *BinaryTrie) Hash() []byte { func (t *BinaryTrie) TryUpdate(key, value []byte) error { // TODO check key depth - err := t.insert(0, key, value) + err := t.insert(0, key, value, true) return err } -func (t *BinaryTrie) insert(depth int, key, value []byte) error { - by := key[depth/8] - bi := (by >> uint(7-depth%8)) & 1 - if bi == 0 { - if t.left == nil { - switch depth { - case len(key)*8 - 1: - t.value = value - return nil - case len(key)*8 - 2: - t.left = &BinaryTrie{nil, nil, value, t.db, nil, 0, 0} - return nil - default: - t.left = &BinaryTrie{nil, nil, nil, t.db, nil, 0, 0} +func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { + // Special case: the trie is empty + if depth == 0 && t.left == nil && t.right == nil && len(t.prefix) == 0 { + t.prefix = key + t.value = value + t.startBit = 0 + t.endBit = 8 * len(key) + return nil + } + + // Compare the current segment of the key with the prefix, + // create an intermediate node if they are different. + var i int + for i = 0; i < t.getPrefixLen(); i++ { + if getBit(key, depth+i) != t.getPrefixBit(i) { + // Starting from the following context: + // + // ... + // parent < child1 + // [ a b c d e ... ] < + // ^ child2 + // | + // cut-off + // + // This needs to be turned into: + // + // ... child1 + // parent < [ d e ... ] < + // [ a b ] < child2 + // child3 + // + // where `c` determines which child is left + // or right. + // + // Both [ a b ] and [ d e ... ] can be empty + // prefixes. + + // Create the [ d e ... ] part + oldChild := new(BinaryTrie) + oldChild.prefix = t.prefix + oldChild.startBit = depth + i + 1 + oldChild.endBit = t.endBit + oldChild.left = t.left + oldChild.right = t.right + + // Create the child3 part + newChild := new(BinaryTrie) + newChild.prefix = key + newChild.startBit = depth + i + 1 + newChild.endBit = len(key) * 8 + newChild.value = value + + // reconfigure the [ a b ] part by just specifying + // which one is the endbit (which could lead to a + // 0-length [ a b ] part) and also which one of the + // two children are left and right. + t.endBit = depth + i + if t.getPrefixBit(i) { + // if the prefix is 1 then the new + // child goes left and the old one + // goes right. + t.left = newChild + t.right = oldChild + } else { + if hashLeft { + t.left = hashBinaryNode(oldChild.Hash()) + } else { + t.left = oldChild + } + t.right = newChild } + + return nil } - return t.left.insert(depth+1, key, value) - } else { - if t.right == nil { - // Free the space taken by left branch as insert - // will no longer visit it. - if t.left != nil { - h := t.left.Hash() - t.left = hashBinaryNode(h) - } + } - switch depth { - case len(key)*8 - 1: - t.value = value - return nil - case len(key)*8 - 2: - t.right = &BinaryTrie{nil, nil, value, t.db, nil, 0, 0} - return nil - default: - t.right = &BinaryTrie{nil, nil, nil, t.db, nil, 0, 0} - } + if depth+i >= 8*len(key)-1 { + t.value = value + return nil + } + + // No break in the middle of the extension prefix, + // recurse into one of the children. + child := &t.left + isRight := getBit(key, depth+i) + if isRight { + child = &t.right + + // Free the space taken by the left branch as insert + // will no longer visit it, this will free memory. + if t.left != nil { + t.left = hashBinaryNode(t.left.Hash()) + } + } + + // Create the child if it doesn't exist, otherwise recurse + if *child == nil { + *child = &BinaryTrie{nil, nil, value, t.db, key, depth + i + 1, 8 * len(key)} + return nil + } + return (*child).insert(depth+1+i, key, value, hashLeft) +} func dotHelper(prefix string, t *BinaryTrie) ([]string, []string) { p := []byte{} @@ -215,33 +287,47 @@ func dotHelper(prefix string, t *BinaryTrie) ([]string, []string) { p = append(p, []byte("0")...) } } - nodeName := fmt.Sprintf("binNode%s%s", p, prefix) + typ := "node" + if t.left == nil && t.right == nil { + typ = "leaf" + } + nodeName := fmt.Sprintf("bin%s%s_%s", typ, prefix, p) nodes := []string{nodeName} links := []string{} if t.left != nil { - n, l := dotHelper(fmt.Sprintf("%s%s%d", p, prefix, 0), t.left.(*BinaryTrie)) - nodes = append(nodes, n...) - links = append(links, fmt.Sprintf("%s -> %s", nodeName, n[0])) - links = append(links, l...) + if left, ok := t.left.(*BinaryTrie); ok { + n, l := dotHelper(fmt.Sprintf("%s%s%d", prefix, p, 0), left) + nodes = append(nodes, n...) + links = append(links, fmt.Sprintf("%s -> %s", nodeName, n[0])) + links = append(links, l...) + } else { + nodes = append(nodes, fmt.Sprintf("hash%s", prefix)) + } } if t.right != nil { - n, l := dotHelper(fmt.Sprintf("%s%s%d", p, prefix, 1), t.right.(*BinaryTrie)) - nodes = append(nodes, n...) - links = append(links, fmt.Sprintf("%s -> %s", nodeName, n[0])) - links = append(links, l...) + if right, ok := t.right.(*BinaryTrie); ok { + n, l := dotHelper(fmt.Sprintf("%s%s%d", prefix, p, 1), right) + nodes = append(nodes, n...) + links = append(links, fmt.Sprintf("%s -> %s", nodeName, n[0])) + links = append(links, l...) + } else { + nodes = append(nodes, fmt.Sprintf("hash%s", prefix)) + } } return nodes, links } func (t *BinaryTrie) toDot() string { nodes, links := dotHelper("", t) - return fmt.Sprintf("digraph D {\n%s\n%s\n}", strings.Join(nodes, "\n"), strings.Join(links, "\n")) + return fmt.Sprintf("digraph D {\nnode [shape=rect]\n%s\n%s\n}", strings.Join(nodes, "\n"), strings.Join(links, "\n")) } func (t *BinaryTrie) Commit() error { var payload bytes.Buffer var err error + payload.Write(t.prefix) + var lh []byte if t.left != nil { lh = t.left.Hash() @@ -288,13 +374,13 @@ func (h hashBinaryNode) Hash() []byte { return h } -func (h hashBinaryNode) insert(depth int, key, value []byte) error { - return fmt.Errorf("trying to insert into a hash") +func (h hashBinaryNode) insert(depth int, key, value []byte, hashLeft bool) error { + return errInsertIntoHash } func (h hashBinaryNode) tryGet(key []byte, depth int) ([]byte, error) { - if depth == 2*len(key) { + if depth >= 8*len(key) { return []byte(h), nil } - return nil, fmt.Errorf("reached an empty branch") + return nil, errReadFromEmptyTree } diff --git a/trie/binary_test.go b/trie/binary_test.go index 8b6d251634e5..4d91d616e2b3 100644 --- a/trie/binary_test.go +++ b/trie/binary_test.go @@ -154,8 +154,8 @@ func TestBinaryForkInsertRead(t *testing.T) { t.Fatalf("error creating binary trie: %v", err) } - for i := byte(0); i < 10; i++ { - err = trie.TryUpdate([]byte{i}, common.FromHex("01")) + for i := byte(0); i <= 10; i++ { + err = trie.insert(0, []byte{i}, common.FromHex("01"), false) if err != nil { t.Fatalf("could not insert (%#x, 0x01) into an empty binary trie, err=%v", i, err) } @@ -183,13 +183,17 @@ func TestBinaryInsertLeftRight(t *testing.T) { // Trie is expected to look like this: // /\ - // / / - // / / - // / / - // / / - // / / - // / / - // / / + // H / + // / + // / + // / + // / + // / + // / + // + // i.e. the left branch is hashed and the + // right branch only contains lefts after + // the first right. // Check there is a left branch if trie.left == nil { @@ -198,7 +202,7 @@ func TestBinaryInsertLeftRight(t *testing.T) { // Check that the left branch has already been hashed if _, ok := trie.left.(hashBinaryNode); !ok { - t.Fatalf("left branch should have been hashed!") + t.Fatalf("left branch should have been hashed! %v", trie.left) } // Check there is a right branch @@ -206,13 +210,80 @@ func TestBinaryInsertLeftRight(t *testing.T) { t.Fatal("empty right branch") } - // Check that the right branch has only lefts after the + right := trie.right.(*BinaryTrie) + + // Check that the right branch has only 0s after the // first right. - for i, tr := 1, trie.right; i < 8; i++ { - if tr == nil { - t.Fatal("invalid trie structure") - } - tr = tr.(*BinaryTrie).left + if !bytes.Equal(right.prefix, []byte{128}) { + t.Fatalf("invalid right prefix %v", right.prefix) + } + if right.startBit != 1 { + t.Fatalf("invalid right start bit 1 != %d", right.startBit) + } + if right.endBit != 8 { + t.Fatalf("invalid right end bit 8 != %d", right.endBit) + } +} + +func TestInsertEnd(t *testing.T) { + btrie := &BinaryTrie{ + right: &BinaryTrie{}, + prefix: []byte{1}, + startBit: 0, + endBit: 7, + } + + err := btrie.insert(0, []byte{0}, []byte{1}, false) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestInsertLeft(t *testing.T) { + btrie := &BinaryTrie{ + prefix: []byte{1}, + startBit: 0, + endBit: 8, + } + + err := btrie.insert(0, []byte{0}, []byte{1}, false) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestInsertIntoNil(t *testing.T) { + btrie := &BinaryTrie{ + right: new(BinaryTrie), + left: nil, + } + err := btrie.insert(0, []byte{0}, []byte{0}, false) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestInsertIntoHash(t *testing.T) { + btrie := hashBinaryNode([]byte("croissantscroissantscroissantscr")) + err := btrie.insert(0, common.Hex2Bytes("01"), common.Hex2Bytes("01"), false) + if err != errInsertIntoHash { + t.Fatalf("unexpected error %v", err) + } +} + +func TestReadFromHash(t *testing.T) { + btrie := hashBinaryNode([]byte("croissantscroissantscroissantscr")) + _, err := btrie.tryGet(common.Hex2Bytes("01"), 0) + if err != errReadFromEmptyTree { + t.Fatalf("unexpected error: %v", err) + } + + data, err := btrie.tryGet(common.Hex2Bytes("01"), 8) + if err != nil { + t.Fatalf("unexpected error %v", err) + } + if !bytes.Equal(data, []byte(btrie)) { + t.Fatalf("unexpected value returned %v", data) } } From 65ff453253c3a8450b5b4eaa37e4816500712f41 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Mon, 20 Apr 2020 19:47:55 +0200 Subject: [PATCH 14/24] Change the node hash format to something hexary-like --- trie/binary.go | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/trie/binary.go b/trie/binary.go index a25fc505b4e0..ee2fb878c301 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" "golang.org/x/crypto/sha3" ) @@ -140,31 +141,31 @@ func (t *BinaryTrie) Update(key, value []byte) { } } +// Hash calculates the hash of an expanded (i.e. not already +// hashed) node. func (t *BinaryTrie) Hash() []byte { + return t.hash() +} + +func (t *BinaryTrie) hash() []byte { var payload bytes.Buffer - var lh []byte + // Calculate the hash of both subtrees + var lh, rh []byte if t.left != nil { lh = t.left.Hash() } - payload.Write(lh) t.left = hashBinaryNode(lh) - - var rh []byte if t.right != nil { rh = t.right.Hash() } - payload.Write(rh) t.right = hashBinaryNode(rh) - hasher := sha3.NewLegacyKeccak256() - if t.value != nil { - hasher.Write(t.value) - hv := hasher.Sum(nil) - payload.Write(hv) - } + // Create the "bitprefix" which indicates which are the start and + // end bit inside the prefix value. + rlp.Encode(&payload, []interface{}{t.bitPrefix(), lh, rh, t.value}) - hasher.Reset() + hasher := sha3.NewLegacyKeccak256() io.Copy(hasher, &payload) return hasher.Sum(nil) } @@ -213,7 +214,7 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { // prefixes. // Create the [ d e ... ] part - oldChild := new(BinaryTrie) + oldChild, _ := NewBinary(t.db) oldChild.prefix = t.prefix oldChild.startBit = depth + i + 1 oldChild.endBit = t.endBit @@ -221,7 +222,7 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { oldChild.right = t.right // Create the child3 part - newChild := new(BinaryTrie) + newChild, _ := NewBinary(t.db) newChild.prefix = key newChild.startBit = depth + i + 1 newChild.endBit = len(key) * 8 @@ -240,6 +241,7 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { t.right = oldChild } else { if hashLeft { + oldChild.Commit() t.left = hashBinaryNode(oldChild.Hash()) } else { t.left = oldChild From 3691a07ef737706bf426622b5e9b538996d4074f Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Tue, 21 Apr 2020 16:09:15 +0200 Subject: [PATCH 15/24] trie: offload db writes to goroutine + remove dependency on DB in BinaryTrie --- core/state/snapshot/hextrie_generator.go | 20 ++++- trie/binary.go | 103 +++++++++++------------ 2 files changed, 67 insertions(+), 56 deletions(-) diff --git a/core/state/snapshot/hextrie_generator.go b/core/state/snapshot/hextrie_generator.go index c645db2494bf..f08a36b7a557 100644 --- a/core/state/snapshot/hextrie_generator.go +++ b/core/state/snapshot/hextrie_generator.go @@ -40,10 +40,21 @@ func GenerateBinaryTree(it AccountIterator) common.Hash { if err != nil { panic(fmt.Sprintf("error opening bintrie db, err=%v", err)) } - btrie, err := trie.NewBinary(db) + btrie, err := trie.NewBinary(true) if err != nil { panic(fmt.Sprintf("error creating binary trie, err=%v", err)) } + + var nodeCount uint64 + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for kv := range btrie.CommitCh { + nodeCount++ + db.Put(kv.Key, kv.Value) + } + }() counter := 0 for it.Next() { counter++ @@ -57,8 +68,13 @@ func GenerateBinaryTree(it AccountIterator) common.Hash { if err != nil { panic(fmt.Sprintf("error committing trie, err=%v", err)) } + close(btrie.CommitCh) + wg.Wait() + btrie.CommitCh = nil + log.Info("Done writing nodes to the DB", "count", nodeCount) + log.Info("Calculated binary hash", "hash", common.ToHex(btrie.Hash())) - return common.Hash{} + return common.BytesToHash(btrie.Hash()) } // GenerateTrieRoot takes an account iterator and reproduces the root hash. diff --git a/trie/binary.go b/trie/binary.go index ee2fb878c301..fd5aca23fef5 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -24,7 +24,6 @@ import ( "strings" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "golang.org/x/crypto/sha3" @@ -37,12 +36,17 @@ type binaryNode interface { tryGet(key []byte, depth int) ([]byte, error) } +type binKeyVal struct { + Key []byte + Value []byte +} + type ( BinaryTrie struct { - left binaryNode - right binaryNode - value []byte - db ethdb.Database + left binaryNode + right binaryNode + value []byte + CommitCh chan binKeyVal // This is the binary equivalent of "extension nodes": // binary nodes can have a prefix that is common to all @@ -61,9 +65,12 @@ var ( errReadFromEmptyTree = errors.New("reached an empty subtree") ) -func NewBinary(db ethdb.Database) (*BinaryTrie, error) { - - return &BinaryTrie{db: db}, nil +func NewBinary(committing bool) (*BinaryTrie, error) { + var cch chan binKeyVal + if committing { + cch = make(chan binKeyVal, 100) + } + return &BinaryTrie{CommitCh: cch}, nil } func (t *BinaryTrie) Get(key []byte) []byte { @@ -141,6 +148,19 @@ func (t *BinaryTrie) Update(key, value []byte) { } } +func (t *BinaryTrie) bitPrefix() []byte { + bp := make([]byte, 1+(t.getPrefixLen()+7)/8) + for i := 0; i < t.getPrefixLen(); i++ { + if t.getPrefixBit(i) { + by := i / 8 + bi := 1 << uint(7-i%8) + bp[1+by] |= byte(bi) + } + } + + return bp +} + // Hash calculates the hash of an expanded (i.e. not already // hashed) node. func (t *BinaryTrie) Hash() []byte { @@ -154,12 +174,12 @@ func (t *BinaryTrie) hash() []byte { var lh, rh []byte if t.left != nil { lh = t.left.Hash() + t.left = hashBinaryNode(lh) } - t.left = hashBinaryNode(lh) if t.right != nil { rh = t.right.Hash() + t.right = hashBinaryNode(rh) } - t.right = hashBinaryNode(rh) // Create the "bitprefix" which indicates which are the start and // end bit inside the prefix value. @@ -167,7 +187,11 @@ func (t *BinaryTrie) hash() []byte { hasher := sha3.NewLegacyKeccak256() io.Copy(hasher, &payload) - return hasher.Sum(nil) + h := hasher.Sum(nil) + if t.CommitCh != nil { + t.CommitCh <- binKeyVal{Key: h, Value: payload.Bytes()} + } + return h } func (t *BinaryTrie) TryUpdate(key, value []byte) error { @@ -214,19 +238,21 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { // prefixes. // Create the [ d e ... ] part - oldChild, _ := NewBinary(t.db) + oldChild, _ := NewBinary(false) oldChild.prefix = t.prefix oldChild.startBit = depth + i + 1 oldChild.endBit = t.endBit oldChild.left = t.left oldChild.right = t.right + oldChild.CommitCh = t.CommitCh // Create the child3 part - newChild, _ := NewBinary(t.db) + newChild, _ := NewBinary(false) newChild.prefix = key newChild.startBit = depth + i + 1 newChild.endBit = len(key) * 8 newChild.value = value + newChild.CommitCh = t.CommitCh // reconfigure the [ a b ] part by just specifying // which one is the endbit (which could lead to a @@ -274,7 +300,7 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { // Create the child if it doesn't exist, otherwise recurse if *child == nil { - *child = &BinaryTrie{nil, nil, value, t.db, key, depth + i + 1, 8 * len(key)} + *child = &BinaryTrie{nil, nil, value, nil, key, depth + i + 1, 8 * len(key)} return nil } return (*child).insert(depth+1+i, key, value, hashLeft) @@ -324,48 +350,17 @@ func (t *BinaryTrie) toDot() string { return fmt.Sprintf("digraph D {\nnode [shape=rect]\n%s\n%s\n}", strings.Join(nodes, "\n"), strings.Join(links, "\n")) } +// Commit stores all the values in the binary trie into the database. +// This version does not perform any caching, it is intended to perform +// the conversion from hexary to binary. +// It basically performs a hash, except that it makes sure that there is +// a channel to stream the intermediate (hash, preimage) values to. func (t *BinaryTrie) Commit() error { - var payload bytes.Buffer - var err error - - payload.Write(t.prefix) - - var lh []byte - if t.left != nil { - lh = t.left.Hash() - err := t.left.Commit() - if err != nil { - return err - } + if t.CommitCh == nil { + return fmt.Errorf("commit channel missing") } - payload.Write(lh) - t.left = hashBinaryNode(lh) - - var rh []byte - if t.right != nil { - rh = t.right.Hash() - err := t.right.Commit() - if err != nil { - return err - } - } - payload.Write(rh) - t.right = hashBinaryNode(rh) - - hasher := sha3.NewLegacyKeccak256() - if t.value != nil { - hasher.Write(t.value) - hv := hasher.Sum(nil) - payload.Write(hv) - } - - hasher.Reset() - io.Copy(hasher, &payload) - h := hasher.Sum(nil) - - err = t.db.Put(h, payload.Bytes()) - - return err + t.hash() + return nil } func (h hashBinaryNode) Commit() error { From 50505ee8b9ff870352e8da3bc0666ae8db17df36 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 22 Apr 2020 11:25:28 +0200 Subject: [PATCH 16/24] trie: refactor binary node creation --- core/state/snapshot/hextrie_generator.go | 6 +-- trie/binary.go | 55 ++++++++++++++++-------- trie/binary_test.go | 34 ++++----------- 3 files changed, 48 insertions(+), 47 deletions(-) diff --git a/core/state/snapshot/hextrie_generator.go b/core/state/snapshot/hextrie_generator.go index f08a36b7a557..b6bf6d1e2dbe 100644 --- a/core/state/snapshot/hextrie_generator.go +++ b/core/state/snapshot/hextrie_generator.go @@ -40,10 +40,8 @@ func GenerateBinaryTree(it AccountIterator) common.Hash { if err != nil { panic(fmt.Sprintf("error opening bintrie db, err=%v", err)) } - btrie, err := trie.NewBinary(true) - if err != nil { - panic(fmt.Sprintf("error creating binary trie, err=%v", err)) - } + btrie := new(trie.BinaryTrie) + btrie.CommitCh = make(chan trie.BinaryHashPreimage) var nodeCount uint64 var wg sync.WaitGroup diff --git a/trie/binary.go b/trie/binary.go index fd5aca23fef5..3ecff23b3b85 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -36,17 +36,24 @@ type binaryNode interface { tryGet(key []byte, depth int) ([]byte, error) } -type binKeyVal struct { +// BinaryHashPreimage represents a tuple of a hash and its preimage +type BinaryHashPreimage struct { Key []byte Value []byte } +// All known implementations of binaryNode type ( + // BinaryTrie is a node with two children ("left" and "right") + // It can be prefixed by bits that are common to all subtrie + // keys and it can also hold a value. BinaryTrie struct { - left binaryNode - right binaryNode - value []byte - CommitCh chan binKeyVal + left binaryNode + right binaryNode + value []byte + + // Used to send (hash, preimage) pairs when hashing + CommitCh chan BinaryHashPreimage // This is the binary equivalent of "extension nodes": // binary nodes can have a prefix that is common to all @@ -65,14 +72,6 @@ var ( errReadFromEmptyTree = errors.New("reached an empty subtree") ) -func NewBinary(committing bool) (*BinaryTrie, error) { - var cch chan binKeyVal - if committing { - cch = make(chan binKeyVal, 100) - } - return &BinaryTrie{CommitCh: cch}, nil -} - func (t *BinaryTrie) Get(key []byte) []byte { res, err := t.TryGet(key) if err != nil { @@ -86,6 +85,7 @@ func (t *BinaryTrie) TryGet(key []byte) ([]byte, error) { return value, err } +// getPrefixLen returns the bit length of the current node's prefix func (t *BinaryTrie) getPrefixLen() int { if t.endBit > t.startBit { return t.endBit - t.startBit @@ -93,12 +93,16 @@ func (t *BinaryTrie) getPrefixLen() int { return 0 } +// getBit returns the boolean value of bit at offset `off` in +// the byte key `key` func getBit(key []byte, off int) bool { mask := byte(1) << (7 - uint(off)%8) return byte(key[uint(off)/8])&mask != byte(0) } +// getPrefixBit returns the boolean value of bit number `bitnum` +// in the prefix of the current node. func (t *BinaryTrie) getPrefixBit(bitnum int) bool { if bitnum > t.getPrefixLen() { panic(fmt.Sprintf("Trying to get bit #%d in a %d bit-long bitfield", bitnum, t.getPrefixLen())) @@ -148,6 +152,7 @@ func (t *BinaryTrie) Update(key, value []byte) { } } +// bitPrefix add the "bit prefix" to a key / extension node func (t *BinaryTrie) bitPrefix() []byte { bp := make([]byte, 1+(t.getPrefixLen()+7)/8) for i := 0; i < t.getPrefixLen(); i++ { @@ -167,6 +172,9 @@ func (t *BinaryTrie) Hash() []byte { return t.hash() } +// hash is a a helper function that is shared between Hash and +// Commit. If t.CommitCh is not nil, then its behavior will be +// that of Commit, and that of Hash otherwise. func (t *BinaryTrie) hash() []byte { var payload bytes.Buffer @@ -189,17 +197,23 @@ func (t *BinaryTrie) hash() []byte { io.Copy(hasher, &payload) h := hasher.Sum(nil) if t.CommitCh != nil { - t.CommitCh <- binKeyVal{Key: h, Value: payload.Bytes()} + t.CommitCh <- BinaryHashPreimage{Key: h, Value: payload.Bytes()} } return h } +// TryUpdate inserts a (key, value) pair into the binary trie, +// and expects values to be inserted in order as inserting to +// the right of a node will cause the left node to be hashed. func (t *BinaryTrie) TryUpdate(key, value []byte) error { // TODO check key depth err := t.insert(0, key, value, true) return err } +// insert is a recursive helper function that inserts a (key, value) pair at +// a given depth. If hashLeft is true, inserting a key into a right subnode +// will cause the left subnode to be hashed. func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { // Special case: the trie is empty if depth == 0 && t.left == nil && t.right == nil && len(t.prefix) == 0 { @@ -238,7 +252,7 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { // prefixes. // Create the [ d e ... ] part - oldChild, _ := NewBinary(false) + oldChild := new(BinaryTrie) oldChild.prefix = t.prefix oldChild.startBit = depth + i + 1 oldChild.endBit = t.endBit @@ -247,7 +261,7 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { oldChild.CommitCh = t.CommitCh // Create the child3 part - newChild, _ := NewBinary(false) + newChild := new(BinaryTrie) newChild.prefix = key newChild.startBit = depth + i + 1 newChild.endBit = len(key) * 8 @@ -266,9 +280,10 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { t.left = newChild t.right = oldChild } else { + // if asked to, hash the left subtrie to free + // up memory. if hashLeft { - oldChild.Commit() - t.left = hashBinaryNode(oldChild.Hash()) + t.left = hashBinaryNode(oldChild.hash()) } else { t.left = oldChild } @@ -345,6 +360,7 @@ func dotHelper(prefix string, t *BinaryTrie) ([]string, []string) { return nodes, links } +// toDot creates a graphviz representation of the binary trie func (t *BinaryTrie) toDot() string { nodes, links := dotHelper("", t) return fmt.Sprintf("digraph D {\nnode [shape=rect]\n%s\n%s\n}", strings.Join(nodes, "\n"), strings.Join(links, "\n")) @@ -363,10 +379,13 @@ func (t *BinaryTrie) Commit() error { return nil } +// Commit does not commit anything, because a hash doesn't have +// its accompanying preimage. func (h hashBinaryNode) Commit() error { return nil } +// Hash returns itself func (h hashBinaryNode) Hash() []byte { return h } diff --git a/trie/binary_test.go b/trie/binary_test.go index 4d91d616e2b3..bb5c75646604 100644 --- a/trie/binary_test.go +++ b/trie/binary_test.go @@ -24,12 +24,9 @@ import ( ) func TestBinaryLeafReadEmpty(t *testing.T) { - trie, err := NewBinary(nil) - if err != nil { - t.Fatalf("error creating binary trie: %v", err) - } + trie := new(BinaryTrie) - _, err = trie.TryGet(common.FromHex("00")) + _, err := trie.TryGet(common.FromHex("00")) if err == nil { t.Fatalf("should have returned an error trying to get from an empty binry trie, err=%v", err) } @@ -115,12 +112,9 @@ func TestBinaryReadPrefix(t *testing.T) { } func TestBinaryLeafInsert(t *testing.T) { - trie, err := NewBinary(nil) - if err != nil { - t.Fatalf("error creating binary trie: %v", err) - } + trie := new(BinaryTrie) - err = trie.TryUpdate(common.FromHex("00"), common.FromHex("00")) + err := trie.TryUpdate(common.FromHex("00"), common.FromHex("00")) if err != nil { t.Fatalf("could not insert (0x00, 0x00) into an empty binary trie, err=%v", err) } @@ -128,12 +122,9 @@ func TestBinaryLeafInsert(t *testing.T) { } func TestBinaryLeafInsertRead(t *testing.T) { - trie, err := NewBinary(nil) - if err != nil { - t.Fatalf("error creating binary trie: %v", err) - } + trie := new(BinaryTrie) - err = trie.TryUpdate(common.FromHex("00"), common.FromHex("01")) + err := trie.TryUpdate(common.FromHex("00"), common.FromHex("01")) if err != nil { t.Fatalf("could not insert (0x00, 0x01) into an empty binary trie, err=%v", err) } @@ -149,13 +140,10 @@ func TestBinaryLeafInsertRead(t *testing.T) { } func TestBinaryForkInsertRead(t *testing.T) { - trie, err := NewBinary(nil) - if err != nil { - t.Fatalf("error creating binary trie: %v", err) - } + trie := new(BinaryTrie) for i := byte(0); i <= 10; i++ { - err = trie.insert(0, []byte{i}, common.FromHex("01"), false) + err := trie.insert(0, []byte{i}, common.FromHex("01"), false) if err != nil { t.Fatalf("could not insert (%#x, 0x01) into an empty binary trie, err=%v", i, err) } @@ -173,11 +161,7 @@ func TestBinaryForkInsertRead(t *testing.T) { } func TestBinaryInsertLeftRight(t *testing.T) { - trie, err := NewBinary(nil) - if err != nil { - t.Fatalf("error creating binary trie: %v", err) - } - + trie := new(BinaryTrie) trie.TryUpdate([]byte{0}, []byte{0}) trie.TryUpdate([]byte{128}, []byte{1}) From 8f092fdaae4a1a338bb0e3a8a786f505681a3322 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 22 Apr 2020 14:14:06 +0200 Subject: [PATCH 17/24] trie: store binary trie in datadir --- cmd/geth/chaincmd.go | 2 +- core/state/snapshot/hextrie_generator.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 6bd43fefd9fe..6c153c4e1889 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -667,7 +667,7 @@ func snapToBin(ctx *cli.Context) error { return fmt.Errorf("Could not create iterator for root %x: %v", root, err) } log.Info("Generating binary trie", "root", root) - generatedRoot := snapshot.GenerateBinaryTree(it) + generatedRoot := snapshot.GenerateBinaryTree(ctx.GlobalString(utils.DataDirFlag.Name), it) log.Info("Generation done", "root", root, "binary root", generatedRoot) return nil } diff --git a/core/state/snapshot/hextrie_generator.go b/core/state/snapshot/hextrie_generator.go index b6bf6d1e2dbe..4192c6be92ab 100644 --- a/core/state/snapshot/hextrie_generator.go +++ b/core/state/snapshot/hextrie_generator.go @@ -35,8 +35,8 @@ type leaf struct { type trieGeneratorFn func(in chan (leaf), out chan (common.Hash)) -func GenerateBinaryTree(it AccountIterator) common.Hash { - db, err := rawdb.NewLevelDBDatabase("./bintrie", 128, 1024, "") +func GenerateBinaryTree(path string, it AccountIterator) common.Hash { + db, err := rawdb.NewLevelDBDatabase(path+"/bintrie", 128, 1024, "") if err != nil { panic(fmt.Sprintf("error opening bintrie db, err=%v", err)) } From cf0990f11dfd161a7c8ff91f976a0f8e898608d3 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Tue, 5 May 2020 16:24:12 +0200 Subject: [PATCH 18/24] bugfixes and helpers needed to read from the bintrie --- cmd/geth/chaincmd.go | 2 +- trie/binary.go | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 6c153c4e1889..d225d75e4c96 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -19,7 +19,6 @@ package main import ( "encoding/json" "fmt" - "github.com/ethereum/go-ethereum/core/state/snapshot" "os" "path/filepath" "runtime" @@ -33,6 +32,7 @@ import ( "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" + "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/eth/downloader" "github.com/ethereum/go-ethereum/event" diff --git a/trie/binary.go b/trie/binary.go index 3ecff23b3b85..e73996a519b1 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -162,16 +162,35 @@ func (t *BinaryTrie) bitPrefix() []byte { bp[1+by] |= byte(bi) } } + if t.getPrefixLen() > 0 { + bp[0] = byte(t.endBit+t.startBit) / 8 + } return bp } +func Prefix2Bitfield(payload []byte) *BinaryTrie { + return &BinaryTrie{ + prefix: payload[1:], + endBit: int(payload[0]), + } +} + // Hash calculates the hash of an expanded (i.e. not already // hashed) node. func (t *BinaryTrie) Hash() []byte { return t.hash() } +// BinaryDBNode represents a binary node as it is stored +// inside the DB. +type BinaryDBNode struct { + Bitprefix []byte + Left []byte + Right []byte + Value []byte +} + // hash is a a helper function that is shared between Hash and // Commit. If t.CommitCh is not nil, then its behavior will be // that of Commit, and that of Hash otherwise. @@ -179,25 +198,29 @@ func (t *BinaryTrie) hash() []byte { var payload bytes.Buffer // Calculate the hash of both subtrees - var lh, rh []byte + var dbnode BinaryDBNode if t.left != nil { - lh = t.left.Hash() - t.left = hashBinaryNode(lh) + dbnode.Left = t.left.Hash() + t.left = hashBinaryNode(dbnode.Left) } if t.right != nil { - rh = t.right.Hash() - t.right = hashBinaryNode(rh) + dbnode.Right = t.right.Hash() + t.right = hashBinaryNode(dbnode.Right) } + dbnode.Value = t.value + dbnode.Bitprefix = t.bitPrefix() + // Create the "bitprefix" which indicates which are the start and // end bit inside the prefix value. - rlp.Encode(&payload, []interface{}{t.bitPrefix(), lh, rh, t.value}) + rlp.Encode(&payload, dbnode) + value := payload.Bytes() hasher := sha3.NewLegacyKeccak256() io.Copy(hasher, &payload) h := hasher.Sum(nil) if t.CommitCh != nil { - t.CommitCh <- BinaryHashPreimage{Key: h, Value: payload.Bytes()} + t.CommitCh <- BinaryHashPreimage{Key: h, Value: value} } return h } From 309e0ad89ea77d4c43d466f63ba51573ceb3a5fc Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Tue, 5 May 2020 17:04:53 +0200 Subject: [PATCH 19/24] Code to read back from the binary trie DB For some reason the correct value can't be retreived from the DB, this will be investigated later. --- cmd/geth/chaincmd.go | 13 ++++++++ core/state/snapshot/hextrie_generator.go | 8 +++-- go.sum | 1 + trie/binary.go | 42 +++++++++++++++++++++++- 4 files changed, 60 insertions(+), 4 deletions(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index d225d75e4c96..e2ad711590f1 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -38,6 +38,7 @@ import ( "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/trie" + "github.com/syndtr/goleveldb/leveldb" "gopkg.in/urfave/cli.v1" ) @@ -669,6 +670,18 @@ func snapToBin(ctx *cli.Context) error { log.Info("Generating binary trie", "root", root) generatedRoot := snapshot.GenerateBinaryTree(ctx.GlobalString(utils.DataDirFlag.Name), it) log.Info("Generation done", "root", root, "binary root", generatedRoot) + + db, err := leveldb.OpenFile(ctx.GlobalString(utils.DataDirFlag.Name)+"/bintrie", nil) + it, _ = snapTree.AccountIterator(root, common.Hash{}) + found := 0 + total := 0 + for it.Next() { + if trie.CheckKey(db, it.Hash().Bytes(), generatedRoot[:], 0, it.Account()) { + found++ + } + } + log.Info("Read check finished", "total", total, "found", found) + db.Close() return nil } diff --git a/core/state/snapshot/hextrie_generator.go b/core/state/snapshot/hextrie_generator.go index 4192c6be92ab..def0bf99320a 100644 --- a/core/state/snapshot/hextrie_generator.go +++ b/core/state/snapshot/hextrie_generator.go @@ -22,10 +22,10 @@ import ( "time" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/trie" + "github.com/syndtr/goleveldb/leveldb" ) type leaf struct { @@ -36,10 +36,11 @@ type leaf struct { type trieGeneratorFn func(in chan (leaf), out chan (common.Hash)) func GenerateBinaryTree(path string, it AccountIterator) common.Hash { - db, err := rawdb.NewLevelDBDatabase(path+"/bintrie", 128, 1024, "") + db, err := leveldb.OpenFile(path+"/bintrie", nil) if err != nil { panic(fmt.Sprintf("error opening bintrie db, err=%v", err)) } + defer db.Close() btrie := new(trie.BinaryTrie) btrie.CommitCh = make(chan trie.BinaryHashPreimage) @@ -50,7 +51,8 @@ func GenerateBinaryTree(path string, it AccountIterator) common.Hash { defer wg.Done() for kv := range btrie.CommitCh { nodeCount++ - db.Put(kv.Key, kv.Value) + log.Info("inserting key", "count", nodeCount, "key", common.ToHex(kv.Key), "value", common.ToHex(kv.Value)) + db.Put(kv.Key, kv.Value, nil) } }() counter := 0 diff --git a/go.sum b/go.sum index 2a823e15cfba..aa31d7e75e98 100644 --- a/go.sum +++ b/go.sum @@ -185,6 +185,7 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d h1:gZZadD8H+fF+n9CmNhYL1Y0dJB+kLOmKd7FbPJLeGHs= github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= github.com/tyler-smith/go-bip39 v1.0.1-0.20181017060643-dbb3b84ba2ef h1:wHSqTBrZW24CsNJDfeh9Ex6Pm0Rcpc7qrgKBiL44vF4= diff --git a/trie/binary.go b/trie/binary.go index e73996a519b1..f7f0103bdbaf 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" + "github.com/syndtr/goleveldb/leveldb" "golang.org/x/crypto/sha3" ) @@ -163,7 +164,7 @@ func (t *BinaryTrie) bitPrefix() []byte { } } if t.getPrefixLen() > 0 { - bp[0] = byte(t.endBit+t.startBit) / 8 + bp[0] = byte(t.endBit-t.startBit) / 8 } return bp @@ -176,6 +177,45 @@ func Prefix2Bitfield(payload []byte) *BinaryTrie { } } +func CheckKey(db *leveldb.DB, key, root []byte, depth int, value []byte) bool { + node, err := db.Get(root, nil) + if err != nil { + log.Error("could not find the node!", "error", err) + return false + } + + var out BinaryDBNode + err = rlp.DecodeBytes(node, &out) + + bt := Prefix2Bitfield(out.Bitprefix) + fulldepth := depth + if len(bt.prefix) > 0 { + fulldepth += 8*len(bt.prefix) - (8-bt.endBit)%8 + } + + if fulldepth < 8*len(key) { + by := key[fulldepth/8] + bi := (by>>uint(7-(fulldepth%8)))&1 == 0 + if bi { + if len(out.Left) == 0 { + log.Error("key could not be found !") + return false + } + + return CheckKey(db, key, out.Left, fulldepth+1, value) + } else { + if len(out.Right) == 0 { + log.Error("key could not be found ?") + return false + } + + return CheckKey(db, key, out.Right, fulldepth+1, value) + } + } + + return true // bytes.Equal(out.Value, value) +} + // Hash calculates the hash of an expanded (i.e. not already // hashed) node. func (t *BinaryTrie) Hash() []byte { From b8c749d9fe6bcd564328b21992ba013f8761ca23 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Fri, 5 Jun 2020 16:18:39 +0200 Subject: [PATCH 20/24] fix: bit overflow in binary prefix's first byte --- trie/binary.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trie/binary.go b/trie/binary.go index f7f0103bdbaf..195de571204d 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -164,7 +164,7 @@ func (t *BinaryTrie) bitPrefix() []byte { } } if t.getPrefixLen() > 0 { - bp[0] = byte(t.endBit-t.startBit) / 8 + bp[0] = byte(t.endBit-t.startBit) % 8 } return bp From c69534454b982eefbf72843b7b99e57bb101922e Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Fri, 5 Jun 2020 16:49:32 +0200 Subject: [PATCH 21/24] fix: move value to left of node if right is inserted --- trie/binary.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/trie/binary.go b/trie/binary.go index 195de571204d..4fe3e9e658c6 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -321,6 +321,7 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { oldChild.endBit = t.endBit oldChild.left = t.left oldChild.right = t.right + oldChild.value = t.value oldChild.CommitCh = t.CommitCh // Create the child3 part @@ -352,6 +353,7 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { } t.right = newChild } + t.value = nil return nil } From 9d88014b28b178c861e34620394beed97cb135d6 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Fri, 5 Jun 2020 19:17:03 +0200 Subject: [PATCH 22/24] bug: values were overwritten before it hit the DB --- trie/binary.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/trie/binary.go b/trie/binary.go index 4fe3e9e658c6..55963e36c35d 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -213,7 +213,11 @@ func CheckKey(db *leveldb.DB, key, root []byte, depth int, value []byte) bool { } } - return true // bytes.Equal(out.Value, value) + if !bytes.Equal(out.Value, value) { + log.Error("invalid value found", "got", out.Value, "expected", value, "fulldepth", fulldepth, "depth", depth, "key", key) + return false + } + return true } // Hash calculates the hash of an expanded (i.e. not already @@ -281,7 +285,8 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { // Special case: the trie is empty if depth == 0 && t.left == nil && t.right == nil && len(t.prefix) == 0 { t.prefix = key - t.value = value + t.value = make([]byte, len(value)) + copy(t.value, value) t.startBit = 0 t.endBit = 8 * len(key) return nil @@ -329,7 +334,8 @@ func (t *BinaryTrie) insert(depth int, key, value []byte, hashLeft bool) error { newChild.prefix = key newChild.startBit = depth + i + 1 newChild.endBit = len(key) * 8 - newChild.value = value + newChild.value = make([]byte, len(value)) + copy(newChild.value, value) newChild.CommitCh = t.CommitCh // reconfigure the [ a b ] part by just specifying From 684f5fb7ab7b131185b18cea6f124cdc2e069935 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Mon, 8 Jun 2020 15:24:21 +0200 Subject: [PATCH 23/24] Increase count for report on number of read keys --- cmd/geth/chaincmd.go | 1 + core/state/snapshot/hextrie_generator.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index e2ad711590f1..3b75dbac6892 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -676,6 +676,7 @@ func snapToBin(ctx *cli.Context) error { found := 0 total := 0 for it.Next() { + total++ if trie.CheckKey(db, it.Hash().Bytes(), generatedRoot[:], 0, it.Account()) { found++ } diff --git a/core/state/snapshot/hextrie_generator.go b/core/state/snapshot/hextrie_generator.go index def0bf99320a..93296fefda7e 100644 --- a/core/state/snapshot/hextrie_generator.go +++ b/core/state/snapshot/hextrie_generator.go @@ -51,7 +51,7 @@ func GenerateBinaryTree(path string, it AccountIterator) common.Hash { defer wg.Done() for kv := range btrie.CommitCh { nodeCount++ - log.Info("inserting key", "count", nodeCount, "key", common.ToHex(kv.Key), "value", common.ToHex(kv.Value)) + log.Debug("inserting key", "count", nodeCount, "key", common.ToHex(kv.Key), "value", common.ToHex(kv.Value)) db.Put(kv.Key, kv.Value, nil) } }() From 50e685582274347c035cbf342b00404a33d5b247 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Mon, 6 Jul 2020 17:02:44 +0200 Subject: [PATCH 24/24] trie: change bintrie structure to keep keys at the bottom --- trie/binary.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/trie/binary.go b/trie/binary.go index 55963e36c35d..5b670cd17628 100644 --- a/trie/binary.go +++ b/trie/binary.go @@ -48,23 +48,27 @@ type ( // BinaryTrie is a node with two children ("left" and "right") // It can be prefixed by bits that are common to all subtrie // keys and it can also hold a value. - BinaryTrie struct { + binBranchNode struct { left binaryNode right binaryNode + + // "Extension" part: a pointer to the leaf, as well as a + // start offset into the key and its length. + leafPtr *binLeafNode + startBit int + prefixLen int + } + + binLeafNode struct { + key []byte value []byte // Used to send (hash, preimage) pairs when hashing CommitCh chan BinaryHashPreimage - // This is the binary equivalent of "extension nodes": - // binary nodes can have a prefix that is common to all - // subtrees. The prefix is defined by a series of bytes, - // and two offsets marking the start bit and the end bit - // of the range. - prefix []byte startBit int - endBit int // Technically, this is the "1st bit past the end" } + hashBinaryNode []byte )