Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PoC] Snapshot to bintrie #12

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2d92754
trie: binary trie + snapbin command
gballet Apr 1, 2020
93375a1
Update trie/binary.go
gballet Apr 1, 2020
d40815b
Update trie/binary.go
gballet Apr 1, 2020
2a1629d
Fix bintrie command
gballet Apr 1, 2020
62370f0
Fix a couple bugs in insert and add a corresponding test
gballet Apr 1, 2020
60cdf15
Fix tests broken after rebase
gballet Apr 1, 2020
4936cc3
fix a fix in tryGet tests
gballet Apr 1, 2020
cbe3620
Prune the left subtree when inserting into the right subtree
gballet Apr 1, 2020
b8c27d5
Benchmarking code for bintrie
gballet Apr 3, 2020
96d4e08
trie: introduce extension nodes in search
gballet Apr 17, 2020
15e9a10
trie: cleanup and one more extension read test
gballet Apr 17, 2020
e954eb7
trie: Add helper to represent bintries in dot format
gballet Apr 18, 2020
0369446
trie: extension support in binary trie
gballet Apr 20, 2020
65ff453
Change the node hash format to something hexary-like
gballet Apr 20, 2020
3691a07
trie: offload db writes to goroutine + remove dependency on DB in Bin…
gballet Apr 21, 2020
50505ee
trie: refactor binary node creation
gballet Apr 22, 2020
8f092fd
trie: store binary trie in datadir
gballet Apr 22, 2020
cf0990f
bugfixes and helpers needed to read from the bintrie
gballet May 5, 2020
309e0ad
Code to read back from the binary trie DB
gballet May 5, 2020
b8c749d
fix: bit overflow in binary prefix's first byte
gballet Jun 5, 2020
c695344
fix: move value to left of node if right is inserted
gballet Jun 5, 2020
9d88014
bug: values were overwritten before it hit the DB
gballet Jun 5, 2020
684f5fb
Increase count for report on number of read keys
gballet Jun 8, 2020
50e6855
trie: change bintrie structure to keep keys at the bottom
gballet Jul 6, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion cmd/geth/chaincmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package main
import (
"encoding/json"
"fmt"
"github.com/ethereum/go-ethereum/core/state/snapshot"
"os"
"path/filepath"
"runtime"
Expand All @@ -33,11 +32,13 @@ import (
"github.com/ethereum/go-ethereum/core"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/core/state/snapshot"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/eth/downloader"
"github.com/ethereum/go-ethereum/event"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie"
"github.com/syndtr/goleveldb/leveldb"
"gopkg.in/urfave/cli.v1"
)

Expand Down Expand Up @@ -218,6 +219,22 @@ Use "ethereum dump 0" to dump the genesis block.`,
},
Category: "BLOCKCHAIN COMMANDS",
}
generateBinaryTrieCommand = cli.Command{
Action: utils.MigrateFlags(snapToBin),
Name: "bintrie",
Usage: "Convert the snapshot DB to a binary trie",
ArgsUsage: " ",
Flags: []cli.Flag{
utils.DataDirFlag,
utils.AncientFlag,
utils.CacheFlag,
utils.TestnetFlag,
utils.RinkebyFlag,
utils.GoerliFlag,
utils.SyncModeFlag,
},
Category: "BLOCKCHAIN COMMANDS",
}
)

// initGenesis will initialise the given JSON format genesis file and writes it as
Expand Down Expand Up @@ -627,6 +644,48 @@ func snapToHash(ctx *cli.Context) error {
return nil
}

func snapToBin(ctx *cli.Context) error {
node, _ := makeConfigNode(ctx)
chain, chainDb := utils.MakeChain(ctx, node)

defer func() {
node.Close()
chain.Stop()
chainDb.Close()
}()

snapTree := chain.Snapshot()
if snapTree == nil {
return fmt.Errorf("No snapshot tree available")
}
block := chain.CurrentBlock()
if block == nil {
return fmt.Errorf("no blocks present")
}
root := block.Root()
it, err := snapTree.AccountIterator(root, common.Hash{})
if err != nil {
return fmt.Errorf("Could not create iterator for root %x: %v", root, err)
}
log.Info("Generating binary trie", "root", root)
generatedRoot := snapshot.GenerateBinaryTree(ctx.GlobalString(utils.DataDirFlag.Name), it)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does that resolve itself correctly? Like, what if --goerli is specified, is the GlobalString (utils.DataDir... really correct?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While you're at it above, doing log.Info, maybe add

dbPath := ctx.GlobalString(utils.DataDirFlag.Name)
log.Info("Generating binary trie", "root", root, "database", dbPath)
generatedRoot := snapshot.GenerateBinaryTree(dbPath, it)

log.Info("Generation done", "root", root, "binary root", generatedRoot)

db, err := leveldb.OpenFile(ctx.GlobalString(utils.DataDirFlag.Name)+"/bintrie", nil)
it, _ = snapTree.AccountIterator(root, common.Hash{})
found := 0
total := 0
for it.Next() {
total++
if trie.CheckKey(db, it.Hash().Bytes(), generatedRoot[:], 0, it.Account()) {
found++
}
}
log.Info("Read check finished", "total", total, "found", found)
db.Close()
return nil
}

// hashish returns true for strings that look like hashes.
func hashish(x string) bool {
_, err := strconv.Atoi(x)
Expand Down
1 change: 1 addition & 0 deletions cmd/geth/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ func init() {
dumpGenesisCommand,
inspectCommand,
generateTrieCommand,
generateBinaryTrieCommand,
// See accountcmd.go:
accountCommand,
walletCommand,
Expand Down
44 changes: 44 additions & 0 deletions core/state/snapshot/hextrie_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
package snapshot

import (
"fmt"
"sync"
"time"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb/memorydb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie"
"github.com/syndtr/goleveldb/leveldb"
)

type leaf struct {
Expand All @@ -33,6 +35,48 @@ type leaf struct {

type trieGeneratorFn func(in chan (leaf), out chan (common.Hash))

func GenerateBinaryTree(path string, it AccountIterator) common.Hash {
db, err := leveldb.OpenFile(path+"/bintrie", nil)
if err != nil {
panic(fmt.Sprintf("error opening bintrie db, err=%v", err))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoa, please don't . You have a live iterator on the original trie db, please back out carefully and close it nicely

}
defer db.Close()
btrie := new(trie.BinaryTrie)
btrie.CommitCh = make(chan trie.BinaryHashPreimage)

var nodeCount uint64
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for kv := range btrie.CommitCh {
nodeCount++
log.Debug("inserting key", "count", nodeCount, "key", common.ToHex(kv.Key), "value", common.ToHex(kv.Value))
db.Put(kv.Key, kv.Value, nil)
}
}()
counter := 0
for it.Next() {
counter++
// Don't get the entire expanded account at this
// stage - NOTE
btrie.TryUpdate(it.Hash().Bytes(), it.Account())
}
log.Info("Inserted all leaves", "count", counter)

err = btrie.Commit()
if err != nil {
panic(fmt.Sprintf("error committing trie, err=%v", err))
}
close(btrie.CommitCh)
wg.Wait()
btrie.CommitCh = nil
log.Info("Done writing nodes to the DB", "count", nodeCount)
log.Info("Calculated binary hash", "hash", common.ToHex(btrie.Hash()))

return common.BytesToHash(btrie.Hash())
}

// GenerateTrieRoot takes an account iterator and reproduces the root hash.
func GenerateTrieRoot(it AccountIterator) common.Hash {
//return generateTrieRoot(it, StackGenerate)
Expand Down
94 changes: 94 additions & 0 deletions core/state/snapshot/trie_generator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,97 @@ func BenchmarkSlimToFullHash(b *testing.B) {
}

}

func BenchmarkBinTrieInsert(b *testing.B) {
// Get a fairly large trie
// Create a custom account factory to recreate the same addresses
makeAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccountWithSmall()
}
return accounts
}
// Build up a large stack of snapshots
base := &diskLayer{
diskdb: rawdb.NewMemoryDatabase(),
root: common.HexToHash("0x01"),
cache: fastcache.New(1024 * 500),
}
snaps := &Tree{
layers: map[common.Hash]snapshot{
base.root: base,
},
}
b.Run("4K", func(b *testing.B) {
// 4K accounts
snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, makeAccounts(4000), nil)
head := snaps.Snapshot(common.HexToHash("0x02"))
// Call it once to make it create the lists before test starts
it := head.(*diffLayer).AccountIterator(common.HexToHash("0x00"))

b.Run("recursive", func(b *testing.B) {
b.Run("noext", func(b *testing.B) {
b.ResetTimer()
trie, _ := trie.NewBinary(nil)
b.ReportAllocs()
for n := 0; n < b.N; n++ {
for it.Next() {
trie.TryUpdate(it.Hash().Bytes(), it.Account())
}
}
b.StopTimer()
})
})
})
}

func BenchmarkBinTrieHash(b *testing.B) {
// Get a fairly large trie
// Create a custom account factory to recreate the same addresses
makeAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccountWithSmall()
}
return accounts
}
// Build up a large stack of snapshots
base := &diskLayer{
diskdb: rawdb.NewMemoryDatabase(),
root: common.HexToHash("0x01"),
cache: fastcache.New(1024 * 500),
}
snaps := &Tree{
layers: map[common.Hash]snapshot{
base.root: base,
},
}
b.Run("4K", func(b *testing.B) {
// 4K accounts
snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, makeAccounts(4000), nil)
head := snaps.Snapshot(common.HexToHash("0x02"))
// Call it once to make it create the lists before test starts
it := head.(*diffLayer).AccountIterator(common.HexToHash("0x00"))
trie, _ := trie.NewBinary(nil)

for it.Next() {
trie.TryUpdate(it.Hash().Bytes(), it.Account())
}

b.Run("recursive", func(b *testing.B) {
b.Run("noext", func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
for n := 0; n < b.N; n++ {
trie.Hash()
}
b.StopTimer()
})
})
})
}
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d h1:gZZadD8H+fF+n9CmNhYL1Y0dJB+kLOmKd7FbPJLeGHs=
github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA=
github.com/tyler-smith/go-bip39 v1.0.1-0.20181017060643-dbb3b84ba2ef h1:wHSqTBrZW24CsNJDfeh9Ex6Pm0Rcpc7qrgKBiL44vF4=
Expand Down
Loading