Skip to content

Commit

Permalink
Optimize merkledb hashing (#2886)
Browse files Browse the repository at this point in the history
  • Loading branch information
StephenButtolph authored Mar 29, 2024
1 parent d046f29 commit 2dbc9ba
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 29 deletions.
62 changes: 41 additions & 21 deletions x/merkledb/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package merkledb

import (
"bytes"
"crypto/sha256"
"encoding/binary"
"errors"
"io"
Expand All @@ -28,11 +29,6 @@ const (
minByteSliceLen = minVarIntLen
minDBNodeLen = minMaybeByteSliceLen + minVarIntLen
minChildLen = minVarIntLen + minKeyLen + ids.IDLen + boolLen

estimatedKeyLen = 64
estimatedValueLen = 64
// Child index, child ID
hashValuesChildLen = minVarIntLen + ids.IDLen
)

var (
Expand All @@ -47,8 +43,8 @@ var (
errIntOverflow = errors.New("value overflows int")
)

// Note that bytes.Buffer.Write always returns nil, so we ignore its return
// values in all encode methods.
// Note that sha256.Write and bytes.Buffer.Write always returns nil, so we
// ignore their return values.

func childSize(index byte, childEntry *child) int {
// * index
Expand Down Expand Up @@ -107,30 +103,54 @@ func encodeDBNode(n *dbNode) []byte {
return buf.Bytes()
}

// Returns the bytes that will be hashed to generate [n]'s ID.
// Returns the canonical hash of [n].
//
// Assumes [n] is non-nil.
func encodeHashValues(n *node) []byte {
// This method is performance critical. It is not expected to perform any memory
// allocations.
func hashNode(n *node) ids.ID {
var (
numChildren = len(n.children)
// Estimate size [hv] to prevent memory allocations
estimatedLen = minVarIntLen + numChildren*hashValuesChildLen + estimatedValueLen + estimatedKeyLen
buf = bytes.NewBuffer(make([]byte, 0, estimatedLen))
sha = sha256.New()
hash ids.ID
// The hash length is larger than the maximum Uvarint length. This
// ensures binary.AppendUvarint doesn't perform any memory allocations.
emptyHashBuffer = hash[:0]
)

encodeUint(buf, uint64(numChildren))
// By directly calling sha.Write rather than passing sha around as an
// io.Writer, the compiler can perform sufficient escape analysis to avoid
// allocating buffers on the heap.
_, _ = sha.Write(binary.AppendUvarint(emptyHashBuffer, uint64(len(n.children))))

// ensure that the order of entries is consistent
keys := maps.Keys(n.children)
// By allocating BranchFactorLargest rather than len(n.children), this slice
// is allocated on the stack rather than the heap. BranchFactorLargest is
// at least len(n.children) which avoids memory allocations.
keys := make([]byte, 0, BranchFactorLargest)
for k := range n.children {
keys = append(keys, k)
}

// Ensure that the order of entries is correct.
slices.Sort(keys)
for _, index := range keys {
entry := n.children[index]
encodeUint(buf, uint64(index))
_, _ = buf.Write(entry.id[:])
_, _ = sha.Write(binary.AppendUvarint(emptyHashBuffer, uint64(index)))
_, _ = sha.Write(entry.id[:])
}
encodeMaybeByteSlice(buf, n.valueDigest)
encodeKeyToBuffer(buf, n.key)

return buf.Bytes()
if n.valueDigest.HasValue() {
_, _ = sha.Write(trueBytes)
value := n.valueDigest.Value()
_, _ = sha.Write(binary.AppendUvarint(emptyHashBuffer, uint64(len(value))))
_, _ = sha.Write(value)
} else {
_, _ = sha.Write(falseBytes)
}

_, _ = sha.Write(binary.AppendUvarint(emptyHashBuffer, uint64(n.key.length)))
_, _ = sha.Write(n.key.Bytes())
sha.Sum(emptyHashBuffer)
return hash
}

// Assumes [n] is non-nil.
Expand Down
160 changes: 154 additions & 6 deletions x/merkledb/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ func TestCodecDecodeDBNode_TooShort(t *testing.T) {
require.ErrorIs(err, io.ErrUnexpectedEOF)
}

// Ensure that encodeHashValues is deterministic
func FuzzEncodeHashValues(f *testing.F) {
// Ensure that hashNode is deterministic
func FuzzHashNode(f *testing.F) {
f.Fuzz(
func(
t *testing.T,
Expand Down Expand Up @@ -222,17 +222,94 @@ func FuzzEncodeHashValues(f *testing.F) {
},
}

// Serialize hv multiple times
hvBytes1 := encodeHashValues(hv)
hvBytes2 := encodeHashValues(hv)
// Hash hv multiple times
hash1 := hashNode(hv)
hash2 := hashNode(hv)

// Make sure they're the same
require.Equal(hvBytes1, hvBytes2)
require.Equal(hash1, hash2)
}
},
)
}

func TestHashNode(t *testing.T) {
tests := []struct {
name string
n *node
expectedHash string
}{
{
name: "empty node",
n: newNode(Key{}),
expectedHash: "rbhtxoQ1DqWHvb6w66BZdVyjmPAneZUSwQq9uKj594qvFSdav",
},
{
name: "has value",
n: func() *node {
n := newNode(Key{})
n.setValue(maybe.Some([]byte("value1")))
return n
}(),
expectedHash: "2vx2xueNdWoH2uB4e8hbMU5jirtZkZ1c3ePCWDhXYaFRHpCbnQ",
},
{
name: "has key",
n: newNode(ToKey([]byte{0, 1, 2, 3, 4, 5, 6, 7})),
expectedHash: "2vA8ggXajhFEcgiF8zHTXgo8T2ALBFgffp1xfn48JEni1Uj5uK",
},
{
name: "1 child",
n: func() *node {
n := newNode(Key{})
childNode := newNode(ToKey([]byte{255}))
childNode.setValue(maybe.Some([]byte("value1")))
n.addChildWithID(childNode, 4, hashNode(childNode))
return n
}(),
expectedHash: "YfJRufqUKBv9ez6xZx6ogpnfDnw9fDsyebhYDaoaH57D3vRu3",
},
{
name: "2 children",
n: func() *node {
n := newNode(Key{})

childNode1 := newNode(ToKey([]byte{255}))
childNode1.setValue(maybe.Some([]byte("value1")))

childNode2 := newNode(ToKey([]byte{237}))
childNode2.setValue(maybe.Some([]byte("value2")))

n.addChildWithID(childNode1, 4, hashNode(childNode1))
n.addChildWithID(childNode2, 4, hashNode(childNode2))
return n
}(),
expectedHash: "YVmbx5MZtSKuYhzvHnCqGrswQcxmozAkv7xE1vTA2EiGpWUkv",
},
{
name: "16 children",
n: func() *node {
n := newNode(Key{})

for i := byte(0); i < 16; i++ {
childNode := newNode(ToKey([]byte{i << 4}))
childNode.setValue(maybe.Some([]byte("some value")))

n.addChildWithID(childNode, 4, hashNode(childNode))
}
return n
}(),
expectedHash: "5YiFLL7QV3f441See9uWePi3wVKsx9fgvX5VPhU8PRxtLqhwY",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
hash := hashNode(test.n)
require.Equal(t, test.expectedHash, hash.String())
})
}
}

func TestCodecDecodeKeyLengthOverflowRegression(t *testing.T) {
_, err := decodeKey(binary.AppendUvarint(nil, math.MaxInt))
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
Expand All @@ -258,6 +335,77 @@ func TestUintSize(t *testing.T) {
}
}

func Benchmark_HashNode(b *testing.B) {
benchmarks := []struct {
name string
n *node
}{
{
name: "empty node",
n: newNode(Key{}),
},
{
name: "has value",
n: func() *node {
n := newNode(Key{})
n.setValue(maybe.Some([]byte("value1")))
return n
}(),
},
{
name: "has key",
n: newNode(ToKey([]byte{0, 1, 2, 3, 4, 5, 6, 7})),
},
{
name: "1 child",
n: func() *node {
n := newNode(Key{})
childNode := newNode(ToKey([]byte{255}))
childNode.setValue(maybe.Some([]byte("value1")))
n.addChildWithID(childNode, 4, hashNode(childNode))
return n
}(),
},
{
name: "2 children",
n: func() *node {
n := newNode(Key{})

childNode1 := newNode(ToKey([]byte{255}))
childNode1.setValue(maybe.Some([]byte("value1")))

childNode2 := newNode(ToKey([]byte{237}))
childNode2.setValue(maybe.Some([]byte("value2")))

n.addChildWithID(childNode1, 4, hashNode(childNode1))
n.addChildWithID(childNode2, 4, hashNode(childNode2))
return n
}(),
},
{
name: "16 children",
n: func() *node {
n := newNode(Key{})

for i := byte(0); i < 16; i++ {
childNode := newNode(ToKey([]byte{i << 4}))
childNode.setValue(maybe.Some([]byte("some value")))

n.addChildWithID(childNode, 4, hashNode(childNode))
}
return n
}(),
},
}
for _, benchmark := range benchmarks {
b.Run(benchmark.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
hashNode(benchmark.n)
}
})
}
}

func Benchmark_EncodeUint(b *testing.B) {
var dst bytes.Buffer
dst.Grow(binary.MaxVarintLen64)
Expand Down
2 changes: 2 additions & 0 deletions x/merkledb/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ const (
BranchFactor4 = BranchFactor(4)
BranchFactor16 = BranchFactor(16)
BranchFactor256 = BranchFactor(256)

BranchFactorLargest = BranchFactor256
)

// Valid checks if BranchFactor [b] is one of the predefined valid options for BranchFactor
Expand Down
3 changes: 1 addition & 2 deletions x/merkledb/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ func (n *node) bytes() []byte {
// Returns and caches the ID of this node.
func (n *node) calculateID(metrics merkleMetrics) ids.ID {
metrics.HashCalculated()
bytes := encodeHashValues(n)
return hashing.ComputeHash256Array(bytes)
return hashNode(n)
}

// Set [n]'s value to [val].
Expand Down

0 comments on commit 2dbc9ba

Please sign in to comment.