Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: use fast unsafe bytes->string convertion #525

Merged
merged 9 commits into from
Aug 3, 2022
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Unreleased

### Improvements

- [#525](https://github.com/cosmos/iavl/pull/525) Optimization: use fast unsafe bytes->string convertion.
robert-zaremba marked this conversation as resolved.
Show resolved Hide resolved

### Bug Fixes

- [#524](https://github.com/cosmos/iavl/pull/524) Fix: `MutableTree.Get`.
Expand Down
14 changes: 8 additions & 6 deletions cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package cache

import (
"container/list"

ibytes "github.com/cosmos/iavl/internal/bytes"
)

// Node represents a node eligible for caching.
Expand Down Expand Up @@ -59,15 +61,15 @@ func New(maxElementCount int) Cache {
}

func (c *lruCache) Add(node Node) Node {
if e, exists := c.dict[string(node.GetKey())]; exists {
if e, exists := c.dict[ibytes.UnsafeBytesToStr(node.GetKey())]; exists {
c.ll.MoveToFront(e)
old := e.Value
e.Value = node
return old.(Node)
}

elem := c.ll.PushFront(node)
c.dict[string(node.GetKey())] = elem
c.dict[ibytes.UnsafeBytesToStr(node.GetKey())] = elem

if c.ll.Len() > c.maxElementCount {
oldest := c.ll.Back()
Expand All @@ -78,15 +80,15 @@ func (c *lruCache) Add(node Node) Node {
}

func (nc *lruCache) Get(key []byte) Node {
if ele, hit := nc.dict[string(key)]; hit {
if ele, hit := nc.dict[ibytes.UnsafeBytesToStr(key)]; hit {
nc.ll.MoveToFront(ele)
return ele.Value.(Node)
}
return nil
}

func (c *lruCache) Has(key []byte) bool {
_, exists := c.dict[string(key)]
_, exists := c.dict[ibytes.UnsafeBytesToStr(key)]
return exists
}

Expand All @@ -95,14 +97,14 @@ func (nc *lruCache) Len() int {
}

func (c *lruCache) Remove(key []byte) Node {
if elem, exists := c.dict[string(key)]; exists {
if elem, exists := c.dict[ibytes.UnsafeBytesToStr(key)]; exists {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised that this is helpful because, from my understanding, copying during []byte to string conversion when accessing a map by key should be optimized by the Go compiler.

Sources:

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if the benchmark would remain the same as it is right now if the map[string] changes are reverted

Copy link
Collaborator Author

@robert-zaremba robert-zaremba Aug 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can check, however for consistency I prefer to keep the casting here.

return c.remove(elem)
}
return nil
}

func (c *lruCache) remove(e *list.Element) Node {
removed := c.ll.Remove(e).(Node)
delete(c.dict, string(removed.GetKey()))
delete(c.dict, ibytes.UnsafeBytesToStr(removed.GetKey()))
return removed
}
3 changes: 2 additions & 1 deletion cmd/iaviewer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
dbm "github.com/tendermint/tm-db"

"github.com/cosmos/iavl"
ibytes "github.com/cosmos/iavl/internal/bytes"
)

// TODO: make this configurable?
Expand Down Expand Up @@ -94,7 +95,7 @@ func PrintDBStats(db dbm.DB) {

defer itr.Close()
for ; itr.Valid(); itr.Next() {
key := string(itr.Key()[:1])
key := ibytes.UnsafeBytesToStr(itr.Key()[:1])
prefix[key]++
count++
}
Expand Down
6 changes: 4 additions & 2 deletions internal/bytes/bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ func (bz *HexBytes) UnmarshalJSON(data []byte) error {
if len(data) < 2 || data[0] != '"' || data[len(data)-1] != '"' {
return fmt.Errorf("invalid hex string: %s", data)
}
bz2, err := hex.DecodeString(string(data[1 : len(data)-1]))
data = data[1 : len(data)-1]
dest := make([]byte, hex.DecodedLen(len(data)))
_, err := hex.Decode(dest, data)
if err != nil {
return err
}
*bz = bz2
*bz = dest
return nil
}

Expand Down
26 changes: 26 additions & 0 deletions internal/bytes/string.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package common

import (
"reflect"
"unsafe"
)

// UnsafeStrToBytes uses unsafe to convert string into byte array. Returned bytes
// must not be altered after this function is called as it will cause a segmentation fault.
func UnsafeStrToBytes(s string) []byte {
var buf []byte
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about the following to avoid copying the slice internals by directly casting the string:

func unsafeGetBytes(s string) []byte {
    return (*[0x7fff0000]byte)(unsafe.Pointer(
        (*reflect.StringHeader)(unsafe.Pointer(&s)).Data),
    )[:len(s):len(s)]
}

I'm not sure if there are any constraints preventing that but this could be a further optimization

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is [0x7fff0000]byte? Looks like a big static array. Where did you get that solution from?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sHdr := (*reflect.StringHeader)(unsafe.Pointer(&s))
bufHdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
bufHdr.Data = sHdr.Data
bufHdr.Cap = sHdr.Len
bufHdr.Len = sHdr.Len
return buf
}

// UnsafeBytesToStr is meant to make a zero allocation conversion
// from []byte -> string to speed up operations, it is not meant
// to be used generally, but for a specific pattern to delete keys
// from a map.
func UnsafeBytesToStr(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}
54 changes: 54 additions & 0 deletions internal/bytes/string_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package common

import (
"runtime"
"strconv"
"testing"
"time"

"github.com/stretchr/testify/suite"
)

func TestStringSuite(t *testing.T) {
suite.Run(t, new(StringSuite))
}

type StringSuite struct{ suite.Suite }

func unsafeConvertStr() []byte {
return UnsafeStrToBytes("abc")
}

func (s *StringSuite) TestUnsafeStrToBytes() {
// we convert in other function to trigger GC. We want to check that
// the underlying array in []bytes is accessible after GC will finish swapping.
for i := 0; i < 5; i++ {
b := unsafeConvertStr()
runtime.GC()
<-time.NewTimer(2 * time.Millisecond).C
b2 := append(b, 'd')
s.Equal("abc", string(b))
s.Equal("abcd", string(b2))
}
}

func unsafeConvertBytes() string {
return UnsafeBytesToStr([]byte("abc"))
}

func (s *StringSuite) TestUnsafeBytesToStr() {
// we convert in other function to trigger GC. We want to check that
// the underlying array in []bytes is accessible after GC will finish swapping.
for i := 0; i < 5; i++ {
str := unsafeConvertBytes()
runtime.GC()
<-time.NewTimer(2 * time.Millisecond).C
s.Equal("abc", str)
}
}

func BenchmarkUnsafeStrToBytes(b *testing.B) {
for i := 0; i < b.N; i++ {
UnsafeStrToBytes(strconv.Itoa(i))
}
}
17 changes: 9 additions & 8 deletions mutable_tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"time"

"github.com/pkg/errors"

dbm "github.com/tendermint/tm-db"

"github.com/cosmos/iavl/internal/logger"
Expand Down Expand Up @@ -147,7 +146,7 @@ func (tree *MutableTree) Get(key []byte) ([]byte, error) {
return nil, nil
}

if fastNode, ok := tree.unsavedFastNodeAdditions[string(key)]; ok {
if fastNode, ok := tree.unsavedFastNodeAdditions[unsafeToStr(key)]; ok {
return fastNode.value, nil
}
// check if node was deleted
Expand Down Expand Up @@ -919,8 +918,9 @@ func (tree *MutableTree) getUnsavedFastNodeRemovals() map[string]interface{} {
}

func (tree *MutableTree) addUnsavedAddition(key []byte, node *FastNode) {
delete(tree.unsavedFastNodeRemovals, string(key))
tree.unsavedFastNodeAdditions[string(key)] = node
skey := unsafeToStr(key)
delete(tree.unsavedFastNodeRemovals, skey)
tree.unsavedFastNodeAdditions[skey] = node
}

func (tree *MutableTree) saveFastNodeAdditions() error {
Expand All @@ -939,8 +939,9 @@ func (tree *MutableTree) saveFastNodeAdditions() error {
}

func (tree *MutableTree) addUnsavedRemoval(key []byte) {
delete(tree.unsavedFastNodeAdditions, string(key))
tree.unsavedFastNodeRemovals[string(key)] = true
skey := unsafeToStr(key)
delete(tree.unsavedFastNodeAdditions, skey)
tree.unsavedFastNodeRemovals[skey] = true
}

func (tree *MutableTree) saveFastNodeRemovals() error {
Expand All @@ -951,7 +952,7 @@ func (tree *MutableTree) saveFastNodeRemovals() error {
sort.Strings(keysToSort)

for _, key := range keysToSort {
if err := tree.ndb.DeleteFastNode([]byte(key)); err != nil {
if err := tree.ndb.DeleteFastNode(unsafeToBz(key)); err != nil {
return err
}
}
Expand Down Expand Up @@ -1231,7 +1232,7 @@ func (tree *MutableTree) addOrphans(orphans []*Node) error {
if len(node.hash) == 0 {
return fmt.Errorf("expected to find node hash, but was empty")
}
tree.orphans[string(node.hash)] = node.version
tree.orphans[unsafeToStr(node.hash)] = node.version
}
return nil
}
7 changes: 4 additions & 3 deletions nodedb.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ import (
"strings"
"sync"

"github.com/cosmos/iavl/cache"
"github.com/cosmos/iavl/internal/logger"
"github.com/pkg/errors"
dbm "github.com/tendermint/tm-db"

"github.com/cosmos/iavl/cache"
"github.com/cosmos/iavl/internal/logger"
)

const (
Expand Down Expand Up @@ -86,7 +87,7 @@ func newNodeDB(db dbm.DB, cacheSize int, opts *Options) *nodeDB {
opts = &o
}

storeVersion, err := db.Get(metadataKeyFormat.Key([]byte(storageVersionKey)))
storeVersion, err := db.Get(metadataKeyFormat.Key(unsafeToBz(storageVersionKey)))

if err != nil || storeVersion == nil {
storeVersion = []byte(defaultStorageVersionValue)
Expand Down
4 changes: 2 additions & 2 deletions tree_dotgraph.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ func WriteDOTGraph(w io.Writer, tree *ImmutableTree, paths []PathToLeaf) {
}
shortHash := graphNode.Hash[:7]

graphNode.Label = mkLabel(string(node.key), 16, "sans-serif")
graphNode.Label = mkLabel(unsafeToStr(node.key), 16, "sans-serif")
graphNode.Label += mkLabel(shortHash, 10, "monospace")
graphNode.Label += mkLabel(fmt.Sprintf("version=%d", node.version), 10, "monospace")

if node.value != nil {
graphNode.Label += mkLabel(string(node.value), 10, "sans-serif")
graphNode.Label += mkLabel(unsafeToStr(node.value), 10, "sans-serif")
}

if node.height == 0 {
Expand Down
8 changes: 8 additions & 0 deletions unsafe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package iavl

import ibytes "github.com/cosmos/iavl/internal/bytes"

var (
unsafeToStr = ibytes.UnsafeBytesToStr
unsafeToBz = ibytes.UnsafeStrToBytes
)
38 changes: 14 additions & 24 deletions unsaved_fast_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,19 @@ var (
// it iterates over the latest state via fast nodes,
// taking advantage of keys being located in sequence in the underlying database.
type UnsavedFastIterator struct {
start, end []byte

valid bool

ascending bool

err error

ndb *nodeDB
start, end []byte
valid bool
ascending bool
err error
ndb *nodeDB
nextKey []byte
nextVal []byte
fastIterator dbm.Iterator

nextUnsavedNodeIdx int
unsavedFastNodeAdditions map[string]*FastNode

unsavedFastNodeRemovals map[string]interface{}

unsavedFastNodesToSort []string

nextKey []byte

nextVal []byte

nextUnsavedNodeIdx int

fastIterator dbm.Iterator
unsavedFastNodeRemovals map[string]interface{}
unsavedFastNodesToSort []string
}

var _ dbm.Iterator = (*UnsavedFastIterator)(nil)
Expand Down Expand Up @@ -71,7 +61,7 @@ func NewUnsavedFastIterator(start, end []byte, ascending bool, ndb *nodeDB, unsa
continue
}

iter.unsavedFastNodesToSort = append(iter.unsavedFastNodesToSort, string(fastNode.key))
iter.unsavedFastNodesToSort = append(iter.unsavedFastNodesToSort, unsafeToStr(fastNode.key))
}

sort.Slice(iter.unsavedFastNodesToSort, func(i, j int) bool {
Expand Down Expand Up @@ -142,8 +132,8 @@ func (iter *UnsavedFastIterator) Next() {
return
}

diskKeyStr := unsafeToStr(iter.fastIterator.Key())
if iter.fastIterator.Valid() && iter.nextUnsavedNodeIdx < len(iter.unsavedFastNodesToSort) {
diskKeyStr := string(iter.fastIterator.Key())

if iter.unsavedFastNodeRemovals[diskKeyStr] != nil {
// If next fast node from disk is to be removed, skip it.
Expand Down Expand Up @@ -186,7 +176,7 @@ func (iter *UnsavedFastIterator) Next() {

// if only nodes on disk are left, we return them
if iter.fastIterator.Valid() {
if iter.unsavedFastNodeRemovals[string(iter.fastIterator.Key())] != nil {
if iter.unsavedFastNodeRemovals[diskKeyStr] != nil {
// If next fast node from disk is to be removed, skip it.
iter.fastIterator.Next()
iter.Next()
Expand Down