diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b1671d39..bdc6d3105 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Improvements +- [#525](https://github.com/cosmos/iavl/pull/525) Optimization: use fast unsafe bytes->string conversion. - [#506](https://github.com/cosmos/iavl/pull/506) Implement cache abstraction. ### Bug Fixes diff --git a/cache/cache.go b/cache/cache.go index c0f36c63d..a4ca88d67 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -2,6 +2,8 @@ package cache import ( "container/list" + + ibytes "github.com/cosmos/iavl/internal/bytes" ) // Node represents a node eligible for caching. @@ -59,7 +61,8 @@ func New(maxElementCount int) Cache { } func (c *lruCache) Add(node Node) Node { - if e, exists := c.dict[string(node.GetKey())]; exists { + keyStr := ibytes.UnsafeBytesToStr(node.GetKey()) + if e, exists := c.dict[keyStr]; exists { c.ll.MoveToFront(e) old := e.Value e.Value = node @@ -67,18 +70,17 @@ func (c *lruCache) Add(node Node) Node { } elem := c.ll.PushFront(node) - c.dict[string(node.GetKey())] = elem + c.dict[keyStr] = elem if c.ll.Len() > c.maxElementCount { oldest := c.ll.Back() - return c.remove(oldest) } return nil } func (nc *lruCache) Get(key []byte) Node { - if ele, hit := nc.dict[string(key)]; hit { + if ele, hit := nc.dict[ibytes.UnsafeBytesToStr(key)]; hit { nc.ll.MoveToFront(ele) return ele.Value.(Node) } @@ -86,7 +88,7 @@ func (nc *lruCache) Get(key []byte) Node { } func (c *lruCache) Has(key []byte) bool { - _, exists := c.dict[string(key)] + _, exists := c.dict[ibytes.UnsafeBytesToStr(key)] return exists } @@ -95,7 +97,7 @@ func (nc *lruCache) Len() int { } func (c *lruCache) Remove(key []byte) Node { - if elem, exists := c.dict[string(key)]; exists { + if elem, exists := c.dict[ibytes.UnsafeBytesToStr(key)]; exists { return c.remove(elem) } return nil @@ -103,6 +105,6 @@ func (c *lruCache) Remove(key []byte) Node { func (c *lruCache) remove(e *list.Element) Node { removed := c.ll.Remove(e).(Node) - delete(c.dict, string(removed.GetKey())) + delete(c.dict, ibytes.UnsafeBytesToStr(removed.GetKey())) return removed } diff --git a/cmd/iaviewer/main.go b/cmd/iaviewer/main.go index ee881d9ea..476811598 100644 --- a/cmd/iaviewer/main.go +++ b/cmd/iaviewer/main.go @@ -12,6 +12,7 @@ import ( dbm "github.com/tendermint/tm-db" "github.com/cosmos/iavl" + ibytes "github.com/cosmos/iavl/internal/bytes" ) // TODO: make this configurable? @@ -94,7 +95,7 @@ func PrintDBStats(db dbm.DB) { defer itr.Close() for ; itr.Valid(); itr.Next() { - key := string(itr.Key()[:1]) + key := ibytes.UnsafeBytesToStr(itr.Key()[:1]) prefix[key]++ count++ } diff --git a/internal/bytes/bytes.go b/internal/bytes/bytes.go index 310c82eb0..ac3197d8a 100644 --- a/internal/bytes/bytes.go +++ b/internal/bytes/bytes.go @@ -35,11 +35,13 @@ func (bz *HexBytes) UnmarshalJSON(data []byte) error { if len(data) < 2 || data[0] != '"' || data[len(data)-1] != '"' { return fmt.Errorf("invalid hex string: %s", data) } - bz2, err := hex.DecodeString(string(data[1 : len(data)-1])) + data = data[1 : len(data)-1] + dest := make([]byte, hex.DecodedLen(len(data))) + _, err := hex.Decode(dest, data) if err != nil { return err } - *bz = bz2 + *bz = dest return nil } diff --git a/internal/bytes/string.go b/internal/bytes/string.go new file mode 100644 index 000000000..a09422218 --- /dev/null +++ b/internal/bytes/string.go @@ -0,0 +1,26 @@ +package common + +import ( + "reflect" + "unsafe" +) + +// UnsafeStrToBytes uses unsafe to convert string into byte array. Returned bytes +// must not be altered after this function is called as it will cause a segmentation fault. +func UnsafeStrToBytes(s string) []byte { + var buf []byte + sHdr := (*reflect.StringHeader)(unsafe.Pointer(&s)) + bufHdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + bufHdr.Data = sHdr.Data + bufHdr.Cap = sHdr.Len + bufHdr.Len = sHdr.Len + return buf +} + +// UnsafeBytesToStr is meant to make a zero allocation conversion +// from []byte -> string to speed up operations, it is not meant +// to be used generally, but for a specific pattern to delete keys +// from a map. +func UnsafeBytesToStr(b []byte) string { + return *(*string)(unsafe.Pointer(&b)) +} diff --git a/internal/bytes/string_test.go b/internal/bytes/string_test.go new file mode 100644 index 000000000..f704e2f85 --- /dev/null +++ b/internal/bytes/string_test.go @@ -0,0 +1,54 @@ +package common + +import ( + "runtime" + "strconv" + "testing" + "time" + + "github.com/stretchr/testify/suite" +) + +func TestStringSuite(t *testing.T) { + suite.Run(t, new(StringSuite)) +} + +type StringSuite struct{ suite.Suite } + +func unsafeConvertStr() []byte { + return UnsafeStrToBytes("abc") +} + +func (s *StringSuite) TestUnsafeStrToBytes() { + // we convert in other function to trigger GC. We want to check that + // the underlying array in []bytes is accessible after GC will finish swapping. + for i := 0; i < 5; i++ { + b := unsafeConvertStr() + runtime.GC() + <-time.NewTimer(2 * time.Millisecond).C + b2 := append(b, 'd') + s.Equal("abc", string(b)) + s.Equal("abcd", string(b2)) + } +} + +func unsafeConvertBytes() string { + return UnsafeBytesToStr([]byte("abc")) +} + +func (s *StringSuite) TestUnsafeBytesToStr() { + // we convert in other function to trigger GC. We want to check that + // the underlying array in []bytes is accessible after GC will finish swapping. + for i := 0; i < 5; i++ { + str := unsafeConvertBytes() + runtime.GC() + <-time.NewTimer(2 * time.Millisecond).C + s.Equal("abc", str) + } +} + +func BenchmarkUnsafeStrToBytes(b *testing.B) { + for i := 0; i < b.N; i++ { + UnsafeStrToBytes(strconv.Itoa(i)) + } +} diff --git a/mutable_tree.go b/mutable_tree.go index d3eff68bb..5579ef591 100644 --- a/mutable_tree.go +++ b/mutable_tree.go @@ -10,7 +10,6 @@ import ( "time" "github.com/pkg/errors" - dbm "github.com/tendermint/tm-db" "github.com/cosmos/iavl/internal/logger" @@ -147,7 +146,7 @@ func (tree *MutableTree) Get(key []byte) ([]byte, error) { return nil, nil } - if fastNode, ok := tree.unsavedFastNodeAdditions[string(key)]; ok { + if fastNode, ok := tree.unsavedFastNodeAdditions[unsafeToStr(key)]; ok { return fastNode.value, nil } // check if node was deleted @@ -919,8 +918,9 @@ func (tree *MutableTree) getUnsavedFastNodeRemovals() map[string]interface{} { } func (tree *MutableTree) addUnsavedAddition(key []byte, node *FastNode) { - delete(tree.unsavedFastNodeRemovals, string(key)) - tree.unsavedFastNodeAdditions[string(key)] = node + skey := unsafeToStr(key) + delete(tree.unsavedFastNodeRemovals, skey) + tree.unsavedFastNodeAdditions[skey] = node } func (tree *MutableTree) saveFastNodeAdditions() error { @@ -939,8 +939,9 @@ func (tree *MutableTree) saveFastNodeAdditions() error { } func (tree *MutableTree) addUnsavedRemoval(key []byte) { - delete(tree.unsavedFastNodeAdditions, string(key)) - tree.unsavedFastNodeRemovals[string(key)] = true + skey := unsafeToStr(key) + delete(tree.unsavedFastNodeAdditions, skey) + tree.unsavedFastNodeRemovals[skey] = true } func (tree *MutableTree) saveFastNodeRemovals() error { @@ -951,7 +952,7 @@ func (tree *MutableTree) saveFastNodeRemovals() error { sort.Strings(keysToSort) for _, key := range keysToSort { - if err := tree.ndb.DeleteFastNode([]byte(key)); err != nil { + if err := tree.ndb.DeleteFastNode(unsafeToBz(key)); err != nil { return err } } @@ -1231,7 +1232,7 @@ func (tree *MutableTree) addOrphans(orphans []*Node) error { if len(node.hash) == 0 { return fmt.Errorf("expected to find node hash, but was empty") } - tree.orphans[string(node.hash)] = node.version + tree.orphans[unsafeToStr(node.hash)] = node.version } return nil } diff --git a/nodedb.go b/nodedb.go index c64b89ac1..2d4686fe6 100644 --- a/nodedb.go +++ b/nodedb.go @@ -10,10 +10,11 @@ import ( "strings" "sync" - "github.com/cosmos/iavl/cache" - "github.com/cosmos/iavl/internal/logger" "github.com/pkg/errors" dbm "github.com/tendermint/tm-db" + + "github.com/cosmos/iavl/cache" + "github.com/cosmos/iavl/internal/logger" ) const ( @@ -86,7 +87,7 @@ func newNodeDB(db dbm.DB, cacheSize int, opts *Options) *nodeDB { opts = &o } - storeVersion, err := db.Get(metadataKeyFormat.Key([]byte(storageVersionKey))) + storeVersion, err := db.Get(metadataKeyFormat.Key(unsafeToBz(storageVersionKey))) if err != nil || storeVersion == nil { storeVersion = []byte(defaultStorageVersionValue) diff --git a/tree_dotgraph.go b/tree_dotgraph.go index 38df20539..9aa1e8630 100644 --- a/tree_dotgraph.go +++ b/tree_dotgraph.go @@ -56,12 +56,12 @@ func WriteDOTGraph(w io.Writer, tree *ImmutableTree, paths []PathToLeaf) { } shortHash := graphNode.Hash[:7] - graphNode.Label = mkLabel(string(node.key), 16, "sans-serif") + graphNode.Label = mkLabel(unsafeToStr(node.key), 16, "sans-serif") graphNode.Label += mkLabel(shortHash, 10, "monospace") graphNode.Label += mkLabel(fmt.Sprintf("version=%d", node.version), 10, "monospace") if node.value != nil { - graphNode.Label += mkLabel(string(node.value), 10, "sans-serif") + graphNode.Label += mkLabel(unsafeToStr(node.value), 10, "sans-serif") } if node.height == 0 { diff --git a/unsafe.go b/unsafe.go new file mode 100644 index 000000000..2d00540d7 --- /dev/null +++ b/unsafe.go @@ -0,0 +1,8 @@ +package iavl + +import ibytes "github.com/cosmos/iavl/internal/bytes" + +var ( + unsafeToStr = ibytes.UnsafeBytesToStr + unsafeToBz = ibytes.UnsafeStrToBytes +) diff --git a/unsaved_fast_iterator.go b/unsaved_fast_iterator.go index 69483156f..cbbff85fe 100644 --- a/unsaved_fast_iterator.go +++ b/unsaved_fast_iterator.go @@ -18,29 +18,19 @@ var ( // it iterates over the latest state via fast nodes, // taking advantage of keys being located in sequence in the underlying database. type UnsavedFastIterator struct { - start, end []byte - - valid bool - - ascending bool - - err error - - ndb *nodeDB + start, end []byte + valid bool + ascending bool + err error + ndb *nodeDB + nextKey []byte + nextVal []byte + fastIterator dbm.Iterator + nextUnsavedNodeIdx int unsavedFastNodeAdditions map[string]*FastNode - - unsavedFastNodeRemovals map[string]interface{} - - unsavedFastNodesToSort []string - - nextKey []byte - - nextVal []byte - - nextUnsavedNodeIdx int - - fastIterator dbm.Iterator + unsavedFastNodeRemovals map[string]interface{} + unsavedFastNodesToSort []string } var _ dbm.Iterator = (*UnsavedFastIterator)(nil) @@ -71,7 +61,7 @@ func NewUnsavedFastIterator(start, end []byte, ascending bool, ndb *nodeDB, unsa continue } - iter.unsavedFastNodesToSort = append(iter.unsavedFastNodesToSort, string(fastNode.key)) + iter.unsavedFastNodesToSort = append(iter.unsavedFastNodesToSort, unsafeToStr(fastNode.key)) } sort.Slice(iter.unsavedFastNodesToSort, func(i, j int) bool { @@ -142,8 +132,8 @@ func (iter *UnsavedFastIterator) Next() { return } + diskKeyStr := unsafeToStr(iter.fastIterator.Key()) if iter.fastIterator.Valid() && iter.nextUnsavedNodeIdx < len(iter.unsavedFastNodesToSort) { - diskKeyStr := string(iter.fastIterator.Key()) if iter.unsavedFastNodeRemovals[diskKeyStr] != nil { // If next fast node from disk is to be removed, skip it. @@ -186,7 +176,7 @@ func (iter *UnsavedFastIterator) Next() { // if only nodes on disk are left, we return them if iter.fastIterator.Valid() { - if iter.unsavedFastNodeRemovals[string(iter.fastIterator.Key())] != nil { + if iter.unsavedFastNodeRemovals[diskKeyStr] != nil { // If next fast node from disk is to be removed, skip it. iter.fastIterator.Next() iter.Next()