Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: math: make Int.Size() faster by computation not len(MarshalledBytes) #16263

Merged
merged 3 commits into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions math/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ Ref: https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.j

### Improvements

* [#16263](https://github.com/cosmos/cosmos-sdk/pull/16263) Improved math/Int.Size by computing the decimal digits count instead of firstly invoking .Marshal() then checking the length

* [#15768](https://github.com/cosmos/cosmos-sdk/pull/15768) Removed the second call to the `init` method for the global variable `grand`.
* [#16141](https://github.com/cosmos/cosmos-sdk/pull/16141) Speedup `LegacyDec.ApproxRoot` and `LegacyDec.ApproxSqrt`.

Expand Down
87 changes: 84 additions & 3 deletions math/int.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding"
"encoding/json"
"fmt"
stdmath "math"
"math/big"
"strings"
"sync"
Expand Down Expand Up @@ -421,9 +422,89 @@ func (i *Int) Unmarshal(data []byte) error {
}

// Size implements the gogo proto custom type interface.
func (i *Int) Size() int {
bz, _ := i.Marshal()
return len(bz)
// Reduction power of 10 is the smallest power of 10, than 1<<64-1
//
// 18446744073709551615
//
// and the next value fitting with the digits of (1<<64)-1 is:
//
// 10000000000000000000
var (
big10Pow19, _ = new(big.Int).SetString("1"+strings.Repeat("0", 19), 10)
log10Of2 = stdmath.Log10(2)
)

func (i *Int) Size() (size int) {
sign := i.Sign()
if sign == 0 { // It is zero.
// log*(0) is undefined hence return early.
return 1
}

ii := i.i
alreadyMadeCopy := false
if sign < 0 { // Negative sign encountered, so consider len("-")
// The reason that we make this comparison in here is to
// allow checking for negatives exactly once, to reduce
// on comparisons inside sizeBigInt, hence we make a copy
// of ii and make it absolute having taken note of the sign
// already.
size++
// We already accounted for the negative sign above, thus
// we can now compute the length of the absolute value.
ii = new(big.Int).Abs(ii)
alreadyMadeCopy = true
}

// From here on, we are now dealing with non-0, non-negative values.
return size + sizeBigInt(ii, alreadyMadeCopy)
}

func sizeBigInt(i *big.Int, alreadyMadeCopy bool) (size int) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit confused, since this feels like we could solve this with more pre-computation. E.g. for every bitlen, store "markers" in a map for values of that bitlength that corresspond to different sizes.

(And then we'd have 0 alloc's per size call)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nah, it isn't as simple. I had independently tried this approach out, there are a bunch of numbers which are in the boundary of bit lengths but require you to search between the closest values: and roughly I also checked for the allocations reduction and it wasn't significant. I shall refine it in the future, but for purpose of clear wins this approach as is will work great: the approach to get accuracy is to:

  • while precomputing the lookup table, for the same bit length B: store []*big.Int{startingBitLenValue, ...endBitLenValue}
  • when you lookup by bit length, perform a binary search (range search) in the retrieved list and see if the value falls within that range

This was the code for my prior experiment which I'll refine after this PR lands:

type savings struct {
	bi *big.Int
	bl int
}

var bLenMap map[int]*savings

func computeLUT() {
	// 1. Compute the tables on which we have a divergence in digits
	// for log values.
	// Goal to lookup the number of digits given a bit length
	bLenMap = map[int]*savings{
		0: {new(big.Int).SetUint64(1), 1},
	}
	for i := uint(0); i <= 258; i++ {
		ii := new(big.Int).SetUint64(1)
		ii = ii.Lsh(ii, i)
		len10Digits := len(ii.String())
		if false {
			fmt.Printf("%-79s bitLen: %-4d %d\n", ii, ii.BitLen(), len10Digits)
		}
		bLenMap[ii.BitLen()] = &savings{bl: len10Digits, bi: ii}
	}
}

func init() {
	computeLUT()
}

func lookup(ii *big.Int) int {
	ii = ii.Abs(ii) // Could be lazily computed using sync.Once only when needed
	bits := ii.BitLen()

	closest, ok := bLenMap[bits]
	if !ok {
		return 1
	}

	switch cmp := ii.Cmp(closest.bi); {
	case cmp == 0:
		return closest.bl

	case cmp < 0:
		for {
			bits--
			retr, ok := bLenMap[bits]
			if !ok {
				break
			}

			if ii.Cmp(retr.bi) >= 0 {
				break
			}
			closest = retr
		}

	case cmp > 0:
		for {
			bits++
			retr, ok := bLenMap[bits]
			if !ok {
				break
			}

			if ii.Cmp(retr.bi) <= 0 {
				if retr.bi.BitLen() == ii.BitLen() {
					closest = retr
				}
				break
			}
			closest = retr
		}
	}
	return closest.bl
}

// This code assumes that non-0, non-negative values have been passed in.
bitLen := i.BitLen()

res := float64(bitLen) * log10Of2
ires := int(res)
if diff := res - float64(ires); diff == 0.0 {
return size + ires
} else if diff >= 0.3 { // There are other digits past the bitLen, this is a heuristic.
return size + ires + 1
}

// Use Log10(x) for values less than (1<<64)-1, given it is only defined for [1, (1<<64)-1]
if bitLen <= 64 {
return size + 1 + int(stdmath.Log10(float64(i.Uint64())))
}
// Past this point, the value is greater than (1<<64)-1 and 10^19.

// The prior above computation of i.BitLen() * log10Of2 is inaccurate for powers of 10
// and values like "9999999999999999999999999999"; that computation always overshoots by 1
// hence our next alternative is to just go old school and keep dividing the value by:
// 10^19 aka "10000000000000000000" while incrementing size += 19

// At this point we should just keep reducing by 10^19 as that's the smallest multiple
// of 10 that matches the digit length of (1<<64)-1
var ri *big.Int
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we pass this in as a scratch variable each loop, to avoid re-allocations?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ping @odeke-em

if alreadyMadeCopy {
ri = i
} else {
ri = new(big.Int).Set(i)
alreadyMadeCopy = true
}

for ri.Cmp(big10Pow19) >= 0 { // Keep reducing the value by 10^19 and increment size by 19
ri = ri.Quo(ri, big10Pow19)
size += 19
}

if ri.Sign() == 0 { // if the value is zero, no need for the recursion, just return immediately
return size
}

// Otherwise we already know how many times we reduced the value, so its
// remnants less than 10^19 and those can be computed by again calling sizeBigInt.
return size + sizeBigInt(ri, alreadyMadeCopy)
}

// Override Amino binary serialization by proxying to protobuf.
Expand Down
58 changes: 58 additions & 0 deletions math/int_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -513,3 +513,61 @@ func TestFormatIntCorrectness(t *testing.T) {
})
}
}

var sizeTests = []struct {
s string
want int
}{
{"0", 1},
{"-0", 1},
{"-10", 3},
{"-10000", 6},
{"10000", 5},
{"100000", 6},
{"99999", 5},
{"10000000000", 11},
{"18446744073709551616", 20},
{"18446744073709551618", 20},
{"184467440737095516181", 21},
{"100000000000000000000000", 24},
{"1000000000000000000000000000", 28},
{"9000000000099999999999999999", 28},
{"9999999999999999999999999999", 28},
{"9903520314283042199192993792", 28},
{"340282366920938463463374607431768211456", 39},
{"3402823669209384634633746074317682114569999", 43},
{"9999999999999999999999999999999999999999999", 43},
{"99999999999999999999999999999999999999999999", 44},
{"999999999999999999999999999999999999999999999", 45},
{"90000000000999999999999999999000000000099999999999999999", 56},
{"-90000000000999999999999999999000000000099999999999999999", 57},
{"9000000000099999999999999999900000000009999999999999999990", 58},
{"990000000009999999999999999990000000000999999999999999999999", 60},
{"99000000000999999999999999999000000000099999999999999999999919", 62},
{"90000000000999999990000000000000000000000000000000000000000000", 62},
{"99999999999999999999999999990000000000000000000000000000000000", 62},
{"11111111111111119999999999990000000000000000000000000000000000", 62},
{"99000000000999999999999999999000000000099999999999999999999919", 62},
{"10000000000000000000000000000000000000000000000000000000000000", 62},
{"10000000000000000000000000000000000000000000000000000000000000000000000000000", 77},
{"99999999999999999999999999999999999999999999999999999999999999999999999999999", 77},
{"110000000000000000000000000000000000000000000000000000000000000000000000000009", 78},
}

func BenchmarkIntSize(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, st := range sizeTests {
ii, _ := math.NewIntFromString(st.s)
got := ii.Size()
if got != st.want {
b.Errorf("%q:: got=%d, want=%d", st.s, got, st.want)
}
sink = got
}
}
if sink == nil {
b.Fatal("Benchmark did not run!")
}
sink = nil
}