From 70cbe2b1be5cce003461928361311f2aca64d991 Mon Sep 17 00:00:00 2001
From: Aaron Chen <aaronchen.lisp@gmail.com>
Date: Wed, 22 May 2024 00:20:11 +0800
Subject: [PATCH] uint256: optimize div-related functions by removing
 unnecessary computation (#169)

* uint256: optimize div-related functions by removing unnecessary computation

* small fix
---
 binary_test.go  |  6 ++---
 decimal.go      |  8 +++----
 uint256.go      | 60 +++++++++++++++++++++++++++----------------------
 uint256_test.go |  3 ++-
 4 files changed, 42 insertions(+), 35 deletions(-)

diff --git a/binary_test.go b/binary_test.go
index 57a7a55e..b5d5186c 100644
--- a/binary_test.go
+++ b/binary_test.go
@@ -246,7 +246,7 @@ func divModMod(z, x, y *Int) *Int {
 func udivremDiv(z, x, y *Int) *Int {
 	var quot Int
 	if !y.IsZero() {
-		udivrem(quot[:], x[:], y)
+		udivrem(quot[:], x[:], y, nil)
 	}
 	return z.Set(&quot)
 }
@@ -256,8 +256,8 @@ func udivremMod(z, x, y *Int) *Int {
 	if y.IsZero() {
 		return z.Clear()
 	}
-	var quot Int
-	rem := udivrem(quot[:], x[:], y)
+	var quot, rem Int
+	udivrem(quot[:], x[:], y, &rem)
 	return z.Set(&rem)
 }
 
diff --git a/decimal.go b/decimal.go
index b8efbc5f..0a268f3c 100644
--- a/decimal.go
+++ b/decimal.go
@@ -45,8 +45,8 @@ func (z *Int) Dec() string {
 	)
 	for {
 		// Obtain Q and R for divisor
-		var quot Int
-		rem := udivrem(quot[:], y[:], divisor)
+		var quot, rem Int
+		udivrem(quot[:], y[:], divisor, &rem)
 		y.Set(&quot) // Set Q for next loop
 		// Convert the R to ascii representation
 		buf = strconv.AppendUint(buf[:0], rem.Uint64(), 10)
@@ -79,8 +79,8 @@ func (z *Int) PrettyDec(separator byte) string {
 		comma   = 0
 	)
 	for {
-		var quot Int
-		rem := udivrem(quot[:], y[:], divisor)
+		var quot, rem Int
+		udivrem(quot[:], y[:], divisor, &rem)
 		y.Set(&quot) // Set Q for next loop
 		buf = strconv.AppendUint(buf[:0], rem.Uint64(), 10)
 		for j := len(buf) - 1; j >= 0; j-- {
diff --git a/uint256.go b/uint256.go
index e5a91239..1f4de336 100644
--- a/uint256.go
+++ b/uint256.go
@@ -259,7 +259,8 @@ func (z *Int) AddMod(x, y, m *Int) *Int {
 	if _, overflow := z.AddOverflow(x, y); overflow {
 		sum := [5]uint64{z[0], z[1], z[2], z[3], 1}
 		var quot [5]uint64
-		rem := udivrem(quot[:], sum[:], m)
+		var rem Int
+		udivrem(quot[:], sum[:], m, &rem)
 		return z.Set(&rem)
 	}
 	return z.Mod(z, m)
@@ -499,7 +500,7 @@ func udivremKnuth(quot, u, d []uint64) {
 // The quotient is stored in provided quot - len(u)-len(d)+1 words.
 // It loosely follows the Knuth's division algorithm (sometimes referenced as "schoolbook" division) using 64-bit words.
 // See Knuth, Volume 2, section 4.3.1, Algorithm D.
-func udivrem(quot, u []uint64, d *Int) (rem Int) {
+func udivrem(quot, u []uint64, d, rem *Int) {
 	var dLen int
 	for i := len(d) - 1; i >= 0; i-- {
 		if d[i] != 0 {
@@ -526,8 +527,10 @@ func udivrem(quot, u []uint64, d *Int) (rem Int) {
 	}
 
 	if uLen < dLen {
-		copy(rem[:], u)
-		return rem
+		if rem != nil {
+			copy(rem[:], u)
+		}
+		return
 	}
 
 	var unStorage [9]uint64
@@ -542,18 +545,20 @@ func udivrem(quot, u []uint64, d *Int) (rem Int) {
 
 	if dLen == 1 {
 		r := udivremBy1(quot, un, dn[0])
-		rem.SetUint64(r >> shift)
-		return rem
+		if rem != nil {
+			rem.SetUint64(r >> shift)
+		}
+		return
 	}
 
 	udivremKnuth(quot, un, dn)
 
-	for i := 0; i < dLen-1; i++ {
-		rem[i] = (un[i] >> shift) | (un[i+1] << (64 - shift))
+	if rem != nil {
+		for i := 0; i < dLen-1; i++ {
+			rem[i] = (un[i] >> shift) | (un[i+1] << (64 - shift))
+		}
+		rem[dLen-1] = un[dLen-1] >> shift
 	}
-	rem[dLen-1] = un[dLen-1] >> shift
-
-	return rem
 }
 
 // Div sets z to the quotient x/y for returns z.
@@ -574,7 +579,7 @@ func (z *Int) Div(x, y *Int) *Int {
 	// x/y ; x > y > 0
 
 	var quot Int
-	udivrem(quot[:], x[:], y)
+	udivrem(quot[:], x[:], y, nil)
 	return z.Set(&quot)
 }
 
@@ -603,9 +608,9 @@ func (z *Int) Mod(x, y *Int) *Int {
 		return z.SetUint64(x.Uint64() % y.Uint64())
 	}
 
-	var quot Int
-	*z = udivrem(quot[:], x[:], y)
-	return z
+	var quot, rem Int
+	udivrem(quot[:], x[:], y, &rem)
+	return z.Set(&rem)
 }
 
 // DivMod sets z to the quotient x div y and m to the modulus x mod y and returns the pair (z, m) for y != 0.
@@ -635,10 +640,9 @@ func (z *Int) DivMod(x, y, m *Int) (*Int, *Int) {
 		return z.SetUint64(x0 / y0), m.SetUint64(x0 % y0)
 	}
 
-	var quot Int
-	*m = udivrem(quot[:], x[:], y)
-	*z = quot
-	return z, m
+	var quot, rem Int
+	udivrem(quot[:], x[:], y, &rem)
+	return z.Set(&quot), m.Set(&rem)
 }
 
 // SMod interprets x and y as two's complement signed integers,
@@ -683,8 +687,8 @@ func (z *Int) MulModWithReciprocal(x, y, m *Int, mu *[5]uint64) *Int {
 		pl Int
 		ph Int
 	)
-	copy(pl[:], p[:4])
-	copy(ph[:], p[4:])
+	pl[0], pl[1], pl[2], pl[3] = p[0], p[1], p[2], p[3]
+	ph[0], ph[1], ph[2], ph[3] = p[4], p[5], p[6], p[7]
 
 	// If the multiplication is within 256 bits use Mod().
 	if ph.IsZero() {
@@ -692,7 +696,8 @@ func (z *Int) MulModWithReciprocal(x, y, m *Int, mu *[5]uint64) *Int {
 	}
 
 	var quot [8]uint64
-	rem := udivrem(quot[:], p[:], m)
+	var rem Int
+	udivrem(quot[:], p[:], m, &rem)
 	return z.Set(&rem)
 }
 
@@ -716,8 +721,8 @@ func (z *Int) MulMod(x, y, m *Int) *Int {
 		pl Int
 		ph Int
 	)
-	copy(pl[:], p[:4])
-	copy(ph[:], p[4:])
+	pl[0], pl[1], pl[2], pl[3] = p[0], p[1], p[2], p[3]
+	ph[0], ph[1], ph[2], ph[3] = p[4], p[5], p[6], p[7]
 
 	// If the multiplication is within 256 bits use Mod().
 	if ph.IsZero() {
@@ -725,7 +730,8 @@ func (z *Int) MulMod(x, y, m *Int) *Int {
 	}
 
 	var quot [8]uint64
-	rem := udivrem(quot[:], p[:], m)
+	var rem Int
+	udivrem(quot[:], p[:], m, &rem)
 	return z.Set(&rem)
 }
 
@@ -739,9 +745,9 @@ func (z *Int) MulDivOverflow(x, y, d *Int) (*Int, bool) {
 	umul(x, y, &p)
 
 	var quot [8]uint64
-	udivrem(quot[:], p[:], d)
+	udivrem(quot[:], p[:], d, nil)
 
-	copy(z[:], quot[:4])
+	z[0], z[1], z[2], z[3] = quot[0], quot[1], quot[2], quot[3]
 
 	return z, (quot[4] | quot[5] | quot[6] | quot[7]) != 0
 }
diff --git a/uint256_test.go b/uint256_test.go
index 8b75c711..81bf5ff6 100644
--- a/uint256_test.go
+++ b/uint256_test.go
@@ -479,8 +479,9 @@ func TestUdivremQuick(t *testing.T) {
 	var (
 		u        = []uint64{1, 0, 0, 0, 0}
 		expected = new(Int)
+		rem Int
 	)
-	rem := udivrem([]uint64{}, u, &Int{0, 1, 0, 0})
+	udivrem([]uint64{}, u, &Int{0, 1, 0, 0}, &rem)
 	copy(expected[:], u)
 	if !rem.Eq(expected) {
 		t.Errorf("Wrong remainder: %x, expected %x", rem, expected)