Fix floatMandE to properly determine necessary precision.

Add DecodeFloat and flesh out the missing decodeSmallNumber. Handling floating point precision is difficult so these routines leverage the standard library floating point parsing and formatting. Fixes #31
cockroachdb · Mar 3, 2015 · a9799c3 · a9799c3
1 parent 2ba0d21
commit a9799c3
Show file tree

Hide file tree

Showing 2 changed files with 222 additions and 40 deletions.
diff --git a/util/encoding/key_encoding.go b/util/encoding/key_encoding.go
@@ -25,6 +25,7 @@ import (
 	"bytes"
 	"fmt"
 	"math"
+	"strconv"
 	"unicode/utf8"
 )
 
@@ -391,6 +392,43 @@ func EncodeFloat(b []byte, f float64) []byte {
 	return nil
 }
 
+// DecodeFloat returns the remaining byte slice after decoding and the decoded
+// float64 from buf.
+func DecodeFloat(buf []byte) ([]byte, float64) {
+	if buf[0] == 0x15 {
+		return buf[1:], 0
+	}
+	idx := bytes.Index(buf, []byte{orderedEncodingTerminator})
+	switch {
+	case buf[0] == 0x08:
+		// Negative large.
+		e, m := decodeLargeNumber(true, buf[:idx+1])
+		return buf[idx+1:], makeFloatFromMandE(true, e, m)
+	case buf[0] > 0x08 && buf[0] <= 0x13:
+		// Negative medium.
+		e, m := decodeMediumNumber(true, buf[:idx+1])
+		return buf[idx+1:], makeFloatFromMandE(true, e, m)
+	case buf[0] == 0x14:
+		// Negative small.
+		e, m := decodeSmallNumber(true, buf[:idx+1])
+		return buf[idx+1:], makeFloatFromMandE(true, e, m)
+	case buf[0] == 0x22:
+		// Positive large.
+		e, m := decodeLargeNumber(false, buf[:idx+1])
+		return buf[idx+1:], makeFloatFromMandE(false, e, m)
+	case buf[0] >= 0x17 && buf[0] < 0x22:
+		// Positive large.
+		e, m := decodeMediumNumber(false, buf[:idx+1])
+		return buf[idx+1:], makeFloatFromMandE(false, e, m)
+	case buf[0] == 0x16:
+		// Positive small.
+		e, m := decodeSmallNumber(false, buf[:idx+1])
+		return buf[idx+1:], makeFloatFromMandE(false, e, m)
+	default:
+		panic(fmt.Sprintf("unknown prefix of the encoded byte slice: %q", buf))
+	}
+}
+
 // floatMandE computes and returns the mantissa M and exponent E for f.
 //
 // The mantissa is a base-100 representation of the value. The exponent
@@ -410,44 +448,102 @@ func floatMandE(f float64) (int, []byte) {
 	if f < 0 {
 		f = -f
 	}
-	i := 0
-	if f >= 1 {
-		for t := f; t >= 1; t /= 100 {
-			i++
-		}
+
+	// Use strconv.FormatFloat to handle the intricacies of determining how much
+	// precision is necessary to precisely represent f. The 'e' format is
+	// d.dddde±dd.
+	b := strconv.AppendFloat(nil, f, 'e', -1, 64)
+	if len(b) < 5 {
+		// The formatted float must be at least 5 bytes or something unexpected has
+		// occurred.
+		panic(fmt.Errorf("malformed float: %v -> %s", f, b))
+	}
+
+	// Parse the exponent.
+	e10 := 10*int(b[len(b)-2]-'0') + int(b[len(b)-1]-'0')
+	if b[len(b)-3] == '-' {
+		e10 = -e10
+	}
+
+	// Strip off the exponent.
+	b = b[:len(b)-4]
+
+	// Move all of the digits after the decimal and prepend a leading 0.
+	if len(b) > 1 {
+		// "d.dddd" -> "dddddd"
+		b[1] = b[0]
 	} else {
-		for t := f; t < 0.01; t *= 100 {
-			i--
-		}
+		// "d" -> "dd"
+		b = append(b, b[0])
+	}
+	b[0] = '0' // "0ddddd"
+	e10++
+
+	// Convert the power-10 exponent to a power of 100 exponent.
+	var e100 int
+	if e10 >= 0 {
+		e100 = (e10 + 1) / 2
+	} else {
+		e100 = e10 / 2
+	}
+	// Strip the leading 0 if the conversion to e100 did not add a multiple of
+	// 10.
+	if e100*2 == e10 {
+		b = b[1:]
+	}
+
+	// Ensure that the number of digits is even.
+	if len(b)%2 != 0 {
+		b = append(b, '0')
+	}
+
+	// Convert the base-10 'b' slice to a base-100 'm' slice. We do this
+	// conversion in place to avoid an allocation.
+	m := b[:len(b)/2]
+	for i := 0; i < len(b); i += 2 {
+		accum := 10*int(b[i]-'0') + int(b[i+1]-'0')
+		// The bytes are encoded as 2n+1.
+		m[i/2] = byte(2*accum + 1)
 	}
-	// Iterate through the centimal digits of the
-	// mantissa and add them to the buffer.
-	// For a number like 9999.00001, start with
-	// 99.9900001, write 99 to the buffer, then
-	// multiply by 100 until there is no fractional
-	// portion left.
-	d := f * math.Pow(100, float64(-i+1))
-	var buf bytes.Buffer
-	n, frac := math.Modf(d)
-	buf.WriteByte(byte(2*n + 1))
-	for frac != 0 {
-		// Remove the integral portion and shift to the left.
-		// So given the above example:
-		// 99.9900001 -> [-99] -> 00.9900001 -> [*100] -> 99.00001
-		// 99.00001 -> [-99] -> 00.00001 -> [*100] -> 00.001
-		// 00.001 -> [-00] -> 00.001 -> [*100] -> 00.1
-		// 00.1 -> [-00] -> 00.1 -> [*100] -> 10
-		// Done as frac == 0
-		d = (d - n) * 100
-		n, frac = math.Modf(d)
-		buf.WriteByte(byte(2*n + 1)) // Write the integral to the buf.
-	}
-	b := buf.Bytes()
 	// The last byte is encoded as 2n+0.
-	b[len(b)-1]--
+	m[len(m)-1]--
 
-	// Trailing X==0 digits are omitted.
-	return i, removeTrailingZeros(b)
+	return e100, m
+}
+
+// makeFloatFromMandE reconstructs the float from the mantissa M and exponent
+// E. Properly handling floating point rounding is tough, so we take the
+// approach of converting the base-100 mantissa into a base-10 mantissa,
+// formatting the floating point number to a string and then using the standard
+// library to parse it.
+func makeFloatFromMandE(negative bool, e int, m []byte) float64 {
+	// ±.dddde±dd.
+	b := make([]byte, 0, len(m)*2+6)
+	if negative {
+		b = append(b, '-')
+	}
+	b = append(b, '.')
+	for i, v := range m {
+		t := int(v)
+		if i == len(m) {
+			t--
+		}
+		t /= 2
+		b = append(b, byte(t/10)+'0')
+		b = append(b, byte(t%10)+'0')
+	}
+	b = append(b, 'e')
+	e = 2 * e
+	if e < 0 {
+		b = append(b, '-')
+		e = -e
+	} else {
+		b = append(b, '+')
+	}
+	b = append(b, byte(e/10)+'0')
+	b = append(b, byte(e%10)+'0')
+	f, _ := strconv.ParseFloat(string(b), 64)
+	return f
 }
 
 // onesComplement inverts each byte in buf from index start to end.
@@ -499,6 +595,26 @@ func encodeLargeNumber(negative bool, e int, m []byte, buf []byte) []byte {
 	return buf[:l+1]
 }
 
+func decodeSmallNumber(negative bool, buf []byte) (int, []byte) {
+	var e uint64
+	var n int
+	if negative {
+		tmp := []byte{^buf[1]}
+		e, n = GetUVarint(tmp)
+	} else {
+		e, n = GetUVarint(buf[1:])
+	}
+
+	// We don't need the prefix and last terminator.
+	m := make([]byte, len(buf)-(2+n))
+	copy(m, buf[1+n:len(buf)-1])
+
+	if negative {
+		onesComplement(m, 0, len(m))
+	}
+	return int(-e), m
+}
+
 func decodeMediumNumber(negative bool, buf []byte) (int, []byte) {
 	// We don't need the prefix and last terminator.
 	m := make([]byte, len(buf)-2)

diff --git a/util/encoding/key_encoding_test.go b/util/encoding/key_encoding_test.go
@@ -207,10 +207,12 @@ func TestIntMandE(t *testing.T) {
 	}
 	for _, c := range testCases {
 		if e, m := intMandE(c.Value); e != c.E || !bytes.Equal(m, c.M) {
-			t.Errorf("unexpected mismatch in E/M for %v. expected E=%v | M=%+v, got E=%v | M=%+v", c.Value, c.E, prettyBytes(c.M), e, prettyBytes(m))
+			t.Errorf("unexpected mismatch in E/M for %v. expected E=%v | M=%+v, got E=%v | M=%+v",
+				c.Value, c.E, prettyBytes(c.M), e, prettyBytes(m))
 		}
 		if v := makeIntFromMandE(c.Value < 0, c.E, c.M); v != c.Value {
-			t.Errorf("unexpected mismatch in Value for E=%v and M=%+v. expected value=%v, got value=%v", c.E, prettyBytes(c.M), c.Value, v)
+			t.Errorf("unexpected mismatch in Value for E=%v and M=%+v. expected value=%v, got value=%v",
+				c.E, prettyBytes(c.M), c.Value, v)
 		}
 	}
 }
@@ -245,7 +247,8 @@ func TestEncodeInt(t *testing.T) {
 	for i, c := range testCases {
 		enc := EncodeInt([]byte{}, c.Value)
 		if !bytes.Equal(enc, c.Encoding) {
-			t.Errorf("unexpected mismatch for %v. expected %v, got %v", c.Value, prettyBytes(c.Encoding), prettyBytes(enc))
+			t.Errorf("unexpected mismatch for %v. expected %v, got %v",
+				c.Value, prettyBytes(c.Encoding), prettyBytes(enc))
 		}
 		if i > 0 {
 			if bytes.Compare(testCases[i-1].Encoding, enc) >= 0 {
@@ -259,7 +262,7 @@ func TestEncodeInt(t *testing.T) {
 	}
 }
 
-func disabledTestFloatMandE(t *testing.T) {
+func TestFloatMandE(t *testing.T) {
 	testCases := []struct {
 		Value float64
 		E     int
@@ -291,13 +294,76 @@ func disabledTestFloatMandE(t *testing.T) {
 		{1234.5, 2, []byte{0x19, 0x45, 0x64}},
 		{12.345, 1, []byte{0x19, 0x45, 0x64}},
 		{0.123, 0, []byte{0x19, 0x3c}},
+		{-0.123, 0, []byte{0x19, 0x3c}},
 		{0.0123, 0, []byte{0x03, 0x2e}},
 		{0.00123, -1, []byte{0x19, 0x3c}},
-		{9223372036854775807, 10, []byte{0x13, 0x2d, 0x43, 0x91, 0x07, 0x89, 0x6d, 0x9b, 0x75, 0x0e}},
+		// The following value cannot be precisely represented as a float.
+		// {9223372036854775807, 10, []byte{0x13, 0x2d, 0x43, 0x91, 0x07, 0x89, 0x6d, 0x9b, 0x75, 0x0e}},
 	}
 	for _, c := range testCases {
 		if e, m := floatMandE(c.Value); e != c.E || !bytes.Equal(m, c.M) {
-			t.Errorf("unexpected mismatch in E/M for %v. expected E=%v | M=%+v, got E=%v | M=%+v", c.Value, c.E, prettyBytes(c.M), e, prettyBytes(m))
+			t.Errorf("unexpected mismatch in E/M for %v. expected E=%v | M=%+v, got E=%v | M=%+v",
+				c.Value, c.E, prettyBytes(c.M), e, prettyBytes(m))
+		}
+	}
+}
+
+func TestEncodeFloat(t *testing.T) {
+	testCases := []struct {
+		Value    float64
+		Encoding []byte
+	}{
+		{-10000.0, []byte{0x10, 0xfd, 0x0}},
+		{-9999.0, []byte{0x11, 0x38, 0x39, 0x00}},
+		{-100.0, []byte{0x11, 0xfd, 0x00}},
+		{-99.0, []byte{0x12, 0x39, 0x00}},
+		{-1.0, []byte{0x12, 0xfd, 0x0}},
+		{-0.00123, []byte{0x14, 0xfe, 0xe6, 0xc3, 0x0}},
+		{0, []byte{0x15}},
+		{0.00123, []byte{0x16, 0x1, 0x19, 0x3c, 0x0}},
+		{0.0123, []byte{0x17, 0x03, 0x2e, 0x0}},
+		{0.123, []byte{0x17, 0x19, 0x3c, 0x0}},
+		{1.0, []byte{0x18, 0x02, 0x0}},
+		{10.0, []byte{0x18, 0x14, 0x0}},
+		{12.345, []byte{0x18, 0x19, 0x45, 0x64, 0x0}},
+		{99.0, []byte{0x18, 0xc6, 0x0}},
+		{99.0001, []byte{0x18, 0xc7, 0x01, 0x02, 0x0}},
+		{99.01, []byte{0x18, 0xc7, 0x02, 0x0}},
+		{100.0, []byte{0x19, 0x02, 0x0}},
+		{100.01, []byte{0x19, 0x03, 0x01, 0x02, 0x0}},
+		{100.1, []byte{0x19, 0x03, 0x01, 0x14, 0x0}},
+		{1234, []byte{0x19, 0x19, 0x44, 0x0}},
+		{1234.5, []byte{0x19, 0x19, 0x45, 0x64, 0x0}},
+		{9999, []byte{0x19, 0xc7, 0xc6, 0x0}},
+		{9999.000001, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0x02, 0x0}},
+		{9999.000009, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0x12, 0x0}},
+		{9999.00001, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0x14, 0x0}},
+		{9999.00009, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0xb4, 0x0}},
+		{9999.000099, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0xc6, 0x0}},
+		{9999.0001, []byte{0x19, 0xc7, 0xc7, 0x01, 0x02, 0x0}},
+		{9999.001, []byte{0x19, 0xc7, 0xc7, 0x01, 0x14, 0x0}},
+		{9999.01, []byte{0x19, 0xc7, 0xc7, 0x02, 0x0}},
+		{9999.1, []byte{0x19, 0xc7, 0xc7, 0x14, 0x0}},
+		{10000, []byte{0x1a, 0x02, 0x0}},
+		{10001, []byte{0x1a, 0x03, 0x01, 0x02, 0x0}},
+		{12345, []byte{0x1a, 0x03, 0x2f, 0x5a, 0x0}},
+		{123450, []byte{0x1a, 0x19, 0x45, 0x64, 0x0}},
+	}
+	for i, c := range testCases {
+		enc := EncodeFloat([]byte{}, c.Value)
+		if !bytes.Equal(enc, c.Encoding) {
+			t.Errorf("unexpected mismatch for %v. expected %v, got %v",
+				c.Value, prettyBytes(c.Encoding), prettyBytes(enc))
+		}
+		if i > 0 {
+			if bytes.Compare(testCases[i-1].Encoding, enc) >= 0 {
+				t.Errorf("%v: expected %v to be less than %v",
+					c.Value, prettyBytes(testCases[i-1].Encoding), prettyBytes(enc))
+			}
+		}
+		_, dec := DecodeFloat(enc)
+		if dec != c.Value {
+			t.Errorf("unexpected mismatch for %v. got %v", c.Value, dec)
 		}
 	}
 }