Skip to content

Commit

Permalink
Fix floatMandE to properly determine necessary precision.
Browse files Browse the repository at this point in the history
Add DecodeFloat and flesh out the missing decodeSmallNumber.

Handling floating point precision is difficult so these routines
leverage the standard library floating point parsing and formatting.

Fixes #31
  • Loading branch information
petermattis committed Mar 3, 2015
1 parent 2ba0d21 commit a9799c3
Show file tree
Hide file tree
Showing 2 changed files with 222 additions and 40 deletions.
184 changes: 150 additions & 34 deletions util/encoding/key_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"bytes"
"fmt"
"math"
"strconv"
"unicode/utf8"
)

Expand Down Expand Up @@ -391,6 +392,43 @@ func EncodeFloat(b []byte, f float64) []byte {
return nil
}

// DecodeFloat returns the remaining byte slice after decoding and the decoded
// float64 from buf.
func DecodeFloat(buf []byte) ([]byte, float64) {
if buf[0] == 0x15 {
return buf[1:], 0
}
idx := bytes.Index(buf, []byte{orderedEncodingTerminator})
switch {
case buf[0] == 0x08:
// Negative large.
e, m := decodeLargeNumber(true, buf[:idx+1])
return buf[idx+1:], makeFloatFromMandE(true, e, m)
case buf[0] > 0x08 && buf[0] <= 0x13:
// Negative medium.
e, m := decodeMediumNumber(true, buf[:idx+1])
return buf[idx+1:], makeFloatFromMandE(true, e, m)
case buf[0] == 0x14:
// Negative small.
e, m := decodeSmallNumber(true, buf[:idx+1])
return buf[idx+1:], makeFloatFromMandE(true, e, m)
case buf[0] == 0x22:
// Positive large.
e, m := decodeLargeNumber(false, buf[:idx+1])
return buf[idx+1:], makeFloatFromMandE(false, e, m)
case buf[0] >= 0x17 && buf[0] < 0x22:
// Positive large.
e, m := decodeMediumNumber(false, buf[:idx+1])
return buf[idx+1:], makeFloatFromMandE(false, e, m)
case buf[0] == 0x16:
// Positive small.
e, m := decodeSmallNumber(false, buf[:idx+1])
return buf[idx+1:], makeFloatFromMandE(false, e, m)
default:
panic(fmt.Sprintf("unknown prefix of the encoded byte slice: %q", buf))
}
}

// floatMandE computes and returns the mantissa M and exponent E for f.
//
// The mantissa is a base-100 representation of the value. The exponent
Expand All @@ -410,44 +448,102 @@ func floatMandE(f float64) (int, []byte) {
if f < 0 {
f = -f
}
i := 0
if f >= 1 {
for t := f; t >= 1; t /= 100 {
i++
}

// Use strconv.FormatFloat to handle the intricacies of determining how much
// precision is necessary to precisely represent f. The 'e' format is
// d.dddde±dd.
b := strconv.AppendFloat(nil, f, 'e', -1, 64)
if len(b) < 5 {
// The formatted float must be at least 5 bytes or something unexpected has
// occurred.
panic(fmt.Errorf("malformed float: %v -> %s", f, b))
}

// Parse the exponent.
e10 := 10*int(b[len(b)-2]-'0') + int(b[len(b)-1]-'0')
if b[len(b)-3] == '-' {
e10 = -e10
}

// Strip off the exponent.
b = b[:len(b)-4]

// Move all of the digits after the decimal and prepend a leading 0.
if len(b) > 1 {
// "d.dddd" -> "dddddd"
b[1] = b[0]
} else {
for t := f; t < 0.01; t *= 100 {
i--
}
// "d" -> "dd"
b = append(b, b[0])
}
b[0] = '0' // "0ddddd"
e10++

// Convert the power-10 exponent to a power of 100 exponent.
var e100 int
if e10 >= 0 {
e100 = (e10 + 1) / 2
} else {
e100 = e10 / 2
}
// Strip the leading 0 if the conversion to e100 did not add a multiple of
// 10.
if e100*2 == e10 {
b = b[1:]
}

// Ensure that the number of digits is even.
if len(b)%2 != 0 {
b = append(b, '0')
}

// Convert the base-10 'b' slice to a base-100 'm' slice. We do this
// conversion in place to avoid an allocation.
m := b[:len(b)/2]
for i := 0; i < len(b); i += 2 {
accum := 10*int(b[i]-'0') + int(b[i+1]-'0')
// The bytes are encoded as 2n+1.
m[i/2] = byte(2*accum + 1)
}
// Iterate through the centimal digits of the
// mantissa and add them to the buffer.
// For a number like 9999.00001, start with
// 99.9900001, write 99 to the buffer, then
// multiply by 100 until there is no fractional
// portion left.
d := f * math.Pow(100, float64(-i+1))
var buf bytes.Buffer
n, frac := math.Modf(d)
buf.WriteByte(byte(2*n + 1))
for frac != 0 {
// Remove the integral portion and shift to the left.
// So given the above example:
// 99.9900001 -> [-99] -> 00.9900001 -> [*100] -> 99.00001
// 99.00001 -> [-99] -> 00.00001 -> [*100] -> 00.001
// 00.001 -> [-00] -> 00.001 -> [*100] -> 00.1
// 00.1 -> [-00] -> 00.1 -> [*100] -> 10
// Done as frac == 0
d = (d - n) * 100
n, frac = math.Modf(d)
buf.WriteByte(byte(2*n + 1)) // Write the integral to the buf.
}
b := buf.Bytes()
// The last byte is encoded as 2n+0.
b[len(b)-1]--
m[len(m)-1]--

// Trailing X==0 digits are omitted.
return i, removeTrailingZeros(b)
return e100, m
}

// makeFloatFromMandE reconstructs the float from the mantissa M and exponent
// E. Properly handling floating point rounding is tough, so we take the
// approach of converting the base-100 mantissa into a base-10 mantissa,
// formatting the floating point number to a string and then using the standard
// library to parse it.
func makeFloatFromMandE(negative bool, e int, m []byte) float64 {
// ±.dddde±dd.
b := make([]byte, 0, len(m)*2+6)
if negative {
b = append(b, '-')
}
b = append(b, '.')
for i, v := range m {
t := int(v)
if i == len(m) {
t--
}
t /= 2
b = append(b, byte(t/10)+'0')
b = append(b, byte(t%10)+'0')
}
b = append(b, 'e')
e = 2 * e
if e < 0 {
b = append(b, '-')
e = -e
} else {
b = append(b, '+')
}
b = append(b, byte(e/10)+'0')
b = append(b, byte(e%10)+'0')
f, _ := strconv.ParseFloat(string(b), 64)
return f
}

// onesComplement inverts each byte in buf from index start to end.
Expand Down Expand Up @@ -499,6 +595,26 @@ func encodeLargeNumber(negative bool, e int, m []byte, buf []byte) []byte {
return buf[:l+1]
}

func decodeSmallNumber(negative bool, buf []byte) (int, []byte) {
var e uint64
var n int
if negative {
tmp := []byte{^buf[1]}
e, n = GetUVarint(tmp)
} else {
e, n = GetUVarint(buf[1:])
}

// We don't need the prefix and last terminator.
m := make([]byte, len(buf)-(2+n))
copy(m, buf[1+n:len(buf)-1])

if negative {
onesComplement(m, 0, len(m))
}
return int(-e), m
}

func decodeMediumNumber(negative bool, buf []byte) (int, []byte) {
// We don't need the prefix and last terminator.
m := make([]byte, len(buf)-2)
Expand Down
78 changes: 72 additions & 6 deletions util/encoding/key_encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,12 @@ func TestIntMandE(t *testing.T) {
}
for _, c := range testCases {
if e, m := intMandE(c.Value); e != c.E || !bytes.Equal(m, c.M) {
t.Errorf("unexpected mismatch in E/M for %v. expected E=%v | M=%+v, got E=%v | M=%+v", c.Value, c.E, prettyBytes(c.M), e, prettyBytes(m))
t.Errorf("unexpected mismatch in E/M for %v. expected E=%v | M=%+v, got E=%v | M=%+v",
c.Value, c.E, prettyBytes(c.M), e, prettyBytes(m))
}
if v := makeIntFromMandE(c.Value < 0, c.E, c.M); v != c.Value {
t.Errorf("unexpected mismatch in Value for E=%v and M=%+v. expected value=%v, got value=%v", c.E, prettyBytes(c.M), c.Value, v)
t.Errorf("unexpected mismatch in Value for E=%v and M=%+v. expected value=%v, got value=%v",
c.E, prettyBytes(c.M), c.Value, v)
}
}
}
Expand Down Expand Up @@ -245,7 +247,8 @@ func TestEncodeInt(t *testing.T) {
for i, c := range testCases {
enc := EncodeInt([]byte{}, c.Value)
if !bytes.Equal(enc, c.Encoding) {
t.Errorf("unexpected mismatch for %v. expected %v, got %v", c.Value, prettyBytes(c.Encoding), prettyBytes(enc))
t.Errorf("unexpected mismatch for %v. expected %v, got %v",
c.Value, prettyBytes(c.Encoding), prettyBytes(enc))
}
if i > 0 {
if bytes.Compare(testCases[i-1].Encoding, enc) >= 0 {
Expand All @@ -259,7 +262,7 @@ func TestEncodeInt(t *testing.T) {
}
}

func disabledTestFloatMandE(t *testing.T) {
func TestFloatMandE(t *testing.T) {
testCases := []struct {
Value float64
E int
Expand Down Expand Up @@ -291,13 +294,76 @@ func disabledTestFloatMandE(t *testing.T) {
{1234.5, 2, []byte{0x19, 0x45, 0x64}},
{12.345, 1, []byte{0x19, 0x45, 0x64}},
{0.123, 0, []byte{0x19, 0x3c}},
{-0.123, 0, []byte{0x19, 0x3c}},
{0.0123, 0, []byte{0x03, 0x2e}},
{0.00123, -1, []byte{0x19, 0x3c}},
{9223372036854775807, 10, []byte{0x13, 0x2d, 0x43, 0x91, 0x07, 0x89, 0x6d, 0x9b, 0x75, 0x0e}},
// The following value cannot be precisely represented as a float.
// {9223372036854775807, 10, []byte{0x13, 0x2d, 0x43, 0x91, 0x07, 0x89, 0x6d, 0x9b, 0x75, 0x0e}},
}
for _, c := range testCases {
if e, m := floatMandE(c.Value); e != c.E || !bytes.Equal(m, c.M) {
t.Errorf("unexpected mismatch in E/M for %v. expected E=%v | M=%+v, got E=%v | M=%+v", c.Value, c.E, prettyBytes(c.M), e, prettyBytes(m))
t.Errorf("unexpected mismatch in E/M for %v. expected E=%v | M=%+v, got E=%v | M=%+v",
c.Value, c.E, prettyBytes(c.M), e, prettyBytes(m))
}
}
}

func TestEncodeFloat(t *testing.T) {
testCases := []struct {
Value float64
Encoding []byte
}{
{-10000.0, []byte{0x10, 0xfd, 0x0}},
{-9999.0, []byte{0x11, 0x38, 0x39, 0x00}},
{-100.0, []byte{0x11, 0xfd, 0x00}},
{-99.0, []byte{0x12, 0x39, 0x00}},
{-1.0, []byte{0x12, 0xfd, 0x0}},
{-0.00123, []byte{0x14, 0xfe, 0xe6, 0xc3, 0x0}},
{0, []byte{0x15}},
{0.00123, []byte{0x16, 0x1, 0x19, 0x3c, 0x0}},
{0.0123, []byte{0x17, 0x03, 0x2e, 0x0}},
{0.123, []byte{0x17, 0x19, 0x3c, 0x0}},
{1.0, []byte{0x18, 0x02, 0x0}},
{10.0, []byte{0x18, 0x14, 0x0}},
{12.345, []byte{0x18, 0x19, 0x45, 0x64, 0x0}},
{99.0, []byte{0x18, 0xc6, 0x0}},
{99.0001, []byte{0x18, 0xc7, 0x01, 0x02, 0x0}},
{99.01, []byte{0x18, 0xc7, 0x02, 0x0}},
{100.0, []byte{0x19, 0x02, 0x0}},
{100.01, []byte{0x19, 0x03, 0x01, 0x02, 0x0}},
{100.1, []byte{0x19, 0x03, 0x01, 0x14, 0x0}},
{1234, []byte{0x19, 0x19, 0x44, 0x0}},
{1234.5, []byte{0x19, 0x19, 0x45, 0x64, 0x0}},
{9999, []byte{0x19, 0xc7, 0xc6, 0x0}},
{9999.000001, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0x02, 0x0}},
{9999.000009, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0x12, 0x0}},
{9999.00001, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0x14, 0x0}},
{9999.00009, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0xb4, 0x0}},
{9999.000099, []byte{0x19, 0xc7, 0xc7, 0x01, 0x01, 0xc6, 0x0}},
{9999.0001, []byte{0x19, 0xc7, 0xc7, 0x01, 0x02, 0x0}},
{9999.001, []byte{0x19, 0xc7, 0xc7, 0x01, 0x14, 0x0}},
{9999.01, []byte{0x19, 0xc7, 0xc7, 0x02, 0x0}},
{9999.1, []byte{0x19, 0xc7, 0xc7, 0x14, 0x0}},
{10000, []byte{0x1a, 0x02, 0x0}},
{10001, []byte{0x1a, 0x03, 0x01, 0x02, 0x0}},
{12345, []byte{0x1a, 0x03, 0x2f, 0x5a, 0x0}},
{123450, []byte{0x1a, 0x19, 0x45, 0x64, 0x0}},
}
for i, c := range testCases {
enc := EncodeFloat([]byte{}, c.Value)
if !bytes.Equal(enc, c.Encoding) {
t.Errorf("unexpected mismatch for %v. expected %v, got %v",
c.Value, prettyBytes(c.Encoding), prettyBytes(enc))
}
if i > 0 {
if bytes.Compare(testCases[i-1].Encoding, enc) >= 0 {
t.Errorf("%v: expected %v to be less than %v",
c.Value, prettyBytes(testCases[i-1].Encoding), prettyBytes(enc))
}
}
_, dec := DecodeFloat(enc)
if dec != c.Value {
t.Errorf("unexpected mismatch for %v. got %v", c.Value, dec)
}
}
}

0 comments on commit a9799c3

Please sign in to comment.