Skip to content

Commit

Permalink
size: stop using regexp
Browse files Browse the repository at this point in the history
Using a regular expression brings in the whole regexp and regexp/syntax
packages, which increase the resulting binary size by about 130K
(Go 1.18.1, Linux/amd64).

Besides, regular expressions are generally slow and incur some
initialization overhead (to compile all global regexp.MustComplile
variables). This, unlike the size difference, is not the main motivation
for this commit, but it feels like it should have also been mentioned.

A quick benchmark comparison shows a huge improvement (again, this is
not why this is done, nevertheless it pleases the eye):

name         old time/op    new time/op    delta
ParseSize-4    10.6µs ± 3%     2.6µs ±29%  -75.10%  (p=0.002 n=6+6)

name         old alloc/op   new alloc/op   delta
ParseSize-4    3.26kB ± 0%    0.20kB ± 0%  -93.75%  (p=0.000 n=7+6)

name         old allocs/op  new allocs/op  delta
ParseSize-4      72.0 ± 0%      26.0 ± 0%  -63.89%  (p=0.000 n=7+6)

Compatibility note: As a result, we are now a but more relaxed to the
input, allowing e.g. ".4 Gb", or "-0", or "234. B", following the rules
of strconv.ParseFloat. It seems that those were previously rejected as
a result of a regex being used, not deliberately.

Signed-off-by: Kir Kolyshkin <[email protected]>
  • Loading branch information
kolyshkin committed May 16, 2022
1 parent cec4960 commit 7375726
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 27 deletions.
70 changes: 58 additions & 12 deletions size.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package units

import (
"fmt"
"regexp"
"strconv"
"strings"
)
Expand All @@ -26,16 +25,17 @@ const (
PiB = 1024 * TiB
)

type unitMap map[string]int64
type unitMap map[byte]int64

var (
decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB}
binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB}
sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[iI]?[bB]?$`)
decimalMap = unitMap{'k': KB, 'm': MB, 'g': GB, 't': TB, 'p': PB}
binaryMap = unitMap{'k': KiB, 'm': MiB, 'g': GiB, 't': TiB, 'p': PiB}
)

var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
var (
decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
)

func getSizeAndUnit(size float64, base float64, _map []string) (float64, string) {
i := 0
Expand Down Expand Up @@ -89,20 +89,66 @@ func RAMInBytes(size string) (int64, error) {

// Parses the human-readable size string into the amount it represents.
func parseSize(sizeStr string, uMap unitMap) (int64, error) {
matches := sizeRegex.FindStringSubmatch(sizeStr)
if len(matches) != 4 {
// TODO: rewrite to use strings.Cut if there's a space
// once Go < 1.18 is deprecated.
sep := strings.LastIndexAny(sizeStr, "01234567890. ")
if sep == -1 {
// There should be at least a digit.
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
}
var num, sfx string
if sizeStr[sep] != ' ' {
num = sizeStr[:sep+1]
sfx = sizeStr[sep+1:]
} else {
// Omit the space separator.
num = sizeStr[:sep]
sfx = sizeStr[sep+1:]
}

size, err := strconv.ParseFloat(matches[1], 64)
size, err := strconv.ParseFloat(num, 64)
if err != nil {
return -1, err
}
// Backward compatibility: reject negative sizes.
if size < 0 {
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
}

if len(sfx) == 0 {
return int64(size), nil
}

unitPrefix := strings.ToLower(matches[3])
if mul, ok := uMap[unitPrefix]; ok {
// Process the suffix.

if len(sfx) > 3 { // Too long.
goto badSuffix
}
sfx = strings.ToLower(sfx)
// Trivial case: b suffix.
if sfx[0] == 'b' {
if len(sfx) > 1 { // no extra characters allowed after b.
goto badSuffix
}
return int64(size), nil
}
// A suffix from the map.
if mul, ok := uMap[sfx[0]]; ok {
size *= float64(mul)
} else {
goto badSuffix
}

// The suffix may have extra "b" or "ib" (e.g. KiB or MB).
switch {
case len(sfx) == 2 && sfx[1] != 'b':
goto badSuffix
case len(sfx) == 3 && sfx[1:] != "ib":
goto badSuffix
}

return int64(size), nil

badSuffix:
return -1, fmt.Errorf("invalid suffix: '%s'", sfx)
}
31 changes: 16 additions & 15 deletions size_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,22 @@ func TestFromHumanSize(t *testing.T) {
assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5 kB")
assertSuccessEquals(t, 32, FromHumanSize, "32.5 B")
assertSuccessEquals(t, 300, FromHumanSize, "0.3 K")
assertSuccessEquals(t, 300, FromHumanSize, ".3kB")

assertSuccessEquals(t, 0, FromHumanSize, "0.")
assertSuccessEquals(t, 0, FromHumanSize, "0. ")
assertSuccessEquals(t, 0, FromHumanSize, "0.b")
assertSuccessEquals(t, 0, FromHumanSize, "0.B")
assertSuccessEquals(t, 0, FromHumanSize, "-0")
assertSuccessEquals(t, 0, FromHumanSize, "-0b")
assertSuccessEquals(t, 0, FromHumanSize, "-0B")
assertSuccessEquals(t, 0, FromHumanSize, "-0 b")
assertSuccessEquals(t, 0, FromHumanSize, "-0 B")
assertSuccessEquals(t, 32, FromHumanSize, "32.")
assertSuccessEquals(t, 32, FromHumanSize, "32.b")
assertSuccessEquals(t, 32, FromHumanSize, "32.B")
assertSuccessEquals(t, 32, FromHumanSize, "32. b")
assertSuccessEquals(t, 32, FromHumanSize, "32. B")

// We do not tolerate extra leading or trailing spaces
// (except for a space after the number and a missing suffix).
Expand All @@ -124,26 +140,11 @@ func TestFromHumanSize(t *testing.T) {
assertError(t, FromHumanSize, " ")
assertError(t, FromHumanSize, " .")
assertError(t, FromHumanSize, " . ")
assertError(t, FromHumanSize, "0.")
assertError(t, FromHumanSize, "0. ")
assertError(t, FromHumanSize, "0.b")
assertError(t, FromHumanSize, "0.B")
assertError(t, FromHumanSize, "-0")
assertError(t, FromHumanSize, "-0b")
assertError(t, FromHumanSize, "-0B")
assertError(t, FromHumanSize, "-0 b")
assertError(t, FromHumanSize, "-0 B")
assertError(t, FromHumanSize, "-32")
assertError(t, FromHumanSize, ".3kB")
assertError(t, FromHumanSize, "-32b")
assertError(t, FromHumanSize, "-32B")
assertError(t, FromHumanSize, "-32 b")
assertError(t, FromHumanSize, "-32 B")
assertError(t, FromHumanSize, "32.")
assertError(t, FromHumanSize, "32.b")
assertError(t, FromHumanSize, "32.B")
assertError(t, FromHumanSize, "32. b")
assertError(t, FromHumanSize, "32. B")
assertError(t, FromHumanSize, "32b.")
assertError(t, FromHumanSize, "32B.")
assertError(t, FromHumanSize, "32 b.")
Expand Down

0 comments on commit 7375726

Please sign in to comment.