Skip to content

Commit

Permalink
perf: replace char comparison with array lookup (#16)
Browse files Browse the repository at this point in the history
* perf: replace char comparison with array lookup

isNotEscapedSingleChar is a hotsport that was taking significant cpu time
during encoding for string-heavy structs.
Replacing char comparison with a array lookup yields a significant performance
improvement:

                 │   old.txt   │               new.txt
                 │   sec/op    │   sec/op     vs base
String/string-20   35.72n ± 3%   18.61n ± 2%  -47.89% (p=0.000 n=10)

Both variant are zero allocations.

* refactor: remove unused var

* build: add missing license headers
  • Loading branch information
kruskall authored Jun 19, 2023
1 parent 3f997be commit 485c9d6
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 15 deletions.
136 changes: 136 additions & 0 deletions tables.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright 2018 Elasticsearch BV
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package fastjson provides a library for fast JSON encoding,
// optimised for static code generation.
//
// Fastjson functions and interfaces are structured such that
// all encoding appends to a buffer, enabling buffer reuse
// without forcing specific mechanisms such as sync.Pool. This
// enables zero-allocation encoding without incurring any
// concurrency overhead in certain applications.

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package fastjson

import "unicode/utf8"

// htmlSafeSet holds the value true if the ASCII character with the given
// array position can be safely represented inside a JSON string, embedded
// inside of HTML <script> tags, without any additional escaping.
//
// All values are true except for the ASCII control characters (0-31), the
// double quote ("), the backslash character ("\"), HTML opening and closing
// tags ("<" and ">"), and the ampersand ("&").
var htmlSafeSet = [utf8.RuneSelf]bool{
' ': true,
'!': true,
'"': false,
'#': true,
'$': true,
'%': true,
'&': false,
'\'': true,
'(': true,
')': true,
'*': true,
'+': true,
',': true,
'-': true,
'.': true,
'/': true,
'0': true,
'1': true,
'2': true,
'3': true,
'4': true,
'5': true,
'6': true,
'7': true,
'8': true,
'9': true,
':': true,
';': true,
'<': false,
'=': true,
'>': false,
'?': true,
'@': true,
'A': true,
'B': true,
'C': true,
'D': true,
'E': true,
'F': true,
'G': true,
'H': true,
'I': true,
'J': true,
'K': true,
'L': true,
'M': true,
'N': true,
'O': true,
'P': true,
'Q': true,
'R': true,
'S': true,
'T': true,
'U': true,
'V': true,
'W': true,
'X': true,
'Y': true,
'Z': true,
'[': true,
'\\': false,
']': true,
'^': true,
'_': true,
'`': true,
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
'{': true,
'|': true,
'}': true,
'~': true,
'\u007f': true,
}
21 changes: 6 additions & 15 deletions writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,29 +109,20 @@ func (w *Writer) String(s string) {

const chars = "0123456789abcdef"

func isNotEscapedSingleChar(c byte, escapeHTML bool) bool {
// Note: might make sense to use a table if there are more chars to escape. With 4 chars
// it benchmarks the same.
if escapeHTML {
return c != '<' && c != '>' && c != '&' && c != '\\' && c != '"' && c >= 0x20 && c < utf8.RuneSelf
}
return c != '\\' && c != '"' && c >= 0x20 && c < utf8.RuneSelf
}

// StringContents is the same as String, but without the surrounding quotes.
func (w *Writer) StringContents(s string) {
// Portions of the string that contain no escapes are appended as byte slices.

p := 0 // last non-escape symbol

for i := 0; i < len(s); {
c := s[i]

if isNotEscapedSingleChar(c, true) {
// single-width character, no escaping is required
i++
continue
} else if c < utf8.RuneSelf {
if c := s[i]; c < utf8.RuneSelf {
if htmlSafeSet[c] {
i++
continue
}

// single-with character, need to escape
w.RawString(s[p:i])
switch c {
Expand Down
12 changes: 12 additions & 0 deletions writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,15 @@ func TestWriterStringEscapes(t *testing.T) {
w.StringContents(string([]byte{255}))
assertEncoded(t, &w, `\ufffd`)
}

func BenchmarkString(b *testing.B) {
w := Writer{
buf: make([]byte, 0, 1024),
}
b.Run("string", func(b *testing.B) {
for i := 0; i < b.N; i++ {
w.Reset()
w.String("foobarstring-go-fastjson")
}
})
}

0 comments on commit 485c9d6

Please sign in to comment.