Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
87968: tree: Improve performance of tree.AsJSON r=miretskiy a=miretskiy

Improve performance of `tree.AsJSON` method.

These improvements are important for any query that produces
large number of JSON objects, as well as to changefeeds, which
rely on this function when producing JSON encoded feed.

Most of the changes revolved around modifying underlying types
(s.a. date/timestamp types, box2d, etc) to favor using functions
that append to bytes buffer, instead of relying on slower
functions, such as `fmt.Sprintf`.  The conversion
performance improved around 5-10% for most of the types, and
as high as 50% for time types:

```
Benchmark            old t/op      new t/op    delta
AsJSON/box2d-10    578ns ± 3%    414ns ± 2%   -28.49%  (p=0.000 n=10+9)
AsJSON/box2d[]-10  1.64µs ± 3%   1.19µs ± 4%  -27.14%  (p=0.000 n=10+10)
AsJSON/time-10     232ns ± 2%    103ns ± 1%   -55.61%  (p=0.000 n=10+10)
AsJSON/time[]-10   687ns ± 4%    342ns ± 4%   -50.17%  (p=0.000 n=10+10)
```

Note: Some types in the local benchmark show slight slow down in speed.
No changes were made in those types, and in general, the encoding speed
of these types might be too fast to reliable detect changes:
```
Benchmark            old t/op      new t/op       delta
AsJSON/bool[]-10    65.9ns ± 1%   67.7ns ± 2%    +2.79%  (p=0.001 n=8+9)
```
The emphasis was also placed on reducing allocations.
By relying more heavily on a pooled FmtCtx, which contains
bytes buffer, some conversions resulted in amortized
elimination of allocations (time):
```
Benchmark               old B/op      new t/op    delta
AsJSON/timestamp-10    42.1B ± 3%      0.0B      -100.00%  (p=0.000 n=10+10)
AsJSON/timestamp[]-10  174B ± 4%      60B ± 1%   -65.75%  (p=0.000 n=10+10)
```

Release Note: None
Release Justification: performance improvement

Co-authored-by: Yevgeniy Miretskiy <[email protected]>
  • Loading branch information
craig[bot] and Yevgeniy Miretskiy committed Sep 17, 2022
2 parents bd97ad5 + 35cb15e commit 3fa1a16
Show file tree
Hide file tree
Showing 16 changed files with 313 additions and 40 deletions.
4 changes: 4 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ ALL_TESTS = [
"//pkg/util/span:span_test",
"//pkg/util/stop:stop_test",
"//pkg/util/stringarena:stringarena_test",
"//pkg/util/strutil:strutil_test",
"//pkg/util/syncutil/singleflight:singleflight_test",
"//pkg/util/syncutil:syncutil_test",
"//pkg/util/sysutil:sysutil_test",
Expand Down Expand Up @@ -2002,6 +2003,8 @@ GO_TARGETS = [
"//pkg/util/stringarena:stringarena",
"//pkg/util/stringarena:stringarena_test",
"//pkg/util/stringencoding:stringencoding",
"//pkg/util/strutil:strutil",
"//pkg/util/strutil:strutil_test",
"//pkg/util/syncutil/singleflight:singleflight",
"//pkg/util/syncutil/singleflight:singleflight_test",
"//pkg/util/syncutil:syncutil",
Expand Down Expand Up @@ -2951,6 +2954,7 @@ GET_X_DATA_TARGETS = [
"//pkg/util/stop:get_x_data",
"//pkg/util/stringarena:get_x_data",
"//pkg/util/stringencoding:get_x_data",
"//pkg/util/strutil:get_x_data",
"//pkg/util/syncutil:get_x_data",
"//pkg/util/syncutil/singleflight:get_x_data",
"//pkg/util/system:get_x_data",
Expand Down
24 changes: 15 additions & 9 deletions pkg/geo/bbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,21 @@ func NewCartesianBoundingBox() *CartesianBoundingBox {

// Repr is the string representation of the CartesianBoundingBox.
func (b *CartesianBoundingBox) Repr() string {
// fmt.Sprintf with %f does not truncate leading zeroes, so use
// FormatFloat instead.
return fmt.Sprintf(
"BOX(%s %s,%s %s)",
strconv.FormatFloat(b.LoX, 'f', -1, 64),
strconv.FormatFloat(b.LoY, 'f', -1, 64),
strconv.FormatFloat(b.HiX, 'f', -1, 64),
strconv.FormatFloat(b.HiY, 'f', -1, 64),
)
return string(b.AppendFormat(nil))
}

// AppendFormat appends string representation of the CartesianBoundingBox
// to the buffer, and returns modified buffer.
func (b *CartesianBoundingBox) AppendFormat(buf []byte) []byte {
buf = append(buf, "BOX("...)
buf = strconv.AppendFloat(buf, b.LoX, 'f', -1, 64)
buf = append(buf, ' ')
buf = strconv.AppendFloat(buf, b.LoY, 'f', -1, 64)
buf = append(buf, ',')
buf = strconv.AppendFloat(buf, b.HiX, 'f', -1, 64)
buf = append(buf, ' ')
buf = strconv.AppendFloat(buf, b.HiY, 'f', -1, 64)
return append(buf, ')')
}

// ParseCartesianBoundingBox parses a box2d string into a bounding box.
Expand Down
4 changes: 4 additions & 0 deletions pkg/geo/bbox_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"fmt"
"math"
"strconv"
"strings"
"testing"

"github.com/cockroachdb/cockroach/pkg/geo/geopb"
Expand Down Expand Up @@ -63,6 +64,9 @@ func TestParseCartesianBoundingBox(t *testing.T) {
} else {
require.NoError(t, err)
require.Equal(t, tc.expected, ret)
// Test Repr/AppendFormat round trip.
require.Equal(t, strings.ToUpper(tc.s), ret.Repr())
require.Equal(t, strings.ToUpper(tc.s), string(ret.AppendFormat(nil)))
}
})
}
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/sem/tree/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ go_test(
"function_name_test.go",
"indexed_vars_test.go",
"interval_test.go",
"json_test.go",
"main_test.go",
"name_part_test.go",
"name_resolution_test.go",
Expand Down
31 changes: 22 additions & 9 deletions pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -1782,7 +1782,11 @@ func (d *DUuid) Format(ctx *FmtCtx) {
if !bareStrings {
ctx.WriteByte('\'')
}
ctx.WriteString(d.UUID.String())

buf := ctx.scratch[:uuid.RFC4122StrSize]
d.UUID.StringBytes(buf)
ctx.Write(buf)

if !bareStrings {
ctx.WriteByte('\'')
}
Expand Down Expand Up @@ -2344,7 +2348,7 @@ func (d *DTime) Format(ctx *FmtCtx) {
if !bareStrings {
ctx.WriteByte('\'')
}
ctx.WriteString(timeofday.TimeOfDay(*d).String())
ctx.Write(timeofday.TimeOfDay(*d).AppendFormat(ctx.scratch[:0]))
if !bareStrings {
ctx.WriteByte('\'')
}
Expand Down Expand Up @@ -2520,7 +2524,7 @@ func (d *DTimeTZ) Format(ctx *FmtCtx) {
if !bareStrings {
ctx.WriteByte('\'')
}
ctx.WriteString(d.TimeTZ.String())
ctx.Write(d.TimeTZ.AppendFormat(ctx.scratch[:0]))
if !bareStrings {
ctx.WriteByte('\'')
}
Expand Down Expand Up @@ -3603,7 +3607,7 @@ func (d *DBox2D) Format(ctx *FmtCtx) {
if !bareStrings {
ctx.WriteByte('\'')
}
ctx.WriteString(d.CartesianBoundingBox.Repr())
ctx.Write(d.CartesianBoundingBox.AppendFormat(ctx.scratch[:0]))
if !bareStrings {
ctx.WriteByte('\'')
}
Expand Down Expand Up @@ -3731,10 +3735,10 @@ func AsJSON(
// without the T separator. This causes some compatibility problems
// with certain JSON consumers, so we'll use an alternate formatting
// path here to maintain consistency with PostgreSQL.
return json.FromString(t.Time.In(loc).Format(time.RFC3339Nano)), nil
return json.FromString(formatTime(t.Time.In(loc), time.RFC3339Nano)), nil
case *DTimestamp:
// This is RFC3339Nano, but without the TZ fields.
return json.FromString(t.UTC().Format("2006-01-02T15:04:05.999999999")), nil
return json.FromString(formatTime(t.UTC(), "2006-01-02T15:04:05.999999999")), nil
case *DDate, *DUuid, *DOid, *DInterval, *DBytes, *DIPAddr, *DTime, *DTimeTZ, *DBitArray, *DBox2D:
return json.FromString(AsStringWithFlags(t, FmtBareStrings, FmtDataConversionConfig(dcc))), nil
case *DGeometry:
Expand All @@ -3752,6 +3756,16 @@ func AsJSON(
}
}

// formatTime formats time with specified layout.
// TODO(yuzefovich): consider using this function in more places.
func formatTime(t time.Time, layout string) string {
// We only need FmtCtx to access its buffer so
// that we get 0 amortized allocations.
ctx := NewFmtCtx(FmtSimple)
ctx.Write(t.AppendFormat(ctx.scratch[:0], layout))
return ctx.CloseAndGetString()
}

// ResolvedType implements the TypedExpr interface.
func (*DJSON) ResolvedType() *types.T {
return types.Jsonb
Expand Down Expand Up @@ -5060,14 +5074,13 @@ func (d *DOid) CompareError(ctx CompareContext, other Datum) (int, error) {

// Format implements the Datum interface.
func (d *DOid) Format(ctx *FmtCtx) {
s := strconv.FormatUint(uint64(d.Oid), 10)
if d.semanticType.Oid() == oid.T_oid || d.name == "" {
ctx.WriteString(s)
ctx.Write(strconv.AppendUint(ctx.scratch[:0], uint64(d.Oid), 10))
} else if ctx.HasFlags(fmtDisambiguateDatumTypes) {
ctx.WriteString("crdb_internal.create_")
ctx.WriteString(d.semanticType.SQLStandardName())
ctx.WriteByte('(')
ctx.WriteString(s)
ctx.Write(strconv.AppendUint(ctx.scratch[:0], uint64(d.Oid), 10))
ctx.WriteByte(',')
lexbase.EncodeSQLStringWithFlags(&ctx.Buffer, d.name, lexbase.EncNoFlags)
ctx.WriteByte(')')
Expand Down
11 changes: 11 additions & 0 deletions pkg/sql/sem/tree/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/redact"
)
Expand Down Expand Up @@ -273,6 +274,8 @@ type FmtCtx struct {
// indexedTypeFormatter is an optional interceptor for formatting
// IDTypeReferences differently than normal.
indexedTypeFormatter func(*FmtCtx, *OIDTypeReference)
// small scratch buffer to reduce allocations.
scratch [64]byte
}

// FmtCtxOption is an option to pass into NewFmtCtx.
Expand Down Expand Up @@ -673,3 +676,11 @@ func (ctx *FmtCtx) formatNodeMaybeMarkRedaction(n NodeFormatter) {
n.Format(ctx)
}
}

func init() {
ctx := NewFmtCtx(FmtSimple)
if len(ctx.scratch) < uuid.RFC4122StrSize {
panic(errors.AssertionFailedf("FmtCtx scratch must be at least %d bytes", uuid.RFC4122StrSize))
}
ctx.Close()
}
116 changes: 116 additions & 0 deletions pkg/sql/sem/tree/json_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package tree_test

import (
"math/rand"
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/sql/randgen"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)

func BenchmarkAsJSON(b *testing.B) {
// Use fixed seed so that each invocation of this benchmark
// produces exactly the same types, and datums streams.
// This number can be changed to an arbitrary value; doing so
// would result in new types/datums being produced.
rng := randutil.NewTestRandWithSeed(-4365865412074131521)

const numDatums = 1024
makeDatums := func(typ *types.T) tree.Datums {
const allowNulls = true
res := make(tree.Datums, numDatums)
for i := 0; i < numDatums; i++ {
res[i] = randgen.RandDatum(rng, typ, allowNulls)
}
return res
}

bench := func(b *testing.B, typ *types.T) {
b.ReportAllocs()
b.StopTimer()
datums := makeDatums(typ)
b.StartTimer()

for i := 0; i < b.N; i++ {
_, err := tree.AsJSON(datums[i%numDatums], sessiondatapb.DataConversionConfig{}, time.UTC)
if err != nil {
b.Fatal(err)
}
}
}

for _, typ := range testTypes(rng) {
b.Run(typ.String(), func(b *testing.B) {
bench(b, typ)
})

if randgen.IsAllowedForArray(typ) {
typ = types.MakeArray(typ)
b.Run(typ.String(), func(b *testing.B) {
bench(b, typ)
})
}
}
}

// testTypes returns list of types to test against.
func testTypes(rng *rand.Rand) (typs []*types.T) {
for _, typ := range randgen.SeedTypes {
switch typ {
case types.AnyTuple:
// Ignore AnyTuple -- it's not very interesting; we'll generate test tuples below.
case types.RegClass, types.RegNamespace, types.RegProc, types.RegProcedure, types.RegRole, types.RegType:
// Ignore a bunch of pseudo-OID types (just want regular OID).
case types.Geometry, types.Geography:
// Ignore geometry/geography: these types are insanely inefficient;
// AsJson(Geo) -> MarshalGeo -> go JSON bytes -> ParseJSON -> Go native -> json.JSON
// Benchmarking this generates too much noise.
// TODO(yevgeniy): fix this.
default:
typs = append(typs, typ)
}
}

// Add tuple types.
var tupleTypes []*types.T
makeTupleType := func() *types.T {
contents := make([]*types.T, rng.Intn(6)) // Up to 6 fields
for i := range contents {
contents[i] = randgen.RandTypeFromSlice(rng, typs)
}
candidateTuple := types.MakeTuple(contents)
// Ensure tuple type is unique.
for _, t := range tupleTypes {
if t.Equal(candidateTuple) {
return nil
}
}
tupleTypes = append(tupleTypes, candidateTuple)
return candidateTuple
}

const numTupleTypes = 5
for i := 0; i < numTupleTypes; i++ {
var typ *types.T
for typ == nil {
typ = makeTupleType()
}
typs = append(typs, typ)
}

return typs
}
19 changes: 19 additions & 0 deletions pkg/util/strutil/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "strutil",
srcs = ["util.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/util/strutil",
visibility = ["//visibility:public"],
)

go_test(
name = "strutil_test",
srcs = ["util_test.go"],
args = ["-test.timeout=295s"],
embed = [":strutil"],
deps = ["@com_github_stretchr_testify//require"],
)

get_x_data(name = "get_x_data")
34 changes: 34 additions & 0 deletions pkg/util/strutil/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package strutil

import "strconv"

// AppendInt appends the decimal form of x to b and returns the result.
// If the decimal form is shorter than width, the result is padded with leading 0's.
// If the decimal is longer than width, returns formatted decimal without
// any truncation.
func AppendInt(b []byte, x int, width int) []byte {
if x < 0 {
width--
x = -x
b = append(b, '-')
}

var scratch [16]byte
xb := strconv.AppendInt(scratch[:0], int64(x), 10)

// Add 0-padding.
for w := len(xb); w < width; w++ {
b = append(b, '0')
}
return append(b, xb...)
}
Loading

0 comments on commit 3fa1a16

Please sign in to comment.