Skip to content

Commit

Permalink
chore: Batch writer benchmarks (#1552)
Browse files Browse the repository at this point in the history
https://docs.google.com/spreadsheets/d/1NxKyOtvckW7yCbwLTL4VOu91XPNyd-Q0dYDMxjT-yH8/edit?usp=sharing

Current state (`1000 * N`)
```
GoLand2023.2/tmp/GoLand/___gobench_writers_test_go.test -test.v -test.paniconexit0 -test.bench ^\QBenchmarkWriterMemory\E$ -test.run ^$
goos: darwin
goarch: arm64
pkg: github.com/cloudquery/plugin-sdk/v4/writers
BenchmarkWriterMemory
BenchmarkWriterMemory/BatchWriter_batch10k_bytes100M
BenchmarkWriterMemory/BatchWriter_batch10k_bytes100M-10         	     465	   2556097 ns/op	        35.23 bytes/op
BenchmarkWriterMemory/BatchWriter_defaults
BenchmarkWriterMemory/BatchWriter_defaults-10                   	     469	   2595365 ns/op	        25.22 bytes/op
BenchmarkWriterMemory/BatchWriter_wide_batch10k_bytes100M
BenchmarkWriterMemory/BatchWriter_wide_batch10k_bytes100M-10    	       8	 129711115 ns/op	    178917 bytes/op
BenchmarkWriterMemory/BatchWriter_wide_defaults
BenchmarkWriterMemory/BatchWriter_wide_defaults-10              	       8	 151473391 ns/op	     48066 bytes/op
BenchmarkWriterMemory/MixedBatchWriter_batch10k_bytes100M
BenchmarkWriterMemory/MixedBatchWriter_batch10k_bytes100M-10    	     672	   1719326 ns/op	      1394 bytes/op
BenchmarkWriterMemory/MixedBatchWriter_defaults
BenchmarkWriterMemory/MixedBatchWriter_defaults-10              	     585	   1752100 ns/op	      1394 bytes/op
BenchmarkWriterMemory/MixedBatchWriter_wide_batch10k_bytes100M
BenchmarkWriterMemory/MixedBatchWriter_wide_batch10k_bytes100M-10         	       8	 142323412 ns/op	    185406 bytes/op
BenchmarkWriterMemory/MixedBatchWriter_wide_defaults
BenchmarkWriterMemory/MixedBatchWriter_wide_defaults-10                   	       8	 127524651 ns/op	     96165 bytes/op
BenchmarkWriterMemory/StreamingBatchWriter_bytes100M
BenchmarkWriterMemory/StreamingBatchWriter_bytes100M-10                   	     400	   2812296 ns/op	      1393 bytes/op
BenchmarkWriterMemory/StreamingBatchWriter_defaults
BenchmarkWriterMemory/StreamingBatchWriter_defaults-10                    	     394	   2803509 ns/op	      1393 bytes/op
BenchmarkWriterMemory/StreamingBatchWriter_wide_bytes100M
BenchmarkWriterMemory/StreamingBatchWriter_wide_bytes100M-10              	       9	 120658421 ns/op	     75653 bytes/op
BenchmarkWriterMemory/StreamingBatchWriter_wide_defaults
BenchmarkWriterMemory/StreamingBatchWriter_wide_defaults-10               	       9	 117394778 ns/op	     78724 bytes/op
PASS

Process finished with the exit code 0
```
  • Loading branch information
disq authored Mar 6, 2024
1 parent f565da8 commit 3050443
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 2 deletions.
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ lint:

.PHONY: benchmark
benchmark:
go test -bench=Benchmark -run="^$$" ./...
go test -bench=Benchmark -run="^$$" ./... | grep -v 'BenchmarkWriterMemory/'
go test -bench=BenchmarkWriterMemory -run="^$$" ./writers/

benchmark-ci:
go install go.bobheadxi.dev/[email protected]
go test -bench . -benchmem ./... -run="^$$" | gobenchdata --json bench.json
{ go test -bench . -benchmem ./... -run="^$$" | grep -v 'BenchmarkWriterMemory/' && \
go test -bench=BenchmarkWriterMemory -benchmem -test.benchtime 10000x ./writers/ -run="^$$"; } | gobenchdata --json bench.json
rm -rf .delta.* && go run scripts/benchmark-delta/main.go bench.json
1 change: 1 addition & 0 deletions scripts/benchmark-delta/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func main() {
for _, run := range d {
for _, suite := range run.Suites {
for _, bm := range suite.Benchmarks {
bm.Name = strings.ReplaceAll(bm.Name, "/", "_")
if bm.NsPerOp > 0 {
fmt.Println(bm.Name, "ns/op", bm.NsPerOp)
deltaResults = append(deltaResults, deltaResult{
Expand Down
218 changes: 218 additions & 0 deletions writers/writers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
package writers_test

import (
"context"
"math/rand"
"runtime"
"sort"
"strconv"
"testing"

"github.com/apache/arrow/go/v15/arrow"
"github.com/apache/arrow/go/v15/arrow/array"
"github.com/apache/arrow/go/v15/arrow/memory"
"github.com/cloudquery/plugin-sdk/v4/message"
"github.com/cloudquery/plugin-sdk/v4/schema"
"github.com/cloudquery/plugin-sdk/v4/writers"
"github.com/cloudquery/plugin-sdk/v4/writers/batchwriter"
"github.com/cloudquery/plugin-sdk/v4/writers/mixedbatchwriter"
"github.com/cloudquery/plugin-sdk/v4/writers/streamingbatchwriter"
"golang.org/x/exp/maps"
)

type bCase struct {
name string
wr writers.Writer
rec func() arrow.Record
}

func BenchmarkWriterMemory(b *testing.B) {
batchwriterOpts := map[string][]batchwriter.Option{
"defaults": nil,
"batch10k bytes100M": {batchwriter.WithBatchSizeBytes(100000000), batchwriter.WithBatchSize(10000)},
}
mixedbatchwriterOpts := map[string][]mixedbatchwriter.Option{
"defaults": nil,
"batch10k bytes100M": {mixedbatchwriter.WithBatchSizeBytes(100000000), mixedbatchwriter.WithBatchSize(10000)},
}
streamingbatchwriterOpts := map[string][]streamingbatchwriter.Option{
"defaults": nil,
"bytes100M": {streamingbatchwriter.WithBatchSizeBytes(100000000)},
}

var bCases []bCase
bCases = append(bCases, writerMatrix("BatchWriter", batchwriter.New, newBatchWriterClient(), makeRecord, batchwriterOpts)...)
bCases = append(bCases, writerMatrix("BatchWriter wide", batchwriter.New, newBatchWriterClient(), makeWideRecord, batchwriterOpts)...)
bCases = append(bCases, writerMatrix("MixedBatchWriter", mixedbatchwriter.New, newMixedBatchWriterClient(), makeRecord, mixedbatchwriterOpts)...)
bCases = append(bCases, writerMatrix("MixedBatchWriter wide", mixedbatchwriter.New, newMixedBatchWriterClient(), makeWideRecord, mixedbatchwriterOpts)...)
bCases = append(bCases, writerMatrix("StreamingBatchWriter", streamingbatchwriter.New, newStreamingBatchWriterClient(), makeRecord, streamingbatchwriterOpts)...)
bCases = append(bCases, writerMatrix("StreamingBatchWriter wide", streamingbatchwriter.New, newStreamingBatchWriterClient(), makeWideRecord, streamingbatchwriterOpts)...)

for _, c := range bCases {
c := c
b.Run(c.name, func(b *testing.B) {
var (
mStart runtime.MemStats
mEnd runtime.MemStats
)

ch := make(chan message.WriteMessage)
errCh := make(chan error)
go func() {
defer close(errCh)
errCh <- c.wr.Write(context.Background(), ch)
}()

runtime.ReadMemStats(&mStart)
b.ResetTimer()
for i := 0; i < b.N; i++ {
rec := c.rec()
ch <- &message.WriteInsert{
Record: rec,
}
}
close(ch)
err := <-errCh

b.StopTimer()

if err != nil {
b.Fatal(err)
}

runtime.ReadMemStats(&mEnd)

allocatedBytes := mEnd.Alloc - mStart.Alloc
b.ReportMetric(float64(allocatedBytes)/float64(b.N), "bytes/op") // this is different from -benchmem result "B/op"
})
}
}

func makeRecord() func() arrow.Record {
table := &schema.Table{
Name: "test_table",
Columns: schema.ColumnList{
{
Name: "col1",
Type: arrow.BinaryTypes.String,
},
},
}
sc := table.ToArrowSchema()

return func() arrow.Record {
bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc)
bldr.Field(0).(*array.StringBuilder).Append("test")
return bldr.NewRecord()
}
}

func makeWideRecord() func() arrow.Record {
table := &schema.Table{
Name: "test_wide_table",
Columns: schema.ColumnList{
{
Name: "col1",
Type: arrow.BinaryTypes.String,
},
},
}

const numWideCols = 200
randVals := make([]int64, numWideCols)
for i := 0; i < numWideCols; i++ {
table.Columns = append(table.Columns, schema.Column{
Name: "wide_col" + strconv.Itoa(i),
Type: arrow.PrimitiveTypes.Int64,
})
randVals[i] = rand.Int63()
}
sc := table.ToArrowSchema()

return func() arrow.Record {
bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc)
bldr.Field(0).(*array.StringBuilder).Append("test")
for i := 0; i < numWideCols; i++ {
bldr.Field(i + 1).(*array.Int64Builder).Append(randVals[i])
}
return bldr.NewRecord()
}
}

func writerMatrix[T writers.Writer, C any, O ~func(T)](prefix string, constructor func(C, ...O) (T, error), client C, recordMaker func() func() arrow.Record, optsMatrix map[string][]O) []bCase {
bCases := make([]bCase, 0, len(optsMatrix))

k := maps.Keys(optsMatrix)
sort.Strings(k)

for _, name := range k {
opts := optsMatrix[name]
wr, err := constructor(client, opts...)
if err != nil {
panic(err)
}
bCases = append(bCases, bCase{
name: prefix + " " + name,
wr: wr,
rec: recordMaker(),
})
}
return bCases
}

type mixedbatchwriterClient struct {
mixedbatchwriter.IgnoreMigrateTableBatch
mixedbatchwriter.UnimplementedDeleteStaleBatch
mixedbatchwriter.UnimplementedDeleteRecordsBatch
}

func newMixedBatchWriterClient() mixedbatchwriter.Client {
return &mixedbatchwriterClient{}
}

func (mixedbatchwriterClient) InsertBatch(_ context.Context, msgs message.WriteInserts) error {
for _, m := range msgs {
m.Record.Release()
}
return nil
}

var _ mixedbatchwriter.Client = (*mixedbatchwriterClient)(nil)

type batchwriterClient struct {
batchwriter.IgnoreMigrateTables
batchwriter.UnimplementedDeleteStale
batchwriter.UnimplementedDeleteRecord
}

func newBatchWriterClient() batchwriter.Client {
return &batchwriterClient{}
}

func (batchwriterClient) WriteTableBatch(_ context.Context, _ string, msgs message.WriteInserts) error {
for _, m := range msgs {
m.Record.Release()
}
return nil
}

var _ batchwriter.Client = (*batchwriterClient)(nil)

type streamingbatchwriterClient struct {
streamingbatchwriter.IgnoreMigrateTable
streamingbatchwriter.UnimplementedDeleteStale
streamingbatchwriter.UnimplementedDeleteRecords
}

func newStreamingBatchWriterClient() streamingbatchwriter.Client {
return &streamingbatchwriterClient{}
}

func (streamingbatchwriterClient) WriteTable(_ context.Context, ch <-chan *message.WriteInsert) error {
for m := range ch {
m.Record.Release()
}
return nil
}

var _ streamingbatchwriter.Client = (*streamingbatchwriterClient)(nil)

1 comment on commit 3050443

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⏱️ Benchmark results

  • Glob-8 ns/op: 90.87
  • WriterMemory_BatchWriter_batch10k_bytes100M-8 bytes/op: 1,012
  • WriterMemory_BatchWriter_batch10k_bytes100M-8 ns/op: 2,646
  • WriterMemory_BatchWriter_defaults-8 bytes/op: 1,223
  • WriterMemory_BatchWriter_defaults-8 ns/op: 2,861
  • WriterMemory_BatchWriter_wide_batch10k_bytes100M-8 bytes/op: 194,822
  • WriterMemory_BatchWriter_wide_batch10k_bytes100M-8 ns/op: 230,240
  • BenchmarkWriterMemory_BatchWriter_wide_defaults-8_bytes_per_op: 1.845
  • WriterMemory_BatchWriter_wide_defaults-8 ns/op: 166,912
  • WriterMemory_MixedBatchWriter_batch10k_bytes100M-8 bytes/op: 1,426
  • WriterMemory_MixedBatchWriter_batch10k_bytes100M-8 ns/op: 2,397
  • WriterMemory_MixedBatchWriter_defaults-8 bytes/op: 1,426
  • WriterMemory_MixedBatchWriter_defaults-8 ns/op: 2,412
  • WriterMemory_MixedBatchWriter_wide_batch10k_bytes100M-8 bytes/op: 190,139
  • WriterMemory_MixedBatchWriter_wide_batch10k_bytes100M-8 ns/op: 207,328
  • WriterMemory_MixedBatchWriter_wide_defaults-8 bytes/op: 11,613
  • WriterMemory_MixedBatchWriter_wide_defaults-8 ns/op: 171,197
  • WriterMemory_StreamingBatchWriter_bytes100M-8 bytes/op: 1,394
  • WriterMemory_StreamingBatchWriter_bytes100M-8 ns/op: 2,983
  • WriterMemory_StreamingBatchWriter_defaults-8 bytes/op: 1,393
  • WriterMemory_StreamingBatchWriter_defaults-8 ns/op: 3,110
  • WriterMemory_StreamingBatchWriter_wide_bytes100M-8 bytes/op: 8,482
  • WriterMemory_StreamingBatchWriter_wide_bytes100M-8 ns/op: 158,683
  • WriterMemory_StreamingBatchWriter_wide_defaults-8 bytes/op: 11,588
  • WriterMemory_StreamingBatchWriter_wide_defaults-8 ns/op: 152,582

Please sign in to comment.