Skip to content

Commit

Permalink
more extensive benchmarks comparing with segmentio/topk
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreben committed Sep 26, 2024
1 parent 628159a commit 196f3d7
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 31 deletions.
30 changes: 24 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![Go Reference](https://pkg.go.dev/badge/github.com/keilerkonzept/topk.svg)](https://pkg.go.dev/github.com/keilerkonzept/topk)
[![Go Report Card](https://goreportcard.com/badge/github.com/keilerkonzept/topk)](https://goreportcard.com/report/github.com/keilerkonzept/topk)

Sliding-window and regular top-K sketches.
Sliding-window and regular top-K sketches. [Significantly faster (~1.5x)](#comparison-with-segmentiotopk) than [segmentio/topk](https://github.com/segmentio/topk) on small sketches (k <= 1000) and [much faster (10x-90x)](#comparison-with-segmentiotopk) on large sketches (k >= 10000).

- A fast implementation of the [HeavyKeeper top-K sketch](https://www.usenix.org/conference/atc18/presentation/gong) inspired by the [segmentio implementation](https://github.com/segmentio/topk) and [RedisBloom implementation](https://github.com/RedisBloom/RedisBloom/blob/b5916e1b9fba17829c3e329c127b99d706eb31f6/src/topk.c).
- A sliding-window top-K sketch, also based on HeavyKeeper, as described in ["A Sketch Framework for Approximate Data Stream Processing in Sliding Windows"](https://yangtonghome.github.io/uploads/SlidingSketch_TKDE2022_final.pdf)
Expand All @@ -29,7 +29,7 @@ import (
- [Benchmarks](#benchmarks)
- [Top-K Sketch](#top-k-sketch)
- [Sliding-Window Top-K Sketch](#sliding-window-top-k-sketch)
- [Decay LUT impact](#decay-lut-impact)
- [Comparison with segmentio/topk](#comparison-with-segmentiotopk)

## Examples

Expand Down Expand Up @@ -221,16 +221,34 @@ The `Add` benchmark performs random increments in the interval [1,10).
| `Tick` | 100 | 3 | 8192 | 100 | 50 | 31349 ns/op | 0 B/op | 0 allocs/op |
| `Tick` | 100 | 3 | 8192 | 100 | 100 | 87488 ns/op | 0 B/op | 0 allocs/op |

### Decay LUT impact
### Comparison with [segmentio/topk](https://github.com/segmentio/topk)

The size of the look-up table is configured using the `WithDecayLUTSize` option. If the look-up table covers the actual counts involved, the speedup can be significant:
Using [benchstat](https://pkg.go.dev/golang.org/x/perf/cmd/benchstat):
```sh
$ go test -run='^$' -bench=BenchmarkSketchAddForComparison -count=10 | tee new.txt
$ go test -run='^$' -bench=BenchmarkSegmentioTopkSample -count=10 | tee -a old.txt
$ benchstat -row /K,/Depth,/Width,/Decay -col .name old.txt new.txt
```

```
goos: darwin
goarch: arm64
pkg: github.com/keilerkonzept/topk
cpu: Apple M1 Pro
```

BenchmarkSketch_1000_3k_3-10 124646 8450 ns/op
BenchmarkSegmentioTopK_1000_3k_3-10 39903 32345 ns/op
| K | Depth | Width | Decay | `segmentio/topk` (sec/op) | this package (sec/op) | diff |
|--------|-------|---------|-------|---------------------------|-----------------------|------------------------|
| 100 | 4 | 460 | 0.6 | 800.1n ± 5% | 447.9n ± 5% | **-44.03%** (p=0.000 n=10) |
| 100 | 4 | 460 | 0.8 | 753.8n ± 2% | 504.5n ± 3% | **-33.07%** (p=0.000 n=10) |
| 100 | 4 | 460 | 0.9 | 687.4n ± 2% | 476.3n ± 3% | **-30.71%** (p=0.000 n=10) |
| 1000 | 6 | 6907 | 0.6 | 1149.5n ± 2% | 569.4n ± 2% | **-50.46%** (p=0.000 n=10) |
| 1000 | 6 | 6907 | 0.8 | 1086.5n ± 2% | 677.7n ± 2% | **-37.63%** (p=0.000 n=10) |
| 1000 | 6 | 6907 | 0.9 | 966.8n ± 2% | 627.6n ± 2% | **-35.08%** (p=0.000 n=10) |
| 10000 | 9 | 92103 | 0.6 | 10.993µ ± 2% | 1.232µ ± 6% | **-88.79%** (p=0.000 n=10) |
| 10000 | 9 | 92103 | 0.8 | 11.046µ ± 5% | 1.326µ ± 10% | **-88.00%** (p=0.000 n=10) |
| 10000 | 9 | 92103 | 0.9 | 10.859µ ± 2% | 1.280µ ± 5% | **-88.21%** (p=0.000 n=10) |
| 100000 | 1 | 1151292 | 0.6 | 91.304µ ± 1% | 1.893µ ± 4% | **-97.93%** (p=0.000 n=10) |
| 100000 | 1 | 1151292 | 0.8 | 90.494µ ± 1% | 1.888µ ± 1% | **-97.91%** (p=0.000 n=10) |
| 100000 | 1 | 1151292 | 0.9 | 91.129µ ± 1% | 1.910µ ± 1% | **-97.90%** (p=0.000 n=10) |
```
38 changes: 38 additions & 0 deletions sketch_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package topk_test

import (
"fmt"
"math"
"math/rand/v2"
"testing"

"github.com/keilerkonzept/topk"
segmentiotopk "github.com/segmentio/topk"
)

var (
Expand Down Expand Up @@ -112,3 +114,39 @@ func BenchmarkSketchQuery(b *testing.B) {
}
}
}

// BenchmarkSegmentioTopkSample benchmarks the Sample method of a [github.com/segmentio/topk.HeavyKeeper].
func BenchmarkSegmentioTopkSample(b *testing.B) {
for _, k := range []int{100, 1_000, 10_000, 100_000} {
for _, decay := range []float64{0.6, 0.8, 0.9} {
depth := max(3, int(math.Log(float64(k))))
width := max(256, int(float64(k)*math.Log(float64(k))))
b.Run(fmt.Sprintf("K=%d_Depth=%d_Width=%d_Decay=%f", k, depth, width, decay), func(b *testing.B) {
sketch := segmentiotopk.New(k, decay)

b.ResetTimer()
for i := 0; i < b.N; i++ {
sketch.Sample(items[rand.IntN(len(items))], uint32(rand.IntN(10)))
}
})
}
}
}

// BenchmarkSketchAddForComparison benchmarks the Add method of a [topk.Sketch] for comparison with the segmentio implementation.
func BenchmarkSketchAddForComparison(b *testing.B) {
for _, k := range []int{100, 1_000, 10_000, 100_000} {
for _, decay := range []float32{0.6, 0.8, 0.9} {
depth := max(3, int(math.Log(float64(k))))
width := max(256, int(float64(k)*math.Log(float64(k))))
b.Run(fmt.Sprintf("K=%d_Depth=%d_Width=%d_Decay=%f", k, depth, width, decay), func(b *testing.B) {
sketch := topk.New(k, topk.WithDecay(decay), topk.WithDepth(depth), topk.WithWidth(width))

b.ResetTimer()
for i := 0; i < b.N; i++ {
sketch.Add(items[rand.IntN(len(items))], uint32(rand.IntN(10)))
}
})
}
}
}
25 changes: 0 additions & 25 deletions sketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,8 @@ import (
"github.com/keilerkonzept/topk"
"github.com/keilerkonzept/topk/heap"
"github.com/keilerkonzept/topk/internal/sizeof"
segmentio_topk "github.com/segmentio/topk"
)

func BenchmarkSketch_1000_3k_3(b *testing.B) {
sketch := topk.New(1000, topk.WithWidth(3_000), topk.WithDepth(3), topk.WithDecayLUTSize(1024))
items := make([]string, 2048)
for i := range items {
items[i] = fmt.Sprint(i)
}
b.ResetTimer()
for i := 0; i <= b.N; i++ {
sketch.Add(items[i%len(items)], uint32(i%len(items)))
}
}

func BenchmarkSegmentioTopK_1000_3k_3(b *testing.B) {
sketch := segmentio_topk.New(1000, 0.9)
items := make([]string, 2048)
for i := range items {
items[i] = fmt.Sprint(i)
}
b.ResetTimer()
for i := 0; i <= b.N; i++ {
sketch.Sample(items[i%len(items)], uint32(i%len(items)))
}
}

func TestNewSketch_DefaultParameters(t *testing.T) {
k := 10
sketch := topk.New(k)
Expand Down

0 comments on commit 196f3d7

Please sign in to comment.