Skip to content

Commit

Permalink
kvserver: add pebble ingestion/flush metrics
Browse files Browse the repository at this point in the history
This adds:

```
storage.l0-bytes-flushed
storage.lX-num-files
storage.lX-bytes-ingested
```

which are all useful to understand inverted LSMs

Release note: None
  • Loading branch information
tbg committed May 11, 2022
1 parent d63a369 commit 8dfd075
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 17 deletions.
94 changes: 79 additions & 15 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package kvserver

import (
"context"
"fmt"
"runtime/debug"
"sync/atomic"
"time"
Expand Down Expand Up @@ -439,18 +440,41 @@ var (
Measurement: "SSTables",
Unit: metric.Unit_COUNT,
}
metaRdbL0Sublevels = metric.Metadata{
Name: "storage.l0-sublevels",
Help: "Number of Level 0 sublevels",
Measurement: "Storage",
Unit: metric.Unit_COUNT,
}
metaRdbL0NumFiles = metric.Metadata{
Name: "storage.l0-num-files",
Help: "Number of Level 0 files",
Measurement: "Storage",
Unit: metric.Unit_COUNT,
}
// NB: bytes only ever get flushed into L0, so this metric does not
// exist for any other level.
metaRdbL0BytesFlushed = storageLevelMetricMetadata(
"bytes-flushed",
"Number of bytes flushed (from memtables) into Level %d",
"Bytes",
metric.Unit_BYTES,
)[0]

// NB: sublevels is trivial (zero or one) except on L0.
metaRdbL0Sublevels = storageLevelMetricMetadata(
"sublevels",
"Number of Level %d sublevels",
"Sublevels",
metric.Unit_COUNT,
)[0]

// NB: we only expose the file count in L0 because it matters for
// admission control. The other file counts are less interesting.
metaRdbL0NumFiles = storageLevelMetricMetadata(
"num-files",
"Number of SSTables in Level %d",
"SSTables",
metric.Unit_COUNT,
)[0]
)

var metaRdbBytesIngested = storageLevelMetricMetadata(
"bytes-ingested",
"Number of bytes ingested directly into Level %d",
"Bytes",
metric.Unit_BYTES,
)

var (
metaRdbWriteStalls = metric.Metadata{
Name: "storage.write-stalls",
Help: "Number of instances of intentional write stalls to backpressure incoming writes",
Expand Down Expand Up @@ -1402,7 +1426,13 @@ type StoreMetrics struct {
// Server-side transaction metrics.
CommitWaitsBeforeCommitTrigger *metric.Counter

// RocksDB metrics.
// Storage (pebble) metrics. Some are named RocksDB which is what we used
// before pebble, and this name is kept for backwards compatibility despite
// the backing metrics now originating from pebble.
//
// All of these are cumulative values. They are maintained by pebble and
// so we have to expose them as gauges (lest we start tracking deltas from
// the respective last stats we got from pebble).
RdbBlockCacheHits *metric.Gauge
RdbBlockCacheMisses *metric.Gauge
RdbBlockCacheUsage *metric.Gauge
Expand All @@ -1421,8 +1451,10 @@ type StoreMetrics struct {
RdbNumSSTables *metric.Gauge
RdbPendingCompaction *metric.Gauge
RdbMarkedForCompactionFiles *metric.Gauge
RdbL0BytesFlushed *metric.Gauge
RdbL0Sublevels *metric.Gauge
RdbL0NumFiles *metric.Gauge
RdbBytesIngested [7]*metric.Gauge // idx = level
RdbWriteStalls *metric.Gauge

// Disk health metrics.
Expand Down Expand Up @@ -1802,6 +1834,7 @@ func newTenantsStorageMetrics() *TenantsStorageMetrics {

func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
storeRegistry := metric.NewRegistry()
rdbBytesIngested := storageLevelGaugeSlice(metaRdbBytesIngested)
sm := &StoreMetrics{
registry: storeRegistry,
TenantsStorageMetrics: newTenantsStorageMetrics(),
Expand Down Expand Up @@ -1842,6 +1875,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
// Rebalancing metrics.
AverageQueriesPerSecond: metric.NewGaugeFloat64(metaAverageQueriesPerSecond),
AverageWritesPerSecond: metric.NewGaugeFloat64(metaAverageWritesPerSecond),
// TODO(tbg): this histogram seems bogus? What are we tracking here?
L0SubLevelsHistogram: metric.NewHistogram(
metaL0SubLevelHistogram,
allocatorimpl.L0SublevelInterval,
Expand Down Expand Up @@ -1876,8 +1910,10 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
RdbNumSSTables: metric.NewGauge(metaRdbNumSSTables),
RdbPendingCompaction: metric.NewGauge(metaRdbPendingCompaction),
RdbMarkedForCompactionFiles: metric.NewGauge(metaRdbMarkedForCompactionFiles),
RdbL0BytesFlushed: metric.NewGauge(metaRdbL0BytesFlushed),
RdbL0Sublevels: metric.NewGauge(metaRdbL0Sublevels),
RdbL0NumFiles: metric.NewGauge(metaRdbL0NumFiles),
RdbBytesIngested: rdbBytesIngested,
RdbWriteStalls: metric.NewGauge(metaRdbWriteStalls),

// Disk health metrics.
Expand Down Expand Up @@ -2108,13 +2144,18 @@ func (sm *StoreMetrics) updateEngineMetrics(m storage.Metrics) {
sm.RdbReadAmplification.Update(int64(m.ReadAmp()))
sm.RdbPendingCompaction.Update(int64(m.Compact.EstimatedDebt))
sm.RdbMarkedForCompactionFiles.Update(int64(m.Compact.MarkedFiles))
sm.RdbL0Sublevels.Update(int64(m.Levels[0].Sublevels))
sm.L0SubLevelsHistogram.RecordValue(int64(m.Levels[0].Sublevels))
sm.RdbL0NumFiles.Update(m.Levels[0].NumFiles)
sm.RdbNumSSTables.Update(m.NumSSTables())
sm.RdbWriteStalls.Update(m.WriteStallCount)
sm.DiskSlow.Update(m.DiskSlowCount)
sm.DiskStalled.Update(m.DiskStallCount)

sm.RdbL0Sublevels.Update(int64(m.Levels[0].Sublevels))
sm.RdbL0NumFiles.Update(m.Levels[0].NumFiles)
sm.RdbL0BytesFlushed.Update(int64(m.Levels[0].BytesFlushed))
for level, stats := range m.Levels {
sm.RdbBytesIngested[level].Update(int64(stats.BytesIngested))
}
}

func (sm *StoreMetrics) updateEnvStats(stats storage.EnvStats) {
Expand Down Expand Up @@ -2145,3 +2186,26 @@ func (sm *StoreMetrics) handleMetricsResult(ctx context.Context, metric result.M
log.Fatalf(ctx, "unhandled fields in metrics result: %+v", metric)
}
}

func storageLevelMetricMetadata(
name, helpTpl, measurement string, unit metric.Unit,
) [7]metric.Metadata {
var sl [7]metric.Metadata
for i := range sl {
sl[i] = metric.Metadata{
Name: fmt.Sprintf("storage.l%d-%s", i, name),
Help: fmt.Sprintf(helpTpl, i),
Measurement: measurement,
Unit: unit,
}
}
return sl
}

func storageLevelGaugeSlice(sl [7]metric.Metadata) [7]*metric.Gauge {
var gs [7]*metric.Gauge
for i := range sl {
gs[i] = metric.NewGauge(sl[i])
}
return gs
}
22 changes: 20 additions & 2 deletions pkg/ts/catalog/chart_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -2681,8 +2681,26 @@ var charts = []sectionDescription{
Metrics: []string{"storage.l0-sublevels"},
},
{
Title: "L0 Files",
Metrics: []string{"storage.l0-num-files"},
Title: "L0 Files",
Metrics: []string{
"storage.l0-num-files",
},
},
{
Title: "Bytes flushed to Level 0",
Metrics: []string{"storage.l0-bytes-flushed"},
},
{
Title: "Bytes Ingested per Level",
Metrics: []string{
"storage.l0-bytes-ingested",
"storage.l1-bytes-ingested",
"storage.l2-bytes-ingested",
"storage.l3-bytes-ingested",
"storage.l4-bytes-ingested",
"storage.l5-bytes-ingested",
"storage.l6-bytes-ingested",
},
},
{
Title: "Ingestion",
Expand Down

0 comments on commit 8dfd075

Please sign in to comment.