Skip to content

Commit

Permalink
Read, Write and operational dashboard improvements (#2584)
Browse files Browse the repository at this point in the history
1. Add bigtable, boltdb-shipper to read/write dashboards.
2. Add boltdb-shipper to operational dashboard.
3. Record latency for write operation in boltdb-shipper.
  • Loading branch information
sandeepsukhani authored Sep 4, 2020
1 parent 82a8cf4 commit 6dd2e9e
Show file tree
Hide file tree
Showing 3 changed files with 254 additions and 2 deletions.
4 changes: 3 additions & 1 deletion pkg/storage/stores/shipper/shipper_index_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,9 @@ func (s *Shipper) NewWriteBatch() chunk.WriteBatch {
}

func (s *Shipper) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
return s.uploadsManager.BatchWrite(ctx, batch)
return instrument.CollectedRequest(ctx, "WRITE", instrument.NewHistogramCollector(s.metrics.requestDurationSeconds), instrument.ErrorCode, func(ctx context.Context) error {
return s.uploadsManager.BatchWrite(ctx, batch)
})
}

func (s *Shipper) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
Expand Down
207 changes: 207 additions & 0 deletions production/loki-mixin/dashboard-loki-operational.json
Original file line number Diff line number Diff line change
Expand Up @@ -6343,6 +6343,213 @@
],
"title": "Cassandra",
"type": "row"
},
{
"collapsed": true,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 37
},
"id": 114,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 10
},
"id": 115,
"interval": "",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 1,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
"expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
"expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Latency By Operation",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 18
},
"id": 116,
"interval": "",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 1,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Status By Method",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"title": "BoltDB Shipper",
"type": "row"
}
],
"refresh": "30s",
Expand Down
45 changes: 44 additions & 1 deletion production/loki-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,28 @@ local utils = import 'mixin-utils/utils.libsonnet';
g.panel('Latency') +
utils.latencyRecordingRulePanel('loki_request_duration_seconds', [utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('route', '/logproto.Pusher/Push')], extra_selectors=[utils.selector.re('cluster', '$cluster')])
)
),
)
.addRow(
g.row('BigTable')
.addPanel(
g.panel('QPS') +
g.qpsPanel('cortex_bigtable_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester", operation="/google.bigtable.v2.Bigtable/MutateRows"}')
)
.addPanel(
g.panel('Latency') +
utils.latencyRecordingRulePanel('cortex_bigtable_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester')] + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/MutateRows')])
)
)
.addRow(
g.row('BoltDB Shipper')
.addPanel(
g.panel('QPS') +
g.qpsPanel('loki_boltdb_shipper_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester", operation="WRITE"}')
)
.addPanel(
g.panel('Latency') +
g.latencyPanel('loki_boltdb_shipper_request_duration_seconds', '{cluster=~"$cluster", job=~"($namespace)/ingester", operation="WRITE"}')
),

local http_routes = 'loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values',
local grpc_routes = '/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series',
Expand Down Expand Up @@ -93,6 +114,28 @@ local utils = import 'mixin-utils/utils.libsonnet';
g.panel('Latency') +
utils.latencyRecordingRulePanel('loki_request_duration_seconds', [utils.selector.eq('job', '$namespace/ingester'), utils.selector.re('route', grpc_routes)], extra_selectors=[utils.selector.eq('cluster', '$cluster')], sum_by=['route'])
)
)
.addRow(
g.row('BigTable')
.addPanel(
g.panel('QPS') +
g.qpsPanel('cortex_bigtable_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="/google.bigtable.v2.Bigtable/ReadRows"}')
)
.addPanel(
g.panel('Latency') +
utils.latencyRecordingRulePanel('cortex_bigtable_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')] + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/ReadRows')])
)
)
.addRow(
g.row('BoltDB Shipper')
.addPanel(
g.panel('QPS') +
g.qpsPanel('loki_boltdb_shipper_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="QUERY"}')
)
.addPanel(
g.panel('Latency') +
g.latencyPanel('loki_boltdb_shipper_request_duration_seconds', '{cluster=~"$cluster", job=~"($namespace)/querier", operation="QUERY"}')
)
),


Expand Down

0 comments on commit 6dd2e9e

Please sign in to comment.