From 9f606666daa0cf6c406e1b2330f44bc67400a072 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Sat, 4 Jun 2022 23:12:29 +0800 Subject: [PATCH 01/10] add metrics otelcol_exporter_queue_capacity Signed-off-by: Ziqi Zhao --- exporter/exporterhelper/obsreport.go | 7 +++++++ exporter/exporterhelper/queued_retry_inmemory.go | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/exporter/exporterhelper/obsreport.go b/exporter/exporterhelper/obsreport.go index 324857f6788..377cc2f911a 100644 --- a/exporter/exporterhelper/obsreport.go +++ b/exporter/exporterhelper/obsreport.go @@ -40,6 +40,7 @@ func init() { type instruments struct { registry *metric.Registry queueSize *metric.Int64DerivedGauge + queueCapacity *metric.Int64DerivedGauge failedToEnqueueTraceSpans *metric.Int64Cumulative failedToEnqueueMetricPoints *metric.Int64Cumulative failedToEnqueueLogRecords *metric.Int64Cumulative @@ -55,6 +56,12 @@ func newInstruments(registry *metric.Registry) *instruments { metric.WithLabelKeys(obsmetrics.ExporterKey), metric.WithUnit(metricdata.UnitDimensionless)) + insts.queueCapacity, _ = registry.AddInt64DerivedGauge( + obsmetrics.ExporterKey+"/queue_capacity", + metric.WithDescription("Current capacity of the retry queue (in batches)"), + metric.WithLabelKeys(obsmetrics.ExporterKey), + metric.WithUnit(metricdata.UnitDimensionless)) + insts.failedToEnqueueTraceSpans, _ = registry.AddInt64Cumulative( obsmetrics.ExporterKey+"/enqueue_failed_spans", metric.WithDescription("Number of spans failed to be added to the sending queue."), diff --git a/exporter/exporterhelper/queued_retry_inmemory.go b/exporter/exporterhelper/queued_retry_inmemory.go index 7349a85de54..6f2f7c7c437 100644 --- a/exporter/exporterhelper/queued_retry_inmemory.go +++ b/exporter/exporterhelper/queued_retry_inmemory.go @@ -128,6 +128,12 @@ func (qrs *queuedRetrySender) start(context.Context, component.Host) error { if err != nil { return fmt.Errorf("failed to create retry queue size metric: %w", err) } + err = globalInstruments.queueCapacity.UpsertEntry(func() int64 { + return int64(qrs.queue.Capacity()) + }, metricdata.NewLabelValue(qrs.fullName)) + if err != nil { + return fmt.Errorf("failed to create retry queue capacity metric: %w", err) + } } return nil From 06254adca54d9c3fa5f6b5a0be6894606d16aeba Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Sun, 26 Jun 2022 20:27:39 +0800 Subject: [PATCH 02/10] add unit test for new metric Signed-off-by: Ziqi Zhao --- exporter/exporterhelper/queued_retry_inmemory.go | 2 +- exporter/exporterhelper/queued_retry_test.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/exporter/exporterhelper/queued_retry_inmemory.go b/exporter/exporterhelper/queued_retry_inmemory.go index 6f2f7c7c437..7b76221a78f 100644 --- a/exporter/exporterhelper/queued_retry_inmemory.go +++ b/exporter/exporterhelper/queued_retry_inmemory.go @@ -129,7 +129,7 @@ func (qrs *queuedRetrySender) start(context.Context, component.Host) error { return fmt.Errorf("failed to create retry queue size metric: %w", err) } err = globalInstruments.queueCapacity.UpsertEntry(func() int64 { - return int64(qrs.queue.Capacity()) + return int64(qrs.cfg.QueueSize) }, metricdata.NewLabelValue(qrs.fullName)) if err != nil { return fmt.Errorf("failed to create retry queue capacity metric: %w", err) diff --git a/exporter/exporterhelper/queued_retry_test.go b/exporter/exporterhelper/queued_retry_test.go index 7edf7788e8c..e4a8e4b5a6a 100644 --- a/exporter/exporterhelper/queued_retry_test.go +++ b/exporter/exporterhelper/queued_retry_test.go @@ -346,6 +346,7 @@ func TestQueuedRetry_QueueMetricsReported(t *testing.T) { require.NoError(t, be.sender.send(newErrorRequest(context.Background()))) } checkValueForGlobalManager(t, defaultExporterTags, int64(7), "exporter/queue_size") + checkValueForGlobalManager(t, defaultExporterTags, int64(5000), "exporter/queue_capacity") assert.NoError(t, be.Shutdown(context.Background())) checkValueForGlobalManager(t, defaultExporterTags, int64(0), "exporter/queue_size") From 25195519b47578077f5549d4fe5fe9bbb8b16eae Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Sun, 26 Jun 2022 20:43:26 +0800 Subject: [PATCH 03/10] add entry to changelog Signed-off-by: Ziqi Zhao --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00caf6ec5df..d39346cf7bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,6 +85,7 @@ ### 💡 Enhancements 💡 +- Add `otelcol_exporter_queue_capacity` metrics show the collector's exporter queue capacity (#5475) - Deprecate `HTTPClientSettings.ToClient` in favor of `HTTPClientSettings.ToClientWithHost` (#5584) - Use OpenCensus `metric` package for process metrics instead of `stats` package (#5486) - Update OTLP to v0.18.0 (#5530) From d49b15293d01350d54190b423fc59765d3465f5f Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Mon, 27 Jun 2022 06:59:02 +0800 Subject: [PATCH 04/10] fix unittest Signed-off-by: Ziqi Zhao --- exporter/exporterhelper/queued_retry_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/exporterhelper/queued_retry_test.go b/exporter/exporterhelper/queued_retry_test.go index e4a8e4b5a6a..b7cebb39c8b 100644 --- a/exporter/exporterhelper/queued_retry_test.go +++ b/exporter/exporterhelper/queued_retry_test.go @@ -342,11 +342,11 @@ func TestQueuedRetry_QueueMetricsReported(t *testing.T) { be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) + checkValueForGlobalManager(t, defaultExporterTags, int64(5000), "exporter/queue_capacity") for i := 0; i < 7; i++ { require.NoError(t, be.sender.send(newErrorRequest(context.Background()))) } checkValueForGlobalManager(t, defaultExporterTags, int64(7), "exporter/queue_size") - checkValueForGlobalManager(t, defaultExporterTags, int64(5000), "exporter/queue_capacity") assert.NoError(t, be.Shutdown(context.Background())) checkValueForGlobalManager(t, defaultExporterTags, int64(0), "exporter/queue_size") From c6da553e5983b702927a6ec8521868a927600201 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Thu, 30 Jun 2022 06:51:40 +0800 Subject: [PATCH 05/10] fix for reviews Signed-off-by: Ziqi Zhao --- exporter/exporterhelper/obsreport.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/exporterhelper/obsreport.go b/exporter/exporterhelper/obsreport.go index 377cc2f911a..011c3fc99c2 100644 --- a/exporter/exporterhelper/obsreport.go +++ b/exporter/exporterhelper/obsreport.go @@ -58,7 +58,7 @@ func newInstruments(registry *metric.Registry) *instruments { insts.queueCapacity, _ = registry.AddInt64DerivedGauge( obsmetrics.ExporterKey+"/queue_capacity", - metric.WithDescription("Current capacity of the retry queue (in batches)"), + metric.WithDescription("Fixed capacity of the retry queue (in batches)"), metric.WithLabelKeys(obsmetrics.ExporterKey), metric.WithUnit(metricdata.UnitDimensionless)) From 73fe1551d0d913f4d13d0106d3bc7b4dde08f9df Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Sat, 9 Jul 2022 21:39:54 +0800 Subject: [PATCH 06/10] add queue capacity metric for experimental query setting Signed-off-by: Ziqi Zhao --- exporter/exporterhelper/queued_retry_experimental.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/exporter/exporterhelper/queued_retry_experimental.go b/exporter/exporterhelper/queued_retry_experimental.go index 30aadebfeb3..a808e212c71 100644 --- a/exporter/exporterhelper/queued_retry_experimental.go +++ b/exporter/exporterhelper/queued_retry_experimental.go @@ -219,6 +219,12 @@ func (qrs *queuedRetrySender) start(ctx context.Context, host component.Host) er if err != nil { return fmt.Errorf("failed to create retry queue size metric: %w", err) } + err = globalInstruments.queueCapacity.UpsertEntry(func() int64 { + return int64(qrs.cfg.QueueSize) + }, metricdata.NewLabelValue(qrs.fullName())) + if err != nil { + return fmt.Errorf("failed to create retry queue capacity metric: %w", err) + } } return nil From e8353e1bba05e6eacb494dd259c395137fb2439b Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Tue, 19 Jul 2022 07:21:36 +0800 Subject: [PATCH 07/10] update monitoring doc Signed-off-by: Ziqi Zhao --- docs/monitoring.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/monitoring.md b/docs/monitoring.md index 90c0810c993..f824ed2aef4 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -34,9 +34,11 @@ Most exporters offer a [queue/retry mechanism](../exporter/exporterhelper/README that is recommended as the retry mechanism for the Collector and as such should be used in any production deployment. -**TODO:** Add metric to monitor queue length. +The `otelcol_exporter_queue_capacity` indicates the capacity of the retry queue (in batches). The `otelcol_exporter_queue_size` indicates the current size of retry queue. So you can check those two metrics to see if queue size is enough for you load. -Currently, the queue/retry mechanism only supports logging for monitoring. Check +The `otelcol_exporter_enqueue_failed_spans`, `otelcol_exporter_enqueue_failed_metric_points` and `otelcol_exporter_enqueue_failed_log_records` indicate the number of span/metric points/log records failed to be added to the sending queue. This maybe cause by the queue is full of unsettled elements so you should decrease your sending rate or horizontally scale your collector. + +The queue/retry mechanism also supports logging for monitoring. Check the logs for messages like `"Dropping data because sending_queue is full"`. ### Receive Failures From 8d8c14a10936535e1e423538936eca441ebb4f9e Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Tue, 19 Jul 2022 22:51:54 +0800 Subject: [PATCH 08/10] Update docs/monitoring.md Co-authored-by: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> --- docs/monitoring.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/monitoring.md b/docs/monitoring.md index f824ed2aef4..90be9ba51b4 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -34,7 +34,7 @@ Most exporters offer a [queue/retry mechanism](../exporter/exporterhelper/README that is recommended as the retry mechanism for the Collector and as such should be used in any production deployment. -The `otelcol_exporter_queue_capacity` indicates the capacity of the retry queue (in batches). The `otelcol_exporter_queue_size` indicates the current size of retry queue. So you can check those two metrics to see if queue size is enough for you load. +The `otelcol_exporter_queue_capacity` indicates the capacity of the retry queue (in batches). The `otelcol_exporter_queue_size` indicates the current size of retry queue. So you can use these two metrics to check if the queue capacity is enough for your workload. The `otelcol_exporter_enqueue_failed_spans`, `otelcol_exporter_enqueue_failed_metric_points` and `otelcol_exporter_enqueue_failed_log_records` indicate the number of span/metric points/log records failed to be added to the sending queue. This maybe cause by the queue is full of unsettled elements so you should decrease your sending rate or horizontally scale your collector. From 5765c91b8c03eb9b043c649727d9c37ef630ef1b Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Tue, 19 Jul 2022 22:52:04 +0800 Subject: [PATCH 09/10] Update docs/monitoring.md Co-authored-by: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> --- docs/monitoring.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/monitoring.md b/docs/monitoring.md index 90be9ba51b4..3b73d8f85b1 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -36,7 +36,7 @@ be used in any production deployment. The `otelcol_exporter_queue_capacity` indicates the capacity of the retry queue (in batches). The `otelcol_exporter_queue_size` indicates the current size of retry queue. So you can use these two metrics to check if the queue capacity is enough for your workload. -The `otelcol_exporter_enqueue_failed_spans`, `otelcol_exporter_enqueue_failed_metric_points` and `otelcol_exporter_enqueue_failed_log_records` indicate the number of span/metric points/log records failed to be added to the sending queue. This maybe cause by the queue is full of unsettled elements so you should decrease your sending rate or horizontally scale your collector. +The `otelcol_exporter_enqueue_failed_spans`, `otelcol_exporter_enqueue_failed_metric_points` and `otelcol_exporter_enqueue_failed_log_records` indicate the number of span/metric points/log records failed to be added to the sending queue. This may be cause by a queue full of unsettled elements, so you may need to decrease your sending rate or horizontally scale collectors. The queue/retry mechanism also supports logging for monitoring. Check the logs for messages like `"Dropping data because sending_queue is full"`. From bc61d2645d1d41e58246d2ca310ab050769d9c5b Mon Sep 17 00:00:00 2001 From: Alex Boten Date: Tue, 19 Jul 2022 08:17:15 -0700 Subject: [PATCH 10/10] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d39346cf7bc..08d00f8ecfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - `loggingexporter`: Decouple `loglevel` field from level of logged messages (#5678) - Expose `pcommon.NewSliceFromRaw` function (#5679) - `loggingexporter`: create the exporter's logger from the service's logger (#5677) +- Add `otelcol_exporter_queue_capacity` metrics show the collector's exporter queue capacity (#5475) ### 🧰 Bug fixes 🧰 @@ -85,7 +86,6 @@ ### 💡 Enhancements 💡 -- Add `otelcol_exporter_queue_capacity` metrics show the collector's exporter queue capacity (#5475) - Deprecate `HTTPClientSettings.ToClient` in favor of `HTTPClientSettings.ToClientWithHost` (#5584) - Use OpenCensus `metric` package for process metrics instead of `stats` package (#5486) - Update OTLP to v0.18.0 (#5530)