diff --git a/CHANGELOG.md b/CHANGELOG.md index 00caf6ec5df..08d00f8ecfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - `loggingexporter`: Decouple `loglevel` field from level of logged messages (#5678) - Expose `pcommon.NewSliceFromRaw` function (#5679) - `loggingexporter`: create the exporter's logger from the service's logger (#5677) +- Add `otelcol_exporter_queue_capacity` metrics show the collector's exporter queue capacity (#5475) ### 🧰 Bug fixes 🧰 diff --git a/docs/monitoring.md b/docs/monitoring.md index 90c0810c993..3b73d8f85b1 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -34,9 +34,11 @@ Most exporters offer a [queue/retry mechanism](../exporter/exporterhelper/README that is recommended as the retry mechanism for the Collector and as such should be used in any production deployment. -**TODO:** Add metric to monitor queue length. +The `otelcol_exporter_queue_capacity` indicates the capacity of the retry queue (in batches). The `otelcol_exporter_queue_size` indicates the current size of retry queue. So you can use these two metrics to check if the queue capacity is enough for your workload. -Currently, the queue/retry mechanism only supports logging for monitoring. Check +The `otelcol_exporter_enqueue_failed_spans`, `otelcol_exporter_enqueue_failed_metric_points` and `otelcol_exporter_enqueue_failed_log_records` indicate the number of span/metric points/log records failed to be added to the sending queue. This may be cause by a queue full of unsettled elements, so you may need to decrease your sending rate or horizontally scale collectors. + +The queue/retry mechanism also supports logging for monitoring. Check the logs for messages like `"Dropping data because sending_queue is full"`. ### Receive Failures diff --git a/exporter/exporterhelper/obsreport.go b/exporter/exporterhelper/obsreport.go index 324857f6788..011c3fc99c2 100644 --- a/exporter/exporterhelper/obsreport.go +++ b/exporter/exporterhelper/obsreport.go @@ -40,6 +40,7 @@ func init() { type instruments struct { registry *metric.Registry queueSize *metric.Int64DerivedGauge + queueCapacity *metric.Int64DerivedGauge failedToEnqueueTraceSpans *metric.Int64Cumulative failedToEnqueueMetricPoints *metric.Int64Cumulative failedToEnqueueLogRecords *metric.Int64Cumulative @@ -55,6 +56,12 @@ func newInstruments(registry *metric.Registry) *instruments { metric.WithLabelKeys(obsmetrics.ExporterKey), metric.WithUnit(metricdata.UnitDimensionless)) + insts.queueCapacity, _ = registry.AddInt64DerivedGauge( + obsmetrics.ExporterKey+"/queue_capacity", + metric.WithDescription("Fixed capacity of the retry queue (in batches)"), + metric.WithLabelKeys(obsmetrics.ExporterKey), + metric.WithUnit(metricdata.UnitDimensionless)) + insts.failedToEnqueueTraceSpans, _ = registry.AddInt64Cumulative( obsmetrics.ExporterKey+"/enqueue_failed_spans", metric.WithDescription("Number of spans failed to be added to the sending queue."), diff --git a/exporter/exporterhelper/queued_retry_experimental.go b/exporter/exporterhelper/queued_retry_experimental.go index 30aadebfeb3..a808e212c71 100644 --- a/exporter/exporterhelper/queued_retry_experimental.go +++ b/exporter/exporterhelper/queued_retry_experimental.go @@ -219,6 +219,12 @@ func (qrs *queuedRetrySender) start(ctx context.Context, host component.Host) er if err != nil { return fmt.Errorf("failed to create retry queue size metric: %w", err) } + err = globalInstruments.queueCapacity.UpsertEntry(func() int64 { + return int64(qrs.cfg.QueueSize) + }, metricdata.NewLabelValue(qrs.fullName())) + if err != nil { + return fmt.Errorf("failed to create retry queue capacity metric: %w", err) + } } return nil diff --git a/exporter/exporterhelper/queued_retry_inmemory.go b/exporter/exporterhelper/queued_retry_inmemory.go index 7349a85de54..7b76221a78f 100644 --- a/exporter/exporterhelper/queued_retry_inmemory.go +++ b/exporter/exporterhelper/queued_retry_inmemory.go @@ -128,6 +128,12 @@ func (qrs *queuedRetrySender) start(context.Context, component.Host) error { if err != nil { return fmt.Errorf("failed to create retry queue size metric: %w", err) } + err = globalInstruments.queueCapacity.UpsertEntry(func() int64 { + return int64(qrs.cfg.QueueSize) + }, metricdata.NewLabelValue(qrs.fullName)) + if err != nil { + return fmt.Errorf("failed to create retry queue capacity metric: %w", err) + } } return nil diff --git a/exporter/exporterhelper/queued_retry_test.go b/exporter/exporterhelper/queued_retry_test.go index 7edf7788e8c..b7cebb39c8b 100644 --- a/exporter/exporterhelper/queued_retry_test.go +++ b/exporter/exporterhelper/queued_retry_test.go @@ -342,6 +342,7 @@ func TestQueuedRetry_QueueMetricsReported(t *testing.T) { be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) + checkValueForGlobalManager(t, defaultExporterTags, int64(5000), "exporter/queue_capacity") for i := 0; i < 7; i++ { require.NoError(t, be.sender.send(newErrorRequest(context.Background()))) }