Add workqueue prometheus metrics

nginx · Dec 4, 2020 · 6984981 · 6984981
1 parent 4285b29
commit 6984981
Show file tree

Hide file tree

Showing 5 changed files with 133 additions and 17 deletions.
diff --git a/cmd/nginx-ingress/main.go b/cmd/nginx-ingress/main.go
@@ -367,6 +367,7 @@ func main() {
 		managerCollector = collectors.NewLocalManagerMetricsCollector(constLabels)
 		controllerCollector = collectors.NewControllerMetricsCollector(*enableCustomResources, constLabels)
 		processCollector := collectors.NewNginxProcessesMetricsCollector(constLabels)
+		workQueueCollector := collectors.NewWorkQueueMetricsCollector(constLabels)
 
 		err = managerCollector.Register(registry)
 		if err != nil {
@@ -382,6 +383,11 @@ func main() {
 		if err != nil {
 			glog.Errorf("Error registering NginxProcess Prometheus metrics: %v", err)
 		}
+
+		err = workQueueCollector.Register(registry)
+		if err != nil {
+			glog.Errorf("Error registering WorkQueue Prometheus metrics: %v", err)
+		}
 	}
 
 	useFakeNginxManager := *proxyURL != ""

diff --git a/docs-web/logging-and-monitoring/prometheus.md b/docs-web/logging-and-monitoring/prometheus.md
@@ -27,7 +27,7 @@ The Ingress Controller exports the following metrics:
   * Exported by NGINX/NGINX Plus. Refer to the [NGINX Prometheus Exporter developer docs](https://github.com/nginxinc/nginx-prometheus-exporter#exported-metrics) to find more information about the exported metrics.
   * There is a Grafana dashboard for NGINX Plus metrics located in the root repo folder.
   * Calculated by the Ingress Controller:
-    *  `controller_upstream_server_response_latency_ms_count`. Bucketed response times from when NGINX establishes a connection to an upstream server to when the last byte of the response body is received by NGINX. **Note**: The metric for the upstream isn't available until traffic is sent to the upstream. The metric isn't enabled by default. To enable the metric, set the `-enable-latency-metrics` command-line argument.
+    * `controller_upstream_server_response_latency_ms_count`. Bucketed response times from when NGINX establishes a connection to an upstream server to when the last byte of the response body is received by NGINX. **Note**: The metric for the upstream isn't available until traffic is sent to the upstream. The metric isn't enabled by default. To enable the metric, set the `-enable-latency-metrics` command-line argument.
 * Ingress Controller metrics
   * `controller_nginx_reloads_total`. Number of successful NGINX reloads. This includes the label `reason` with 2 possible values `endpoints` (the reason for the reload was an endpoints update) and `other` (the reload was caused by something other than an endpoint update like an ingress update).
   * `controller_nginx_reload_errors_total`. Number of unsuccessful NGINX reloads.
@@ -37,6 +37,11 @@ The Ingress Controller exports the following metrics:
   * `controller_ingress_resources_total`. Number of handled Ingress resources. This metric includes the label type, that groups the Ingress resources by their type (regular, [minion or master](/nginx-ingress-controller/configuration/ingress-resources/cross-namespace-configuration)). **Note**: The metric doesn't count minions without a master.
   * `controller_virtualserver_resources_total`. Number of handled VirtualServer resources.
   * `controller_virtualserverroute_resources_total`. Number of handled VirtualServerRoute resources. **Note**: The metric counts only VirtualServerRoutes that have a reference from a VirtualServer.
+* Kubernetes Cluster metrics
+  * `controller_workqueue_depth` Current depth of workqueue.
+  * `controller_workqueue_queue_duration_second`. How long in seconds an item stays in workqueue before being requested.
+  * `controller_workqueue_work_duration_seconds`. How long in seconds processing an item from workqueue takes.
+
 
 **Note**: all metrics have the namespace nginx_ingress. For example, nginx_ingress_controller_nginx_reloads_total.
 

diff --git a/internal/k8s/task_queue.go b/internal/k8s/task_queue.go
@@ -29,7 +29,7 @@ type taskQueue struct {
 // The sync function is called for every element inserted into the queue.
 func newTaskQueue(syncFn func(task)) *taskQueue {
 	return &taskQueue{
-		queue:      workqueue.New(),
+		queue:      workqueue.NewNamed("taskQueue"),
 		sync:       syncFn,
 		workerDone: make(chan struct{}),
 	}
@@ -55,7 +55,6 @@ func (tq *taskQueue) Enqueue(obj interface{}) {
 	}
 
 	glog.V(3).Infof("Adding an element with a key: %v", task.Key)
-
 	tq.queue.Add(task)
 }
 
@@ -103,30 +102,19 @@ func (tq *taskQueue) Shutdown() {
 // kind represents the kind of the Kubernetes resources of a task
 type kind int
 
+// resources
 const (
-	// ingress resource
 	ingress = iota
-	// endpoints resource
 	endpoints
-	// configMap resource
 	configMap
-	// secret resource
 	secret
-	// service resource
 	service
-	// virtualserver resource
 	virtualserver
-	// virtualServeRoute resource
 	virtualServerRoute
-	// globalConfiguration resource
 	globalConfiguration
-	// transportserver resource
 	transportserver
-	// policy resource
 	policy
-	// appProtectPolicy resource
 	appProtectPolicy
-	// appProtectlogconf resource
 	appProtectLogConf
 )
 
@@ -166,7 +154,7 @@ func newTask(key string, obj interface{}) (task, error) {
 		} else if objectKind == appProtectLogConfGVK.Kind {
 			k = appProtectLogConf
 		} else {
-			return task{}, fmt.Errorf("Unknow unstructured kind: %v", objectKind)
+			return task{}, fmt.Errorf("Unknown unstructured kind: %v", objectKind)
 		}
 	default:
 		return task{}, fmt.Errorf("Unknown type: %v", t)

diff --git a/internal/metrics/collectors/processes.go b/internal/metrics/collectors/processes.go
@@ -10,7 +10,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 )
 
-// NginxProcessesMetricsCollector implements NginxPorcessesCollector interface and prometheus.Collector interface
+// NginxProcessesMetricsCollector implements NginxProcessesCollector interface and prometheus.Collector interface
 type NginxProcessesMetricsCollector struct {
 	// Metrics
 	workerProcessTotal *prometheus.GaugeVec

diff --git a/internal/metrics/collectors/workqueue.go b/internal/metrics/collectors/workqueue.go
@@ -0,0 +1,117 @@
+package collectors
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+	"k8s.io/client-go/util/workqueue"
+)
+
+const workqueueSubsystem = "workqueue"
+
+// WorkQueueMetricsCollector implements prometheus.Collector interface
+type WorkQueueMetricsCollector struct {
+	depth        *prometheus.GaugeVec
+	latency      *prometheus.HistogramVec
+	workDuration *prometheus.HistogramVec
+}
+
+// NewWorkQueueMetricsCollector creates a new WorkQueueMetricsCollector
+func NewWorkQueueMetricsCollector(constLabels map[string]string) *WorkQueueMetricsCollector {
+	return &WorkQueueMetricsCollector{
+		depth: prometheus.NewGaugeVec(
+			prometheus.GaugeOpts{
+				Namespace:   metricsNamespace,
+				Subsystem:   workqueueSubsystem,
+				Name:        "depth",
+				Help:        "Current depth of workqueue",
+				ConstLabels: constLabels,
+			},
+			[]string{"name"},
+		),
+		latency: prometheus.NewHistogramVec(
+			prometheus.HistogramOpts{
+				Namespace:   metricsNamespace,
+				Subsystem:   workqueueSubsystem,
+				Name:        "queue_duration_seconds",
+				Help:        "How long in seconds an item stays in workqueue before being requested",
+				Buckets:     prometheus.ExponentialBuckets(10e-9, 10, 10),
+				ConstLabels: constLabels,
+			},
+			[]string{"name"},
+		),
+		workDuration: prometheus.NewHistogramVec(
+			prometheus.HistogramOpts{
+				Namespace:   metricsNamespace,
+				Subsystem:   workqueueSubsystem,
+				Name:        "work_duration_seconds",
+				Help:        "How long in seconds processing an item from workqueue takes",
+				Buckets:     prometheus.ExponentialBuckets(10e-9, 10, 10),
+				ConstLabels: constLabels,
+			},
+			[]string{"name"},
+		),
+	}
+}
+
+// Collect implements the prometheus.Collector interface Collect method
+func (wqc *WorkQueueMetricsCollector) Collect(ch chan<- prometheus.Metric) {
+	wqc.depth.Collect(ch)
+	wqc.latency.Collect(ch)
+	wqc.workDuration.Collect(ch)
+}
+
+// Describe implements prometheus.Collector interface Describe method
+func (wqc *WorkQueueMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
+	wqc.depth.Describe(ch)
+	wqc.latency.Describe(ch)
+	wqc.workDuration.Describe(ch)
+}
+
+// Register registers all the metrics of the collector
+func (wqc *WorkQueueMetricsCollector) Register(registry *prometheus.Registry) error {
+	workqueue.SetProvider(wqc)
+	return registry.Register(wqc)
+}
+
+// NewDepthMetric implements the workqueue.MetricsProvider interface NewDepthMetric method
+func (wqc *WorkQueueMetricsCollector) NewDepthMetric(name string) workqueue.GaugeMetric {
+	return wqc.depth.WithLabelValues(name)
+}
+
+// NewLatencyMetric implements the workqueue.MetricsProvider interface NewLatencyMetric method
+func (wqc *WorkQueueMetricsCollector) NewLatencyMetric(name string) workqueue.HistogramMetric {
+	return wqc.latency.WithLabelValues(name)
+
+}
+
+// NewWorkDurationMetric implements the workqueue.MetricsProvider interface NewWorkDurationMetric method
+func (wqc *WorkQueueMetricsCollector) NewWorkDurationMetric(name string) workqueue.HistogramMetric {
+	return wqc.workDuration.WithLabelValues(name)
+}
+
+// noopMetric implements the workqueue.GaugeMetric and workqueue.HistogramMetric interfaces
+type noopMetric struct{}
+
+func (noopMetric) Inc()            {}
+func (noopMetric) Dec()            {}
+func (noopMetric) Set(float64)     {}
+func (noopMetric) Observe(float64) {}
+
+// NewAddsMetric implements the workqueue.MetricsProvider interface NewAddsMetric method
+func (*WorkQueueMetricsCollector) NewAddsMetric(string) workqueue.CounterMetric {
+	return noopMetric{}
+}
+
+// NewUnfinishedWorkSecondsMetric implements the workqueue.MetricsProvider interface NewUnfinishedWorkSecondsMetric method
+func (*WorkQueueMetricsCollector) NewUnfinishedWorkSecondsMetric(string) workqueue.SettableGaugeMetric {
+	return noopMetric{}
+}
+
+// NewLongestRunningProcessorSecondsMetric implements the workqueue.MetricsProvider interface NewLongestRunningProcessorSecondsMetric method
+func (*WorkQueueMetricsCollector) NewLongestRunningProcessorSecondsMetric(string) workqueue.SettableGaugeMetric {
+	return noopMetric{}
+}
+
+// NewRetriesMetric implements the workqueue.MetricsProvider interface NewRetriesMetric method
+func (*WorkQueueMetricsCollector) NewRetriesMetric(string) workqueue.CounterMetric {
+	return noopMetric{}
+}