Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stream metrics support #16

Merged
merged 5 commits into from
Oct 15, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ Usage of ./nginx-prometheus-exporter:
* [HTTP](http://nginx.org/en/docs/http/ngx_http_api_module.html#http_).
* [SSL](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_ssl_object).
* [HTTP Server Zones](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_http_server_zone).
* [Stream Server Zones](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_stream_server_zone).
* [HTTP Upsteams](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_http_upstream). Note: for the `state` metric, the string values are converted to float64 using the following rule: `"up"` -> `1.0`, `"draining"` -> `2.0`, `"down"` -> `3.0`, `"unavail"` –> `4.0`, `"checking"` –> `5.0`, `"unhealthy"` -> `6.0`.
* [Stream Upsteams](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_stream_upstream). Note: for the `state` metric, the string values are converted to float64 using the following rule: `"up"` -> `1.0`, `"down"` -> `3.0`, `"unavail"` –> `4.0`, `"checking"` –> `5.0`, `"unhealthy"` -> `6.0`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Upsteams -> Upstreams
please fix for http as well



Connect to the `/metrics` page of the running exporter to see the complete list of metrics along with their descriptions. Note: to see server zones related metrics you must configure [status zones](https://nginx.org/en/docs/http/ngx_http_status_module.html#status_zone) and to see upstream related metrics you must configure upstreams with a [shared memory zone](http://nginx.org/en/docs/http/ngx_http_upstream_module.html#zone).

Expand Down
121 changes: 118 additions & 3 deletions collector/nginx_plus.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@ import (

// NginxPlusCollector collects NGINX Plus metrics. It implements prometheus.Collector interface.
type NginxPlusCollector struct {
nginxClient *plusclient.NginxClient
totalMetrics, serverZoneMetrics, upstreamMetrics, upstreamServerMetrics map[string]*prometheus.Desc
mutex sync.Mutex
nginxClient *plusclient.NginxClient
totalMetrics map[string]*prometheus.Desc
serverZoneMetrics map[string]*prometheus.Desc
upstreamMetrics map[string]*prometheus.Desc
upstreamServerMetrics map[string]*prometheus.Desc
streamServerZoneMetrics map[string]*prometheus.Desc
streamUpstreamMetrics map[string]*prometheus.Desc
streamUpstreamServerMetrics map[string]*prometheus.Desc
mutex sync.Mutex
}

// NewNginxPlusCollector creates an NginxPlusCollector.
Expand Down Expand Up @@ -42,10 +48,24 @@ func NewNginxPlusCollector(nginxClient *plusclient.NginxClient, namespace string
"received": newServerZoneMetric(namespace, "received", "Bytes received from clients", nil),
"sent": newServerZoneMetric(namespace, "sent", "Bytes sent to clients", nil),
},
streamServerZoneMetrics: map[string]*prometheus.Desc{
"processing": newStreamServerZoneMetric(namespace, "processing", "Client connections that are currently being processed", nil),
"connections": newStreamServerZoneMetric(namespace, "connections", "Total connections", nil),
"sessions_2xx": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "2xx"}),
"sessions_4xx": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "4xx"}),
"sessions_5xx": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "5xx"}),
"sessions_total": newStreamServerZoneMetric(namespace, "sessions", "Total sessions completed", prometheus.Labels{"code": "total"}),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not required. the total number can be obtained in Prometheus by aggregating 2xx,4xx,5xx
Also, see responses metrics for http zones

"discarded": newStreamServerZoneMetric(namespace, "discarded", "Connections completed without creating a session", nil),
"received": newStreamServerZoneMetric(namespace, "received", "Bytes received from clients", nil),
"sent": newStreamServerZoneMetric(namespace, "sent", "Bytes sent to clients", nil),
},
upstreamMetrics: map[string]*prometheus.Desc{
"keepalives": newUpstreamMetric(namespace, "keepalives", "Idle keepalive connections"),
"zombies": newUpstreamMetric(namespace, "zombies", "Servers removed from the group but still processing active client requests"),
},
streamUpstreamMetrics: map[string]*prometheus.Desc{
"zombies": newStreamUpstreamMetric(namespace, "zombies", "Servers removed from the group but still processing active client connections"),
},
upstreamServerMetrics: map[string]*prometheus.Desc{
"state": newUpstreamServerMetric(namespace, "state", "Current state", nil),
"active": newUpstreamServerMetric(namespace, "active", "Active connections", nil),
Expand All @@ -65,6 +85,22 @@ func NewNginxPlusCollector(nginxClient *plusclient.NginxClient, namespace string
"health_checks_fails": newUpstreamServerMetric(namespace, "health_checks_fails", "Failed health checks", nil),
"health_checks_unhealthy": newUpstreamServerMetric(namespace, "health_checks_unhealthy", "How many times the server became unhealthy (state 'unhealthy')", nil),
},
streamUpstreamServerMetrics: map[string]*prometheus.Desc{
"state": newStreamUpstreamServerMetric(namespace, "state", "Current state"),
"active": newStreamUpstreamServerMetric(namespace, "active", "Active connections"),
"sent": newStreamUpstreamServerMetric(namespace, "sent", "Bytes sent to this server"),
"received": newStreamUpstreamServerMetric(namespace, "received", "Bytes received to this server"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-> Bytes received from this server.

"fails": newStreamUpstreamServerMetric(namespace, "fails", "Number of unsuccessful attempts to communicate with the server"),
"unavail": newStreamUpstreamServerMetric(namespace, "unavail", "How many times the server became unavailable for client connections (state 'unavail') due to the number of unsuccessful attempts reaching the max_fails threshold"),
"connections": newStreamUpstreamServerMetric(namespace, "connections", "Total number of client connections forwarded to this server"),
"connect_time": newStreamUpstreamServerMetric(namespace, "connect_time", "Average time to connect to the upstream server"),
"first_byte_time": newStreamUpstreamServerMetric(namespace, "first_byte_time", "The average time to receive the first byte of data"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-> Average time to receive the first byte of data

"response_time": newStreamUpstreamServerMetric(namespace, "response_time", "Average time to get the full response from the server"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-> Average time to receive the last byte of data

"health_checks_checks": newStreamUpstreamServerMetric(namespace, "health_checks_checks", "Total health check requests"),
"health_checks_fails": newStreamUpstreamServerMetric(namespace, "health_checks_fails", "Failed health checks"),
"health_checks_unhealthy": newStreamUpstreamServerMetric(namespace, "health_checks_unhealthy", "How many times the server became unhealthy (state 'unhealthy')"),
"downtime": newStreamUpstreamServerMetric(namespace, "downtime", "Total time the server was in the 'unavail', 'checking', and 'unhealthy' states"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove downtime metric

Copy link
Contributor Author

@Dean-Coakley Dean-Coakley Oct 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pleshakov

ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["downtime"],
here too?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

},
}
}

Expand All @@ -83,6 +119,15 @@ func (c *NginxPlusCollector) Describe(ch chan<- *prometheus.Desc) {
for _, m := range c.upstreamServerMetrics {
ch <- m
}
for _, m := range c.streamServerZoneMetrics {
ch <- m
}
for _, m := range c.streamUpstreamMetrics {
ch <- m
}
for _, m := range c.streamUpstreamServerMetrics {
ch <- m
}
}

// Collect fetches metrics from NGINX Plus and sends them to the provided channel.
Expand Down Expand Up @@ -138,6 +183,27 @@ func (c *NginxPlusCollector) Collect(ch chan<- prometheus.Metric) {
prometheus.CounterValue, float64(zone.Sent), name)
}

for name, zone := range stats.StreamServerZones {
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["processing"],
prometheus.GaugeValue, float64(zone.Processing), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["connections"],
prometheus.CounterValue, float64(zone.Connections), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_2xx"],
prometheus.CounterValue, float64(zone.Sessions.Sessions2xx), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_4xx"],
prometheus.CounterValue, float64(zone.Sessions.Sessions4xx), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_5xx"],
prometheus.CounterValue, float64(zone.Sessions.Sessions5xx), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sessions_total"],
prometheus.CounterValue, float64(zone.Sessions.Total), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["discarded"],
prometheus.CounterValue, float64(zone.Discarded), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["received"],
prometheus.CounterValue, float64(zone.Received), name)
ch <- prometheus.MustNewConstMetric(c.streamServerZoneMetrics["sent"],
prometheus.CounterValue, float64(zone.Sent), name)
}

for name, upstream := range stats.Upstreams {
for _, peer := range upstream.Peers {
ch <- prometheus.MustNewConstMetric(c.upstreamServerMetrics["state"],
Expand Down Expand Up @@ -183,6 +249,43 @@ func (c *NginxPlusCollector) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(c.upstreamMetrics["zombies"],
prometheus.GaugeValue, float64(upstream.Zombies), name)
}

for name, upstream := range stats.StreamUpstreams {
for _, peer := range upstream.Peers {
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["state"],
prometheus.GaugeValue, upstreamServerStates[peer.State], name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["active"],
prometheus.GaugeValue, float64(peer.Active), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["connections"],
prometheus.GaugeValue, float64(peer.Connections), name, peer.Server)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Counter

ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["connect_time"],
prometheus.GaugeValue, float64(peer.ConnectTime), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["first_byte_time"],
prometheus.GaugeValue, float64(peer.FirstByteTime), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["response_time"],
prometheus.GaugeValue, float64(peer.ResponseTime), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["sent"],
prometheus.CounterValue, float64(peer.Sent), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["received"],
prometheus.CounterValue, float64(peer.Received), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["fails"],
prometheus.CounterValue, float64(peer.Fails), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["unavail"],
prometheus.CounterValue, float64(peer.Unavail), name, peer.Server)
if peer.HealthChecks != (plusclient.HealthChecks{}) {
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_checks"],
prometheus.CounterValue, float64(peer.HealthChecks.Checks), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_fails"],
prometheus.CounterValue, float64(peer.HealthChecks.Fails), name, peer.Server)
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_unhealthy"],
prometheus.CounterValue, float64(peer.HealthChecks.Unhealthy), name, peer.Server)
}
ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["downtime"],
prometheus.GaugeValue, float64(peer.Downtime), name, peer.Server)
}
ch <- prometheus.MustNewConstMetric(c.streamUpstreamMetrics["zombies"],
prometheus.GaugeValue, float64(upstream.Zombies), name)
}
}

var upstreamServerStates = map[string]float64{
Expand All @@ -198,10 +301,22 @@ func newServerZoneMetric(namespace string, metricName string, docString string,
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "server_zone", metricName), docString, []string{"server_zone"}, constLabels)
}

func newStreamServerZoneMetric(namespace string, metricName string, docString string, constLabels prometheus.Labels) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_server_zone", metricName), docString, []string{"server_zone"}, constLabels)
}

func newUpstreamMetric(namespace string, metricName string, docString string) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "upstream", metricName), docString, []string{"upstream"}, nil)
}

func newStreamUpstreamMetric(namespace string, metricName string, docString string) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_upstream", metricName), docString, []string{"upstream"}, nil)
}

func newUpstreamServerMetric(namespace string, metricName string, docString string, constLabels prometheus.Labels) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "upstream_server", metricName), docString, []string{"upstream", "server"}, constLabels)
}

func newStreamUpstreamServerMetric(namespace string, metricName string, docString string) *prometheus.Desc {
return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_upstream_server", metricName), docString, []string{"upstream", "server"}, nil)
}