From 95a5907e0d8c46f9f1db3115092b915a585a0e3d Mon Sep 17 00:00:00 2001 From: Wangchong Zhou Date: Tue, 27 Apr 2021 00:44:04 +0800 Subject: [PATCH] fix(exporter) attach subsystem label to memory stats (#118) Memory stats in HTTP and Stream subsystem are independent, attaching the label so prometheus won't complain about duplicate metrics --- README.md | 18 ++++++- kong/plugins/prometheus/exporter.lua | 19 ++++---- spec/02-access_spec.lua | 73 ++++++++++++++++++++++++++-- spec/04-status_api_spec.lua | 21 ++++++-- spec/05-enterprise-exporter_spec.lua | 4 +- 5 files changed, 115 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 70118ae..8585c33 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,10 @@ This plugin exposes metrics in [Prometheus Exposition format](https://github.com - *DB reachability*: Can the Kong node reach it's Database or not (Guage 0/1). - *Connections*: Various NGINX connection metrics like active, reading, writing, accepted connections. +- *Memory Usage*: + - *Shared dict usage*: Memory usage for each shared dictionaries in bytes. + - *Shared dict capacity*: Capacity for each shared dictionaries in bytes. + - *Lua VM memory usage*: Memory usage for Lua VM on each worker in bytes. ### Grafana Dashboard @@ -132,7 +136,6 @@ kong_latency_bucket{type="kong",service="google",le="00001.0"} 1 kong_latency_bucket{type="kong",service="google",le="00002.0"} 1 . . -. kong_latency_bucket{type="kong",service="google",le="+Inf"} 2 kong_latency_bucket{type="request",service="google",le="00300.0"} 1 kong_latency_bucket{type="request",service="google",le="00400.0"} 1 @@ -159,6 +162,19 @@ kong_nginx_http_current_connections{state="reading"} 0 kong_nginx_http_current_connections{state="total"} 8 kong_nginx_http_current_connections{state="waiting"} 0 kong_nginx_http_current_connections{state="writing"} 1 +# HELP kong_memory_lua_shared_dict_bytes Allocated slabs in bytes in a shared_dict +# TYPE kong_memory_lua_shared_dict_bytes gauge +kong_memory_lua_shared_dict_bytes{shared_dict="kong",kong_subsystem="http"} 40960 +. +. +# HELP kong_memory_lua_shared_dict_total_bytes Total capacity in bytes of a shared_dict +# TYPE kong_memory_lua_shared_dict_total_bytes gauge +kong_memory_lua_shared_dict_total_bytes{shared_dict="kong",kong_subsystem="http"} 5242880 +. +. +# HELP kong_memory_workers_lua_vms_bytes Allocated bytes in worker Lua VM +# TYPE kong_memory_workers_lua_vms_bytes gauge +kong_memory_workers_lua_vms_bytes{pid="7281",kong_subsystem="http"} 41124353 # HELP kong_nginx_metric_errors_total Number of nginx-lua-prometheus errors # TYPE kong_nginx_metric_errors_total counter kong_nginx_metric_errors_total 0 diff --git a/kong/plugins/prometheus/exporter.lua b/kong/plugins/prometheus/exporter.lua index 291816e..8857415 100644 --- a/kong/plugins/prometheus/exporter.lua +++ b/kong/plugins/prometheus/exporter.lua @@ -24,6 +24,7 @@ if pok then enterprise = require("kong.plugins.prometheus.enterprise.exporter") end +local kong_subsystem = ngx.config.subsystem local function init() local shm = "prometheus_metrics" @@ -35,7 +36,7 @@ local function init() prometheus = require("kong.plugins.prometheus.prometheus").init(shm, "kong_") -- global metrics - if ngx.config.subsystem == "http" then + if kong_subsystem == "http" then metrics.connections = prometheus:gauge("nginx_http_current_connections", "Number of HTTP connections", {"state"}) @@ -56,23 +57,23 @@ local function init() local memory_stats = {} memory_stats.worker_vms = prometheus:gauge("memory_workers_lua_vms_bytes", "Allocated bytes in worker Lua VM", - {"pid"}) + {"pid", "kong_subsystem"}) memory_stats.shms = prometheus:gauge("memory_lua_shared_dict_bytes", "Allocated slabs in bytes in a shared_dict", - {"shared_dict"}) + {"shared_dict", "kong_subsystem"}) memory_stats.shm_capacity = prometheus:gauge("memory_lua_shared_dict_total_bytes", "Total capacity in bytes of a shared_dict", - {"shared_dict"}) + {"shared_dict", "kong_subsystem"}) local res = kong.node.get_memory_stats() for shm_name, value in pairs(res.lua_shared_dicts) do - memory_stats.shm_capacity:set(value.capacity, {shm_name}) + memory_stats.shm_capacity:set(value.capacity, { shm_name, kong_subsystem }) end metrics.memory_stats = memory_stats -- per service/route - if ngx.config.subsystem == "http" then + if kong_subsystem == "http" then metrics.status = prometheus:counter("http_status", "HTTP status codes per service/route in Kong", {"service", "route", "code"}) @@ -129,7 +130,7 @@ end local log -if ngx.config.subsystem == "http" then +if kong_subsystem == "http" then function log(message, serialized) if not metrics then kong.log.err("prometheus: can not log metrics because of an initialization " @@ -324,11 +325,11 @@ local function metric_data() -- memory stats local res = kong.node.get_memory_stats() for shm_name, value in pairs(res.lua_shared_dicts) do - metrics.memory_stats.shms:set(value.allocated_slabs, {shm_name}) + metrics.memory_stats.shms:set(value.allocated_slabs, { shm_name, kong_subsystem }) end for i = 1, #res.workers_lua_vms do metrics.memory_stats.worker_vms:set(res.workers_lua_vms[i].http_allocated_gc, - {res.workers_lua_vms[i].pid}) + { res.workers_lua_vms[i].pid, kong_subsystem }) end if enterprise then diff --git a/spec/02-access_spec.lua b/spec/02-access_spec.lua index 1425240..196b976 100644 --- a/spec/02-access_spec.lua +++ b/spec/02-access_spec.lua @@ -285,7 +285,10 @@ describe("Plugin: prometheus (access)", function() path = "/metrics", }) local body = assert.res_status(200, res) - assert.matches('kong_memory_workers_lua_vms_bytes', body, nil, true) + assert.matches('kong_memory_workers_lua_vms_bytes{pid="%d+",kong_subsystem="http"} %d+', body) + if stream_available then + assert.matches('kong_memory_workers_lua_vms_bytes{pid="%d+",kong_subsystem="stream"} %d+', body) + end assert.matches('kong_nginx_metric_errors_total 0', body, nil, true) end) @@ -297,9 +300,12 @@ describe("Plugin: prometheus (access)", function() }) local body = assert.res_status(200, res) assert.matches('kong_memory_lua_shared_dict_total_bytes' .. - '{shared_dict="prometheus_metrics"} 5242880', body, nil, true) - assert.matches('kong_memory_lua_shared_dict_bytes' .. - '{shared_dict="prometheus_metrics"}', body, nil, true) + '{shared_dict="prometheus_metrics",kong_subsystem="http"} %d+', body) + -- TODO: uncomment below once the ngx.shared iterrator in stream is fixed + -- if stream_available then + -- assert.matches('kong_memory_lua_shared_dict_total_bytes' .. + -- '{shared_dict="stream_prometheus_metrics",kong_subsystem="stream"} %d+', body) + -- end assert.matches('kong_nginx_metric_errors_total 0', body, nil, true) end) @@ -316,6 +322,65 @@ describe("Plugin: prometheus (access)", function() end) end) +local test_f +if stream_available then + test_f = describe +else + test_f = pending +end +test_f("Plugin: prometheus (access) no stream listeners", function() + local admin_client + + setup(function() + local bp = helpers.get_db_utils() + + bp.plugins:insert { + protocols = { "http", "https", "grpc", "grpcs", "tcp", "tls" }, + name = "prometheus" + } + + assert(helpers.start_kong { + plugins = "bundled, prometheus", + stream_listen = "off", + }) + admin_client = helpers.admin_client() + end) + + teardown(function() + if admin_client then + admin_client:close() + end + + helpers.stop_kong() + end) + + it("exposes Lua worker VM stats only for http subsystem", function() + local res = assert(admin_client:send { + method = "GET", + path = "/metrics", + }) + local body = assert.res_status(200, res) + assert.matches('kong_memory_workers_lua_vms_bytes{pid="%d+",kong_subsystem="http"}', body) + assert.not_matches('kong_memory_workers_lua_vms_bytes{pid="%d+",kong_subsystem="stream"}', body) + + assert.matches('kong_nginx_metric_errors_total 0', body, nil, true) + end) + + it("exposes lua_shared_dict metrics only for http subsystem", function() + local res = assert(admin_client:send { + method = "GET", + path = "/metrics", + }) + local body = assert.res_status(200, res) + assert.matches('kong_memory_lua_shared_dict_total_bytes' .. + '{shared_dict="prometheus_metrics",kong_subsystem="http"} %d+', body) + + assert.not_matches('kong_memory_lua_shared_dict_bytes' .. + '{shared_dict="stream_prometheus_metric",kong_subsystem="stream"} %d+', body) + assert.matches('kong_nginx_metric_errors_total 0', body, nil, true) + end) +end) + describe("Plugin: prometheus (access) per-consumer metrics", function() local proxy_client local admin_client diff --git a/spec/04-status_api_spec.lua b/spec/04-status_api_spec.lua index 7a66ffb..c0c6fba 100644 --- a/spec/04-status_api_spec.lua +++ b/spec/04-status_api_spec.lua @@ -1,6 +1,8 @@ local helpers = require "spec.helpers" local pl_file = require "pl.file" +local TCP_PROXY_PORT = 9007 + -- Note: remove the below hack when https://github.com/Kong/kong/pull/6952 is merged local stream_available, _ = pcall(require, "kong.tools.stream_api") @@ -138,6 +140,7 @@ describe("Plugin: prometheus (access via status API)", function() nginx_conf = nginx_conf, plugins = "bundled, prometheus", status_listen = "0.0.0.0:9500", + stream_listen = "127.0.0.1:" .. TCP_PROXY_PORT, }) proxy_client = helpers.proxy_client() status_client = helpers.http_client("127.0.0.1", 9500, 20000) @@ -373,7 +376,12 @@ describe("Plugin: prometheus (access via status API)", function() path = "/metrics", }) local body = assert.res_status(200, res) - assert.matches('kong_memory_workers_lua_vms_bytes', body, nil, true) + assert.matches('kong_memory_workers_lua_vms_bytes{pid="%d+",kong_subsystem="http"}', body) + if stream_available then + assert.matches('kong_memory_workers_lua_vms_bytes{pid="%d+",kong_subsystem="stream"}', body) + end + + assert.matches('kong_nginx_metric_errors_total 0', body, nil, true) end) it("exposes lua_shared_dict metrics", function() @@ -383,8 +391,13 @@ describe("Plugin: prometheus (access via status API)", function() }) local body = assert.res_status(200, res) assert.matches('kong_memory_lua_shared_dict_total_bytes' .. - '{shared_dict="prometheus_metrics"} 5242880', body, nil, true) - assert.matches('kong_memory_lua_shared_dict_bytes' .. - '{shared_dict="prometheus_metrics"}', body, nil, true) + '{shared_dict="prometheus_metrics",kong_subsystem="http"} %d+', body) + -- TODO: uncomment below once the ngx.shared iterrator in stream is fixed + -- if stream_available then + -- assert.matches('kong_memory_lua_shared_dict_total_bytes' .. + -- '{shared_dict="prometheus_metrics",kong_subsystem="stream"} %d+', body) + -- end + + assert.matches('kong_nginx_metric_errors_total 0', body, nil, true) end) end) diff --git a/spec/05-enterprise-exporter_spec.lua b/spec/05-enterprise-exporter_spec.lua index e2a8ebe..8a0f496 100644 --- a/spec/05-enterprise-exporter_spec.lua +++ b/spec/05-enterprise-exporter_spec.lua @@ -53,8 +53,8 @@ t("Plugin: prometheus (exporter) enterprise licenses", function() }) local body = assert.res_status(200, res) - assert.matches('kong_enterprise_license_signature %d+', body, nil, true) - assert.matches('kong_enterprise_license_expiration %d+', body, nil, true) + assert.matches('kong_enterprise_license_signature %d+', body) + assert.matches('kong_enterprise_license_expiration %d+', body) assert.matches('kong_enterprise_license_features{feature="ee_plugins"}', body, nil, true) assert.matches('kong_enterprise_license_features{feature="write_admin_api"}', body, nil, true)