From 07e7a13d405370e11669deca2f9c672db430aad1 Mon Sep 17 00:00:00 2001 From: Gabriele Bartolini Date: Tue, 12 Sep 2023 17:17:17 +0200 Subject: [PATCH] fix(monitoring): improve accuracy of replica lag Synchronize the metrics' configmap with the current one shipped by the operator, while fixing the `pg_replication` metric's lag issue. Closes #143 Signed-off-by: Gabriele Bartolini --- charts/cloudnative-pg/values.yaml | 476 ++++++++++-------------------- 1 file changed, 158 insertions(+), 318 deletions(-) diff --git a/charts/cloudnative-pg/values.yaml b/charts/cloudnative-pg/values.yaml index 9faf8ec0c5..9747ec0c12 100644 --- a/charts/cloudnative-pg/values.yaml +++ b/charts/cloudnative-pg/values.yaml @@ -144,114 +144,18 @@ monitoringQueriesConfigMap: name: cnpg-default-monitoring # -- A string representation of a YAML defining monitoring queries. queries: | - backends: - query: | - SELECT sa.datname - , sa.usename - , sa.application_name - , states.state - , COALESCE(sa.count, 0) AS total - , COALESCE(sa.max_tx_secs, 0) AS max_tx_duration_seconds - FROM ( VALUES ('active') - , ('idle') - , ('idle in transaction') - , ('idle in transaction (aborted)') - , ('fastpath function call') - , ('disabled') - ) AS states(state) - LEFT JOIN ( - SELECT datname - , state - , usename - , COALESCE(application_name, '') AS application_name - , COUNT(*) - , COALESCE(EXTRACT (EPOCH FROM (max(now() - xact_start))), 0) AS max_tx_secs - FROM pg_catalog.pg_stat_activity - GROUP BY datname, state, usename, application_name - ) sa ON states.state = sa.state - WHERE sa.usename IS NOT NULL - metrics: - - datname: - usage: "LABEL" - description: "Name of the database" - - usename: - usage: "LABEL" - description: "Name of the user" - - application_name: - usage: "LABEL" - description: "Name of the application" - - state: - usage: "LABEL" - description: "State of the backend" - - total: - usage: "GAUGE" - description: "Number of backends" - - max_tx_duration_seconds: - usage: "GAUGE" - description: "Maximum duration of a transaction in seconds" - - backends_waiting: - query: | - SELECT count(*) AS total - FROM pg_catalog.pg_locks blocked_locks - JOIN pg_catalog.pg_locks blocking_locks - ON blocking_locks.locktype = blocked_locks.locktype - AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database - AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation - AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page - AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple - AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid - AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid - AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid - AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid - AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid - AND blocking_locks.pid != blocked_locks.pid - JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid - WHERE NOT blocked_locks.granted - metrics: - - total: - usage: "GAUGE" - description: "Total number of backends that are currently waiting on other queries" - - pg_database: - query: | - SELECT datname - , pg_catalog.pg_database_size(datname) AS size_bytes - , pg_catalog.age(datfrozenxid) AS xid_age - , pg_catalog.mxid_age(datminmxid) AS mxid_age - FROM pg_catalog.pg_database - metrics: - - datname: - usage: "LABEL" - description: "Name of the database" - - size_bytes: - usage: "GAUGE" - description: "Disk space used by the database" - - xid_age: - usage: "GAUGE" - description: "Number of transactions from the frozen XID to the current one" - - mxid_age: - usage: "GAUGE" - description: "Number of multiple transactions (Multixact) from the frozen XID to the current one" - - pg_postmaster: - query: | - SELECT EXTRACT(EPOCH FROM pg_postmaster_start_time) AS start_time - FROM pg_catalog.pg_postmaster_start_time() - metrics: - - start_time: - usage: "GAUGE" - description: "Time at which postgres started (based on epoch)" - pg_replication: - query: "SELECT CASE WHEN NOT pg_catalog.pg_is_in_recovery() + query: "SELECT CASE WHEN ( + NOT pg_is_in_recovery() + OR pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) THEN 0 - ELSE GREATEST (0, - EXTRACT(EPOCH FROM (now() - pg_catalog.pg_last_xact_replay_timestamp()))) - END AS lag, - pg_catalog.pg_is_in_recovery() AS in_recovery, + ELSE GREATEST (0, + EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) + END AS lag, + pg_is_in_recovery() AS in_recovery, EXISTS (TABLE pg_stat_wal_receiver) AS is_wal_receiver_up, (SELECT count(*) FROM pg_stat_replication) AS streaming_replicas" + metrics: - lag: usage: "GAUGE" @@ -266,252 +170,188 @@ monitoringQueriesConfigMap: usage: "GAUGE" description: "Number of streaming replicas connected to the instance" - pg_replication_slots: - query: | - SELECT slot_name, - slot_type, - database, - active, - pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), restart_lsn) - FROM pg_catalog.pg_replication_slots - WHERE NOT temporary + pg_postmaster: + query: "SELECT pg_postmaster_start_time as start_time from pg_postmaster_start_time()" + primary: true metrics: - - slot_name: - usage: "LABEL" - description: "Name of the replication slot" - - slot_type: - usage: "LABEL" - description: "Type of the replication slot" - - database: - usage: "LABEL" - description: "Name of the database" - - active: - usage: "GAUGE" - description: "Flag indicating whether the slot is active" - - pg_wal_lsn_diff: + - start_time: usage: "GAUGE" - description: "Replication lag in bytes" + description: "Time at which postgres started" - pg_stat_archiver: + pg_stat_user_tables: + target_databases: + - "*" query: | - SELECT archived_count - , failed_count - , COALESCE(EXTRACT(EPOCH FROM (now() - last_archived_time)), -1) AS seconds_since_last_archival - , COALESCE(EXTRACT(EPOCH FROM (now() - last_failed_time)), -1) AS seconds_since_last_failure - , COALESCE(EXTRACT(EPOCH FROM last_archived_time), -1) AS last_archived_time - , COALESCE(EXTRACT(EPOCH FROM last_failed_time), -1) AS last_failed_time - , COALESCE(CAST(CAST('x'||pg_catalog.right(pg_catalog.split_part(last_archived_wal, '.', 1), 16) AS pg_catalog.bit(64)) AS pg_catalog.int8), -1) AS last_archived_wal_start_lsn - , COALESCE(CAST(CAST('x'||pg_catalog.right(pg_catalog.split_part(last_failed_wal, '.', 1), 16) AS pg_catalog.bit(64)) AS pg_catalog.int8), -1) AS last_failed_wal_start_lsn - , EXTRACT(EPOCH FROM stats_reset) AS stats_reset_time - FROM pg_catalog.pg_stat_archiver + SELECT + current_database() datname, + schemaname, + relname, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + n_mod_since_analyze, + COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, + COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, + COALESCE(last_analyze, '1970-01-01Z') as last_analyze, + COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count + FROM + pg_stat_user_tables metrics: - - archived_count: + - datname: + usage: "LABEL" + description: "Name of current database" + - schemaname: + usage: "LABEL" + description: "Name of the schema that this table is in" + - relname: + usage: "LABEL" + description: "Name of this table" + - seq_scan: usage: "COUNTER" - description: "Number of WAL files that have been successfully archived" - - failed_count: + description: "Number of sequential scans initiated on this table" + - seq_tup_read: usage: "COUNTER" - description: "Number of failed attempts for archiving WAL files" - - seconds_since_last_archival: + description: "Number of live rows fetched by sequential scans" + - idx_scan: + usage: "COUNTER" + description: "Number of index scans initiated on this table" + - idx_tup_fetch: + usage: "COUNTER" + description: "Number of live rows fetched by index scans" + - n_tup_ins: + usage: "COUNTER" + description: "Number of rows inserted" + - n_tup_upd: + usage: "COUNTER" + description: "Number of rows updated" + - n_tup_del: + usage: "COUNTER" + description: "Number of rows deleted" + - n_tup_hot_upd: + usage: "COUNTER" + description: "Number of rows HOT updated (i.e., with no separate index update required)" + - n_live_tup: usage: "GAUGE" - description: "Seconds since the last successful archival operation" - - seconds_since_last_failure: + description: "Estimated number of live rows" + - n_dead_tup: usage: "GAUGE" - description: "Seconds since the last failed archival operation" - - last_archived_time: + description: "Estimated number of dead rows" + - n_mod_since_analyze: usage: "GAUGE" - description: "Epoch of the last time WAL archiving succeeded" - - last_failed_time: + description: "Estimated number of rows changed since last analyze" + - last_vacuum: usage: "GAUGE" - description: "Epoch of the last time WAL archiving failed" - - last_archived_wal_start_lsn: + description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)" + - last_autovacuum: usage: "GAUGE" - description: "Archived WAL start LSN" - - last_failed_wal_start_lsn: + description: "Last time at which this table was vacuumed by the autovacuum daemon" + - last_analyze: usage: "GAUGE" - description: "Last failed WAL LSN" - - stats_reset_time: + description: "Last time at which this table was manually analyzed" + - last_autoanalyze: usage: "GAUGE" - description: "Time at which these statistics were last reset" - - pg_stat_bgwriter: - query: | - SELECT checkpoints_timed - , checkpoints_req - , checkpoint_write_time - , checkpoint_sync_time - , buffers_checkpoint - , buffers_clean - , maxwritten_clean - , buffers_backend - , buffers_backend_fsync - , buffers_alloc - FROM pg_catalog.pg_stat_bgwriter - metrics: - - checkpoints_timed: - usage: "COUNTER" - description: "Number of scheduled checkpoints that have been performed" - - checkpoints_req: - usage: "COUNTER" - description: "Number of requested checkpoints that have been performed" - - checkpoint_write_time: + description: "Last time at which this table was analyzed by the autovacuum daemon" + - vacuum_count: usage: "COUNTER" - description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" - - checkpoint_sync_time: + description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)" + - autovacuum_count: usage: "COUNTER" - description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" - - buffers_checkpoint: + description: "Number of times this table has been vacuumed by the autovacuum daemon" + - analyze_count: usage: "COUNTER" - description: "Number of buffers written during checkpoints" - - buffers_clean: + description: "Number of times this table has been manually analyzed" + - autoanalyze_count: usage: "COUNTER" - description: "Number of buffers written by the background writer" - - maxwritten_clean: - usage: "COUNTER" - description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" - - buffers_backend: - usage: "COUNTER" - description: "Number of buffers written directly by a backend" - - buffers_backend_fsync: - usage: "COUNTER" - description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" - - buffers_alloc: - usage: "COUNTER" - description: "Number of buffers allocated" + description: "Number of times this table has been analyzed by the autovacuum daemon" - pg_stat_database: - query: | - SELECT datname - , xact_commit - , xact_rollback - , blks_read - , blks_hit - , tup_returned - , tup_fetched - , tup_inserted - , tup_updated - , tup_deleted - , conflicts - , temp_files - , temp_bytes - , deadlocks - , blk_read_time - , blk_write_time - FROM pg_catalog.pg_stat_database + pg_statio_user_tables: + query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables" metrics: - datname: usage: "LABEL" - description: "Name of this database" - - xact_commit: - usage: "COUNTER" - description: "Number of transactions in this database that have been committed" - - xact_rollback: - usage: "COUNTER" - description: "Number of transactions in this database that have been rolled back" - - blks_read: - usage: "COUNTER" - description: "Number of disk blocks read in this database" - - blks_hit: - usage: "COUNTER" - description: "Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system's file system cache)" - - tup_returned: - usage: "COUNTER" - description: "Number of rows returned by queries in this database" - - tup_fetched: - usage: "COUNTER" - description: "Number of rows fetched by queries in this database" - - tup_inserted: - usage: "COUNTER" - description: "Number of rows inserted by queries in this database" - - tup_updated: + description: "Name of current database" + - schemaname: + usage: "LABEL" + description: "Name of the schema that this table is in" + - relname: + usage: "LABEL" + description: "Name of this table" + - heap_blks_read: usage: "COUNTER" - description: "Number of rows updated by queries in this database" - - tup_deleted: + description: "Number of disk blocks read from this table" + - heap_blks_hit: usage: "COUNTER" - description: "Number of rows deleted by queries in this database" - - conflicts: + description: "Number of buffer hits in this table" + - idx_blks_read: usage: "COUNTER" - description: "Number of queries canceled due to conflicts with recovery in this database" - - temp_files: + description: "Number of disk blocks read from all indexes on this table" + - idx_blks_hit: usage: "COUNTER" - description: "Number of temporary files created by queries in this database" - - temp_bytes: + description: "Number of buffer hits in all indexes on this table" + - toast_blks_read: usage: "COUNTER" - description: "Total amount of data written to temporary files by queries in this database" - - deadlocks: + description: "Number of disk blocks read from this table's TOAST table (if any)" + - toast_blks_hit: usage: "COUNTER" - description: "Number of deadlocks detected in this database" - - blk_read_time: + description: "Number of buffer hits in this table's TOAST table (if any)" + - tidx_blks_read: usage: "COUNTER" - description: "Time spent reading data file blocks by backends in this database, in milliseconds" - - blk_write_time: + description: "Number of disk blocks read from this table's TOAST table indexes (if any)" + - tidx_blks_hit: usage: "COUNTER" - description: "Time spent writing data file blocks by backends in this database, in milliseconds" + description: "Number of buffer hits in this table's TOAST table indexes (if any)" - pg_stat_replication: - primary: true + pg_stat_activity: query: | - SELECT usename - , COALESCE(application_name, '') AS application_name - , COALESCE(client_addr::text, '') AS client_addr - , EXTRACT(EPOCH FROM backend_start) AS backend_start - , COALESCE(pg_catalog.age(backend_xmin), 0) AS backend_xmin_age - , pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), sent_lsn) AS sent_diff_bytes - , pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), write_lsn) AS write_diff_bytes - , pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), flush_lsn) AS flush_diff_bytes - , COALESCE(pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), replay_lsn),0) AS replay_diff_bytes - , COALESCE((EXTRACT(EPOCH FROM write_lag)),0)::float AS write_lag_seconds - , COALESCE((EXTRACT(EPOCH FROM flush_lag)),0)::float AS flush_lag_seconds - , COALESCE((EXTRACT(EPOCH FROM replay_lag)),0)::float AS replay_lag_seconds - FROM pg_catalog.pg_stat_replication + WITH + metrics AS ( + SELECT + application_name, + SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum, + COUNT(*) AS process_idle_seconds_count + FROM pg_stat_activity + WHERE state = 'idle' + GROUP BY application_name + ), + buckets AS ( + SELECT + application_name, + le, + SUM( + CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le + THEN 1 + ELSE 0 + END + )::bigint AS bucket + FROM + pg_stat_activity, + UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le + GROUP BY application_name, le + ORDER BY application_name, le + ) + SELECT + application_name, + process_idle_seconds_sum, + process_idle_seconds_count, + ARRAY_AGG(le) AS process_idle_seconds, + ARRAY_AGG(bucket) AS process_idle_seconds_bucket + FROM metrics JOIN buckets USING (application_name) + GROUP BY 1, 2, 3 metrics: - - usename: - usage: "LABEL" - description: "Name of the replication user" - application_name: usage: "LABEL" - description: "Name of the application" - - client_addr: - usage: "LABEL" - description: "Client IP address" - - backend_start: - usage: "COUNTER" - description: "Time when this process was started" - - backend_xmin_age: - usage: "COUNTER" - description: "The age of this standby's xmin horizon" - - sent_diff_bytes: - usage: "GAUGE" - description: "Difference in bytes from the last write-ahead log location sent on this connection" - - write_diff_bytes: - usage: "GAUGE" - description: "Difference in bytes from the last write-ahead log location written to disk by this standby server" - - flush_diff_bytes: - usage: "GAUGE" - description: "Difference in bytes from the last write-ahead log location flushed to disk by this standby server" - - replay_diff_bytes: - usage: "GAUGE" - description: "Difference in bytes from the last write-ahead log location replayed into the database on this standby server" - - write_lag_seconds: - usage: "GAUGE" - description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it" - - flush_lag_seconds: - usage: "GAUGE" - description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it" - - replay_lag_seconds: - usage: "GAUGE" - description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it" - - pg_settings: - query: | - SELECT name, - CASE setting WHEN 'on' THEN '1' WHEN 'off' THEN '0' ELSE setting END AS setting - FROM pg_catalog.pg_settings - WHERE vartype IN ('integer', 'real', 'bool') - ORDER BY 1 - metrics: - - name: - usage: "LABEL" - description: "Name of the setting" - - setting: - usage: "GAUGE" - description: "Setting value" + description: "Application Name" + - process_idle_seconds: + usage: "HISTOGRAM" + description: "Idle time of server processes"