diff --git a/tools/rule_extractor/alerting_rules.csv b/tools/rule_extractor/alerting_rules.csv index a19ef8fbee..b4f2058dc7 100644 --- a/tools/rule_extractor/alerting_rules.csv +++ b/tools/rule_extractor/alerting_rules.csv @@ -80,12 +80,12 @@ KubeVersionMismatch,warning,Different semantic versions of Kubernetes components KubeClientErrors,warning,Kubernetes API server client is experiencing errors. NodeFilesystemSpaceFillingUp,warning,Filesystem is predicted to run out of space within the next 24 hours. NodeFilesystemSpaceFillingUp,critical,Filesystem is predicted to run out of space within the next 4 hours. -NodeFilesystemAlmostOutOfSpace,warning,Filesystem has less than 5% space left. -NodeFilesystemAlmostOutOfSpace,critical,Filesystem has less than 3% space left. +NodeFilesystemAlmostOutOfSpace,warning,Filesystem has less than 20% space left. +NodeFilesystemAlmostOutOfSpace,critical,Filesystem has less than 12% space left. NodeFilesystemFilesFillingUp,warning,Filesystem is predicted to run out of inodes within the next 24 hours. NodeFilesystemFilesFillingUp,critical,Filesystem is predicted to run out of inodes within the next 4 hours. -NodeFilesystemAlmostOutOfFiles,warning,Filesystem has less than 5% inodes left. -NodeFilesystemAlmostOutOfFiles,critical,Filesystem has less than 3% inodes left. +NodeFilesystemAlmostOutOfFiles,warning,Filesystem has less than 15% inodes left. +NodeFilesystemAlmostOutOfFiles,critical,Filesystem has less than 8% inodes left. NodeNetworkReceiveErrs,warning,Network interface is reporting many receive errors. NodeNetworkTransmitErrs,warning,Network interface is reporting many transmit errors. NodeHighNumberConntrackEntriesUsed,warning,Number of conntrack are getting close to the limit diff --git a/tools/rule_extractor/alerting_rules.json b/tools/rule_extractor/alerting_rules.json index cd0e6661bd..8a02609ec1 100644 --- a/tools/rule_extractor/alerting_rules.json +++ b/tools/rule_extractor/alerting_rules.json @@ -492,15 +492,15 @@ "severity": "critical" }, { - "message": "Filesystem has less than 5% space left.", + "message": "Filesystem has less than 20% space left.", "name": "NodeFilesystemAlmostOutOfSpace", - "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 5 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", + "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 20 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "severity": "warning" }, { - "message": "Filesystem has less than 3% space left.", + "message": "Filesystem has less than 12% space left.", "name": "NodeFilesystemAlmostOutOfSpace", - "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 3 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", + "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 12 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "severity": "critical" }, { @@ -516,15 +516,15 @@ "severity": "critical" }, { - "message": "Filesystem has less than 5% inodes left.", + "message": "Filesystem has less than 15% inodes left.", "name": "NodeFilesystemAlmostOutOfFiles", - "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 5 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", + "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 15 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "severity": "warning" }, { - "message": "Filesystem has less than 3% inodes left.", + "message": "Filesystem has less than 8% inodes left.", "name": "NodeFilesystemAlmostOutOfFiles", - "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 3 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", + "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 8 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "severity": "critical" }, { diff --git a/tools/rule_extractor/rules.json b/tools/rule_extractor/rules.json index 52b51e0d1f..52bc2e3a9d 100644 --- a/tools/rule_extractor/rules.json +++ b/tools/rule_extractor/rules.json @@ -2,10 +2,10 @@ "data": { "groups": [ { - "evaluationTime": 0.002303623, + "evaluationTime": 0.001724433, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-alertmanager.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:00.311326669Z", + "lastEvaluation": "2021-08-05T08:29:30.310652056Z", "name": "alertmanager.rules", "rules": [ { @@ -16,12 +16,12 @@ "summary": "Reloading an Alertmanager configuration has failed." }, "duration": 600, - "evaluationTime": 0.000376087, + "evaluationTime": 0.000325479, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:00.311334126Z", + "lastEvaluation": "2021-08-05T08:29:30.310657753Z", "name": "AlertmanagerFailedReload", "query": "max_over_time(alertmanager_config_last_reload_successful{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m]) == 0", "state": "inactive", @@ -35,12 +35,12 @@ "summary": "A member of an Alertmanager cluster has not found all other cluster members." }, "duration": 600, - "evaluationTime": 0.000328462, + "evaluationTime": 0.000241523, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:00.311711797Z", + "lastEvaluation": "2021-08-05T08:29:30.310984822Z", "name": "AlertmanagerMembersInconsistent", "query": "max_over_time(alertmanager_cluster_members{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m]) < on(namespace, service) group_left() count by(namespace, service) (max_over_time(alertmanager_cluster_members{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m]))", "state": "inactive", @@ -54,12 +54,12 @@ "summary": "An Alertmanager instance failed to send notifications." }, "duration": 300, - "evaluationTime": 0.000370533, + "evaluationTime": 0.000139383, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:00.312041941Z", + "lastEvaluation": "2021-08-05T08:29:30.31122765Z", "name": "AlertmanagerFailedToSendAlerts", "query": "(rate(alertmanager_notifications_failed_total{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m]) / rate(alertmanager_notifications_total{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m])) > 0.01", "state": "inactive", @@ -73,12 +73,12 @@ "summary": "All Alertmanager instances in a cluster failed to send notifications to a critical integration." }, "duration": 300, - "evaluationTime": 0.000358935, + "evaluationTime": 0.000172464, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:00.312413765Z", + "lastEvaluation": "2021-08-05T08:29:30.311367875Z", "name": "AlertmanagerClusterFailedToSendAlerts", "query": "min by(namespace, service, integration) (rate(alertmanager_notifications_failed_total{integration=~\".*\",job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m]) / rate(alertmanager_notifications_total{integration=~\".*\",job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m])) > 0.01", "state": "inactive", @@ -92,12 +92,12 @@ "summary": "All Alertmanager instances in a cluster failed to send notifications to a non-critical integration." }, "duration": 300, - "evaluationTime": 0.000211621, + "evaluationTime": 0.000153029, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:00.312773672Z", + "lastEvaluation": "2021-08-05T08:29:30.311541032Z", "name": "AlertmanagerClusterFailedToSendAlerts", "query": "min by(namespace, service, integration) (rate(alertmanager_notifications_failed_total{integration!~\".*\",job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m]) / rate(alertmanager_notifications_total{integration!~\".*\",job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m])) > 0.01", "state": "inactive", @@ -111,12 +111,12 @@ "summary": "Alertmanager instances within the same cluster have different configurations." }, "duration": 1200, - "evaluationTime": 0.000130729, + "evaluationTime": 9.0759e-05, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:00.312986095Z", + "lastEvaluation": "2021-08-05T08:29:30.311694791Z", "name": "AlertmanagerConfigInconsistent", "query": "count by(namespace, service) (count_values by(namespace, service) (\"config_hash\", alertmanager_config_hash{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"})) != 1", "state": "inactive", @@ -130,12 +130,12 @@ "summary": "Half or more of the Alertmanager instances within the same cluster are down." }, "duration": 300, - "evaluationTime": 0.000297013, + "evaluationTime": 0.000288587, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:00.31311755Z", + "lastEvaluation": "2021-08-05T08:29:30.311786172Z", "name": "AlertmanagerClusterDown", "query": "(count by(namespace, service) (avg_over_time(up{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[5m]) < 0.5) / count by(namespace, service) (up{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"})) >= 0.5", "state": "inactive", @@ -149,12 +149,12 @@ "summary": "Half or more of the Alertmanager instances within the same cluster are crashlooping." }, "duration": 300, - "evaluationTime": 0.000211421, + "evaluationTime": 0.0002969, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:00.313415761Z", + "lastEvaluation": "2021-08-05T08:29:30.312076764Z", "name": "AlertmanagerClusterCrashlooping", "query": "(count by(namespace, service) (changes(process_start_time_seconds{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"}[10m]) > 4) / count by(namespace, service) (up{job=\"prometheus-operator-alertmanager\",namespace=\"metalk8s-monitoring\"})) >= 0.5", "state": "inactive", @@ -163,10 +163,10 @@ ] }, { - "evaluationTime": 0.046230034, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-etcd.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:15.28136954Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcd", "rules": [ { @@ -175,12 +175,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": insufficient members ({{ $value }})." }, "duration": 180, - "evaluationTime": 0.000856117, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:15.281389549Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdInsufficientMembers", "query": "sum by(job) (up{job=~\".*etcd.*\"} == bool 1) < ((count by(job) (up{job=~\".*etcd.*\"}) + 1) / 2)", "state": "inactive", @@ -192,12 +192,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": member {{ $labels.instance }} has no leader." }, "duration": 60, - "evaluationTime": 0.000157214, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:15.282247838Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdNoLeader", "query": "etcd_server_has_leader{job=~\".*etcd.*\"} == 0", "state": "inactive", @@ -209,12 +209,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour." }, "duration": 900, - "evaluationTime": 0.000146644, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:15.282405937Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdHighNumberOfLeaderChanges", "query": "rate(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\"}[15m]) > 3", "state": "inactive", @@ -226,12 +226,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}." }, "duration": 600, - "evaluationTime": 0.024003484, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:15.282553474Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdHighNumberOfFailedGRPCRequests", "query": "100 * sum by(job, instance, grpc_service, grpc_method) (rate(grpc_server_handled_total{grpc_code!=\"OK\",job=~\".*etcd.*\"}[5m])) / sum by(job, instance, grpc_service, grpc_method) (rate(grpc_server_handled_total{job=~\".*etcd.*\"}[5m])) > 1", "state": "inactive", @@ -243,12 +243,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}." }, "duration": 300, - "evaluationTime": 0.019425369, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:15.306560443Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdHighNumberOfFailedGRPCRequests", "query": "100 * sum by(job, instance, grpc_service, grpc_method) (rate(grpc_server_handled_total{grpc_code!=\"OK\",job=~\".*etcd.*\"}[5m])) / sum by(job, instance, grpc_service, grpc_method) (rate(grpc_server_handled_total{job=~\".*etcd.*\"}[5m])) > 5", "state": "inactive", @@ -260,12 +260,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}." }, "duration": 600, - "evaluationTime": 0.000256982, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:15.325987853Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdGRPCRequestsSlow", "query": "histogram_quantile(0.99, sum by(job, instance, grpc_service, grpc_method, le) (rate(grpc_server_handling_seconds_bucket{grpc_type=\"unary\",job=~\".*etcd.*\"}[5m]))) > 0.15", "state": "inactive", @@ -277,12 +277,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}." }, "duration": 600, - "evaluationTime": 0.000141554, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:15.326245849Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdMemberCommunicationSlow", "query": "histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~\".*etcd.*\"}[5m])) > 0.15", "state": "inactive", @@ -294,12 +294,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": {{ $value }} proposal failures within the last hour on etcd instance {{ $labels.instance }}." }, "duration": 900, - "evaluationTime": 0.000140185, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:15.326388213Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdHighNumberOfFailedProposals", "query": "rate(etcd_server_proposals_failed_total{job=~\".*etcd.*\"}[15m]) > 5", "state": "inactive", @@ -311,12 +311,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}." }, "duration": 600, - "evaluationTime": 0.000263882, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:15.326529092Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdHighFsyncDurations", "query": "histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~\".*etcd.*\"}[5m])) > 0.5", "state": "inactive", @@ -328,12 +328,12 @@ "message": "etcd cluster \"{{ $labels.job }}\": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}." }, "duration": 600, - "evaluationTime": 0.00025297, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:15.326793789Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdHighCommitDurations", "query": "histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~\".*etcd.*\"}[5m])) > 0.25", "state": "inactive", @@ -345,12 +345,12 @@ "message": "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}" }, "duration": 600, - "evaluationTime": 0.00024051, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:15.327047473Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdHighNumberOfFailedHTTPRequests", "query": "sum by(method) (rate(etcd_http_failed_total{code!=\"404\",job=~\".*etcd.*\"}[5m])) / sum by(method) (rate(etcd_http_received_total{job=~\".*etcd.*\"}[5m])) > 0.01", "state": "inactive", @@ -362,12 +362,12 @@ "message": "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}." }, "duration": 600, - "evaluationTime": 0.000210575, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:15.327288787Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdHighNumberOfFailedHTTPRequests", "query": "sum by(method) (rate(etcd_http_failed_total{code!=\"404\",job=~\".*etcd.*\"}[5m])) / sum by(method) (rate(etcd_http_received_total{job=~\".*etcd.*\"}[5m])) > 0.05", "state": "inactive", @@ -379,12 +379,12 @@ "message": "etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow." }, "duration": 600, - "evaluationTime": 9.7252e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:15.327500115Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "etcdHTTPRequestsSlow", "query": "histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m])) > 0.15", "state": "inactive", @@ -393,177 +393,144 @@ ] }, { - "evaluationTime": 0.001261143, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-general.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:32:55.522004499Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "general.rules", "rules": [ { - "alerts": [ - { - "activeAt": "2021-06-25T16:32:25.520605063Z", - "annotations": { - "description": "100% of the fluent-bit-headless/fluent-bit-headless targets in metalk8s-logging namespace are down.", - "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-targetdown", - "summary": "One or more targets are unreachable." - }, - "labels": { - "alertname": "TargetDown", - "job": "fluent-bit-headless", - "namespace": "metalk8s-logging", - "service": "fluent-bit-headless", - "severity": "warning" - }, - "state": "pending", - "value": "1e+02" - } - ], + "alerts": [], "annotations": { "description": "{{ printf \"%.4g\" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.", "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-targetdown", "summary": "One or more targets are unreachable." }, "duration": 600, - "evaluationTime": 0.001016897, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:55.522011099Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "TargetDown", "query": "100 * (count by(job, namespace, service) (up == 0) / count by(job, namespace, service) (up)) > 10", - "state": "pending", + "state": "inactive", "type": "alerting" }, { - "alerts": [ - { - "activeAt": "2021-06-25T16:32:25.520605063Z", - "annotations": { - "description": "This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.", - "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-watchdog", - "summary": "An alert that should always be firing to certify that Alertmanager is working properly." - }, - "labels": { - "alertname": "Watchdog", - "severity": "none" - }, - "state": "firing", - "value": "1e+00" - } - ], + "alerts": [], "annotations": { "description": "This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.", "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-watchdog", "summary": "An alert that should always be firing to certify that Alertmanager is working properly." }, "duration": 0, - "evaluationTime": 0.000231836, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "none" }, - "lastEvaluation": "2021-06-25T16:32:55.52303039Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "Watchdog", "query": "vector(1)", - "state": "firing", + "state": "inactive", "type": "alerting" } ] }, { - "evaluationTime": 0.012604872, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-k8s.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:13.316833177Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "k8s.rules", "rules": [ { - "evaluationTime": 0.002263775, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.31685241Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate", "query": "sum by(cluster, namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"}[5m])) * on(cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))", "type": "recording" }, { - "evaluationTime": 0.002066659, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.319120117Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "node_namespace_pod_container:container_memory_working_set_bytes", "query": "container_memory_working_set_bytes{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))", "type": "recording" }, { - "evaluationTime": 0.001920643, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.321189722Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "node_namespace_pod_container:container_memory_rss", "query": "container_memory_rss{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))", "type": "recording" }, { - "evaluationTime": 0.001835615, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.32311317Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "node_namespace_pod_container:container_memory_cache", "query": "container_memory_cache{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))", "type": "recording" }, { - "evaluationTime": 0.002014422, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.324951665Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "node_namespace_pod_container:container_memory_swap", "query": "container_memory_swap{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))", "type": "recording" }, { - "evaluationTime": 0.000717357, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.326969543Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "namespace_memory:kube_pod_container_resource_requests:sum", "query": "sum by(namespace, cluster) (sum by(namespace, pod, cluster) (max by(namespace, pod, container, cluster) (kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"memory\"}) * on(namespace, pod, cluster) group_left() max by(namespace, pod) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))", "type": "recording" }, { - "evaluationTime": 0.000590973, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.327688006Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "namespace_cpu:kube_pod_container_resource_requests:sum", "query": "sum by(namespace, cluster) (sum by(namespace, pod, cluster) (max by(namespace, pod, container, cluster) (kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"cpu\"}) * on(namespace, pod, cluster) group_left() max by(namespace, pod) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))", "type": "recording" }, { - "evaluationTime": 0.000652392, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "workload_type": "deployment" }, - "lastEvaluation": "2021-06-25T16:33:13.328283963Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "namespace_workload_pod:kube_pod_owner:relabel", "query": "max by(cluster, namespace, workload, pod) (label_replace(label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"ReplicaSet\"}, \"replicaset\", \"$1\", \"owner_name\", \"(.*)\") * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (1, max by(replicaset, namespace, owner_name) (kube_replicaset_owner{job=\"kube-state-metrics\"})), \"workload\", \"$1\", \"owner_name\", \"(.*)\"))", "type": "recording" }, { - "evaluationTime": 0.000290128, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "workload_type": "daemonset" }, - "lastEvaluation": "2021-06-25T16:33:13.328938073Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "namespace_workload_pod:kube_pod_owner:relabel", "query": "max by(cluster, namespace, workload, pod) (label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"DaemonSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\"))", "type": "recording" }, { - "evaluationTime": 0.000202837, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "workload_type": "statefulset" }, - "lastEvaluation": "2021-06-25T16:33:13.329229602Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "namespace_workload_pod:kube_pod_owner:relabel", "query": "max by(cluster, namespace, workload, pod) (label_replace(kube_pod_owner{job=\"kube-state-metrics\",owner_kind=\"StatefulSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\"))", "type": "recording" @@ -835,10 +802,10 @@ ] }, { - "evaluationTime": 0.000674826, + "evaluationTime": 0.001224051, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kube-apiserver-slos.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:02.747523509Z", + "lastEvaluation": "2021-08-05T08:29:32.746513551Z", "name": "kube-apiserver-slos", "rules": [ { @@ -849,14 +816,14 @@ "summary": "The API server is burning too much error budget." }, "duration": 120, - "evaluationTime": 0.000365047, + "evaluationTime": 0.000481699, "health": "ok", "labels": { "long": "1h", "severity": "critical", "short": "5m" }, - "lastEvaluation": "2021-06-25T16:33:02.747531837Z", + "lastEvaluation": "2021-08-05T08:29:32.746520738Z", "name": "KubeAPIErrorBudgetBurn", "query": "sum(apiserver_request:burnrate1h) > (14.4 * 0.01) and sum(apiserver_request:burnrate5m) > (14.4 * 0.01)", "state": "inactive", @@ -870,14 +837,14 @@ "summary": "The API server is burning too much error budget." }, "duration": 900, - "evaluationTime": 9.3662e-05, + "evaluationTime": 0.000154742, "health": "ok", "labels": { "long": "6h", "severity": "critical", "short": "30m" }, - "lastEvaluation": "2021-06-25T16:33:02.747898359Z", + "lastEvaluation": "2021-08-05T08:29:32.74700463Z", "name": "KubeAPIErrorBudgetBurn", "query": "sum(apiserver_request:burnrate6h) > (6 * 0.01) and sum(apiserver_request:burnrate30m) > (6 * 0.01)", "state": "inactive", @@ -891,14 +858,14 @@ "summary": "The API server is burning too much error budget." }, "duration": 3600, - "evaluationTime": 0.000104909, + "evaluationTime": 0.000265238, "health": "ok", "labels": { "long": "1d", "severity": "warning", "short": "2h" }, - "lastEvaluation": "2021-06-25T16:33:02.747992685Z", + "lastEvaluation": "2021-08-05T08:29:32.747160445Z", "name": "KubeAPIErrorBudgetBurn", "query": "sum(apiserver_request:burnrate1d) > (3 * 0.01) and sum(apiserver_request:burnrate2h) > (3 * 0.01)", "state": "inactive", @@ -912,14 +879,14 @@ "summary": "The API server is burning too much error budget." }, "duration": 10800, - "evaluationTime": 9.7779e-05, + "evaluationTime": 0.000305324, "health": "ok", "labels": { "long": "3d", "severity": "warning", "short": "6h" }, - "lastEvaluation": "2021-06-25T16:33:02.748098214Z", + "lastEvaluation": "2021-08-05T08:29:32.747427599Z", "name": "KubeAPIErrorBudgetBurn", "query": "sum(apiserver_request:burnrate3d) > (1 * 0.01) and sum(apiserver_request:burnrate6h) > (1 * 0.01)", "state": "inactive", @@ -928,241 +895,241 @@ ] }, { - "evaluationTime": 0.266268361, + "evaluationTime": 0.005920899, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kube-apiserver.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:01.97703202Z", + "lastEvaluation": "2021-08-05T08:29:31.976279011Z", "name": "kube-apiserver.rules", "rules": [ { - "evaluationTime": 0.004386856, + "evaluationTime": 0.001005022, "health": "ok", "labels": { "verb": "read" }, - "lastEvaluation": "2021-06-25T16:33:01.97703968Z", + "lastEvaluation": "2021-08-05T08:29:31.976285869Z", "name": "apiserver_request:burnrate1d", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d])) - ((sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.1\",scope=~\"resource|\",verb=~\"LIST|GET\"}[1d])) or vector(0)) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.5\",scope=\"namespace\",verb=~\"LIST|GET\"}[1d])) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"cluster\",verb=~\"LIST|GET\"}[1d])))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))", "type": "recording" }, { - "evaluationTime": 0.003758991, + "evaluationTime": 0.00040539, "health": "ok", "labels": { "verb": "read" }, - "lastEvaluation": "2021-06-25T16:33:01.981429432Z", + "lastEvaluation": "2021-08-05T08:29:31.977293169Z", "name": "apiserver_request:burnrate1h", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h])) - ((sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.1\",scope=~\"resource|\",verb=~\"LIST|GET\"}[1h])) or vector(0)) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.5\",scope=\"namespace\",verb=~\"LIST|GET\"}[1h])) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"cluster\",verb=~\"LIST|GET\"}[1h])))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))", "type": "recording" }, { - "evaluationTime": 0.003518887, + "evaluationTime": 0.00049458, "health": "ok", "labels": { "verb": "read" }, - "lastEvaluation": "2021-06-25T16:33:01.985190381Z", + "lastEvaluation": "2021-08-05T08:29:31.977699527Z", "name": "apiserver_request:burnrate2h", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h])) - ((sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.1\",scope=~\"resource|\",verb=~\"LIST|GET\"}[2h])) or vector(0)) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.5\",scope=\"namespace\",verb=~\"LIST|GET\"}[2h])) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"cluster\",verb=~\"LIST|GET\"}[2h])))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))", "type": "recording" }, { - "evaluationTime": 0.003261336, + "evaluationTime": 0.000367437, "health": "ok", "labels": { "verb": "read" }, - "lastEvaluation": "2021-06-25T16:33:01.988711009Z", + "lastEvaluation": "2021-08-05T08:29:31.978195499Z", "name": "apiserver_request:burnrate30m", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m])) - ((sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.1\",scope=~\"resource|\",verb=~\"LIST|GET\"}[30m])) or vector(0)) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.5\",scope=\"namespace\",verb=~\"LIST|GET\"}[30m])) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"cluster\",verb=~\"LIST|GET\"}[30m])))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))", "type": "recording" }, { - "evaluationTime": 0.003201499, + "evaluationTime": 0.000358372, "health": "ok", "labels": { "verb": "read" }, - "lastEvaluation": "2021-06-25T16:33:01.991974324Z", + "lastEvaluation": "2021-08-05T08:29:31.978563828Z", "name": "apiserver_request:burnrate3d", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d])) - ((sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.1\",scope=~\"resource|\",verb=~\"LIST|GET\"}[3d])) or vector(0)) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.5\",scope=\"namespace\",verb=~\"LIST|GET\"}[3d])) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"cluster\",verb=~\"LIST|GET\"}[3d])))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))", "type": "recording" }, { - "evaluationTime": 0.003445006, + "evaluationTime": 0.000372175, "health": "ok", "labels": { "verb": "read" }, - "lastEvaluation": "2021-06-25T16:33:01.995177659Z", + "lastEvaluation": "2021-08-05T08:29:31.978923137Z", "name": "apiserver_request:burnrate5m", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m])) - ((sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.1\",scope=~\"resource|\",verb=~\"LIST|GET\"}[5m])) or vector(0)) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.5\",scope=\"namespace\",verb=~\"LIST|GET\"}[5m])) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"cluster\",verb=~\"LIST|GET\"}[5m])))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))", "type": "recording" }, { - "evaluationTime": 0.003002091, + "evaluationTime": 0.00035355, "health": "ok", "labels": { "verb": "read" }, - "lastEvaluation": "2021-06-25T16:33:01.998624471Z", + "lastEvaluation": "2021-08-05T08:29:31.979296207Z", "name": "apiserver_request:burnrate6h", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h])) - ((sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.1\",scope=~\"resource|\",verb=~\"LIST|GET\"}[6h])) or vector(0)) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.5\",scope=\"namespace\",verb=~\"LIST|GET\"}[6h])) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"cluster\",verb=~\"LIST|GET\"}[6h])))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))", "type": "recording" }, { - "evaluationTime": 0.002084685, + "evaluationTime": 0.000269247, "health": "ok", "labels": { "verb": "write" }, - "lastEvaluation": "2021-06-25T16:33:02.00162839Z", + "lastEvaluation": "2021-08-05T08:29:31.979650672Z", "name": "apiserver_request:burnrate1d", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d])) - sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"1\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))", "type": "recording" }, { - "evaluationTime": 0.001892339, + "evaluationTime": 0.000256352, "health": "ok", "labels": { "verb": "write" }, - "lastEvaluation": "2021-06-25T16:33:02.003714771Z", + "lastEvaluation": "2021-08-05T08:29:31.979920632Z", "name": "apiserver_request:burnrate1h", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h])) - sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"1\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))", "type": "recording" }, { - "evaluationTime": 0.001724924, + "evaluationTime": 0.000266638, "health": "ok", "labels": { "verb": "write" }, - "lastEvaluation": "2021-06-25T16:33:02.005608521Z", + "lastEvaluation": "2021-08-05T08:29:31.980177647Z", "name": "apiserver_request:burnrate2h", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h])) - sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"1\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))", "type": "recording" }, { - "evaluationTime": 0.00178116, + "evaluationTime": 0.000322176, "health": "ok", "labels": { "verb": "write" }, - "lastEvaluation": "2021-06-25T16:33:02.007334809Z", + "lastEvaluation": "2021-08-05T08:29:31.980445344Z", "name": "apiserver_request:burnrate30m", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m])) - sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"1\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))", "type": "recording" }, { - "evaluationTime": 0.001720574, + "evaluationTime": 0.0002526, "health": "ok", "labels": { "verb": "write" }, - "lastEvaluation": "2021-06-25T16:33:02.009117609Z", + "lastEvaluation": "2021-08-05T08:29:31.980768522Z", "name": "apiserver_request:burnrate3d", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d])) - sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"1\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))", "type": "recording" }, { - "evaluationTime": 0.001786502, + "evaluationTime": 0.000264974, "health": "ok", "labels": { "verb": "write" }, - "lastEvaluation": "2021-06-25T16:33:02.010839476Z", + "lastEvaluation": "2021-08-05T08:29:31.981021778Z", "name": "apiserver_request:burnrate5m", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m])) - sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"1\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))", "type": "recording" }, { - "evaluationTime": 0.001759515, + "evaluationTime": 0.000234391, "health": "ok", "labels": { "verb": "write" }, - "lastEvaluation": "2021-06-25T16:33:02.012627611Z", + "lastEvaluation": "2021-08-05T08:29:31.981287498Z", "name": "apiserver_request:burnrate6h", "query": "((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h])) - sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"1\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))) + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))) / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))", "type": "recording" }, { - "evaluationTime": 0.001816577, + "evaluationTime": 6.3581e-05, "health": "ok", "labels": { "verb": "read" }, - "lastEvaluation": "2021-06-25T16:33:02.014388594Z", + "lastEvaluation": "2021-08-05T08:29:31.981522665Z", "name": "code_resource:apiserver_request_total:rate5m", "query": "sum by(cluster, code, resource) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))", "type": "recording" }, { - "evaluationTime": 0.001143238, + "evaluationTime": 7.4163e-05, "health": "ok", "labels": { "verb": "write" }, - "lastEvaluation": "2021-06-25T16:33:02.01620878Z", + "lastEvaluation": "2021-08-05T08:29:31.981586778Z", "name": "code_resource:apiserver_request_total:rate5m", "query": "sum by(cluster, code, resource) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))", "type": "recording" }, { - "evaluationTime": 0.03738321, + "evaluationTime": 0.000111271, "health": "ok", "labels": { "quantile": "0.99", "verb": "read" }, - "lastEvaluation": "2021-06-25T16:33:02.017354931Z", + "lastEvaluation": "2021-08-05T08:29:31.981661952Z", "name": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.99, sum by(cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))) > 0", "type": "recording" }, { - "evaluationTime": 0.020730983, + "evaluationTime": 9.6269e-05, "health": "ok", "labels": { "quantile": "0.99", "verb": "write" }, - "lastEvaluation": "2021-06-25T16:33:02.054740839Z", + "lastEvaluation": "2021-08-05T08:29:31.981773897Z", "name": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.99, sum by(cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))) > 0", "type": "recording" }, { - "evaluationTime": 0.042072609, + "evaluationTime": 0.000110487, "health": "ok", "labels": { "quantile": "0.99" }, - "lastEvaluation": "2021-06-25T16:33:02.075474238Z", + "lastEvaluation": "2021-08-05T08:29:31.981870832Z", "name": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.99, sum without(instance, pod) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])))", "type": "recording" }, { - "evaluationTime": 0.043465144, + "evaluationTime": 9.5591e-05, "health": "ok", "labels": { "quantile": "0.9" }, - "lastEvaluation": "2021-06-25T16:33:02.117552737Z", + "lastEvaluation": "2021-08-05T08:29:31.981982001Z", "name": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.9, sum without(instance, pod) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])))", "type": "recording" }, { - "evaluationTime": 0.082270209, + "evaluationTime": 0.00011867, "health": "ok", "labels": { "quantile": "0.5" }, - "lastEvaluation": "2021-06-25T16:33:02.161024125Z", + "lastEvaluation": "2021-08-05T08:29:31.982078273Z", "name": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.5, sum without(instance, pod) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])))", "type": "recording" @@ -1170,24 +1137,24 @@ ] }, { - "evaluationTime": 0.000964664, + "evaluationTime": 0.000347602, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kube-prometheus-general.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:32:57.650992974Z", + "lastEvaluation": "2021-08-05T08:29:27.650571925Z", "name": "kube-prometheus-general.rules", "rules": [ { - "evaluationTime": 0.000698536, + "evaluationTime": 0.00028237, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.650999795Z", + "lastEvaluation": "2021-08-05T08:29:27.650577515Z", "name": "count:up1", "query": "count without(instance, pod, node) (up == 1)", "type": "recording" }, { - "evaluationTime": 0.000254055, + "evaluationTime": 5.5044e-05, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.651700483Z", + "lastEvaluation": "2021-08-05T08:29:27.65086119Z", "name": "count:up0", "query": "count without(instance, pod, node) (up == 0)", "type": "recording" @@ -1195,56 +1162,56 @@ ] }, { - "evaluationTime": 0.001589546, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kube-prometheus-node-recording.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:13.692158023Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "kube-prometheus-node-recording.rules", "rules": [ { - "evaluationTime": 0.000465859, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.692164065Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "instance:node_cpu:rate:sum", "query": "sum by(instance) (rate(node_cpu_seconds_total{mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[3m]))", "type": "recording" }, { - "evaluationTime": 0.000181335, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.692631664Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "instance:node_network_receive_bytes:rate:sum", "query": "sum by(instance) (rate(node_network_receive_bytes_total[3m]))", "type": "recording" }, { - "evaluationTime": 0.000190953, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.692813798Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "instance:node_network_transmit_bytes:rate:sum", "query": "sum by(instance) (rate(node_network_transmit_bytes_total[3m]))", "type": "recording" }, { - "evaluationTime": 0.000377761, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.693005607Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "instance:node_cpu:ratio", "query": "sum without(cpu, mode) (rate(node_cpu_seconds_total{mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[5m])) / on(instance) group_left() count by(instance) (sum by(instance, cpu) (node_cpu_seconds_total))", "type": "recording" }, { - "evaluationTime": 0.000187, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.69338742Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "cluster:node_cpu:sum_rate5m", "query": "sum(rate(node_cpu_seconds_total{mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[5m]))", "type": "recording" }, { - "evaluationTime": 0.000167105, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:13.693575285Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "cluster:node_cpu:ratio", "query": "cluster:node_cpu_seconds_total:rate5m / count(sum by(instance, cpu) (node_cpu_seconds_total))", "type": "recording" @@ -1252,107 +1219,107 @@ ] }, { - "evaluationTime": 0.002637665, + "evaluationTime": 0.000995042, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kube-scheduler.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:00.192087905Z", + "lastEvaluation": "2021-08-05T08:29:30.190309316Z", "name": "kube-scheduler.rules", "rules": [ { - "evaluationTime": 0.000790237, + "evaluationTime": 0.000327417, "health": "ok", "labels": { "quantile": "0.99" }, - "lastEvaluation": "2021-06-25T16:33:00.192096481Z", + "lastEvaluation": "2021-08-05T08:29:30.190315048Z", "name": "cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.99, sum without(instance, pod) (rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"kube-scheduler\"}[5m])))", "type": "recording" }, { - "evaluationTime": 0.000356793, + "evaluationTime": 0.000106285, "health": "ok", "labels": { "quantile": "0.99" }, - "lastEvaluation": "2021-06-25T16:33:00.192889167Z", + "lastEvaluation": "2021-08-05T08:29:30.190643886Z", "name": "cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.99, sum without(instance, pod) (rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"kube-scheduler\"}[5m])))", "type": "recording" }, { - "evaluationTime": 0.000106151, + "evaluationTime": 8.8025e-05, "health": "ok", "labels": { "quantile": "0.99" }, - "lastEvaluation": "2021-06-25T16:33:00.193247364Z", + "lastEvaluation": "2021-08-05T08:29:30.190751079Z", "name": "cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.99, sum without(instance, pod) (rate(scheduler_binding_duration_seconds_bucket{job=\"kube-scheduler\"}[5m])))", "type": "recording" }, { - "evaluationTime": 0.000369821, + "evaluationTime": 7.8888e-05, "health": "ok", "labels": { "quantile": "0.9" }, - "lastEvaluation": "2021-06-25T16:33:00.193354462Z", + "lastEvaluation": "2021-08-05T08:29:30.19083978Z", "name": "cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.9, sum without(instance, pod) (rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"kube-scheduler\"}[5m])))", "type": "recording" }, { - "evaluationTime": 0.000241023, + "evaluationTime": 6.8782e-05, "health": "ok", "labels": { "quantile": "0.9" }, - "lastEvaluation": "2021-06-25T16:33:00.193725512Z", + "lastEvaluation": "2021-08-05T08:29:30.190919696Z", "name": "cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.9, sum without(instance, pod) (rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"kube-scheduler\"}[5m])))", "type": "recording" }, { - "evaluationTime": 7.3799e-05, + "evaluationTime": 7.7415e-05, "health": "ok", "labels": { "quantile": "0.9" }, - "lastEvaluation": "2021-06-25T16:33:00.193967565Z", + "lastEvaluation": "2021-08-05T08:29:30.190989102Z", "name": "cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.9, sum without(instance, pod) (rate(scheduler_binding_duration_seconds_bucket{job=\"kube-scheduler\"}[5m])))", "type": "recording" }, { - "evaluationTime": 0.000366698, + "evaluationTime": 9.8382e-05, "health": "ok", "labels": { "quantile": "0.5" }, - "lastEvaluation": "2021-06-25T16:33:00.194041935Z", + "lastEvaluation": "2021-08-05T08:29:30.191067364Z", "name": "cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.5, sum without(instance, pod) (rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"kube-scheduler\"}[5m])))", "type": "recording" }, { - "evaluationTime": 0.000230152, + "evaluationTime": 6.5162e-05, "health": "ok", "labels": { "quantile": "0.5" }, - "lastEvaluation": "2021-06-25T16:33:00.194409813Z", + "lastEvaluation": "2021-08-05T08:29:30.191166534Z", "name": "cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.5, sum without(instance, pod) (rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"kube-scheduler\"}[5m])))", "type": "recording" }, { - "evaluationTime": 8.1858e-05, + "evaluationTime": 6.6836e-05, "health": "ok", "labels": { "quantile": "0.5" }, - "lastEvaluation": "2021-06-25T16:33:00.194640939Z", + "lastEvaluation": "2021-08-05T08:29:30.191232375Z", "name": "cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.5, sum without(instance, pod) (rate(scheduler_binding_duration_seconds_bucket{job=\"kube-scheduler\"}[5m])))", "type": "recording" @@ -1360,10 +1327,10 @@ ] }, { - "evaluationTime": 0.001221072, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kube-state-metrics.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:16.306021059Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "kube-state-metrics", "rules": [ { @@ -1374,12 +1341,12 @@ "summary": "kube-state-metrics is experiencing errors in list operations." }, "duration": 900, - "evaluationTime": 0.000553306, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:16.306029323Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeStateMetricsListErrors", "query": "(sum(rate(kube_state_metrics_list_total{job=\"kube-state-metrics\",result=\"error\"}[5m])) / sum(rate(kube_state_metrics_list_total{job=\"kube-state-metrics\"}[5m]))) > 0.01", "state": "inactive", @@ -1393,12 +1360,12 @@ "summary": "kube-state-metrics is experiencing errors in watch operations." }, "duration": 900, - "evaluationTime": 0.00030408, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:16.306584576Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeStateMetricsWatchErrors", "query": "(sum(rate(kube_state_metrics_watch_total{job=\"kube-state-metrics\",result=\"error\"}[5m])) / sum(rate(kube_state_metrics_watch_total{job=\"kube-state-metrics\"}[5m]))) > 0.01", "state": "inactive", @@ -1412,12 +1379,12 @@ "summary": "kube-state-metrics sharding is misconfigured." }, "duration": 900, - "evaluationTime": 9.3248e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:16.306890102Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeStateMetricsShardingMismatch", "query": "stdvar(kube_state_metrics_total_shards{job=\"kube-state-metrics\"}) != 0", "state": "inactive", @@ -1431,12 +1398,12 @@ "summary": "kube-state-metrics shards are missing." }, "duration": 900, - "evaluationTime": 0.000253912, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:16.306984042Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeStateMetricsShardsMissing", "query": "2 ^ max(kube_state_metrics_total_shards{job=\"kube-state-metrics\"}) - 1 - sum(2 ^ max by(shard_ordinal) (kube_state_metrics_shard_ordinal{job=\"kube-state-metrics\"})) != 0", "state": "inactive", @@ -1445,41 +1412,41 @@ ] }, { - "evaluationTime": 0.001988233, + "evaluationTime": 0.000828074, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kubelet.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:32:56.101867692Z", + "lastEvaluation": "2021-08-05T08:29:26.101935191Z", "name": "kubelet.rules", "rules": [ { - "evaluationTime": 0.000951014, + "evaluationTime": 0.000598713, "health": "ok", "labels": { "quantile": "0.99" }, - "lastEvaluation": "2021-06-25T16:32:56.101877694Z", + "lastEvaluation": "2021-08-05T08:29:26.101952455Z", "name": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.99, sum by(instance, le) (rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"})", "type": "recording" }, { - "evaluationTime": 0.00057738, + "evaluationTime": 0.000119529, "health": "ok", "labels": { "quantile": "0.9" }, - "lastEvaluation": "2021-06-25T16:32:56.102831366Z", + "lastEvaluation": "2021-08-05T08:29:26.102554756Z", "name": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.9, sum by(instance, le) (rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"})", "type": "recording" }, { - "evaluationTime": 0.00043862, + "evaluationTime": 8.4771e-05, "health": "ok", "labels": { "quantile": "0.5" }, - "lastEvaluation": "2021-06-25T16:32:56.103412354Z", + "lastEvaluation": "2021-08-05T08:29:26.102675214Z", "name": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile", "query": "histogram_quantile(0.5, sum by(instance, le) (rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"})", "type": "recording" @@ -1487,10 +1454,10 @@ ] }, { - "evaluationTime": 0.005372735, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kubernetes-apps.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:10.005195877Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "kubernetes-apps", "rules": [ { @@ -1501,12 +1468,12 @@ "summary": "Pod is crash looping." }, "duration": 900, - "evaluationTime": 0.000613444, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.005202261Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubePodCrashLooping", "query": "increase(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\",namespace=~\".*\"}[10m]) > 0", "state": "inactive", @@ -1520,12 +1487,12 @@ "summary": "Pod has been in a non-ready state for more than 15 minutes." }, "duration": 900, - "evaluationTime": 0.000949154, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.005817241Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubePodNotReady", "query": "sum by(namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job=\"kube-state-metrics\",namespace=~\".*\",phase=~\"Pending|Unknown\"}) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!=\"Job\"}))) > 0", "state": "inactive", @@ -1539,12 +1506,12 @@ "summary": "Deployment generation mismatch due to possible roll-back" }, "duration": 900, - "evaluationTime": 0.000284372, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.006767601Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeDeploymentGenerationMismatch", "query": "kube_deployment_status_observed_generation{job=\"kube-state-metrics\",namespace=~\".*\"} != kube_deployment_metadata_generation{job=\"kube-state-metrics\",namespace=~\".*\"}", "state": "inactive", @@ -1558,57 +1525,34 @@ "summary": "Deployment has not matched the expected number of replicas." }, "duration": 900, - "evaluationTime": 0.000436886, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.007053189Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeDeploymentReplicasMismatch", "query": "(kube_deployment_spec_replicas{job=\"kube-state-metrics\",namespace=~\".*\"} != kube_deployment_status_replicas_available{job=\"kube-state-metrics\",namespace=~\".*\"}) and (changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\".*\"}[10m]) == 0)", "state": "inactive", "type": "alerting" }, { - "alerts": [ - { - "activeAt": "2021-06-25T16:32:40.003560608Z", - "annotations": { - "description": "StatefulSet metalk8s-logging/loki has not matched the expected number of replicas for longer than 15 minutes.", - "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch", - "summary": "Deployment has not matched the expected number of replicas." - }, - "labels": { - "alertname": "KubeStatefulSetReplicasMismatch", - "container": "kube-state-metrics", - "endpoint": "http", - "instance": "10.233.132.80:8080", - "job": "kube-state-metrics", - "namespace": "metalk8s-logging", - "pod": "prometheus-operator-kube-state-metrics-56466d4b9c-ttvv7", - "service": "prometheus-operator-kube-state-metrics", - "severity": "warning", - "statefulset": "loki" - }, - "state": "pending", - "value": "0e+00" - } - ], + "alerts": [], "annotations": { "description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.", "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch", "summary": "Deployment has not matched the expected number of replicas." }, "duration": 900, - "evaluationTime": 0.000542952, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.00749102Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeStatefulSetReplicasMismatch", "query": "(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\",namespace=~\".*\"} != kube_statefulset_status_replicas{job=\"kube-state-metrics\",namespace=~\".*\"}) and (changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\".*\"}[10m]) == 0)", - "state": "pending", + "state": "inactive", "type": "alerting" }, { @@ -1619,12 +1563,12 @@ "summary": "StatefulSet generation mismatch due to possible roll-back" }, "duration": 900, - "evaluationTime": 0.000208543, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.008035731Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeStatefulSetGenerationMismatch", "query": "kube_statefulset_status_observed_generation{job=\"kube-state-metrics\",namespace=~\".*\"} != kube_statefulset_metadata_generation{job=\"kube-state-metrics\",namespace=~\".*\"}", "state": "inactive", @@ -1638,12 +1582,12 @@ "summary": "StatefulSet update has not been rolled out." }, "duration": 900, - "evaluationTime": 0.000374754, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.008245156Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeStatefulSetUpdateNotRolledOut", "query": "(max without(revision) (kube_statefulset_status_current_revision{job=\"kube-state-metrics\",namespace=~\".*\"} unless kube_statefulset_status_update_revision{job=\"kube-state-metrics\",namespace=~\".*\"}) * (kube_statefulset_replicas{job=\"kube-state-metrics\",namespace=~\".*\"} != kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\".*\"})) and (changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\".*\"}[5m]) == 0)", "state": "inactive", @@ -1657,12 +1601,12 @@ "summary": "DaemonSet rollout is stuck." }, "duration": 900, - "evaluationTime": 0.000608068, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.008620817Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeDaemonSetRolloutStuck", "query": "((kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\",namespace=~\".*\"} != kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\",namespace=~\".*\"}) or (kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\",namespace=~\".*\"} != 0) or (kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\",namespace=~\".*\"} != kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\",namespace=~\".*\"}) or (kube_daemonset_status_number_available{job=\"kube-state-metrics\",namespace=~\".*\"} != kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\",namespace=~\".*\"})) and (changes(kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\",namespace=~\".*\"}[5m]) == 0)", "state": "inactive", @@ -1676,12 +1620,12 @@ "summary": "Pod container waiting longer than 1 hour" }, "duration": 3600, - "evaluationTime": 0.000103939, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.009230081Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeContainerWaiting", "query": "sum by(namespace, pod, container) (kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\",namespace=~\".*\"}) > 0", "state": "inactive", @@ -1695,12 +1639,12 @@ "summary": "DaemonSet pods are not scheduled." }, "duration": 600, - "evaluationTime": 0.000288229, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.009334688Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeDaemonSetNotScheduled", "query": "kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\",namespace=~\".*\"} - kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\",namespace=~\".*\"} > 0", "state": "inactive", @@ -1714,12 +1658,12 @@ "summary": "DaemonSet pods are misscheduled." }, "duration": 900, - "evaluationTime": 0.000148473, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.009624263Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeDaemonSetMisScheduled", "query": "kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\",namespace=~\".*\"} > 0", "state": "inactive", @@ -1733,12 +1677,12 @@ "summary": "Job did not complete in time" }, "duration": 43200, - "evaluationTime": 0.000184406, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.009774001Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeJobCompletion", "query": "kube_job_spec_completions{job=\"kube-state-metrics\",namespace=~\".*\"} - kube_job_status_succeeded{job=\"kube-state-metrics\",namespace=~\".*\"} > 0", "state": "inactive", @@ -1752,12 +1696,12 @@ "summary": "Job failed to complete." }, "duration": 900, - "evaluationTime": 8.8449e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.009959739Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeJobFailed", "query": "kube_job_failed{job=\"kube-state-metrics\",namespace=~\".*\"} > 0", "state": "inactive", @@ -1771,12 +1715,12 @@ "summary": "HPA has not matched descired number of replicas." }, "duration": 900, - "evaluationTime": 0.000409564, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.010049293Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeHpaReplicasMismatch", "query": "(kube_hpa_status_desired_replicas{job=\"kube-state-metrics\",namespace=~\".*\"} != kube_hpa_status_current_replicas{job=\"kube-state-metrics\",namespace=~\".*\"}) and (kube_hpa_status_current_replicas{job=\"kube-state-metrics\",namespace=~\".*\"} > kube_hpa_spec_min_replicas{job=\"kube-state-metrics\",namespace=~\".*\"}) and (kube_hpa_status_current_replicas{job=\"kube-state-metrics\",namespace=~\".*\"} < kube_hpa_spec_max_replicas{job=\"kube-state-metrics\",namespace=~\".*\"}) and changes(kube_hpa_status_current_replicas{job=\"kube-state-metrics\",namespace=~\".*\"}[15m]) == 0", "state": "inactive", @@ -1790,12 +1734,12 @@ "summary": "HPA is running at max replicas" }, "duration": 900, - "evaluationTime": 0.000106, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:10.010460105Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeHpaMaxedOut", "query": "kube_hpa_status_current_replicas{job=\"kube-state-metrics\",namespace=~\".*\"} == kube_hpa_spec_max_replicas{job=\"kube-state-metrics\",namespace=~\".*\"}", "state": "inactive", @@ -1804,10 +1748,10 @@ ] }, { - "evaluationTime": 0.001498127, + "evaluationTime": 0.001995499, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kubernetes-resources.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:02.941533235Z", + "lastEvaluation": "2021-08-05T08:29:32.941518579Z", "name": "kubernetes-resources", "rules": [ { @@ -1818,12 +1762,12 @@ "summary": "Cluster has overcommitted CPU resource requests." }, "duration": 300, - "evaluationTime": 0.000550211, + "evaluationTime": 0.000713714, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:02.941542444Z", + "lastEvaluation": "2021-08-05T08:29:32.941525512Z", "name": "KubeCPUOvercommit", "query": "sum(namespace_cpu:kube_pod_container_resource_requests:sum) / sum(kube_node_status_allocatable{resource=\"cpu\"}) > ((count(kube_node_status_allocatable{resource=\"cpu\"}) > 1) - 1) / count(kube_node_status_allocatable{resource=\"cpu\"})", "state": "inactive", @@ -1837,12 +1781,12 @@ "summary": "Cluster has overcommitted memory resource requests." }, "duration": 300, - "evaluationTime": 0.000226165, + "evaluationTime": 0.000239363, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:02.942094383Z", + "lastEvaluation": "2021-08-05T08:29:32.942241329Z", "name": "KubeMemoryOvercommit", "query": "sum(namespace_memory:kube_pod_container_resource_requests:sum) / sum(kube_node_status_allocatable{resource=\"memory\"}) > ((count(kube_node_status_allocatable{resource=\"memory\"}) > 1) - 1) / count(kube_node_status_allocatable{resource=\"memory\"})", "state": "inactive", @@ -1856,12 +1800,12 @@ "summary": "Cluster has overcommitted CPU resource requests." }, "duration": 300, - "evaluationTime": 0.00010338, + "evaluationTime": 0.0001194, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:02.942321366Z", + "lastEvaluation": "2021-08-05T08:29:32.942481682Z", "name": "KubeCPUQuotaOvercommit", "query": "sum(kube_resourcequota{job=\"kube-state-metrics\",resource=\"cpu\",type=\"hard\"}) / sum(kube_node_status_allocatable{resource=\"cpu\"}) > 1.5", "state": "inactive", @@ -1875,12 +1819,12 @@ "summary": "Cluster has overcommitted memory resource requests." }, "duration": 300, - "evaluationTime": 0.000126511, + "evaluationTime": 0.000119575, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:02.942425397Z", + "lastEvaluation": "2021-08-05T08:29:32.942601996Z", "name": "KubeMemoryQuotaOvercommit", "query": "sum(kube_resourcequota{job=\"kube-state-metrics\",resource=\"memory\",type=\"hard\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\"}) > 1.5", "state": "inactive", @@ -1894,12 +1838,12 @@ "summary": "Namespace quota is going to be full." }, "duration": 900, - "evaluationTime": 9.3479e-05, + "evaluationTime": 0.000178422, "health": "ok", "labels": { "severity": "info" }, - "lastEvaluation": "2021-06-25T16:33:02.94255254Z", + "lastEvaluation": "2021-08-05T08:29:32.942722358Z", "name": "KubeQuotaAlmostFull", "query": "kube_resourcequota{job=\"kube-state-metrics\",type=\"used\"} / ignoring(instance, job, type) (kube_resourcequota{job=\"kube-state-metrics\",type=\"hard\"} > 0) > 0.9 < 1", "state": "inactive", @@ -1913,12 +1857,12 @@ "summary": "Namespace quota is fully used." }, "duration": 900, - "evaluationTime": 8.4602e-05, + "evaluationTime": 0.000188183, "health": "ok", "labels": { "severity": "info" }, - "lastEvaluation": "2021-06-25T16:33:02.942646497Z", + "lastEvaluation": "2021-08-05T08:29:32.942901763Z", "name": "KubeQuotaFullyUsed", "query": "kube_resourcequota{job=\"kube-state-metrics\",type=\"used\"} / ignoring(instance, job, type) (kube_resourcequota{job=\"kube-state-metrics\",type=\"hard\"} > 0) == 1", "state": "inactive", @@ -1932,12 +1876,12 @@ "summary": "Namespace quota has exceeded the limits." }, "duration": 900, - "evaluationTime": 7.7728e-05, + "evaluationTime": 0.000230747, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:02.942731688Z", + "lastEvaluation": "2021-08-05T08:29:32.943091768Z", "name": "KubeQuotaExceeded", "query": "kube_resourcequota{job=\"kube-state-metrics\",type=\"used\"} / ignoring(instance, job, type) (kube_resourcequota{job=\"kube-state-metrics\",type=\"hard\"} > 0) > 1", "state": "inactive", @@ -1951,12 +1895,12 @@ "summary": "Processes experience elevated CPU throttling." }, "duration": 900, - "evaluationTime": 0.000210346, + "evaluationTime": 0.000187565, "health": "ok", "labels": { "severity": "info" }, - "lastEvaluation": "2021-06-25T16:33:02.942809945Z", + "lastEvaluation": "2021-08-05T08:29:32.943323354Z", "name": "CPUThrottlingHigh", "query": "sum by(container, pod, namespace) (increase(container_cpu_cfs_throttled_periods_total{container!=\"\"}[5m])) / sum by(container, pod, namespace) (increase(container_cpu_cfs_periods_total[5m])) > (25 / 100)", "state": "inactive", @@ -1965,10 +1909,10 @@ ] }, { - "evaluationTime": 0.001591148, + "evaluationTime": 0.001022916, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kubernetes-storage.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:32:56.330989607Z", + "lastEvaluation": "2021-08-05T08:29:26.331490963Z", "name": "kubernetes-storage", "rules": [ { @@ -1979,12 +1923,12 @@ "summary": "PersistentVolume is filling up." }, "duration": 60, - "evaluationTime": 0.000562076, + "evaluationTime": 0.000490018, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:32:56.330997662Z", + "lastEvaluation": "2021-08-05T08:29:26.331497357Z", "name": "KubePersistentVolumeFillingUp", "query": "kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\".*\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\".*\"} < 0.03", "state": "inactive", @@ -1998,12 +1942,12 @@ "summary": "PersistentVolume is filling up." }, "duration": 3600, - "evaluationTime": 0.000816565, + "evaluationTime": 0.000388512, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:56.331561866Z", + "lastEvaluation": "2021-08-05T08:29:26.331989558Z", "name": "KubePersistentVolumeFillingUp", "query": "(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\".*\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\".*\"}) < 0.15 and predict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\".*\"}[6h], 4 * 24 * 3600) < 0", "state": "inactive", @@ -2017,12 +1961,12 @@ "summary": "PersistentVolume is having issues with provisioning." }, "duration": 300, - "evaluationTime": 0.000196356, + "evaluationTime": 0.000130252, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:32:56.332381124Z", + "lastEvaluation": "2021-08-05T08:29:26.332380443Z", "name": "KubePersistentVolumeErrors", "query": "kube_persistentvolume_status_phase{job=\"kube-state-metrics\",phase=~\"Failed|Pending\"} > 0", "state": "inactive", @@ -2031,10 +1975,10 @@ ] }, { - "evaluationTime": 0.003992803, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kubernetes-system-apiserver.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:32:52.147586728Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "kubernetes-system-apiserver", "rules": [ { @@ -2045,12 +1989,12 @@ "summary": "Client certificate is about to expire." }, "duration": 0, - "evaluationTime": 0.000669807, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:52.147597093Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeClientCertificateExpiration", "query": "apiserver_client_certificate_expiration_seconds_count{job=\"apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by(job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"apiserver\"}[5m]))) < 604800", "state": "inactive", @@ -2064,12 +2008,12 @@ "summary": "Client certificate is about to expire." }, "duration": 0, - "evaluationTime": 0.000348185, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:32:52.148269262Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeClientCertificateExpiration", "query": "apiserver_client_certificate_expiration_seconds_count{job=\"apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by(job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"apiserver\"}[5m]))) < 86400", "state": "inactive", @@ -2083,12 +2027,12 @@ "summary": "An aggregated API has reported errors." }, "duration": 0, - "evaluationTime": 0.000240802, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:52.148618123Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "AggregatedAPIErrors", "query": "sum by(name, namespace) (increase(aggregator_unavailable_apiservice_total[10m])) > 4", "state": "inactive", @@ -2102,12 +2046,12 @@ "summary": "An aggregated API is down." }, "duration": 300, - "evaluationTime": 0.000676188, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:52.148860895Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "AggregatedAPIDown", "query": "(1 - max by(name, namespace) (avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85", "state": "inactive", @@ -2121,12 +2065,12 @@ "summary": "Target disappeared from Prometheus target discovery." }, "duration": 900, - "evaluationTime": 0.000162829, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:32:52.149538706Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeAPIDown", "query": "absent(up{job=\"apiserver\"} == 1)", "state": "inactive", @@ -2140,12 +2084,12 @@ "summary": "The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests." }, "duration": 300, - "evaluationTime": 0.001871616, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:52.149703049Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeAPITerminatedRequests", "query": "sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) / (sum(rate(apiserver_request_total{job=\"apiserver\"}[10m])) + sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m]))) > 0.2", "state": "inactive", @@ -2154,38 +2098,53 @@ ] }, { - "evaluationTime": 0.00029565, + "evaluationTime": 0.000732412, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kubernetes-system-controller-manager.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:05.677095497Z", + "lastEvaluation": "2021-08-05T08:29:35.675742785Z", "name": "kubernetes-system-controller-manager", "rules": [ { - "alerts": [], + "alerts": [ + { + "activeAt": "2021-08-05T08:29:35.675120003Z", + "annotations": { + "description": "KubeControllerManager has disappeared from Prometheus target discovery.", + "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown", + "summary": "Target disappeared from Prometheus target discovery." + }, + "labels": { + "alertname": "KubeControllerManagerDown", + "severity": "critical" + }, + "state": "pending", + "value": "1e+00" + } + ], "annotations": { "description": "KubeControllerManager has disappeared from Prometheus target discovery.", "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown", "summary": "Target disappeared from Prometheus target discovery." }, "duration": 900, - "evaluationTime": 0.000284452, + "evaluationTime": 0.000720188, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:05.67710294Z", + "lastEvaluation": "2021-08-05T08:29:35.675749807Z", "name": "KubeControllerManagerDown", "query": "absent(up{job=\"kube-controller-manager\"} == 1)", - "state": "inactive", + "state": "pending", "type": "alerting" } ] }, { - "evaluationTime": 0.002286035, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kubernetes-system-kubelet.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:08.337988481Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "kubernetes-system-kubelet", "rules": [ { @@ -2196,12 +2155,12 @@ "summary": "Node is not ready." }, "duration": 900, - "evaluationTime": 0.00044019, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.338004078Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeNodeNotReady", "query": "kube_node_status_condition{condition=\"Ready\",job=\"kube-state-metrics\",status=\"true\"} == 0", "state": "inactive", @@ -2215,12 +2174,12 @@ "summary": "Node is unreachable." }, "duration": 900, - "evaluationTime": 0.000199643, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.338446048Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeNodeUnreachable", "query": "(kube_node_spec_taint{effect=\"NoSchedule\",job=\"kube-state-metrics\",key=\"node.kubernetes.io/unreachable\"} unless ignoring(key, value) kube_node_spec_taint{job=\"kube-state-metrics\",key=~\"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn\"}) == 1", "state": "inactive", @@ -2234,12 +2193,12 @@ "summary": "Kubelet is running at capacity." }, "duration": 900, - "evaluationTime": 0.000836037, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.33864656Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletTooManyPods", "query": "count by(node) ((kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance, pod, namespace, cluster) group_left(node) topk by(instance, pod, namespace, cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})) / max by(node) (kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1) > 0.95", "state": "inactive", @@ -2253,12 +2212,12 @@ "summary": "Node readiness status is flapping." }, "duration": 900, - "evaluationTime": 0.000136276, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.339483717Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeNodeReadinessFlapping", "query": "sum by(node) (changes(kube_node_status_condition{condition=\"Ready\",status=\"true\"}[15m])) > 2", "state": "inactive", @@ -2272,12 +2231,12 @@ "summary": "Kubelet Pod Lifecycle Event Generator is taking too long to relist." }, "duration": 300, - "evaluationTime": 5.8296e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.339620825Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletPlegDurationHigh", "query": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile=\"0.99\"} >= 10", "state": "inactive", @@ -2291,12 +2250,12 @@ "summary": "Kubelet Pod startup latency is too high." }, "duration": 900, - "evaluationTime": 0.000260198, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.339679772Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletPodStartUpLatencyHigh", "query": "histogram_quantile(0.99, sum by(instance, le) (rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\",metrics_path=\"/metrics\"}[5m]))) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\",metrics_path=\"/metrics\"} > 60", "state": "inactive", @@ -2310,12 +2269,12 @@ "summary": "Kubelet client certificate is about to expire." }, "duration": 0, - "evaluationTime": 4.3845e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.339940769Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletClientCertificateExpiration", "query": "kubelet_certificate_manager_client_ttl_seconds < 604800", "state": "inactive", @@ -2329,12 +2288,12 @@ "summary": "Kubelet client certificate is about to expire." }, "duration": 0, - "evaluationTime": 3.504e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:08.339985154Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletClientCertificateExpiration", "query": "kubelet_certificate_manager_client_ttl_seconds < 86400", "state": "inactive", @@ -2348,12 +2307,12 @@ "summary": "Kubelet server certificate is about to expire." }, "duration": 0, - "evaluationTime": 4.893e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.340020613Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletServerCertificateExpiration", "query": "kubelet_certificate_manager_server_ttl_seconds < 604800", "state": "inactive", @@ -2367,12 +2326,12 @@ "summary": "Kubelet server certificate is about to expire." }, "duration": 0, - "evaluationTime": 4.2894e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:08.340070154Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletServerCertificateExpiration", "query": "kubelet_certificate_manager_server_ttl_seconds < 86400", "state": "inactive", @@ -2386,12 +2345,12 @@ "summary": "Kubelet has failed to renew its client certificate." }, "duration": 900, - "evaluationTime": 4.9078e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.340113564Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletClientCertificateRenewalErrors", "query": "increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0", "state": "inactive", @@ -2405,12 +2364,12 @@ "summary": "Kubelet has failed to renew its server certificate." }, "duration": 900, - "evaluationTime": 4.7543e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:08.340163168Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletServerCertificateRenewalErrors", "query": "increase(kubelet_server_expiration_renew_errors[5m]) > 0", "state": "inactive", @@ -2424,12 +2383,12 @@ "summary": "Target disappeared from Prometheus target discovery." }, "duration": 900, - "evaluationTime": 6.1022e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:08.340211194Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeletDown", "query": "absent(up{job=\"kubelet\",metrics_path=\"/metrics\"} == 1)", "state": "inactive", @@ -2438,10 +2397,10 @@ ] }, { - "evaluationTime": 0.000352559, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kubernetes-system-scheduler.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:19.589208816Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "kubernetes-system-scheduler", "rules": [ { @@ -2452,12 +2411,12 @@ "summary": "Target disappeared from Prometheus target discovery." }, "duration": 900, - "evaluationTime": 0.000341974, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:19.589215712Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeSchedulerDown", "query": "absent(up{job=\"kube-scheduler\"} == 1)", "state": "inactive", @@ -2466,10 +2425,10 @@ ] }, { - "evaluationTime": 0.001887914, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-kubernetes-system.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:17.925635168Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "kubernetes-system", "rules": [ { @@ -2480,12 +2439,12 @@ "summary": "Different semantic versions of Kubernetes components running." }, "duration": 900, - "evaluationTime": 0.001334861, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:17.925657018Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeVersionMismatch", "query": "count(count by(git_version) (label_replace(kubernetes_build_info{job!~\"kube-dns|coredns\"}, \"git_version\", \"$1\", \"git_version\", \"(v[0-9]*.[0-9]*).*\"))) > 1", "state": "inactive", @@ -2499,12 +2458,12 @@ "summary": "Kubernetes API server client is experiencing errors." }, "duration": 900, - "evaluationTime": 0.000518492, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:17.926997341Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "KubeClientErrors", "query": "(sum by(instance, job) (rate(rest_client_requests_total{code=~\"5..\"}[5m])) / sum by(instance, job) (rate(rest_client_requests_total[5m]))) > 0.01", "state": "inactive", @@ -2513,96 +2472,96 @@ ] }, { - "evaluationTime": 0.002806548, + "evaluationTime": 0.001619265, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-node-exporter.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:32:57.325460671Z", + "lastEvaluation": "2021-08-05T08:29:27.325028126Z", "name": "node-exporter.rules", "rules": [ { - "evaluationTime": 0.000460091, + "evaluationTime": 0.000312794, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.325467892Z", + "lastEvaluation": "2021-08-05T08:29:27.325040219Z", "name": "instance:node_num_cpu:sum", "query": "count without(cpu) (count without(mode) (node_cpu_seconds_total{job=\"node-exporter\"}))", "type": "recording" }, { - "evaluationTime": 0.000224068, + "evaluationTime": 0.00015216, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.32592962Z", + "lastEvaluation": "2021-08-05T08:29:27.325354973Z", "name": "instance:node_cpu_utilisation:rate5m", "query": "1 - avg without(cpu, mode) (rate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m]))", "type": "recording" }, { - "evaluationTime": 0.00024306, + "evaluationTime": 0.000143513, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.326154485Z", + "lastEvaluation": "2021-08-05T08:29:27.325507949Z", "name": "instance:node_load1_per_cpu:ratio", "query": "(node_load1{job=\"node-exporter\"} / instance:node_num_cpu:sum{job=\"node-exporter\"})", "type": "recording" }, { - "evaluationTime": 0.000242273, + "evaluationTime": 0.000147387, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.326399605Z", + "lastEvaluation": "2021-08-05T08:29:27.32565247Z", "name": "instance:node_memory_utilisation:ratio", "query": "1 - (node_memory_MemAvailable_bytes{job=\"node-exporter\"} / node_memory_MemTotal_bytes{job=\"node-exporter\"})", "type": "recording" }, { - "evaluationTime": 0.000141324, + "evaluationTime": 9.6468e-05, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.326643195Z", + "lastEvaluation": "2021-08-05T08:29:27.325801962Z", "name": "instance:node_vmstat_pgmajfault:rate5m", "query": "rate(node_vmstat_pgmajfault{job=\"node-exporter\"}[5m])", "type": "recording" }, { - "evaluationTime": 0.000319107, + "evaluationTime": 0.000128537, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.326785406Z", + "lastEvaluation": "2021-08-05T08:29:27.325899322Z", "name": "instance_device:node_disk_io_time_seconds:rate5m", "query": "rate(node_disk_io_time_seconds_total{device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\",job=\"node-exporter\"}[5m])", "type": "recording" }, { - "evaluationTime": 0.000237099, + "evaluationTime": 0.00016345, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.3271057Z", + "lastEvaluation": "2021-08-05T08:29:27.326028668Z", "name": "instance_device:node_disk_io_time_weighted_seconds:rate5m", "query": "rate(node_disk_io_time_weighted_seconds_total{device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\",job=\"node-exporter\"}[5m])", "type": "recording" }, { - "evaluationTime": 0.000268628, + "evaluationTime": 0.000151948, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.327343806Z", + "lastEvaluation": "2021-08-05T08:29:27.326193112Z", "name": "instance:node_network_receive_bytes_excluding_lo:rate5m", "query": "sum without(device) (rate(node_network_receive_bytes_total{device!=\"lo\",job=\"node-exporter\"}[5m]))", "type": "recording" }, { - "evaluationTime": 0.000211786, + "evaluationTime": 0.000151124, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.327613562Z", + "lastEvaluation": "2021-08-05T08:29:27.326346445Z", "name": "instance:node_network_transmit_bytes_excluding_lo:rate5m", "query": "sum without(device) (rate(node_network_transmit_bytes_total{device!=\"lo\",job=\"node-exporter\"}[5m]))", "type": "recording" }, { - "evaluationTime": 0.000242944, + "evaluationTime": 6.5027e-05, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.327826379Z", + "lastEvaluation": "2021-08-05T08:29:27.32649886Z", "name": "instance:node_network_receive_drop_excluding_lo:rate5m", "query": "sum without(device) (rate(node_network_receive_drop_total{device!=\"lo\",job=\"node-exporter\"}[5m]))", "type": "recording" }, { - "evaluationTime": 0.00019397, + "evaluationTime": 7.9169e-05, "health": "ok", - "lastEvaluation": "2021-06-25T16:32:57.328070476Z", + "lastEvaluation": "2021-08-05T08:29:27.326564489Z", "name": "instance:node_network_transmit_drop_excluding_lo:rate5m", "query": "sum without(device) (rate(node_network_transmit_drop_total{device!=\"lo\",job=\"node-exporter\"}[5m]))", "type": "recording" @@ -2610,10 +2569,10 @@ ] }, { - "evaluationTime": 0.012949631, + "evaluationTime": 0.003346354, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-node-exporter.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:05.359432266Z", + "lastEvaluation": "2021-08-05T08:29:35.358810274Z", "name": "node-exporter", "rules": [ { @@ -2624,12 +2583,12 @@ "summary": "Filesystem is predicted to run out of space within the next 24 hours." }, "duration": 3600, - "evaluationTime": 0.002195899, + "evaluationTime": 0.000881355, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.359440247Z", + "lastEvaluation": "2021-08-05T08:29:35.358817782Z", "name": "NodeFilesystemSpaceFillingUp", "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 40 and predict_linear(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"}[6h], 24 * 60 * 60) < 0 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "state": "inactive", @@ -2643,12 +2602,12 @@ "summary": "Filesystem is predicted to run out of space within the next 4 hours." }, "duration": 3600, - "evaluationTime": 0.001574088, + "evaluationTime": 0.000264111, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:05.361638008Z", + "lastEvaluation": "2021-08-05T08:29:35.359701487Z", "name": "NodeFilesystemSpaceFillingUp", "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 20 and predict_linear(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"}[6h], 4 * 60 * 60) < 0 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "state": "inactive", @@ -2659,17 +2618,17 @@ "annotations": { "description": "Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available space left.", "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace", - "summary": "Filesystem has less than 5% space left." + "summary": "Filesystem has less than 20% space left." }, "duration": 3600, - "evaluationTime": 0.001165681, + "evaluationTime": 0.000141783, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.363213375Z", + "lastEvaluation": "2021-08-05T08:29:35.359966493Z", "name": "NodeFilesystemAlmostOutOfSpace", - "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 5 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", + "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 20 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "state": "inactive", "type": "alerting" }, @@ -2678,17 +2637,17 @@ "annotations": { "description": "Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available space left.", "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace", - "summary": "Filesystem has less than 3% space left." + "summary": "Filesystem has less than 12% space left." }, "duration": 3600, - "evaluationTime": 0.001108558, + "evaluationTime": 0.000164005, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:05.364380214Z", + "lastEvaluation": "2021-08-05T08:29:35.360109123Z", "name": "NodeFilesystemAlmostOutOfSpace", - "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 3 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", + "query": "(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"} * 100 < 12 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "state": "inactive", "type": "alerting" }, @@ -2700,12 +2659,12 @@ "summary": "Filesystem is predicted to run out of inodes within the next 24 hours." }, "duration": 3600, - "evaluationTime": 0.001412834, + "evaluationTime": 0.000296165, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.365489927Z", + "lastEvaluation": "2021-08-05T08:29:35.360274144Z", "name": "NodeFilesystemFilesFillingUp", "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 40 and predict_linear(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"}[6h], 24 * 60 * 60) < 0 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "state": "inactive", @@ -2719,12 +2678,12 @@ "summary": "Filesystem is predicted to run out of inodes within the next 4 hours." }, "duration": 3600, - "evaluationTime": 0.00146909, + "evaluationTime": 0.000384795, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:05.366903868Z", + "lastEvaluation": "2021-08-05T08:29:35.360571987Z", "name": "NodeFilesystemFilesFillingUp", "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 20 and predict_linear(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"}[6h], 4 * 60 * 60) < 0 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "state": "inactive", @@ -2735,17 +2694,17 @@ "annotations": { "description": "Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available inodes left.", "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles", - "summary": "Filesystem has less than 5% inodes left." + "summary": "Filesystem has less than 15% inodes left." }, "duration": 3600, - "evaluationTime": 0.001074096, + "evaluationTime": 0.000189107, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.368374251Z", + "lastEvaluation": "2021-08-05T08:29:35.360958094Z", "name": "NodeFilesystemAlmostOutOfFiles", - "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 5 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", + "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 15 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "state": "inactive", "type": "alerting" }, @@ -2754,17 +2713,17 @@ "annotations": { "description": "Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available inodes left.", "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles", - "summary": "Filesystem has less than 3% inodes left." + "summary": "Filesystem has less than 8% inodes left." }, "duration": 3600, - "evaluationTime": 0.001152169, + "evaluationTime": 0.000162797, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:05.369449448Z", + "lastEvaluation": "2021-08-05T08:29:35.361148216Z", "name": "NodeFilesystemAlmostOutOfFiles", - "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 3 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", + "query": "(node_filesystem_files_free{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_files{fstype!=\"\",job=\"node-exporter\"} * 100 < 8 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} == 0)", "state": "inactive", "type": "alerting" }, @@ -2776,12 +2735,12 @@ "summary": "Network interface is reporting many receive errors." }, "duration": 3600, - "evaluationTime": 0.000444152, + "evaluationTime": 0.000111343, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.370602929Z", + "lastEvaluation": "2021-08-05T08:29:35.361311978Z", "name": "NodeNetworkReceiveErrs", "query": "increase(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01", "state": "inactive", @@ -2795,12 +2754,12 @@ "summary": "Network interface is reporting many transmit errors." }, "duration": 3600, - "evaluationTime": 0.000459121, + "evaluationTime": 0.000104584, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.37104807Z", + "lastEvaluation": "2021-08-05T08:29:35.36142417Z", "name": "NodeNetworkTransmitErrs", "query": "increase(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01", "state": "inactive", @@ -2814,12 +2773,12 @@ "summary": "Number of conntrack are getting close to the limit" }, "duration": 0, - "evaluationTime": 0.000166636, + "evaluationTime": 0.000108123, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.371508103Z", + "lastEvaluation": "2021-08-05T08:29:35.361529541Z", "name": "NodeHighNumberConntrackEntriesUsed", "query": "(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75", "state": "inactive", @@ -2833,12 +2792,12 @@ "summary": "Clock skew detected." }, "duration": 600, - "evaluationTime": 0.000267282, + "evaluationTime": 0.00019926, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.371676103Z", + "lastEvaluation": "2021-08-05T08:29:35.361638553Z", "name": "NodeClockSkewDetected", "query": "(node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0)", "state": "inactive", @@ -2852,12 +2811,12 @@ "summary": "Clock not synchronising." }, "duration": 600, - "evaluationTime": 0.000159517, + "evaluationTime": 0.000105942, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.371944605Z", + "lastEvaluation": "2021-08-05T08:29:35.361839124Z", "name": "NodeClockNotSynchronising", "query": "min_over_time(node_timex_sync_status[5m]) == 0 and node_timex_maxerror_seconds >= 16", "state": "inactive", @@ -2871,12 +2830,12 @@ "summary": "Node Exporter text file collector failed to scrape." }, "duration": 0, - "evaluationTime": 7.6125e-05, + "evaluationTime": 5.6446e-05, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.372105167Z", + "lastEvaluation": "2021-08-05T08:29:35.361945817Z", "name": "NodeTextFileCollectorScrapeError", "query": "node_textfile_scrape_error{job=\"node-exporter\"} == 1", "state": "inactive", @@ -2890,12 +2849,12 @@ "summary": "RAID Array is degraded" }, "duration": 900, - "evaluationTime": 0.000116504, + "evaluationTime": 7.7363e-05, "health": "ok", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:33:05.372182207Z", + "lastEvaluation": "2021-08-05T08:29:35.362002951Z", "name": "NodeRAIDDegraded", "query": "node_md_disks_required - ignoring(state) (node_md_disks{state=\"active\"}) >= 1", "state": "inactive", @@ -2909,12 +2868,12 @@ "summary": "Failed device in RAID array" }, "duration": 0, - "evaluationTime": 7.8971e-05, + "evaluationTime": 7.3091e-05, "health": "ok", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:05.37229987Z", + "lastEvaluation": "2021-08-05T08:29:35.362081016Z", "name": "NodeRAIDDiskFailure", "query": "node_md_disks{state=\"failed\"} >= 1", "state": "inactive", @@ -2923,10 +2882,10 @@ ] }, { - "evaluationTime": 0.000604503, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-node-network.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:16.987520111Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "node-network", "rules": [ { @@ -2936,12 +2895,12 @@ "runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkinterfaceflapping" }, "duration": 120, - "evaluationTime": 0.000591092, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:16.987529231Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "NodeNetworkInterfaceFlapping", "query": "changes(node_network_up{device!~\"veth.+\",job=\"node-exporter\"}[2m]) > 2", "state": "inactive", @@ -2950,32 +2909,32 @@ ] }, { - "evaluationTime": 0.001877002, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-node.rules.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:18.113123223Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "node.rules", "rules": [ { - "evaluationTime": 0.000989836, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:18.113131881Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "node_namespace_pod:kube_pod_info:", "query": "topk by(namespace, pod) (1, max by(node, namespace, pod) (label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")))", "type": "recording" }, { - "evaluationTime": 0.000608485, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:18.114125187Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "node:node_num_cpu:sum", "query": "count by(cluster, node) (sum by(node, cpu) (node_cpu_seconds_total{job=\"node-exporter\"} * on(namespace, pod) group_left(node) topk by(namespace, pod) (1, node_namespace_pod:kube_pod_info:)))", "type": "recording" }, { - "evaluationTime": 0.000261028, - "health": "ok", - "lastEvaluation": "2021-06-25T16:33:18.114735396Z", + "evaluationTime": 0, + "health": "unknown", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": ":node_memory_MemAvailable_bytes:sum", "query": "sum by(cluster) (node_memory_MemAvailable_bytes{job=\"node-exporter\"} or (node_memory_Buffers_bytes{job=\"node-exporter\"} + node_memory_Cached_bytes{job=\"node-exporter\"} + node_memory_MemFree_bytes{job=\"node-exporter\"} + node_memory_Slab_bytes{job=\"node-exporter\"}))", "type": "recording" @@ -2983,10 +2942,10 @@ ] }, { - "evaluationTime": 0.001409511, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-prometheus-operator.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:33:16.674146348Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "prometheus-operator", "rules": [ { @@ -2997,12 +2956,12 @@ "summary": "Errors while performing list operations in controller." }, "duration": 900, - "evaluationTime": 0.000546981, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:16.67415385Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusOperatorListErrors", "query": "(sum by(controller, namespace) (rate(prometheus_operator_list_operations_failed_total{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\"}[10m])) / sum by(controller, namespace) (rate(prometheus_operator_list_operations_total{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\"}[10m]))) > 0.4", "state": "inactive", @@ -3016,12 +2975,12 @@ "summary": "Errors while performing watch operations in controller." }, "duration": 900, - "evaluationTime": 0.000267678, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:16.674702623Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusOperatorWatchErrors", "query": "(sum by(controller, namespace) (rate(prometheus_operator_watch_operations_failed_total{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\"}[10m])) / sum by(controller, namespace) (rate(prometheus_operator_watch_operations_total{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\"}[10m]))) > 0.4", "state": "inactive", @@ -3035,12 +2994,12 @@ "summary": "Last controller reconciliation failed" }, "duration": 600, - "evaluationTime": 0.00010106, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:16.674971166Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusOperatorSyncFailed", "query": "min_over_time(prometheus_operator_syncs{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\",status=\"failed\"}[5m]) > 0", "state": "inactive", @@ -3054,12 +3013,12 @@ "summary": "Errors while reconciling controller." }, "duration": 600, - "evaluationTime": 0.000208116, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:16.675072897Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusOperatorReconcileErrors", "query": "(sum by(controller, namespace) (rate(prometheus_operator_reconcile_errors_total{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\"}[5m]))) / (sum by(controller, namespace) (rate(prometheus_operator_reconcile_operations_total{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\"}[5m]))) > 0.1", "state": "inactive", @@ -3073,12 +3032,12 @@ "summary": "Errors while reconciling Prometheus." }, "duration": 600, - "evaluationTime": 7.229e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:16.675281856Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusOperatorNodeLookupErrors", "query": "rate(prometheus_operator_node_address_lookup_errors_total{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\"}[5m]) > 0.1", "state": "inactive", @@ -3092,12 +3051,12 @@ "summary": "Prometheus operator not ready" }, "duration": 300, - "evaluationTime": 0.000102631, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:16.67535468Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusOperatorNotReady", "query": "min by(namespace, controller) (max_over_time(prometheus_operator_ready{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\"}[5m]) == 0)", "state": "inactive", @@ -3111,12 +3070,12 @@ "summary": "Resources rejected by Prometheus operator" }, "duration": 300, - "evaluationTime": 9.5807e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:33:16.675457899Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusOperatorRejectedResources", "query": "min_over_time(prometheus_operator_managed_resources{job=\"prometheus-operator-operator\",namespace=\"metalk8s-monitoring\",state=\"rejected\"}[5m]) > 0", "state": "inactive", @@ -3125,10 +3084,10 @@ ] }, { - "evaluationTime": 0.003384193, + "evaluationTime": 0, "file": "/etc/prometheus/rules/prometheus-prometheus-operator-prometheus-rulefiles-0/metalk8s-monitoring-prometheus-operator-prometheus.yaml", "interval": 30, - "lastEvaluation": "2021-06-25T16:32:54.861063973Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "prometheus", "rules": [ { @@ -3139,12 +3098,12 @@ "summary": "Failed Prometheus configuration reload." }, "duration": 600, - "evaluationTime": 0.000364017, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:32:54.861075044Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusBadConfig", "query": "max_over_time(prometheus_config_last_reload_successful{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) == 0", "state": "inactive", @@ -3158,12 +3117,12 @@ "summary": "Prometheus alert notification queue predicted to run full in less than 30m." }, "duration": 900, - "evaluationTime": 0.000343364, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.861440641Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusNotificationQueueRunningFull", "query": "(predict_linear(prometheus_notifications_queue_length{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m], 60 * 30) > min_over_time(prometheus_notifications_queue_capacity{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]))", "state": "inactive", @@ -3177,12 +3136,12 @@ "summary": "Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager." }, "duration": 900, - "evaluationTime": 0.000208526, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.861785718Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusErrorSendingAlertsToSomeAlertmanagers", "query": "(rate(prometheus_notifications_errors_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) / rate(prometheus_notifications_sent_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m])) * 100 > 1", "state": "inactive", @@ -3196,12 +3155,12 @@ "summary": "Prometheus is not connected to any Alertmanagers." }, "duration": 600, - "evaluationTime": 9.7234e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.861995224Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusNotConnectedToAlertmanagers", "query": "max_over_time(prometheus_notifications_alertmanagers_discovered{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) < 1", "state": "inactive", @@ -3215,12 +3174,12 @@ "summary": "Prometheus has issues reloading blocks from disk." }, "duration": 14400, - "evaluationTime": 7.8029e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.862093265Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusTSDBReloadsFailing", "query": "increase(prometheus_tsdb_reloads_failures_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[3h]) > 0", "state": "inactive", @@ -3234,12 +3193,12 @@ "summary": "Prometheus has issues compacting blocks." }, "duration": 14400, - "evaluationTime": 6.939e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.862171896Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusTSDBCompactionsFailing", "query": "increase(prometheus_tsdb_compactions_failed_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[3h]) > 0", "state": "inactive", @@ -3253,12 +3212,12 @@ "summary": "Prometheus is not ingesting samples." }, "duration": 600, - "evaluationTime": 0.000446883, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.86224187Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusNotIngestingSamples", "query": "(rate(prometheus_tsdb_head_samples_appended_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) <= 0 and (sum without(scrape_job) (prometheus_target_metadata_cache_entries{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}) > 0 or sum without(rule_group) (prometheus_rule_group_rules{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}) > 0))", "state": "inactive", @@ -3272,12 +3231,12 @@ "summary": "Prometheus is dropping samples with duplicate timestamps." }, "duration": 600, - "evaluationTime": 0.000107841, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.862689624Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusDuplicateTimestamps", "query": "rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) > 0", "state": "inactive", @@ -3291,12 +3250,12 @@ "summary": "Prometheus drops samples with out-of-order timestamps." }, "duration": 600, - "evaluationTime": 6.8805e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.862798147Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusOutOfOrderTimestamps", "query": "rate(prometheus_target_scrapes_sample_out_of_order_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) > 0", "state": "inactive", @@ -3310,12 +3269,12 @@ "summary": "Prometheus fails to send samples to remote storage." }, "duration": 900, - "evaluationTime": 0.000285968, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:32:54.862867425Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusRemoteStorageFailures", "query": "((rate(prometheus_remote_storage_failed_samples_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m])) / ((rate(prometheus_remote_storage_failed_samples_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m])) + (rate(prometheus_remote_storage_succeeded_samples_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) or rate(prometheus_remote_storage_samples_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m])))) * 100 > 1", "state": "inactive", @@ -3329,12 +3288,12 @@ "summary": "Prometheus remote write is behind." }, "duration": 900, - "evaluationTime": 0.000124324, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:32:54.863154106Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusRemoteWriteBehind", "query": "(max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) - ignoring(remote_name, url) group_right() max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m])) > 120", "state": "inactive", @@ -3348,12 +3307,12 @@ "summary": "Prometheus remote write desired shards calculation wants to run more than configured max shards." }, "duration": 900, - "evaluationTime": 0.000106282, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.863278937Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusRemoteWriteDesiredShards", "query": "(max_over_time(prometheus_remote_storage_shards_desired{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) > max_over_time(prometheus_remote_storage_shards_max{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]))", "state": "inactive", @@ -3367,12 +3326,12 @@ "summary": "Prometheus is failing rule evaluations." }, "duration": 900, - "evaluationTime": 0.000177789, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:32:54.863385773Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusRuleFailures", "query": "increase(prometheus_rule_evaluation_failures_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) > 0", "state": "inactive", @@ -3386,12 +3345,12 @@ "summary": "Prometheus is missing rule evaluations due to slow rule group evaluation." }, "duration": 900, - "evaluationTime": 0.000179225, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.863564064Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusMissingRuleEvaluations", "query": "increase(prometheus_rule_group_iterations_missed_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) > 0", "state": "inactive", @@ -3405,12 +3364,12 @@ "summary": "Prometheus has dropped targets because some scrape configs have exceeded the targets limit." }, "duration": 900, - "evaluationTime": 7.8072e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.863743893Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusTargetLimitHit", "query": "increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) > 0", "state": "inactive", @@ -3424,12 +3383,12 @@ "summary": "Prometheus has dropped targets because some scrape configs have exceeded the labels limit." }, "duration": 900, - "evaluationTime": 7.7658e-05, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "warning" }, - "lastEvaluation": "2021-06-25T16:32:54.863822538Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusLabelLimitHit", "query": "increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) > 0", "state": "inactive", @@ -3443,12 +3402,12 @@ "summary": "Prometheus encounters more than 3% errors sending alerts to any Alertmanager." }, "duration": 900, - "evaluationTime": 0.000540286, - "health": "ok", + "evaluationTime": 0, + "health": "unknown", "labels": { "severity": "critical" }, - "lastEvaluation": "2021-06-25T16:32:54.863900829Z", + "lastEvaluation": "0001-01-01T00:00:00Z", "name": "PrometheusErrorSendingAlertsToAnyAlertmanager", "query": "min without(alertmanager) (rate(prometheus_notifications_errors_total{alertmanager!~\"\",job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m]) / rate(prometheus_notifications_sent_total{alertmanager!~\"\",job=\"prometheus-operator-prometheus\",namespace=\"metalk8s-monitoring\"}[5m])) * 100 > 3", "state": "inactive",