Skip to content

Commit

Permalink
Disable etcdDatabaseHighFragmentationRatio
Browse files Browse the repository at this point in the history
  • Loading branch information
eg-ayoub committed May 16, 2023
1 parent a2a616f commit 9dd980b
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 65 deletions.
4 changes: 4 additions & 0 deletions charts/drop-prometheus-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ node-exporter:
- NodeRAIDDiskFailure
- NodeTextFileCollectorScrapeError
- NodeFileDescriptorLimit
# workaround: this fires upon install
# revert the entire commit after the fix is merged
etcd:
- etcdDatabaseHighFragmentationRatio
14 changes: 0 additions & 14 deletions salt/metalk8s/addons/prometheus-operator/deployed/chart.sls
Original file line number Diff line number Diff line change
Expand Up @@ -68514,20 +68514,6 @@ spec:
for: 10m
labels:
severity: warning
- alert: etcdDatabaseHighFragmentationRatio
annotations:
description: 'etcd cluster "{{ $labels.job }}": database size in use on instance
{{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual
allocated disk space, please run defragmentation (e.g. etcdctl defrag) to
retrieve the unused fragmented disk space.'
runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
summary: etcd database size in use is less than 50% of the actual allocated
storage.
expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]))
< 0.5
for: 10m
labels:
severity: warning
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
Expand Down
1 change: 0 additions & 1 deletion tools/lib-alert-tree/metalk8s/platform/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
Existing.warning("etcdHighNumberOfFailedProposals"),
Existing.warning("etcdHighNumberOfLeaderChanges"),
Existing.warning("etcdMemberCommunicationSlow"),
Existing.warning("etcdDatabaseHighFragmentationRatio"),
Existing.warning("etcdExcessiveDatabaseGrowth"),
Existing.warning("KubeCPUOvercommit"),
Existing.warning("KubeCPUQuotaOvercommit"),
Expand Down
8 changes: 1 addition & 7 deletions tools/rule_extractor/alerting_rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
{
"message": "The Kubernetes control plane is degraded.",
"name": "KubernetesControlPlaneDegraded",
"query": "sum(ALERTS{alertname=\"KubeAPIErrorBudgetBurn\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeAPITerminatedRequests\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighNumberOfFailedGRPCRequests\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighCommitDurations\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighFsyncDurations\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighNumberOfFailedProposals\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighNumberOfLeaderChanges\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdMemberCommunicationSlow\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdDatabaseHighFragmentationRatio\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdExcessiveDatabaseGrowth\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeCPUOvercommit\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeCPUQuotaOvercommit\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeMemoryOvercommit\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeMemoryQuotaOvercommit\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeClientCertificateExpiration\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeClientErrors\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeVersionMismatch\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentReplicasMismatch\",alertstate=\"firing\",deployment=~\"coredns\",namespace=~\"kube-system\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentGenerationMismatch\",alertstate=\"firing\",deployment=~\"coredns\",namespace=~\"kube-system\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentReplicasMismatch\",alertstate=\"firing\",deployment=~\"prometheus-adapter\",namespace=~\"metalk8s-monitoring\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentGenerationMismatch\",alertstate=\"firing\",deployment=~\"prometheus-adapter\",namespace=~\"metalk8s-monitoring\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentReplicasMismatch\",alertstate=\"firing\",deployment=~\"prometheus-operator-kube-state-metrics\",namespace=~\"metalk8s-monitoring\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentGenerationMismatch\",alertstate=\"firing\",deployment=~\"prometheus-operator-kube-state-metrics\",namespace=~\"metalk8s-monitoring\",severity=\"warning\"}) >= 1",
"query": "sum(ALERTS{alertname=\"KubeAPIErrorBudgetBurn\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeAPITerminatedRequests\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighNumberOfFailedGRPCRequests\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighCommitDurations\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighFsyncDurations\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighNumberOfFailedProposals\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdHighNumberOfLeaderChanges\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdMemberCommunicationSlow\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"etcdExcessiveDatabaseGrowth\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeCPUOvercommit\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeCPUQuotaOvercommit\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeMemoryOvercommit\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeMemoryQuotaOvercommit\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeClientCertificateExpiration\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeClientErrors\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeVersionMismatch\",alertstate=\"firing\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentReplicasMismatch\",alertstate=\"firing\",deployment=~\"coredns\",namespace=~\"kube-system\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentGenerationMismatch\",alertstate=\"firing\",deployment=~\"coredns\",namespace=~\"kube-system\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentReplicasMismatch\",alertstate=\"firing\",deployment=~\"prometheus-adapter\",namespace=~\"metalk8s-monitoring\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentGenerationMismatch\",alertstate=\"firing\",deployment=~\"prometheus-adapter\",namespace=~\"metalk8s-monitoring\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentReplicasMismatch\",alertstate=\"firing\",deployment=~\"prometheus-operator-kube-state-metrics\",namespace=~\"metalk8s-monitoring\",severity=\"warning\"} or ALERTS{alertname=\"KubeDeploymentGenerationMismatch\",alertstate=\"firing\",deployment=~\"prometheus-operator-kube-state-metrics\",namespace=~\"metalk8s-monitoring\",severity=\"warning\"}) >= 1",
"severity": "warning"
},
{
Expand Down Expand Up @@ -215,12 +215,6 @@
"query": "max_over_time(reloader_last_reload_successful{namespace=~\".+\"}[5m]) == 0",
"severity": "warning"
},
{
"message": "etcd database size in use is less than 50% of the actual allocated storage.",
"name": "etcdDatabaseHighFragmentationRatio",
"query": "(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes[5m])) < 0.5",
"severity": "warning"
},
{
"message": "etcd cluster database is running full.",
"name": "etcdDatabaseQuotaLowSpace",
Expand Down
43 changes: 0 additions & 43 deletions tools/rule_extractor/rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -774,49 +774,6 @@
"name": "etcd",
"partialResponseStrategy": "ABORT",
"rules": [
{
"alerts": [
{
"activeAt": "2023-03-08T09:49:35.541577744Z",
"annotations": {
"description": "etcd cluster \"kube-etcd\": database size in use on instance 192.168.1.100:2381 is 34.8% of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.",
"runbook_url": "https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation",
"summary": "etcd database size in use is less than 50% of the actual allocated storage."
},
"labels": {
"alertname": "etcdDatabaseHighFragmentationRatio",
"container": "etcd",
"endpoint": "http-metrics",
"instance": "192.168.1.100:2381",
"job": "kube-etcd",
"namespace": "kube-system",
"pod": "etcd-bootstrap",
"service": "prometheus-operator-kube-etcd",
"severity": "warning"
},
"partialResponseStrategy": "WARN",
"state": "firing",
"value": "3.480380775481774e-01"
}
],
"annotations": {
"description": "etcd cluster \"{{ $labels.job }}\": database size in use on instance {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.",
"runbook_url": "https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation",
"summary": "etcd database size in use is less than 50% of the actual allocated storage."
},
"duration": 600,
"evaluationTime": 0.000454219,
"health": "ok",
"labels": {
"prometheus": "metalk8s-monitoring/prometheus-operator-prometheus",
"severity": "warning"
},
"lastEvaluation": "2023-03-08T13:38:35.560377703Z",
"name": "etcdDatabaseHighFragmentationRatio",
"query": "(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes[5m])) < 0.5",
"state": "firing",
"type": "alerting"
},
{
"alerts": [],
"annotations": {
Expand Down

0 comments on commit 9dd980b

Please sign in to comment.