diff --git a/environments/generic/configuration.yml b/environments/generic/configuration.yml index b420780c9..7d8535811 100644 --- a/environments/generic/configuration.yml +++ b/environments/generic/configuration.yml @@ -7,3 +7,10 @@ dotfiles_repo_version: main dotfiles_repo_local_destination: "~/dotfiles" dotfiles_files: - .tmux.conf + +prometheus_alert_status_filter_rule_by_name: + - PrometheusAlertmanagerE2eDeadManSwitch + - HaproxyHttpSlowingDown + +prometheus_alert_status_filter_rule_by_severity: + - info diff --git a/environments/kolla/configuration.yml b/environments/kolla/configuration.yml index 1dba46e25..075892660 100644 --- a/environments/kolla/configuration.yml +++ b/environments/kolla/configuration.yml @@ -23,15 +23,16 @@ openstack_cacert: /etc/ssl/certs/ca-certificates.crt # openstack # enable services -enable_aodh: "yes" +enable_aodh: "no" enable_barbican: "yes" -enable_ceilometer: "yes" -enable_gnocchi: "yes" +enable_ceilometer: "no" +enable_gnocchi: "no" enable_ironic: "yes" -enable_magnum: "yes" -enable_manila: "yes" -enable_senlin: "yes" -enable_skyline: "yes" +enable_magnum: "no" +enable_manila: "no" +enable_senlin: "no" +enable_skyline: "no" +enable_heat: "no" # generic openstack_service_workers: 2 @@ -97,6 +98,7 @@ ironic_dnsmasq_dhcp_range: "192.168.112.50,192.168.112.60" ironic_dnsmasq_dhcp_ranges: - range: "192.168.112.50,192.168.112.60" ironic_cleaning_network: "public" +enable_ironic_prometheus_exporter: "no" # ceilometer enable_ceilometer_prometheus_pushgateway: "yes" diff --git a/playbooks/deploy.yml b/playbooks/deploy.yml index fcb1fb775..a7e4fe580 100644 --- a/playbooks/deploy.yml +++ b/playbooks/deploy.yml @@ -30,6 +30,7 @@ in_a_nutshell: "{{ nutshell | default(false) | bool }}" run_refstack: "{{ refstack | default(false) | bool }}" run_tempest: "{{ tempest | default(false) | bool }}" + run_prometheus_alert_status: "{{ prometheus_alert_status | default(false) | bool }}" tasks: - name: Set facts (Zuul deployment) @@ -206,3 +207,11 @@ - not manual_deploy | bool - run_tempest | bool changed_when: true + + - name: Check prometheus alert status + ansible.builtin.command: + cmd: "ssh -i {{ terraform_path }}/.id_rsa.{{ cloud_env }} dragon@{{ manager_host }} /opt/configuration/scripts/check/303-prometheus-alert-status.sh" + when: + - not manual_deploy | bool + - run_prometheus_alert_status | bool + changed_when: true diff --git a/scripts/check/303-prometheus-alert-status.sh b/scripts/check/303-prometheus-alert-status.sh new file mode 100755 index 000000000..3f5664920 --- /dev/null +++ b/scripts/check/303-prometheus-alert-status.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -x +set -e + +echo +echo "# Checking for active prometheus alerts" +echo + +osism apply prometheus-alert-status