diff --git a/runner/ansible/roles/catalog/1.1.1-156F64.yaml b/runner/ansible/roles/catalog/1.1.1-156F64.yaml new file mode 100644 index 0000000..e1fde1c --- /dev/null +++ b/runner/ansible/roles/catalog/1.1.1-156F64.yaml @@ -0,0 +1,64 @@ +id: 156F64 +name: orosync `token` timeout +# name: 1.1.1 +group: Corosync +description: | + Corosync `token` timeout is set to `{{ values.expected_token_timeout }}` +# As a user I want the dsl to support interpolation of values/env in the description section +# As a user I want the dsl to support interpolation of values/env in the remediation section +remediation: | + ## Abstract + The value of the Corosync `token` timeout is not set as recommended. + + ## Remediation + + Adjust the corosync `token` timeout as recommended on the best practices, and reload the corosync configuration + + 1. Set the correct `token` timeout in the totem session in the corosync config file `/etc/corosync/corosync.conf`. This action must be repeated in all nodes of the cluster. + ``` + [...] + totem { + token: + } + [...] + ``` + 2. Reload the corosync configuration: + `crm corosync reload` + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles#create_the_corosync_configuration_files + + AWS: + + - https://docs.aws.amazon.com/sap/latest/sap-hana/sap-hana-on-aws-cluster-configuration.html + +facts: + - name: corosync_token_timeout + gatherer: corosync.conf + argument: totem.token + +values: + - name: expected_token_timeout + default: 5000 + conditions: + - value: 30000 + # when: env.provider == "azure" || env.provider == "aws" + when: return env.provider == "azure" or env.provider == "aws" + - value: 20000 + # when: env.provider == "gcp" + when: return env.provider == "gcp" + +expectations: + - name: token_timeout + # expect: facts.corosync_token_timeout == values.expected_token_timeout + expect: return facts.corosync_token_timeout == values.expected_token_timeout diff --git a/runner/ansible/roles/catalog/1.1.2-A1244C.yaml b/runner/ansible/roles/catalog/1.1.2-A1244C.yaml new file mode 100644 index 0000000..337a4de --- /dev/null +++ b/runner/ansible/roles/catalog/1.1.2-A1244C.yaml @@ -0,0 +1,47 @@ +id: A1244C +name: Corosync `consensus` timeout +# name: 1.1.2 +group: Corosync +description: | + Corosync `consensus` timeout is set to `{{ values.expected_consensus_timeout }}` +remediation: | + ## Remediation + Adjust the Corosync `consensus` timeout as recommended by the Azure best practices. + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker + + AWS: + + - https://docs.aws.amazon.com/sap/latest/sap-hana/sap-hana-on-aws-cluster-configuration.html + + GCP: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + +facts: + - name: corosync_consensus_timeout + gatherer: corosync.conf + argument: totem.consensus + +values: + - name: expected_consensus_timeout + default: 6000 + conditions: + - value: 36000 + # when: env.provider == "azure" || env.provider == "aws" + when: return env.provider == "azure" or env.provider == "aws" + - value: 24000 + # when: env.provider == "gcp" + when: return env.provider == "gcp" + +expectations: + - name: consensus_timeout + # expect: facts.corosync_consensus_timeout == values.expected_consensus_timeout + expect: return facts.corosync_consensus_timeout == values.expected_consensus_timeout diff --git a/runner/ansible/roles/catalog/1.1.3-845CC9.yaml b/runner/ansible/roles/catalog/1.1.3-845CC9.yaml new file mode 100644 index 0000000..a9b93a1 --- /dev/null +++ b/runner/ansible/roles/catalog/1.1.3-845CC9.yaml @@ -0,0 +1,40 @@ +id: 845CC9 +name: Corosync `max_messages` +# name: 1.1.3 +group: Corosync +description: | + Corosync `max_messages` is set to `{{ values.expected_max_messages }}` +remediation: | + ## Remediation + Adjust the Corosync `max_messages` parameter as recommended by the Azure best practices. + + ## References + AZURE: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#install-the-cluster + + AWS: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + +facts: + - name: corosync_max_messages + gatherer: corosync.conf + argument: totem.max_messages + +# As a user I want the DSL to support checks without values section +# so that I can reduce the boiler code in the DSL itself +# and simply use a hardcoded value that is environment independent + +expectations: + - name: max_messages + # expect: facts.corosync_max_messages == 20 + expect: return facts.corosync_max_messages == 20 diff --git a/runner/ansible/roles/catalog/1.1.4-24ABCB.yaml b/runner/ansible/roles/catalog/1.1.4-24ABCB.yaml new file mode 100644 index 0000000..030e78d --- /dev/null +++ b/runner/ansible/roles/catalog/1.1.4-24ABCB.yaml @@ -0,0 +1,36 @@ +id: 24ABCB +name: Corosync `join` +# name: 1.1.4 +group: Corosync +description: | + Corosync `join` is set to `{{ values.expected_join }}` +remediation: | + ## Remediation + Adjust the Corosync `join` parameter as recommended by the Azure best practices. + + ## References + AZURE: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#install-the-cluster + + AWS: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + +facts: + - name: corosync_join + gatherer: corosync.conf + argument: totem.join + +expectations: + - name: join + # expect: facts.corosync_join == 60 + expect: return facts.corosync_join == 60 diff --git a/runner/ansible/roles/catalog/1.1.5-21FCA6.yaml b/runner/ansible/roles/catalog/1.1.5-21FCA6.yaml new file mode 100644 index 0000000..eca96de --- /dev/null +++ b/runner/ansible/roles/catalog/1.1.5-21FCA6.yaml @@ -0,0 +1,44 @@ +id: 21FCA6 +name: Corosync `token_retransmits_before_loss_const` +# name: 1.1.5 +group: Corosync +description: | + Corosync `token_retransmits_before_loss_const` is set to: `{{ values.expected_token_retransmits_before_loss_const }}` +remediation: | + ## Remediation + Adjust the corosync `token_retransmits_before_loss_const` parameter to `{{ values.expected_token_retransmits_before_loss_const }}` as recommended by the Azure best practices. + + ## References + AZURE: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#install-the-cluster + + AWS: + + - https://docs.aws.amazon.com/sap/latest/sap-hana/sap-hana-on-aws-cluster-configuration.html + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + +facts: + - name: corosync_token_retransmits_before_loss_const + gatherer: corosync.conf + argument: totem.token_retransmits_before_loss_const + +values: + - name: expected_token_retransmits_before_loss_const + default: 10 + conditions: + - value: 6 + # when: env.provider == "aws" + when: return env.provider == "aws" + +expectations: + - name: token_retransmits_before_loss_const + # expect: facts.corosync_token_retransmits_before_loss_const == values.expected_token_retransmits_before_loss_const + expect: return facts.corosync_token_retransmits_before_loss_const == values.expected_token_retransmits_before_loss_const diff --git a/runner/ansible/roles/catalog/1.1.6-33403D.yaml b/runner/ansible/roles/catalog/1.1.6-33403D.yaml new file mode 100644 index 0000000..70b0514 --- /dev/null +++ b/runner/ansible/roles/catalog/1.1.6-33403D.yaml @@ -0,0 +1,68 @@ +id: 33403D +name: Corosync `transport` +# name: 1.1.6 +group: Corosync +description: | + Corosync `transport` is set to `{{ values.expected_transport }}` +remediation: | + ## Remediation + To change the corosync MCAST transport to UCAST edit the /etc/corosync/corosync.conf + as in the example + ``` + max_messages: 20 + interface { + ringnumber: 0 + - bindnetaddr: 10.162.32.167 + - mcastaddr: 239.11.100.41 + mcastport: 5405 + ttl: 1 + } + + transport: udpu + ... + +nodelist { + + node { + + ring0_addr: 10.162.32.167 + + nodeid: 1 + + } + + + + node { + + ring0_addr: 10.162.32.89 + + nodeid: 2 + + } + + + +} + ``` + 1. stop the already running cluster by using **systemctl stop pacemaker** + 2. In the totem section, in the interface subsection remove the + keys-value pairs **bindnetaddr** and **mcastaddr** + 3. In the totem section add key-value pair **transport: udpu** + 4. Add section nodelist and subsections node for each nodes of the + cluster, where the **ring0_addr** is the IP address of the node + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker + + AWS: + + - https://docs.aws.amazon.com/sap/latest/sap-hana/sap-hana-on-aws-cluster-configuration.html + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles#create_the_corosync_configuration_files + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-adapting-the-corosync-and-sbd-configuration + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + +facts: + - name: corosync_transport + gatherer: corosync.conf + argument: totem.transport + +expectations: + - name: transport + # expect: facts.corosync_transport == "udpu" + expect: return facts.corosync_transport == "udpu" diff --git a/runner/ansible/roles/catalog/1.1.7-C620DC.yaml b/runner/ansible/roles/catalog/1.1.7-C620DC.yaml new file mode 100644 index 0000000..9c559db --- /dev/null +++ b/runner/ansible/roles/catalog/1.1.7-C620DC.yaml @@ -0,0 +1,36 @@ +id: C620DC +name: Corosync `expected_votes` +# name: 1.1.7 +group: Corosync +description: | + Corosync `expected_votes` is set to `{{ values.expected_votes }}` +remediation: | + ## Remediation + Adjust the corosync `expected_votes` parameter to `{{ values.expected_votes }}` to make sure pacemaker calculates the actions properly for a two-node cluster. + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#install-the-cluster + + AWS: + + - https://docs.aws.amazon.com/sap/latest/sap-hana/sap-hana-on-aws-cluster-configuration.html + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + +facts: + - name: corosync_expected_votes + gatherer: corosync.conf + argument: quorum.expected_votes + +expectations: + - name: expected_votes + # expect: facts.corosync_expected_votes == 2 + expect: return facts.corosync_expected_votes == 2 diff --git a/runner/ansible/roles/catalog/1.1.8-6E9B82.yaml b/runner/ansible/roles/catalog/1.1.8-6E9B82.yaml new file mode 100644 index 0000000..a566716 --- /dev/null +++ b/runner/ansible/roles/catalog/1.1.8-6E9B82.yaml @@ -0,0 +1,39 @@ +id: 6E9B82 +name: Corosync `expected_votes` +# name: 1.1.8 +group: Corosync +description: | + Corosync `two_node` is set to `{{ values.expected_two_node }}` +remediation: | + ## Abstract + The runtime value of the corosync `two_node` parameter is not set as recommended. + + ## Remediation + Adjust the corosync two_node parameter to `{{ values.expected_two_node }}` to make sure Pacemaker calculates the actions properly for a two-node cluster. + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#install-the-cluster + + AWS: + + - https://docs.aws.amazon.com/sap/latest/sap-hana/sap-hana-on-aws-cluster-configuration.html + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + +facts: + - name: corosync_two_node + gatherer: corosync.conf + argument: quorum.two_node + +expectations: + - name: two_node + # expect: facts.corosync_two_node == 1 + expect: return facts.corosync_two_node == 1 diff --git a/runner/ansible/roles/catalog/1.1.9-DA114A.yaml b/runner/ansible/roles/catalog/1.1.9-DA114A.yaml new file mode 100644 index 0000000..52a3955 --- /dev/null +++ b/runner/ansible/roles/catalog/1.1.9-DA114A.yaml @@ -0,0 +1,90 @@ +id: DA114A +name: Corosync rings +# name: 1.1.9 +group: Corosync +description: | + Corosync has at least 2 rings configured +remediation: | + ## Abstract + It is strongly recommended to add a second ring to the corosync communication. + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker + + AWS: + + - https://docs.aws.amazon.com/sap/latest/sap-hana/sap-hana-on-aws-cluster-configuration.html + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-example-for-etccorosynccorosync-conf + - section 9.1.3 in https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-adapting-the-corosync-and-sbd-configuration + +facts: + - name: corosync_nodes + gatherer: corosync.conf + argument: nodelist + # argument: nodelist.nodes + +values: + - name: expected_corosync_nodes + default: 2 + - name: expected_corosync_rings_per_node + default: 2 + conditions: + - value: 1 + # when: env.provider == "aws" || env.provider == "gcp" + when: return env.provider == "aws" or env.provider == "gcp" + +expectations: + - name: corosync_rings + # The current expression evaluation engine, Abacus, does not support any of: len, every (iteration in general), custom functions + expect: | + return + length(facts.corosync_nodes) == values.expected_corosync_nodes and + all(facts.corosync_nodes, function(node_rings) + return values.expected_corosync_rings_per_node == node_rings + end) + + # expect: return length(facts.corosync_nodes) == values.expected_corosync_nodes + # expect: | + # return all(facts.corosync_nodes, function(node_rings) + # return values.expected_corosync_rings_per_node == node_rings + # end) + +# As a user I want the DSL expressions to support a function to count the elements of a list +# As a user I want the DSL to support expressions applied to a list of items + +# expectations: +# - name: corosync_rings +# expect_length: +# fact: facts.corosync_nodes +# value: values.expected_corosync_nodes +# - name: corosync_rings_per_node +# expect_each: +# item: node +# in: facts.corosync_nodes +# to_satisfy: +# expect_length: +# fact: node.rings +# value: values.expected_corosync_rings_per_node + +# - name: corosync_rings +# # The current expression evaluation engine, Abacus, does not support any of: len, every (iteration in general), custom functions +# expect: len(facts.corosync_nodes) == values.expected_corosync_nodes && every(facts.corosync_nodes, hasReccomendedRings) +# # expect: len(facts.corosync_nodes) == values.expected_corosync_nodes +# # expect: every(facts.corosync_nodes, fn node -> length(node.rings) == values.expected_corosync_rings_per_node end) +# # expect: len(facts.corosync_nodes) == values.expected_corosync_nodes && every(facts.corosync_nodes, "node", "len(node.rings) == values.expected_corosync_rings_per_node") + +# # As a user I want the DSL expressions to support a function to count the elements of a list +# # As a user I want the DSL to support expressions applied to a list of items + +# - name: corosync_rings +# # The current expression evaluation engine, Abacus, does not support any of: len, every (iteration in general), custom functions +# expect: facts.corosync_nodes == values.expected_corosync_nodes && facts.corosync_rings_per_node == values.expected_corosync_rings_per_node \ No newline at end of file diff --git a/runner/ansible/roles/catalog/1.2.1-205AF7.yaml b/runner/ansible/roles/catalog/1.2.1-205AF7.yaml new file mode 100644 index 0000000..014c700 --- /dev/null +++ b/runner/ansible/roles/catalog/1.2.1-205AF7.yaml @@ -0,0 +1,49 @@ +id: 205AF7 +name: Fencing is enabled +# name: 1.2.1 +group: Pacemaker +description: | + Fencing is enabled in the cluster attributes +remediation: | + ## Abstract + Fencing is mandatory to guarantee data integrity for your SAP Applications. + Running a HA Cluster without fencing is not supported and might cause data loss. + + ## Remediation + Execute the following command to enable it: + ``` + crm configure property stonith-enabled=true + ``` + + ## References + AZURE: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#create-a-stonith-device-on-the-pacemaker-cluster + + AWS: + + - https://docs.aws.amazon.com/sap/latest/sap-hana/sap-hana-on-aws-cluster-resources.html + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles#configure_the_general_cluster_properties + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-cluster-bootstrap-and-more + - https://documentation.suse.com/sle-ha/15-SP3/html/SLE-HA-all/cha-ha-fencing.html#sec-ha-fencing-recommend + +facts: + # crm_attribute -t crm_config -G -n stonith-enabled --quiet + - name: fencing_enabled + gatherer: crm_config + argument: stonith-enabled + # alternatively we could have a gatherer without arguments + # - name: fencing_enabled + # gatherer: crm_config.stonith-enabled + +expectations: + - name: fencing_enabled + # expect: facts.fencing_enabled == true + expect: return facts.fencing_enabled == true +# As a user I want a crm_config gatherer, so that I can extract facts about crm_attributes like stonith-enabled, stonith-timeout diff --git a/runner/ansible/roles/catalog/1.2.2-373DB8.yaml b/runner/ansible/roles/catalog/1.2.2-373DB8.yaml new file mode 100644 index 0000000..2d18730 --- /dev/null +++ b/runner/ansible/roles/catalog/1.2.2-373DB8.yaml @@ -0,0 +1,96 @@ +id: 373DB8 +name: Fencing timeout +# name: 1.2.2 +group: Pacemaker +description: | + Cluster fencing timeout is configured correctly +remediation: | + ## Abstract + The fencing timeout (`stonith-timeout`) determines the time Pacemaker will wait for fencing to succeed. + The recommended values on Azure are `144` seconds for SBD only or `900` seconds when using SBD combined with the Azure Fence agent. + + ## Remediation + Execute the following command to adjust the timeout for your usecase: + ```crm configure property stonith-timeout=144``` + or + ```crm configure property stonith-timeout=900``` + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#create-a-stonith-device-on-the-pacemaker-cluster + + AWS: + + - https://docs.aws.amazon.com/sap/latest/sap-hana/sap-hana-on-aws-cluster-resources.html + + GCP: + + - https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles#configure_the_general_cluster_properties + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-cluster-bootstrap-and-more + +facts: + # timeout=$(crm_attribute -t crm_config -G -n stonith-timeout --quiet | sed -e 's/s$//') + # # + # # for aws and gcp we might need additional expected vars (name + fence_aws_...) + # # + # if [[cibadmin -Q --xpath "//primitive[@type='fence_azure_arm']/@type" > /dev/null 2>&1 ]]; then + # exit $([[ "${timeout}" -eq {{ expected[name + '.fence_azure_arm'] }} ]]) + # else + # exit $([[ "${timeout}" -ge {{ expected[name + '.sbd'] }} ]]) + # fi + - name: fencing_timeout + gatherer: crm_config + argument: stonith-timeout + + - name: fence_azure_arm_detected + # would it be enough the information about the provider in the environment? + gatherer: cibadmin + argument: //primitive[@type='fence_azure_arm']/@type + +values: + # default + # "1.2.2.sbd": "150" + # "1.2.2.fence_azure_arm": "" # doesn't apply + + # aws + # "1.2.2.sbd": "600" # doesn't apply for AWS + + # azure + # "1.2.2.sbd": "144" + # "1.2.2.fence_azure_arm": "900" + + # gcp? nothing found in the legacy check + + - name: expected_fencing_timeout + default: 150 + conditions: + - value: 600 + # when: env.provider == "aws" + when: return env.provider == "aws" + - value: 900 + # when: env.provider == "azure" && facts.fence_azure_arm_detected + when: return env.provider == "azure" or facts.fence_azure_arm_detected + - value: 144 + # when: env.provider == "azure" + when: return env.provider == "azure" + +expectations: + - name: fencing_timeout_is_correct + expect: | + return when{ + condition = facts.fence_azure_arm_detected, + matches = facts.fencing_timeout == values.expected_fencing_timeout, + otherwise = facts.fencing_timeout >= values.expected_fencing_timeout + } + + --if facts.fence_azure_arm_detected then + -- return facts.fencing_timeout == values.expected_fencing_timeout + --else + -- return facts.fencing_timeout >= values.expected_fencing_timeout + --end + +# As a user I want the expectation section of the DSL have access to the env (?) so that we can support more complexr scenarios diff --git a/runner/ansible/roles/catalog/1.3.1-0B6DB2.yaml b/runner/ansible/roles/catalog/1.3.1-0B6DB2.yaml new file mode 100644 index 0000000..31cde55 --- /dev/null +++ b/runner/ansible/roles/catalog/1.3.1-0B6DB2.yaml @@ -0,0 +1,61 @@ +id: 373DB8 +name: SBD Pacemaker +# name: 1.3.1 +group: SBD +description: | + `SBD_PACEMAKER` value is correctly set in SBD configuration + + Not applicable for GCP, AWS +remediation: | + ## Abstract + For proper SBD fencing, make sure that the integration with Pacemaker is enabled. + **IMPORTANT**: Always verify these steps in a testing environment before doing so in production ones! + + The SBD is not used in GCP or AWS environments. + ## Remediation + Run the following commands in order: + + 1. Put cluster into maintenance mode: + ```crm configure property maintenance-mode=true``` + 2. Stop the cluster: + ```crm cluster stop``` + 3. Set the SBD_PACEMAKER parameter to `yes` on `/etc/sysconfig/sbd`: + ``` + [...] + SBD_PACEMAKER="yes" + [...] + ``` + 4. Restart the cluster: + ```crm cluster start``` + 5. Put cluster out of maintenance mode + ```crm configure property maintenance-mode=false``` + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#set-up-the-iscsi-target-server-sbd-device + + AWS: + + + GCP: + + + SUSE / KVM: + + - https://documentation.suse.com/sle-ha/15-SP3/html/SLE-HA-all/cha-ha-storage-protect.html + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-1.11.5.5.4.4 + +# run_when: env.provider != "aws" && env.provider != "gcp" +run_when: return env.provider != "aws" && env.provider != "gcp" + +facts: + - name: sbd_pacemaker + gatherer: sbd_config + argument: SBD_PACEMAKER + +expectations: + - name: sbd_pacemaker_correctly_configured + # expect: facts.sbd_pacemaker == "yes" + expect: return facts.sbd_pacemaker == "yes" +# As a user I want to be able to define when a check should run, so that I can specify the senarios where the check is supported diff --git a/runner/ansible/roles/catalog/1.3.2-49591F.yaml b/runner/ansible/roles/catalog/1.3.2-49591F.yaml new file mode 100644 index 0000000..4a646b3 --- /dev/null +++ b/runner/ansible/roles/catalog/1.3.2-49591F.yaml @@ -0,0 +1,66 @@ +id: 49591F +name: SBD Startmode +# name: 1.3.2 +group: SBD +description: | + `SBD_STARTMODE` is set to `always` + + Not applicable for GCP, AWS +remediation: | + ## Abstract + If not set to always, SBD will not automatically start if the node was previously fenced as it will expect the cluster in a clean state. + **IMPORTANT**: Always verify these steps in a testing environment before doing so in production ones! + + The SBD is not used in GCP or AWS environments. + ## Remediation + Run the following commands in order: + + 1. Put cluster into maintenance mode: + ```crm configure property maintenance-mode=true``` + 2. Stop the cluster: + ```crm cluster stop``` + 2. Set the SBD_STARTMODE parameter to `always` on `/etc/sysconfig/sbd`: + ``` + [...] + SBD_STARTMODE="always" + [...] + ``` + 3. Restart the cluster: + ```crm cluster start``` + 4. Put cluster out of maintenance mode: + ```crm configure property maintenance-mode=false``` + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#set-up-the-iscsi-target-server-sbd-device + + AWS: + + GCP: + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#id-1.11.5.5.4.4 + - https://documentation.suse.com/sle-ha/15-SP3/html/SLE-HA-all/cha-ha-storage-protect.html + +# run_when: env.provider != "aws" && env.provider != "gcp" +run_when: return env.provider != "aws" && env.provider != "gcp" + +facts: + - name: sbd_startmode + gatherer: sbd_config + argument: SBD_STARTMODE + +values: + - name: expected_sbd_startmode + default: "clean" + conditions: + - value: "always" + # when: env.provider == "azure" + when: return env.provider == "azure" + +expectations: + - name: sbd_startmode_correctly_configured + # expect: facts.sbd_startmode == values.expected_sbd_startmode + expect: return facts.sbd_startmode == values.expected_sbd_startmode diff --git a/runner/ansible/roles/catalog/1.3.3-816815.yaml b/runner/ansible/roles/catalog/1.3.3-816815.yaml new file mode 100644 index 0000000..8bee3e1 --- /dev/null +++ b/runner/ansible/roles/catalog/1.3.3-816815.yaml @@ -0,0 +1,47 @@ +id: "816815" +name: SBD enabled +# name: 1.3.3 +group: SBD +description: | + SBD service is enabled + + Not applicable to GCP, AWS +remediation: | + ## Abstract + If not enabled, SBD service will not start automatically after reboots, affecting the correct cluster startup. + + The SBD is not used in GCP or AWS environments. + ## Remediation + To enable the service, run: + ``` + systemctl enable sbd + ``` + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#set-up-the-iscsi-target-server-sbd-device + + AWS: + + GCP: + + SUSE / KVM: + + - https://documentation.suse.com/sle-ha/15-SP3/html/SLE-HA-all/cha-ha-storage-protect.html#pro-ha-storage-protect-sbd-services + +# run_when: env.provider != "aws" && env.provider != "gcp" +run_when: return env.provider != "aws" && env.provider != "gcp" + +facts: + - name: sbd_service_status + gatherer: systemd + argument: sbd + # - name: sbd_service_enabled + # gatherer: systemd_enabled + # argument: sbd + +expectations: + - name: sbd_service_is_enabled + # expect: facts.sbd_service_status == "active" + expect: return facts.sbd_service_status == "active" diff --git a/runner/ansible/roles/catalog/1.3.4-61451E.yaml b/runner/ansible/roles/catalog/1.3.4-61451E.yaml new file mode 100644 index 0000000..0fbba72 --- /dev/null +++ b/runner/ansible/roles/catalog/1.3.4-61451E.yaml @@ -0,0 +1,38 @@ +id: 61451E +name: SBD required devices +# name: 1.3.4 +group: SBD +description: | + Multiple SBD devices are configured + + Not applicable to GCP, AWS +remediation: | + ## Abstract + It is recommended to configure 3 SBD devices for production environments. + + The SBD is not used in GCP or AWS environments. + + ## References + Azure: + + - https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker#set-up-sbd-device + + AWS: + + GCP: + + SUSE / KVM: + + - https://documentation.suse.com/sbp/all/single-html/SLES4SAP-hana-sr-guide-PerfOpt-15/#cha.hana-sr.scenario + +run_when: env.provider != "aws" && env.provider != "gcp" + +# CFSA-876 +facts: + - name: sbd_devices + gatherer: sbd_devices + +expectations: + - name: sbd_devices_correctly_configured + # expect: len(facts.sbd_devices) == 3 + expect: return len(facts.sbd_devices) == 3