diff --git a/deployment/etcd/etcd-container-puppet.yaml b/deployment/etcd/etcd-container-puppet.yaml index cc8238d5dc..78c5896efb 100644 --- a/deployment/etcd/etcd-container-puppet.yaml +++ b/deployment/etcd/etcd-container-puppet.yaml @@ -37,6 +37,14 @@ parameters: description: Initial cluster token for the etcd cluster during bootstrap. type: string hidden: true + EtcdInitialClusterState: + description: Initial cluster state ("new" or "existing"). The default value "new" + needs to be overridden only when an overcloud node is replaced, at + which time the value should be set to "existing". + type: string + default: 'new' + constraints: + - allowed_values: ['new', 'existing'] MonitoringSubscriptionEtcd: default: 'overcloud-etcd' type: string @@ -120,6 +128,7 @@ outputs: tripleo::profile::base::etcd::peer_port: '2380' etcd::debug: {get_param: Debug} etcd::initial_cluster_token: {get_param: EtcdInitialClusterToken} + etcd::initial_cluster_state: {get_param: EtcdInitialClusterState} etcd::manage_package: false etcd::manage_service: false - if: @@ -162,6 +171,61 @@ outputs: owner: etcd:etcd - path: /etc/pki/tls/private/etcd.key owner: etcd:etcd + container_config_scripts: + etcd_update_members.sh: + mode: "0700" + content: + str_replace: + template: | + #!/bin/bash + echo "####################################" + echo "### $(date -u) ###" + source /etc/etcd/etcd.conf + export ETCDCTL_API=3 + ETCDCTL="etcdctl TLS_OPTS --endpoints=${ETCD_LISTEN_CLIENT_URLS}" + + # Ask etcd for the current list of members + eval $ETCDCTL member list | tr -d "," > /tmp/etcd-members + + # etcdctl doesn't generate reliable error status, so use presence of the + # node's own name to determine whether this node is capable of managing + # etcd membership. + ETCD_NAME=$(hiera -c /etc/puppet/hiera.yaml etcd::etcd_name) + if ! grep -q $ETCD_NAME /tmp/etcd-members; then + echo "This is a new node that is unable to manage etcd membership" + exit 0 + fi + + # Remove old members. These are nodes in the current list of members + # that are *not* in the ETCD_INITIAL_CLUSTER. + while read id status name peers clients; do \ + if [[ "${ETCD_INITIAL_CLUSTER}" != *"${name}=${peers}"* ]]; then + echo "Removing old member ${name} (ID ${id}) from the cluster" + eval $ETCDCTL member remove ${id} + fi + done < /tmp/etcd-members + + # Add new members. These are nodes in the ETCD_INITIAL_CLUSTER that are + # not in the list of current members. ETCD_INITIAL_CLUSTER is a comma + # delimited list of "name=peers" tuples, so iterate over the list. + IFS=, ETCD_MEMBERS=(${ETCD_INITIAL_CLUSTER}) + for member in ${ETCD_MEMBERS[@]}; do \ + # Split the tuple + IFS='=' read name peers <<< $member + if ! grep -q "${name} ${peers}" /tmp/etcd-members; then + echo "Adding new member ${name} to the cluster" + eval $ETCDCTL member add ${name} --peer-urls=${peers} + fi + done + params: + TLS_OPTS: + if: + - internal_tls_enabled + - str_replace: + template: "--cacert=TLS_CA --cert=/etc/pki/tls/certs/etcd.crt --key=/etc/pki/tls/private/etcd.key" + params: + TLS_CA: {get_param: InternalTLSCAFile} + - "" docker_config: step_2: etcd: @@ -177,6 +241,7 @@ outputs: - - /var/lib/etcd:/var/lib/etcd - /var/lib/kolla/config_files/etcd.json:/var/lib/kolla/config_files/config.json:ro - /var/lib/config-data/puppet-generated/etcd:/var/lib/kolla/config_files/src:ro + - /var/lib/container-config-scripts/etcd_update_members.sh:/etcd_update_members.sh:ro - if: - internal_tls_enabled - - /etc/pki/tls/certs/etcd.crt:/var/lib/kolla/config_files/src-tls/etc/pki/tls/certs/etcd.crt:ro @@ -196,54 +261,66 @@ outputs: - /var/lib/config-data/puppet-generated/etcd/etc/etcd/:/etc/etcd:ro - /var/lib/etcd:/var/lib/etcd:ro deploy_steps_tasks: - if: - - internal_tls_enabled - - - name: Certificate generation - when: step|int == 1 - block: - - include_role: - name: linux-system-roles.certificate - vars: - certificate_requests: - - name: etcd - dns: - - str_replace: - template: "{{fqdn_$NETWORK}}" + list_concat: + - - name: Manage etcd cluster membership + vars: + initial_cluster_state: {get_param: EtcdInitialClusterState} + shell: | + "{{ container_cli }}" exec -ti -u root etcd /etcd_update_members.sh 2>&1 | \ + tee -a /var/log/containers/stdouts/etcd_update_members.log + become: true + failed_when: false + when: + - step|int == 3 + - initial_cluster_state == "existing" + - if: + - internal_tls_enabled + - - name: Certificate generation + when: step|int == 1 + block: + - include_role: + name: linux-system-roles.certificate + vars: + certificate_requests: + - name: etcd + dns: + - str_replace: + template: "{{fqdn_$NETWORK}}" + params: + $NETWORK: {get_param: [ServiceNetMap, EtcdNetwork]} + - str_replace: + template: "{{cloud_names.cloud_name_NETWORK}}" + params: + NETWORK: {get_param: [ServiceNetMap, EtcdNetwork]} + principal: + str_replace: + template: "etcd/{{fqdn_$NETWORK}}@{{idm_realm}}" params: $NETWORK: {get_param: [ServiceNetMap, EtcdNetwork]} - - str_replace: - template: "{{cloud_names.cloud_name_NETWORK}}" - params: - NETWORK: {get_param: [ServiceNetMap, EtcdNetwork]} - principal: - str_replace: - template: "etcd/{{fqdn_$NETWORK}}@{{idm_realm}}" - params: - $NETWORK: {get_param: [ServiceNetMap, EtcdNetwork]} - run_after: | - # cinder uses etcd, so its containers also need to be refreshed - container_names=$({{container_cli}} ps --format=\{\{.Names\}\} | grep -E 'cinder|etcd') - service_crt="/etc/pki/tls/certs/etcd.crt" - service_key="/etc/pki/tls/private/etcd.key" - kolla_dir="/var/lib/kolla/config_files/src-tls" - # For each container, check whether the cert file needs to be updated. - # The check is necessary because the original THT design directly bind mounted - # the files to their final location, and did not copy them in via $kolla_dir. - # Regardless of whether the container is directly using the files, or a copy, - # there's no need to trigger a reload because the cert is not cached. - for container_name in ${container_names[*]}; do - {{container_cli}} exec -u root "$container_name" bash -c " - [[ -f ${kolla_dir}/${service_crt} ]] && cp ${kolla_dir}/${service_crt} $service_crt; - [[ -f ${kolla_dir}/${service_key} ]] && cp ${kolla_dir}/${service_key} $service_key; - true - " - done - key_size: - if: - - key_size_override_set - - {get_param: EtcdCertificateKeySize} - - {get_param: CertificateKeySize} - ca: ipa + run_after: | + # cinder uses etcd, so its containers also need to be refreshed + container_names=$({{container_cli}} ps --format=\{\{.Names\}\} | grep -E 'cinder|etcd') + service_crt="/etc/pki/tls/certs/etcd.crt" + service_key="/etc/pki/tls/private/etcd.key" + kolla_dir="/var/lib/kolla/config_files/src-tls" + # For each container, check whether the cert file needs to be updated. + # The check is necessary because the original THT design directly bind mounted + # the files to their final location, and did not copy them in via $kolla_dir. + # Regardless of whether the container is directly using the files, or a copy, + # there's no need to trigger a reload because the cert is not cached. + for container_name in ${container_names[*]}; do + {{container_cli}} exec -u root "$container_name" bash -c " + [[ -f ${kolla_dir}/${service_crt} ]] && cp ${kolla_dir}/${service_crt} $service_crt; + [[ -f ${kolla_dir}/${service_key} ]] && cp ${kolla_dir}/${service_key} $service_key; + true + " + done + key_size: + if: + - key_size_override_set + - {get_param: EtcdCertificateKeySize} + - {get_param: CertificateKeySize} + ca: ipa host_prep_tasks: - name: create /var/lib/etcd file: