Skip to content
This repository has been archived by the owner on Feb 29, 2024. It is now read-only.

Commit

Permalink
Etcd: Update cluster membership when replacing a node
Browse files Browse the repository at this point in the history
Add the ability to replace an overcloud node that's running the etcd
service by updating the etcd cluster's membership to delete the old
node (the one being replaced) with the new replacement node.

etcd maintains its cluster membership using each node's name and it's
cluster peer address on the internal_api network. This data typically
changes when replacing an overcloud node. The cluster membership is
updated using a shell script that's triggered to run after etcd is
deployed on the new node.

A new EtcdInitialClusterState parameter is introduced. It defaults
to "new," and only needs to be set to "existing" when the cluster
membership changes.

NOTE(stable/victoria):
  The backport dealt with conflicts due to these changes that were
  introduced in wallaby:
  - https://review.opendev.org/c/openstack/tripleo-heat-templates/+/771832
  - https://review.opendev.org/c/openstack/tripleo-heat-templates/+/820111

Change-Id: I8daa5500e7ee722eef64808306dc2aa5ca027a15
(cherry picked from commit d3d20bb)
(cherry picked from commit ba22896)
(cherry picked from commit a9a4d54)
  • Loading branch information
ASBishop committed Mar 25, 2022
1 parent 8b4cb9a commit 13015db
Showing 1 changed file with 77 additions and 0 deletions.
77 changes: 77 additions & 0 deletions deployment/etcd/etcd-container-puppet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ parameters:
description: Initial cluster token for the etcd cluster during bootstrap.
type: string
hidden: true
EtcdInitialClusterState:
description: Initial cluster state ("new" or "existing"). The default value "new"
needs to be overridden only when an overcloud node is replaced, at
which time the value should be set to "existing".
type: string
default: 'new'
constraints:
- allowed_values: ['new', 'existing']
MonitoringSubscriptionEtcd:
default: 'overcloud-etcd'
type: string
Expand Down Expand Up @@ -113,6 +121,7 @@ outputs:
tripleo::profile::base::etcd::peer_port: '2380'
etcd::debug: {get_param: Debug}
etcd::initial_cluster_token: {get_param: EtcdInitialClusterToken}
etcd::initial_cluster_state: {get_param: EtcdInitialClusterState}
etcd::manage_package: false
etcd::manage_service: false
-
Expand Down Expand Up @@ -184,6 +193,61 @@ outputs:
owner: etcd:etcd
- path: /etc/pki/tls/private/etcd.key
owner: etcd:etcd
container_config_scripts:
etcd_update_members.sh:
mode: "0700"
content:
str_replace:
template: |
#!/bin/bash
echo "####################################"
echo "### $(date -u) ###"
source /etc/etcd/etcd.conf
export ETCDCTL_API=3
ETCDCTL="etcdctl TLS_OPTS --endpoints=${ETCD_LISTEN_CLIENT_URLS}"
# Ask etcd for the current list of members
eval $ETCDCTL member list | tr -d "," > /tmp/etcd-members
# etcdctl doesn't generate reliable error status, so use presence of the
# node's own name to determine whether this node is capable of managing
# etcd membership.
ETCD_NAME=$(hiera -c /etc/puppet/hiera.yaml etcd::etcd_name)
if ! grep -q $ETCD_NAME /tmp/etcd-members; then
echo "This is a new node that is unable to manage etcd membership"
exit 0
fi
# Remove old members. These are nodes in the current list of members
# that are *not* in the ETCD_INITIAL_CLUSTER.
while read id status name peers clients; do \
if [[ "${ETCD_INITIAL_CLUSTER}" != *"${name}=${peers}"* ]]; then
echo "Removing old member ${name} (ID ${id}) from the cluster"
eval $ETCDCTL member remove ${id}
fi
done < /tmp/etcd-members
# Add new members. These are nodes in the ETCD_INITIAL_CLUSTER that are
# not in the list of current members. ETCD_INITIAL_CLUSTER is a comma
# delimited list of "name=peers" tuples, so iterate over the list.
IFS=, ETCD_MEMBERS=(${ETCD_INITIAL_CLUSTER})
for member in ${ETCD_MEMBERS[@]}; do \
# Split the tuple
IFS='=' read name peers <<< $member
if ! grep -q "${name} ${peers}" /tmp/etcd-members; then
echo "Adding new member ${name} to the cluster"
eval $ETCDCTL member add ${name} --peer-urls=${peers}
fi
done
params:
TLS_OPTS:
if:
- internal_tls_enabled
- str_replace:
template: "--cacert=TLS_CA --cert=/etc/pki/tls/certs/etcd.crt --key=/etc/pki/tls/private/etcd.key"
params:
TLS_CA: {get_param: InternalTLSCAFile}
- ""
docker_config:
step_2:
etcd:
Expand All @@ -200,6 +264,7 @@ outputs:
- /var/lib/etcd:/var/lib/etcd
- /var/lib/kolla/config_files/etcd.json:/var/lib/kolla/config_files/config.json:ro
- /var/lib/config-data/puppet-generated/etcd/:/var/lib/kolla/config_files/src:ro
- /var/lib/container-config-scripts/etcd_update_members.sh:/etcd_update_members.sh:ro
-
if:
- internal_tls_enabled
Expand All @@ -220,6 +285,18 @@ outputs:
volumes:
- /var/lib/config-data/etcd/etc/etcd/:/etc/etcd:ro
- /var/lib/etcd:/var/lib/etcd:ro
deploy_steps_tasks:
- name: Manage etcd cluster membership
vars:
initial_cluster_state: {get_param: EtcdInitialClusterState}
shell: |
"{{ container_cli }}" exec -ti -u root etcd /etcd_update_members.sh 2>&1 | \
tee -a /var/log/containers/stdouts/etcd_update_members.log
become: true
failed_when: false
when:
- step|int == 3
- initial_cluster_state == "existing"
host_prep_tasks:
- name: create /var/lib/etcd
file:
Expand Down

0 comments on commit 13015db

Please sign in to comment.