From f70905c0589996d504fc53ea902e5fafb1181144 Mon Sep 17 00:00:00 2001 From: Teddy Andrieux Date: Tue, 10 Dec 2019 12:31:37 +0100 Subject: [PATCH 1/4] salt: Add a `refresh_pillar` in upgrade and downgrade orchestrate When deploying node we need a pillar update to date, we already have a function checking for pillar value in deploy_node, but since the `refresh_pillar` is always async we first do a refresh everywhere before calling any deploy_node orchestrate --- salt/metalk8s/orchestrate/downgrade/init.sls | 16 +++++++++++++++- salt/metalk8s/orchestrate/upgrade/init.sls | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/salt/metalk8s/orchestrate/downgrade/init.sls b/salt/metalk8s/orchestrate/downgrade/init.sls index 1740c444af..5d0accb569 100644 --- a/salt/metalk8s/orchestrate/downgrade/init.sls +++ b/salt/metalk8s/orchestrate/downgrade/init.sls @@ -10,6 +10,20 @@ Execute the downgrade prechecks: orchestrate: dest_version: {{ dest_version }} +# In salt 2018.3 we can not do synchronous pillar refresh, so add a sleep +# see https://github.com/saltstack/salt/issues/20590 +Wait for pillar refresh to complete: + salt.function: + - name: saltutil.refresh_pillar + - tgt: '*' + - require: + - salt: Execute the downgrade prechecks + module.run: + - test.sleep: + - length: 20 + - require: + - salt: Wait for pillar refresh to complete + {%- set cp_nodes = salt.metalk8s.minions_by_role('master') | sort %} {%- set other_nodes = pillar.metalk8s.nodes.keys() | difference(cp_nodes) | sort %} @@ -39,7 +53,7 @@ Wait for API server to be available on {{ node }}: - status: 200 - verify_ssl: false - require: - - salt: Execute the downgrade prechecks + - module: Wait for pillar refresh to complete {%- if previous_node is defined %} - salt: Deploy node {{ previous_node }} {%- endif %} diff --git a/salt/metalk8s/orchestrate/upgrade/init.sls b/salt/metalk8s/orchestrate/upgrade/init.sls index dc1ef028e1..f29ca75137 100644 --- a/salt/metalk8s/orchestrate/upgrade/init.sls +++ b/salt/metalk8s/orchestrate/upgrade/init.sls @@ -22,6 +22,20 @@ Upgrade etcd cluster: - require: - salt: Execute the upgrade prechecks +# In salt 2018.3 we can not do synchronous pillar refresh, so add a sleep +# see https://github.com/saltstack/salt/issues/20590 +Wait for pillar refresh to complete: + salt.function: + - name: saltutil.refresh_pillar + - tgt: '*' + - require: + - salt: Upgrade etcd cluster + module.run: + - test.sleep: + - length: 20 + - require: + - salt: Wait for pillar refresh to complete + {%- set cp_nodes = salt.metalk8s.minions_by_role('master') | sort %} {%- set other_nodes = pillar.metalk8s.nodes.keys() | difference(cp_nodes) | sort %} @@ -46,7 +60,7 @@ Wait for API server to be available on {{ node }}: - status: 200 - verify_ssl: false - require: - - salt: Upgrade etcd cluster + - module: Wait for pillar refresh to complete {%- if previous_node is defined %} - salt: Deploy node {{ previous_node }} {%- endif %} From b8b528be9e302ad52ea229b1c6a6051f4bb3a56e Mon Sep 17 00:00:00 2001 From: Teddy Andrieux Date: Mon, 9 Dec 2019 15:50:17 +0100 Subject: [PATCH 2/4] script,salt: Downgrade `bootstrap` before other nodes Some ext_pillar need a salt-master already in the dest_version to be computed, so we need to first downgrade salt-master before applying highstate on other nodes Fixes: #2101 --- salt/metalk8s/orchestrate/downgrade/init.sls | 5 --- scripts/downgrade.sh.in | 38 ++++++-------------- 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/salt/metalk8s/orchestrate/downgrade/init.sls b/salt/metalk8s/orchestrate/downgrade/init.sls index 5d0accb569..7717c1e93d 100644 --- a/salt/metalk8s/orchestrate/downgrade/init.sls +++ b/salt/metalk8s/orchestrate/downgrade/init.sls @@ -39,11 +39,6 @@ Wait for pillar refresh to complete: Skip node {{ node }}, already in {{ node_version }} older than {{ dest_version }}: test.succeed_without_changes - {%- elif 'bootstrap' in pillar.metalk8s.nodes[node].roles %} - -Skip node {{ node }}, bootstrap node downgrade should be done later: - test.succeed_without_changes - {%- else %} Wait for API server to be available on {{ node }}: diff --git a/scripts/downgrade.sh.in b/scripts/downgrade.sh.in index fdd5c60fea..97f9d14e53 100755 --- a/scripts/downgrade.sh.in +++ b/scripts/downgrade.sh.in @@ -176,6 +176,15 @@ precheck_downgrade () { launch_downgrade () { SALT_MASTER_CALL=(crictl exec -i "$(get_salt_container)") + "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_all \ + saltenv="metalk8s-$DESTINATION_VERSION" + + "${SALT_MASTER_CALL[@]}" salt-run metalk8s_saltutil.sync_auth \ + saltenv="metalk8s-$DESTINATION_VERSION" + + "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_roster \ + saltenv="metalk8s-$DESTINATION_VERSION" + "${SALT_MASTER_CALL[@]}" salt-run state.orchestrate \ metalk8s.orchestrate.downgrade \ saltenv="$SALTENV" @@ -192,33 +201,6 @@ downgrade_bootstrap () { pillar="{'metalk8s': {'endpoints': {'salt-master': $saltmaster_endpoint, \ 'repositories': $repo_endpoint}}}" \ --retcode-passthrough - - SALT_MASTER_CALL=(crictl exec -i "$(get_salt_container)") - "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_all \ - saltenv="metalk8s-$DESTINATION_VERSION" - "${SALT_MASTER_CALL[@]}" salt-run metalk8s_saltutil.sync_auth \ - saltenv="metalk8s-$DESTINATION_VERSION" - "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_roster \ - saltenv="metalk8s-$DESTINATION_VERSION" - - local bootstrap_id - bootstrap_id=$( - $SALT_CALL grains.get id --out txt \ - | awk '/^local\: /{ print $2 }') - [ -z "$bootstrap_id" ] && die "Cannot retrieve bootstrap id" - - "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_all \ - saltenv="metalk8s-@@VERSION" - - "${SALT_MASTER_CALL[@]}" salt-run state.orchestrate_single \ - metalk8s_kubernetes.object_updated \ - "$bootstrap_id" \ - kind=Node apiVersion=v1 \ - patch="{'metadata': {'labels': \ - {'metalk8s.scality.com/version': '$DESTINATION_VERSION'}}}" - "${SALT_MASTER_CALL[@]}" salt-run state.sls \ - metalk8s.orchestrate.deploy_node saltenv="$SALTENV" \ - pillar="{'orchestrate': {'node_name': '$bootstrap_id'}}" } # patch the kube-system namespace annotation with input @@ -252,5 +234,5 @@ fi run "Performing Pre-Downgrade checks" precheck_downgrade [ $DRY_RUN -eq 1 ] && exit 0 run "Setting cluster version to $DESTINATION_VERSION" patch_kubesystem_namespace -run "Launching the downgrade" launch_downgrade run "Downgrading bootstrap" downgrade_bootstrap +run "Launching the downgrade" launch_downgrade From 9c221cf32ea0c2e1cfe87fadd01d998c8dcd790f Mon Sep 17 00:00:00 2001 From: Teddy Andrieux Date: Wed, 11 Dec 2019 11:08:04 +0100 Subject: [PATCH 3/4] salt: Add ability to `skip_roles` in salt top.sls We may want some time to run only a part of the highstate, for example when downgrading we want to be able to apply an highstate except `etcd` role --- salt/top.sls.in | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/salt/top.sls.in b/salt/top.sls.in index d7ba3b465d..6552b72507 100644 --- a/salt/top.sls.in +++ b/salt/top.sls.in @@ -71,6 +71,12 @@ # # So, the `metalk8s.roles.node.absent` state is *only* applied when a # minion/node has no other known role but the `ca` one. +# +# Add a `skip_roles` feature to skip specific roles if we want to apply only +# a part of the highstate +# NOTE: When we skip a role we also skip `absent` state, meaning that if you +# have a node without `etcd` role and you have an `etcd` `skip_roles` then the +# `metalk8s.roles.etcd.absent` state will not be called {%- set roles = ['bootstrap', 'ca', 'etcd', 'master', 'node', 'infra'] -%} {%- set node_role = 'node' -%} @@ -81,6 +87,10 @@ {%- macro role_match(name) -%} I@metalk8s:nodes:{{ grains['id'] }}:roles:{{ name }} {%- endmacro %} +{%- macro skip_role_match(name) -%} +I@metalk8s:nodes:{{ grains['id'] }}:skip_roles:{{ name }} +{%- endmacro %} + metalk8s-{{ version }}: # 'Default' role applicable to all nodes @@ -91,19 +101,19 @@ metalk8s-{{ version }}: {% for role in roles %} # {{ role }} role - {{ version_match }} and {{ role_match(role) }}: + {{ version_match }} and {{ role_match(role) }} and not {{ skip_role_match(role) }}: - match: compound - metalk8s.roles.{{ role }} {# See comment on top #} {%- if role != node_role %} - {{ version_match }} and not {{ role_match(role) }}: + {{ version_match }} and not {{ role_match(role) }} and not {{ skip_role_match(role) }}: - match: compound - metalk8s.roles.{{ role }}.absent {%- endif %} {% endfor %} # See comment on top - {{ version_match }} {% for role in node_roles %} and not {{ role_match(role) }} {% endfor %}: + {{ version_match }} {% for role in node_roles %} and not {{ role_match(role) }} {% endfor %} and not {{ skip_role_match(node_role) }}: - match: compound - metalk8s.roles.node.absent From 2429dfa51d073e45c7c9980908c3bfb7445aebb7 Mon Sep 17 00:00:00 2001 From: Teddy Andrieux Date: Mon, 9 Dec 2019 15:53:13 +0100 Subject: [PATCH 4/4] salt: Skip `etcd` role when calling highstate downgrade When downgrading we need to first downgrade all the nodes and after take care of the etcd cluster, that why we need to skip `etcd` role when calling highstate on nodes --- salt/metalk8s/orchestrate/deploy_node.sls | 11 +++++++++++ salt/metalk8s/orchestrate/downgrade/init.sls | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/salt/metalk8s/orchestrate/deploy_node.sls b/salt/metalk8s/orchestrate/deploy_node.sls index 8888ceb45e..8a45c121ab 100644 --- a/salt/metalk8s/orchestrate/deploy_node.sls +++ b/salt/metalk8s/orchestrate/deploy_node.sls @@ -1,6 +1,8 @@ {%- set node_name = pillar.orchestrate.node_name %} {%- set version = pillar.metalk8s.nodes[node_name].version %} +{%- set skip_roles = pillar.metalk8s.nodes[node_name].get('skip_roles', []) %} + {%- if node_name not in salt.saltutil.runner('manage.up') %} Deploy salt-minion on a new node: salt.state: @@ -116,6 +118,15 @@ Run the highstate: - tgt: {{ node_name }} - highstate: True - saltenv: metalk8s-{{ version }} + {#- Add ability to skip node roles to not apply all the highstate + e.g.: Skipping etcd when downgrading #} + {%- if skip_roles %} + - pillar: + metalk8s: + nodes: + {{ node_name }}: + skip_roles: {{ skip_roles }} + {%- endif %} - require: - salt: Set grains - salt: Refresh the mine diff --git a/salt/metalk8s/orchestrate/downgrade/init.sls b/salt/metalk8s/orchestrate/downgrade/init.sls index 7717c1e93d..0049905035 100644 --- a/salt/metalk8s/orchestrate/downgrade/init.sls +++ b/salt/metalk8s/orchestrate/downgrade/init.sls @@ -78,6 +78,12 @@ Deploy node {{ node }}: {#- Do not drain if we are in single node cluster #} skip_draining: True {%- endif %} + metalk8s: + nodes: + {{ node }}: + # Skip `etcd` role as we take care of etcd cluster after + skip_roles: + - etcd - require: - metalk8s_kubernetes: Set node {{ node }} version to {{ dest_version }} - require_in: