From f70905c0589996d504fc53ea902e5fafb1181144 Mon Sep 17 00:00:00 2001
From: Teddy Andrieux <teddy.andrieux@scality.com>
Date: Tue, 10 Dec 2019 12:31:37 +0100
Subject: [PATCH 1/4] salt: Add a `refresh_pillar` in upgrade and downgrade
 orchestrate

When deploying node we need a pillar update to date, we already have a
function checking for pillar value in deploy_node, but since the
`refresh_pillar` is always async we first do a refresh everywhere before
calling any deploy_node orchestrate
---
 salt/metalk8s/orchestrate/downgrade/init.sls | 16 +++++++++++++++-
 salt/metalk8s/orchestrate/upgrade/init.sls   | 16 +++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/salt/metalk8s/orchestrate/downgrade/init.sls b/salt/metalk8s/orchestrate/downgrade/init.sls
index 1740c444af..5d0accb569 100644
--- a/salt/metalk8s/orchestrate/downgrade/init.sls
+++ b/salt/metalk8s/orchestrate/downgrade/init.sls
@@ -10,6 +10,20 @@ Execute the downgrade prechecks:
         orchestrate:
           dest_version: {{ dest_version }}
 
+# In salt 2018.3 we can not do synchronous pillar refresh, so add a sleep
+# see https://github.com/saltstack/salt/issues/20590
+Wait for pillar refresh to complete:
+  salt.function:
+    - name: saltutil.refresh_pillar
+    - tgt: '*'
+    - require:
+      - salt: Execute the downgrade prechecks
+  module.run:
+    - test.sleep:
+      - length: 20
+    - require:
+      - salt: Wait for pillar refresh to complete
+
 {%- set cp_nodes = salt.metalk8s.minions_by_role('master') | sort %}
 {%- set other_nodes = pillar.metalk8s.nodes.keys() | difference(cp_nodes) | sort %}
 
@@ -39,7 +53,7 @@ Wait for API server to be available on {{ node }}:
   - status: 200
   - verify_ssl: false
   - require:
-    - salt: Execute the downgrade prechecks
+    - module: Wait for pillar refresh to complete
   {%- if previous_node is defined %}
     - salt: Deploy node {{ previous_node }}
   {%- endif %}
diff --git a/salt/metalk8s/orchestrate/upgrade/init.sls b/salt/metalk8s/orchestrate/upgrade/init.sls
index dc1ef028e1..f29ca75137 100644
--- a/salt/metalk8s/orchestrate/upgrade/init.sls
+++ b/salt/metalk8s/orchestrate/upgrade/init.sls
@@ -22,6 +22,20 @@ Upgrade etcd cluster:
     - require:
       - salt: Execute the upgrade prechecks
 
+# In salt 2018.3 we can not do synchronous pillar refresh, so add a sleep
+# see https://github.com/saltstack/salt/issues/20590
+Wait for pillar refresh to complete:
+  salt.function:
+    - name: saltutil.refresh_pillar
+    - tgt: '*'
+    - require:
+      - salt: Upgrade etcd cluster
+  module.run:
+    - test.sleep:
+      - length: 20
+    - require:
+      - salt: Wait for pillar refresh to complete
+
 {%- set cp_nodes = salt.metalk8s.minions_by_role('master') | sort %}
 {%- set other_nodes = pillar.metalk8s.nodes.keys() | difference(cp_nodes) | sort %}
 
@@ -46,7 +60,7 @@ Wait for API server to be available on {{ node }}:
   - status: 200
   - verify_ssl: false
   - require:
-    - salt: Upgrade etcd cluster
+    - module: Wait for pillar refresh to complete
   {%- if previous_node is defined %}
     - salt: Deploy node {{ previous_node }}
   {%- endif %}

From b8b528be9e302ad52ea229b1c6a6051f4bb3a56e Mon Sep 17 00:00:00 2001
From: Teddy Andrieux <teddy.andrieux@scality.com>
Date: Mon, 9 Dec 2019 15:50:17 +0100
Subject: [PATCH 2/4] script,salt: Downgrade `bootstrap` before other nodes

Some ext_pillar need a salt-master already in the dest_version to be
computed, so we need to first downgrade salt-master before applying
highstate on other nodes

Fixes: #2101
---
 salt/metalk8s/orchestrate/downgrade/init.sls |  5 ---
 scripts/downgrade.sh.in                      | 38 ++++++--------------
 2 files changed, 10 insertions(+), 33 deletions(-)

diff --git a/salt/metalk8s/orchestrate/downgrade/init.sls b/salt/metalk8s/orchestrate/downgrade/init.sls
index 5d0accb569..7717c1e93d 100644
--- a/salt/metalk8s/orchestrate/downgrade/init.sls
+++ b/salt/metalk8s/orchestrate/downgrade/init.sls
@@ -39,11 +39,6 @@ Wait for pillar refresh to complete:
 Skip node {{ node }}, already in {{ node_version }} older than {{ dest_version }}:
   test.succeed_without_changes
 
-  {%- elif 'bootstrap' in pillar.metalk8s.nodes[node].roles %}
-
-Skip node {{ node }}, bootstrap node downgrade should be done later:
-  test.succeed_without_changes
-
   {%- else %}
 
 Wait for API server to be available on {{ node }}:
diff --git a/scripts/downgrade.sh.in b/scripts/downgrade.sh.in
index fdd5c60fea..97f9d14e53 100755
--- a/scripts/downgrade.sh.in
+++ b/scripts/downgrade.sh.in
@@ -176,6 +176,15 @@ precheck_downgrade () {
 
 launch_downgrade () {
     SALT_MASTER_CALL=(crictl exec -i "$(get_salt_container)")
+    "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_all \
+        saltenv="metalk8s-$DESTINATION_VERSION"
+
+    "${SALT_MASTER_CALL[@]}" salt-run metalk8s_saltutil.sync_auth \
+        saltenv="metalk8s-$DESTINATION_VERSION"
+
+    "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_roster \
+        saltenv="metalk8s-$DESTINATION_VERSION"
+
     "${SALT_MASTER_CALL[@]}" salt-run state.orchestrate \
         metalk8s.orchestrate.downgrade \
         saltenv="$SALTENV"
@@ -192,33 +201,6 @@ downgrade_bootstrap () {
         pillar="{'metalk8s': {'endpoints': {'salt-master': $saltmaster_endpoint, \
         'repositories': $repo_endpoint}}}" \
         --retcode-passthrough
-
-    SALT_MASTER_CALL=(crictl exec -i "$(get_salt_container)")
-    "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_all \
-        saltenv="metalk8s-$DESTINATION_VERSION"
-    "${SALT_MASTER_CALL[@]}" salt-run metalk8s_saltutil.sync_auth \
-        saltenv="metalk8s-$DESTINATION_VERSION"
-    "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_roster \
-        saltenv="metalk8s-$DESTINATION_VERSION"
-
-    local bootstrap_id
-    bootstrap_id=$(
-        $SALT_CALL grains.get id --out txt \
-        | awk '/^local\: /{ print $2 }')
-    [ -z "$bootstrap_id" ] && die "Cannot retrieve bootstrap id"
-
-    "${SALT_MASTER_CALL[@]}" salt-run saltutil.sync_all \
-        saltenv="metalk8s-@@VERSION"
-
-    "${SALT_MASTER_CALL[@]}" salt-run state.orchestrate_single \
-        metalk8s_kubernetes.object_updated \
-        "$bootstrap_id" \
-        kind=Node apiVersion=v1 \
-        patch="{'metadata': {'labels': \
-        {'metalk8s.scality.com/version': '$DESTINATION_VERSION'}}}"
-    "${SALT_MASTER_CALL[@]}" salt-run state.sls \
-        metalk8s.orchestrate.deploy_node saltenv="$SALTENV" \
-        pillar="{'orchestrate': {'node_name': '$bootstrap_id'}}"
 }
 
 # patch the kube-system namespace annotation with <destination-version> input
@@ -252,5 +234,5 @@ fi
 run "Performing Pre-Downgrade checks" precheck_downgrade
 [ $DRY_RUN -eq 1 ] && exit 0
 run "Setting cluster version to $DESTINATION_VERSION" patch_kubesystem_namespace
-run "Launching the downgrade" launch_downgrade
 run "Downgrading bootstrap" downgrade_bootstrap
+run "Launching the downgrade" launch_downgrade

From 9c221cf32ea0c2e1cfe87fadd01d998c8dcd790f Mon Sep 17 00:00:00 2001
From: Teddy Andrieux <teddy.andrieux@scality.com>
Date: Wed, 11 Dec 2019 11:08:04 +0100
Subject: [PATCH 3/4] salt: Add ability to `skip_roles` in salt top.sls

We may want some time to run only a part of the highstate, for example
when downgrading we want to be able to apply an highstate except `etcd`
role
---
 salt/top.sls.in | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/salt/top.sls.in b/salt/top.sls.in
index d7ba3b465d..6552b72507 100644
--- a/salt/top.sls.in
+++ b/salt/top.sls.in
@@ -71,6 +71,12 @@
 #
 # So, the `metalk8s.roles.node.absent` state is *only* applied when a
 # minion/node has no other known role but the `ca` one.
+#
+# Add a `skip_roles` feature to skip specific roles if we want to apply only
+# a part of the highstate
+# NOTE: When we skip a role we also skip `absent` state, meaning that if you
+# have a node without `etcd` role and you have an `etcd` `skip_roles` then the
+# `metalk8s.roles.etcd.absent` state will not be called
 
 {%- set roles = ['bootstrap', 'ca', 'etcd', 'master', 'node', 'infra'] -%}
 {%- set node_role = 'node' -%}
@@ -81,6 +87,10 @@
 {%- macro role_match(name) -%}
 I@metalk8s:nodes:{{ grains['id'] }}:roles:{{ name }}
 {%- endmacro %}
+{%- macro skip_role_match(name) -%}
+I@metalk8s:nodes:{{ grains['id'] }}:skip_roles:{{ name }}
+{%- endmacro %}
+
 
 metalk8s-{{ version }}:
   # 'Default' role applicable to all nodes
@@ -91,19 +101,19 @@ metalk8s-{{ version }}:
 
   {% for role in roles %}
   # {{ role }} role
-  {{ version_match }} and {{ role_match(role) }}:
+  {{ version_match }} and {{ role_match(role) }} and not {{ skip_role_match(role) }}:
     - match: compound
     - metalk8s.roles.{{ role }}
 
   {# See comment on top #}
   {%- if role != node_role %}
-  {{ version_match }} and not {{ role_match(role) }}:
+  {{ version_match }} and not {{ role_match(role) }} and not {{ skip_role_match(role) }}:
     - match: compound
     - metalk8s.roles.{{ role }}.absent
   {%- endif %}
   {% endfor %}
 
   # See comment on top
-  {{ version_match }} {% for role in node_roles %} and not {{ role_match(role) }} {% endfor %}:
+  {{ version_match }} {% for role in node_roles %} and not {{ role_match(role) }} {% endfor %} and not {{ skip_role_match(node_role) }}:
     - match: compound
     - metalk8s.roles.node.absent

From 2429dfa51d073e45c7c9980908c3bfb7445aebb7 Mon Sep 17 00:00:00 2001
From: Teddy Andrieux <teddy.andrieux@scality.com>
Date: Mon, 9 Dec 2019 15:53:13 +0100
Subject: [PATCH 4/4] salt: Skip `etcd` role when calling highstate downgrade

When downgrading we need to first downgrade all the nodes and after take
care of the etcd cluster, that why we need to skip `etcd` role when
calling highstate on nodes
---
 salt/metalk8s/orchestrate/deploy_node.sls    | 11 +++++++++++
 salt/metalk8s/orchestrate/downgrade/init.sls |  6 ++++++
 2 files changed, 17 insertions(+)

diff --git a/salt/metalk8s/orchestrate/deploy_node.sls b/salt/metalk8s/orchestrate/deploy_node.sls
index 8888ceb45e..8a45c121ab 100644
--- a/salt/metalk8s/orchestrate/deploy_node.sls
+++ b/salt/metalk8s/orchestrate/deploy_node.sls
@@ -1,6 +1,8 @@
 {%- set node_name = pillar.orchestrate.node_name %}
 {%- set version = pillar.metalk8s.nodes[node_name].version %}
 
+{%- set skip_roles = pillar.metalk8s.nodes[node_name].get('skip_roles', []) %}
+
 {%- if node_name not in salt.saltutil.runner('manage.up') %}
 Deploy salt-minion on a new node:
   salt.state:
@@ -116,6 +118,15 @@ Run the highstate:
     - tgt: {{ node_name }}
     - highstate: True
     - saltenv: metalk8s-{{ version }}
+    {#- Add ability to skip node roles to not apply all the highstate
+         e.g.: Skipping etcd when downgrading #}
+    {%- if skip_roles %}
+    - pillar:
+        metalk8s:
+          nodes:
+            {{ node_name }}:
+              skip_roles: {{ skip_roles }}
+    {%- endif %}
     - require:
       - salt: Set grains
       - salt: Refresh the mine
diff --git a/salt/metalk8s/orchestrate/downgrade/init.sls b/salt/metalk8s/orchestrate/downgrade/init.sls
index 7717c1e93d..0049905035 100644
--- a/salt/metalk8s/orchestrate/downgrade/init.sls
+++ b/salt/metalk8s/orchestrate/downgrade/init.sls
@@ -78,6 +78,12 @@ Deploy node {{ node }}:
           {#- Do not drain if we are in single node cluster #}
           skip_draining: True
           {%- endif %}
+        metalk8s:
+          nodes:
+            {{ node }}:
+              # Skip `etcd` role as we take care of etcd cluster after
+              skip_roles:
+                - etcd
     - require:
       - metalk8s_kubernetes: Set node {{ node }} version to {{ dest_version }}
     - require_in: