salt/volumes: Detach devices before (up|down)grade

Since the management of loop devices changed between 2.6 and 2.7 (introduction of systemd units for this purpose), we want to ensure there will not be duplicated devices pointing to the same sparse file. To do this, we introduce a "cleanup" formula, which can operate in two modes, 'upgrade' and 'downgrade' (controlled via pillar). For upgrade, we manage this cleanup in the `deploy_node` orchestrate, during the drain. For downgrade, we cannot change the `deploy_node` orchestrate, so we manually drain and cleanup from the `downgrade` orchestrate. See: #2982
scality · Dec 16, 2020 · 8858b28 · 8858b28
1 parent a523e8f
commit 8858b28
Show file tree

Hide file tree

Showing 5 changed files with 136 additions and 2 deletions.
diff --git a/buildchain/buildchain/salt_tree.py b/buildchain/buildchain/salt_tree.py
@@ -667,6 +667,7 @@ def _get_parts(self) -> Iterator[str]:
     Path('salt/metalk8s/utils/httpd-tools/init.sls'),
     Path('salt/metalk8s/utils/httpd-tools/installed.sls'),
 
+    Path('salt/metalk8s/volumes/cleanup.sls'),
     Path('salt/metalk8s/volumes/init.sls'),
     Path('salt/metalk8s/volumes/installed.sls'),
     Path('salt/metalk8s/volumes/prepared.sls'),

diff --git a/salt/metalk8s/orchestrate/deploy_node.sls b/salt/metalk8s/orchestrate/deploy_node.sls
@@ -1,4 +1,6 @@
 {%- set node_name = pillar.orchestrate.node_name %}
+{%- set skip_draining = pillar.orchestrate.get('skip_draining', False) %}
+{%- set cleanup_loop_devices = pillar.orchestrate.get('cleanup_loop_devices', False) %}
 {%- set version = pillar.metalk8s.nodes[node_name].version %}
 
 {%- set skip_roles = pillar.metalk8s.nodes[node_name].get('skip_roles', []) %}
@@ -70,7 +72,29 @@ Cordon the node:
   metalk8s_cordon.node_cordoned:
     - name: {{ node_name }}
 
-{%- if not pillar.orchestrate.get('skip_draining', False) %}
+{%- if not skip_draining or cleanup_loop_device %}
+  {%- set run_drain = not skip_draining %}
+
+  {%- if skip_draining %}
+    {# Check if we can avoid the drain (if no loop device needs cleaning up) #}
+    {%- set volumes = salt.metalk8s_kubernetes.list_objects(
+      kind="Volume", apiVersion="storage.metalk8s.scality.com/v1alpha1",
+    ) %}
+    {%- set volumes_to_clean = volumes
+                             | selectattr('spec.nodeName', 'equalto', node_name)
+                             | selectattr('spec.sparseLoopDevice', 'defined')
+                             | list
+    %}
+    {%- if volumes_to_clean %}
+      {%- do salt.log.warning(
+        'Forcing drain to clean up loop devices for the following Volumes: '
+        ~ volumes_to_clean | map(attribute='metadata.name') | join(', ')
+      ) %}
+      {%- set run_drain = True %}
+    {%- endif %}
+  {%- endif %}
+
+  {%- if run_drain %}
 
 Drain the node:
   metalk8s_drain.node_drained:
@@ -83,6 +107,7 @@ Drain the node:
     - require_in:
       - salt: Run the highstate
 
+  {%- endif %}
 {%- endif %}
 
 {%- if node_name in salt.saltutil.runner('manage.up') %}
@@ -171,7 +196,7 @@ Install etcd node:
 Register the node into etcd cluster:
   salt.runner:
     - name: state.orchestrate
-    - pillar: {{ pillar | json  }}
+    - pillar: {{ pillar | json }}
     - mods:
       - metalk8s.orchestrate.register_etcd
     - require:
@@ -207,6 +232,28 @@ Check pillar before highstate:
       - salt: Sync module on the node
       - http: Wait for API server to be available before highstate
 
+
+{#- Nodes in 2.6 or lower rely on manual provisioning of loop devices, which
+    we need to clean-up to avoid having duplicates generated by the systemd
+    units, introduced in 2.7 #}
+{%- if cleanup_loop_devices %}
+
+Cleanup existing loop devices:
+  salt.state:
+    - tgt: {{ node_name }}
+    - saltenv: metalk8s-{{ version }}
+    - sls:
+      - metalk8s.volumes.cleanup
+    - pillar:
+        cleanup_mode: upgrade
+    - require:
+      - salt: Check pillar before highstate
+      - metalk8s_drain: Drain the node
+    - require_in:
+      - salt: Run the highstate
+
+{%- endif %}
+
 Run the highstate:
   salt.state:
     - tgt: {{ node_name }}

diff --git a/salt/metalk8s/orchestrate/downgrade/init.sls b/salt/metalk8s/orchestrate/downgrade/init.sls
@@ -73,8 +73,38 @@ Upgrade salt-minion on {{ node }}:
           node_name: {{ node }}
     - require:
       - metalk8s_kubernetes: Set node {{ node }} version to {{ dest_version }}
+
+# We force a node drain manually to handle cleanup of loop devices outside
+# of the `deploy_node` orchestrate, since it comes from `dest_version`.
+# The `deploy_node` orchestrate will take care of uncordoning.
+Cordon node {{ node }}:
+  metalk8s_cordon.node_cordoned:
+    - name: {{ node }}
+    - require:
+      - salt: Upgrade salt-minion on {{ node }}
+
+Drain node {{ node }}:
+  metalk8s_drain.node_drained:
+    - name: {{ node }}
+    - ignore_daemonset: True
+    - delete_local_data: True
+    - force: True
+    - require:
+      - metalk8s_cordon: Cordon node {{ node }}
+
+Cleanup loop devices from node {{ node }}:
+  salt.state:
+    - tgt: {{ node }}
+    - saltenv: {{ saltenv }} {#- Use current version of this formula #}
+    - sls:
+      - metalk8s.volumes.cleanup
+    - pillar:
+        cleanup_mode: downgrade
+    - require:
+      - metalk8s_drain: Drain node {{ node }}
     - require_in:
       - salt: Deploy node {{ node }}
+
 {%- endif %}
 
 Deploy node {{ node }}:

diff --git a/salt/metalk8s/orchestrate/upgrade/init.sls b/salt/metalk8s/orchestrate/upgrade/init.sls
@@ -108,6 +108,11 @@ Deploy node {{ node }}:
           {#- Do not drain if we are in single node cluster #}
           skip_draining: True
           {%- endif %}
+          {%- if salt.pkg.version_cmp(node_version, '2.7.0') < 0 %}
+          {#- Nodes in 2.6 or lower rely on manual provisioning of loop
+              devices, which we need to clean-up while the node is drained #}
+          cleanup_loop_devices: True
+          {%- endif %}
     - require:
       - metalk8s_kubernetes: Set node {{ node }} version to {{ dest_version }}
     - require_in:

diff --git a/salt/metalk8s/volumes/cleanup.sls b/salt/metalk8s/volumes/cleanup.sls
@@ -0,0 +1,51 @@
+# Clean already attached loop devices before applying highstate, to avoid
+# creating duplicates for a same sparse file.
+# This state can cleanup either "manually", for upgrade, or by disabling the
+# systemd units, for downgrade.
+{%- set mode = pillar.get('cleanup_mode', 'upgrade') %}
+
+include:
+  - .installed
+
+{%- set sparse_volumes = [] %}
+{%- for volume in pillar.metalk8s.volumes %}
+  {%- if 'sparseLoopDevice' in volume.spec %}
+    {%- do sparse_volumes.append(volume) %}
+  {%- endif %}
+{%- endfor %}
+
+{%- if not sparse_volumes %}
+
+Nothing to cleanup:
+  test.succeed_without_changes: []
+
+{%- else %}
+  {%- for volume in sparse_volumes %}
+    {%- set volume_name = volume.metadata.name %}
+    {%- set volume_id = volume.metadata.uid %}
+    {%- if mode == 'upgrade' %}
+
+Cleanup loop device for Volume {{ volume_name }}:
+  cmd.run:
+    - cmd: /usr/local/libexec/metalk8s-sparse-volume-cleanup "{{ volume_id }}"
+    # Only cleanup if the systemd unit doesn't exist yet (this command exits
+    # with retcode 3 if the service is dead, 4 if the template does not exist)
+    - unless: systemctl status metalk8s-sparse-volume@{{ volume_id }}
+    - require:
+      - test: Ensure Python 3 is available
+      - file: Install clean-up script
+
+    {%- else %} {# mode == 'downgrade' #}
+
+Disable systemd unit for Volume {{ volume_name }}:
+  service.dead:
+    - name: metalk8s-sparse-volume@{{ volume_id }}
+    - enable: false
+    - require:
+      - test: Ensure Python 3 is available
+      - file: Set up systemd template unit for sparse loop device provisioning
+      - file: Install clean-up script
+
+    {%- endif %}
+  {%- endfor %}
+{%- endif %}