Skip to content

Commit

Permalink
Merge branch 'bugfix/GH-2444-gather-pillar-instead-of-refresh' into q…
Browse files Browse the repository at this point in the history
…/2.0
  • Loading branch information
bert-e committed Apr 23, 2020
2 parents c026db2 + a900a46 commit 3d8d17e
Show file tree
Hide file tree
Showing 11 changed files with 155 additions and 77 deletions.
2 changes: 1 addition & 1 deletion buildchain/buildchain/salt_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,9 @@ def _get_parts(self) -> Iterator[str]:
Path('salt/metalk8s/kubernetes/etcd/certs/peer.sls'),
Path('salt/metalk8s/kubernetes/etcd/certs/server.sls'),
Path('salt/metalk8s/kubernetes/etcd/files/manifest.yaml'),
Path('salt/metalk8s/kubernetes/etcd/healthy.sls'),
Path('salt/metalk8s/kubernetes/etcd/init.sls'),
Path('salt/metalk8s/kubernetes/etcd/installed.sls'),
Path('salt/metalk8s/kubernetes/etcd/prepared.sls'),

Path('salt/metalk8s/kubernetes/kubelet/configured.sls'),
Path('salt/metalk8s/kubernetes/kubelet/files/kubeadm.env'),
Expand Down
12 changes: 11 additions & 1 deletion salt/_modules/metalk8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import socket
import time

from salt.pillar import get_pillar
from salt.exceptions import CommandExecutionError
import salt.utils.files

Expand Down Expand Up @@ -233,7 +234,16 @@ def check_pillar_keys(keys, refresh=True, pillar=None, raise_error=True):
"""
# Ignore `refresh` if pillar is provided
if not pillar and refresh:
__salt__['saltutil.refresh_pillar']()
# Do not use `saltutil.refresh_pillar` as in salt 2018.3 we can not do
# synchronous pillar refresh
# See https://github.com/saltstack/salt/issues/20590
pillar = get_pillar(
__opts__,
__grains__,
__grains__['id'],
saltenv=__opts__.get('saltenv'),
pillarenv=__opts__.get('pillarenv')
).compile_pillar()

if not pillar:
pillar = __pillar__
Expand Down
22 changes: 0 additions & 22 deletions salt/metalk8s/kubernetes/etcd/healthy.sls

This file was deleted.

3 changes: 2 additions & 1 deletion salt/metalk8s/kubernetes/etcd/init.sls
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
# Available states
# ================
#
# * prepared -> setup etcd dependencies
# * installed -> deploy etcd manifest
# * healthy -> check health of etcd node
#
include:
- .prepared
- .installed
27 changes: 27 additions & 0 deletions salt/metalk8s/kubernetes/etcd/installed.sls
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,30 @@ Create local etcd Pod manifest:
- require:
- file: Create etcd database directory
- file: Ensure etcd CA cert is present
{#- In some case we may want to deploy etcd manifest but etcd do not work
properly, so we need to skip this health check
(e.g.: When we deploy a new etcd and member not yet registered #}
{%- if not pillar.get('metalk8s', {}).get('skip_etcd_healthcheck', False) %}
Delay after etcd pod deployment:
module.wait:
- test.sleep:
- length: 10
- watch:
- metalk8s: Create local etcd Pod manifest
Waiting for etcd running:
http.wait_for_successful_query:
- name: https://127.0.0.1:2379/health
- verify_ssl: True
- ca_bundle: /etc/kubernetes/pki/etcd/ca.crt
- cert:
- /etc/kubernetes/pki/etcd/server.crt
- /etc/kubernetes/pki/etcd/server.key
- status: 200
- match: '{"health": "true"}'
- require:
- module: Delay after etcd pod deployment
{%- endif %}
9 changes: 9 additions & 0 deletions salt/metalk8s/kubernetes/etcd/prepared.sls
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{%- from "metalk8s/repo/macro.sls" import build_image_name with context %}
include:
- metalk8s.kubernetes.ca.etcd.advertised
- .certs
Pre-pull the etcd image:
containerd.image_managed:
- name: {{ build_image_name('etcd') }}
102 changes: 82 additions & 20 deletions salt/metalk8s/orchestrate/deploy_node.sls
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

{%- set skip_roles = pillar.metalk8s.nodes[node_name].get('skip_roles', []) %}

{%- set roles = pillar.get('metalk8s', {}).get('nodes', {}).get(node_name, {}).get('roles', []) %}

{%- if node_name not in salt.saltutil.runner('manage.up') %}
Deploy salt-minion on a new node:
salt.state:
Expand Down Expand Up @@ -71,7 +73,9 @@ Sync module on the node:
- kwarg:
saltenv: {{ saltenv }}

Refresh and check pillar before highstate:
{%- if node_name in salt.saltutil.runner('manage.up') %}

Check pillar before salt-minion configuration:
salt.function:
- name: metalk8s.check_pillar_keys
- tgt: {{ node_name }}
Expand All @@ -89,8 +93,6 @@ Refresh and check pillar before highstate:
- require:
- salt: Sync module on the node

{%- if node_name in salt.saltutil.runner('manage.up') %}

Reconfigure salt-minion:
salt.state:
- tgt: {{ node_name }}
Expand All @@ -100,7 +102,7 @@ Reconfigure salt-minion:
- require:
- salt: Set grains
- salt: Refresh the mine
- salt: Refresh and check pillar before highstate
- salt: Check pillar before salt-minion configuration

Wait minion available:
salt.runner:
Expand All @@ -109,29 +111,101 @@ Wait minion available:
- require:
- salt: Reconfigure salt-minion
- require_in:
- salt: Run the highstate
- http: Wait for API server to be available before highstate

{%- endif %}

{%- if 'etcd' in roles and 'etcd' not in skip_roles %}

Check pillar before etcd deployment:
salt.function:
- name: metalk8s.check_pillar_keys
- tgt: {{ node_name }}
- kwarg:
keys:
- metalk8s.endpoints.salt-master.ip
- metalk8s.endpoints.repositories.ip
- metalk8s.endpoints.repositories.ports.http
# We cannot raise when using `salt.function` as we need to return
# `False` to have a failed state
# https://github.com/saltstack/salt/issues/55503
raise_error: False
- retry:
attempts: 5
- require:
- salt: Sync module on the node

Install etcd node:
salt.state:
- tgt: {{ node_name }}
- saltenv: metalk8s-{{ version }}
- sls:
- metalk8s.roles.etcd
- pillar:
metalk8s:
# Skip etcd healthcheck as we register etcd member just after
skip_etcd_healthcheck: True
- require:
- salt: Check pillar before etcd deployment

Register the node into etcd cluster:
salt.runner:
- name: state.orchestrate
- pillar: {{ pillar | json }}
- mods:
- metalk8s.orchestrate.register_etcd
- require:
- salt: Install etcd node
- require_in:
- http: Wait for API server to be available before highstate

{%- endif %}

Wait for API server to be available before highstate:
http.wait_for_successful_query:
- name: https://{{ pillar.metalk8s.api_server.host }}:6443/healthz
- match: 'ok'
- status: 200
- verify_ssl: false

Check pillar before highstate:
salt.function:
- name: metalk8s.check_pillar_keys
- tgt: {{ node_name }}
- kwarg:
keys:
- metalk8s.endpoints.salt-master.ip
- metalk8s.endpoints.repositories.ip
- metalk8s.endpoints.repositories.ports.http
# We cannot raise when using `salt.function` as we need to return
# `False` to have a failed state
# https://github.com/saltstack/salt/issues/55503
raise_error: False
- retry:
attempts: 5
- require:
- salt: Sync module on the node
- http: Wait for API server to be available before highstate

Run the highstate:
salt.state:
- tgt: {{ node_name }}
- highstate: True
- saltenv: metalk8s-{{ version }}
{#- Add ability to skip node roles to not apply all the highstate
e.g.: Skipping etcd when downgrading #}
e.g.: Skipping etcd when downgrading #}
{%- if skip_roles %}
- pillar:
metalk8s:
nodes:
{{ node_name }}:
skip_roles: {{ skip_roles }}
skip_roles: {{ skip_roles | unique | tojson }}
{%- endif %}
- require:
- salt: Set grains
- salt: Refresh the mine
- metalk8s_cordon: Cordon the node
- salt: Refresh and check pillar before highstate
- salt: Check pillar before highstate

Wait for API server to be available:
http.wait_for_successful_query:
Expand Down Expand Up @@ -161,15 +235,3 @@ Kill kube-controller-manager on all master nodes:
- require:
- salt: Run the highstate

{%- if 'etcd' in pillar.get('metalk8s', {}).get('nodes', {}).get(node_name, {}).get('roles', []) %}

Register the node into etcd cluster:
salt.runner:
- name: state.orchestrate
- pillar: {{ pillar | json }}
- mods:
- metalk8s.orchestrate.register_etcd
- require:
- salt: Run the highstate

{%- endif %}
16 changes: 1 addition & 15 deletions salt/metalk8s/orchestrate/downgrade/init.sls
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,6 @@ Execute the downgrade prechecks:
orchestrate:
dest_version: {{ dest_version }}

# In salt 2018.3 we can not do synchronous pillar refresh, so add a sleep
# see https://github.com/saltstack/salt/issues/20590
Wait for pillar refresh to complete:
salt.function:
- name: saltutil.refresh_pillar
- tgt: '*'
- require:
- salt: Execute the downgrade prechecks
module.run:
- test.sleep:
- length: 20
- require:
- salt: Wait for pillar refresh to complete

{%- set cp_nodes = salt.metalk8s.minions_by_role('master') | sort %}
{%- set other_nodes = pillar.metalk8s.nodes.keys() | difference(cp_nodes) | sort %}

Expand All @@ -48,7 +34,7 @@ Wait for API server to be available on {{ node }}:
- status: 200
- verify_ssl: false
- require:
- module: Wait for pillar refresh to complete
- salt: Execute the downgrade prechecks
{%- if previous_node is defined %}
- salt: Deploy node {{ previous_node }}
{%- endif %}
Expand Down
21 changes: 19 additions & 2 deletions salt/metalk8s/orchestrate/etcd.sls
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,31 @@ Sync {{ node }} minion:
- kwarg:
saltenv: metalk8s-{{ dest_version }}

Check pillar on {{ node }}:
salt.function:
- name: metalk8s.check_pillar_keys
- tgt: {{ node }}
- kwarg:
keys:
- metalk8s.endpoints.repositories.ip
- metalk8s.endpoints.repositories.ports.http
# We cannot raise when using `salt.function` as we need to return
# `False` to have a failed state
# https://github.com/saltstack/salt/issues/55503
raise_error: False
- retry:
attempts: 5
- require:
- salt: Sync {{ node }} minion

Deploy etcd {{ node }} to {{ dest_version }}:
salt.state:
- tgt: {{ node }}
- sls:
- metalk8s.kubernetes.etcd.healthy
- metalk8s.kubernetes.etcd
- saltenv: metalk8s-{{ dest_version }}
- require:
- salt: Sync {{ node }} minion
- salt: Check pillar on {{ node }}
- module: Check etcd cluster health
{%- if previous_node is defined %}
- metalk8s: Check etcd cluster health for {{ previous_node }}
Expand Down
2 changes: 2 additions & 0 deletions salt/metalk8s/orchestrate/register_etcd.sls
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
{# Should be run by the orchestrator runner #}
# This state cannot run on a minion since it depends on etcd3 python lib
# which is only available by default on the salt-master.

{%- set nodename = pillar.orchestrate.node_name -%}

Expand Down
16 changes: 1 addition & 15 deletions salt/metalk8s/orchestrate/upgrade/init.sls
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,6 @@ Upgrade etcd cluster:
- require:
- salt: Execute the upgrade prechecks

# In salt 2018.3 we can not do synchronous pillar refresh, so add a sleep
# see https://github.com/saltstack/salt/issues/20590
Wait for pillar refresh to complete:
salt.function:
- name: saltutil.refresh_pillar
- tgt: '*'
- require:
- salt: Upgrade etcd cluster
module.run:
- test.sleep:
- length: 20
- require:
- salt: Wait for pillar refresh to complete

{%- set cp_nodes = salt.metalk8s.minions_by_role('master') | sort %}
{%- set other_nodes = pillar.metalk8s.nodes.keys() | difference(cp_nodes) | sort %}

Expand All @@ -60,7 +46,7 @@ Wait for API server to be available on {{ node }}:
- status: 200
- verify_ssl: false
- require:
- module: Wait for pillar refresh to complete
- salt: Upgrade etcd cluster
{%- if previous_node is defined %}
- salt: Deploy node {{ previous_node }}
{%- endif %}
Expand Down

0 comments on commit 3d8d17e

Please sign in to comment.