Skip to content

Commit

Permalink
Merge branch 'bugfix/wait-for-apiserver-or-restart-kubelet' into q/123.0
Browse files Browse the repository at this point in the history
  • Loading branch information
bert-e committed Jul 13, 2022
2 parents 5eeb339 + af995d4 commit 59a8b2c
Show file tree
Hide file tree
Showing 5 changed files with 379 additions and 60 deletions.
126 changes: 115 additions & 11 deletions salt/_modules/cri.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
Various functions to interact with a CRI daemon (through :program:`crictl`).
"""

import re
import logging
import re
import time

from salt.exceptions import CommandExecutionError
Expand Down Expand Up @@ -229,25 +229,129 @@ def stop_pod(labels):
This uses the :command:`crictl` command, which should be configured
correctly on the system, e.g. in :file:`/etc/crictl.yaml`.
"""
selector = ",".join([f"{key}={value}" for key, value in labels.items()])
pod_ids = get_pod_id(labels=labels, ignore_not_found=True, multiple=True)
if not pod_ids:
return "No pods to stop"

out = __salt__["cmd.run_all"](f"crictl stopp {' '.join(pod_ids)}")

if out["retcode"] != 0:
selector = ",".join([f"{key}={value}" for key, value in labels.items()])
raise CommandExecutionError(
f"Unable to stop pods with labels '{selector}':\n"
f"IDS: {' '.join(pod_ids)}\nSTDERR: {out['stderr']}\nSTDOUT: {out['stdout']}"
)

return out["stdout"]


pod_ids_out = __salt__["cmd.run_all"](f"crictl pods --quiet --label={selector}")
def get_pod_id(
name=None, labels=None, state=None, multiple=False, ignore_not_found=False
):
"""Retrieve the pod(s) ID(s) in CRI.
.. note::
This uses the :command:`crictl` command, which should be configured
correctly on the system, e.g. in :file:`/etc/crictl.yaml`.
name (str, optional)
Name of the target pod
labels (dict, optional)
Labels of the target pod(s)
state (str, optional)
The state in which we want to find the target pod(s) (`None` if all states are
acceptable)
multiple (bool)
Whether to accept multiple pods returned (raise otherwise)
ignore_not_found (bool)
Whether to raise if no target pod can be found
"""
pod_ids_cmd = "crictl pods --quiet"
info_parts = []
if name is not None:
pod_ids_cmd += f" --name {name}"
info_parts.append(f"name '{name}'")
if labels is not None:
selector = ",".join([f"{key}={value}" for key, value in labels.items()])
pod_ids_cmd += f" --label {selector}"
info_parts.append(f"labels '{selector}'")
if state is not None:
pod_ids_cmd += f" --state {state}"
info_parts.append(f"state '{state}'")
info = f"with {' and '.join(info_parts)}"

pod_ids_out = __salt__["cmd.run_all"](pod_ids_cmd)
if pod_ids_out["retcode"] != 0:
raise CommandExecutionError(
f"Unable to get pods with labels {selector}:\n"
f"Unable to get pod {info}:\n"
f"STDERR: {pod_ids_out['stderr']}\nSTDOUT: {pod_ids_out['stdout']}"
)

pod_ids = pod_ids_out["stdout"]
pod_ids = pod_ids_out["stdout"].splitlines()
if not pod_ids:
return "No pods to stop"
if ignore_not_found:
return None
raise CommandExecutionError(f"No pod found {info}")

out = __salt__["cmd.run_all"](f"crictl stopp {pod_ids}")
if multiple:
return pod_ids

if out["retcode"] != 0:
if len(pod_ids) > 1:
raise CommandExecutionError(f"More than one pod found {info}")

return pod_ids[0]


def wait_pod(
name, state="ready", last_id=None, timeout=60, sleep=5, raise_on_timeout=True
):
"""Wait until the pod has been created/updated.
.. note::
This uses the :command:`crictl` command, which should be configured
correctly on the system, e.g. in :file:`/etc/crictl.yaml`.
name (str)
Name of the target pod
state (str, optional)
The state in which we want to find the target pod (`None` if all states are
acceptable)
last_id (str, optional)
ID of the pod before it was updated (set to `None` if just waiting for a new
pod)
timeout (int)
Number of seconds to wait before bailing out
sleep (int)
Number of seconds to wait between two checks
raise_on_timeout (bool)
Whether to raise if the timeout period is exceeded (otherwise, return False)
"""
start_time = time.time()

while time.time() - start_time < timeout:
current_id = get_pod_id(name=name, state=state, ignore_not_found=True)
if current_id and current_id != last_id:
return True
remaining = timeout + start_time - time.time()
if remaining < sleep: # Don't sleep if we know it's going to time out
break
time.sleep(sleep)

if raise_on_timeout:
verb = "updated" if last_id else "created"
raise CommandExecutionError(
f"Unable to stop pods with labels {selector}:\n"
f"IDS: {pod_ids}\nSTDERR: {out['stderr']}\nSTDOUT: {out['stdout']}"
f"Pod {name} was not {verb} after {(time.time() - start_time):.0f} seconds"
)

return out["stdout"]
return False
28 changes: 21 additions & 7 deletions salt/metalk8s/kubernetes/apiserver/installed.sls
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ include:
}) %}
{%- endif %}
{%- set pod_name = "kube-apiserver-" ~ grains.id %}
{%- set last_pod_id = salt.cri.get_pod_id(
name=pod_name, state="ready", ignore_not_found=True
) %}
Create kube-apiserver Pod manifest:
metalk8s.static_pod_managed:
- name: /etc/kubernetes/manifests/kube-apiserver.yaml
Expand Down Expand Up @@ -144,12 +149,23 @@ Create kube-apiserver Pod manifest:
- file: Ensure SA pub key is present
- file: Ensure Ingress CA cert is present
Delay after apiserver pod deployment:
module.run:
- test.sleep:
- length: 10
- onchanges:
{%- if last_pod_id %}
Restart kubelet to make it pick up the manifest changes:
service.running:
- name: kubelet
- watch:
- metalk8s: Create kube-apiserver Pod manifest
- unless:
# Do not restart kubelet if we see the Pod was updated
- fun: cri.wait_pod
name: {{ pod_name }}
last_id: {{ last_pod_id }}
timeout: 120
raise_on_timeout: false
- require_in:
- module: Make sure kube-apiserver container is up and ready
{%- endif %}
Make sure kube-apiserver container is up and ready:
module.run:
Expand All @@ -159,8 +175,6 @@ Make sure kube-apiserver container is up and ready:
- timeout: 120
- onchanges:
- metalk8s: Create kube-apiserver Pod manifest
- require:
- module: Delay after apiserver pod deployment
http.wait_for_successful_query:
- name: https://{{ host }}:6443/healthz
- verify_ssl: True
Expand Down
1 change: 1 addition & 0 deletions salt/tests/unit/formulas/fixtures/salt.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ def slsutil_renderer(salt_mock: SaltMock, source: str, **_kwargs: Any) -> Any:

# Static mocks {{{

register_basic("cri.get_pod_id")(MagicMock(return_value="abcd1234"))
register_basic("file.find")(MagicMock(return_value=[]))
register_basic("file.join")(lambda *args: "/".join(args))
register_basic("file.read")(MagicMock(return_value="<file contents>"))
Expand Down
Loading

0 comments on commit 59a8b2c

Please sign in to comment.