From 55161eb55f63e9f685330307227692b1b7d91004 Mon Sep 17 00:00:00 2001 From: Carlos Segarra Date: Wed, 18 Dec 2024 19:28:35 +0000 Subject: [PATCH] coco: issue seems to be coming from an over-running coco install --- tasks/operator.py | 28 +++++++++++++++++++++++++++- tasks/util/containerd.py | 27 ++++++++++++++++++++++++++- tasks/util/registry.py | 7 +++++-- tasks/util/toml.py | 9 ++++++++- 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/tasks/operator.py b/tasks/operator.py index 32006e0..27d2365 100644 --- a/tasks/operator.py +++ b/tasks/operator.py @@ -1,10 +1,11 @@ from invoke import task from os.path import join -from tasks.util.env import print_dotted_line +from tasks.util.env import CONTAINERD_CONFIG_FILE, KATA_CONFIG_DIR, print_dotted_line from tasks.util.kubeadm import ( run_kubectl_command, wait_for_pods_in_ns, ) +from tasks.util.toml import read_value_from_toml from tasks.util.versions import COCO_VERSION from time import sleep @@ -85,6 +86,31 @@ def install_cc_runtime(ctx, debug=False): " " ) + # We must also wait until we are done configuring the nydus snapshotter + sleep(10) + for runtime in expected_runtime_classes[1:]: + runtime_no_kata = runtime[5:] + expected_config_path = ( + f"{KATA_CONFIG_DIR}//configuration-{runtime_no_kata}.toml" + ) + toml_path = ( + f'plugins."io.containerd.grpc.v1.cri".containerd.runtimes' + f".{runtime}.options.ConfigPath" + ) + + while expected_config_path != read_value_from_toml( + CONTAINERD_CONFIG_FILE, toml_path, tolerate_missing=True + ): + if debug: + print( + ( + f"Waiting for operator to populate containerd " + f"entry for runtime: {runtime}..." + ) + ) + + sleep(2) + print("Success!") diff --git a/tasks/util/containerd.py b/tasks/util/containerd.py index 65bd709..89ffe65 100644 --- a/tasks/util/containerd.py +++ b/tasks/util/containerd.py @@ -1,6 +1,26 @@ from json import loads as json_loads +from os.path import exists +from socket import AF_UNIX, SOCK_STREAM, error as socket_error, socket from subprocess import run -from time import sleep +from time import sleep, time + + +def wait_for_socket(): + timeout = 10 + interval = 1 + socket_path = "/run/containerd/containerd.sock" + + start_time = time() + while time() - start_time < timeout: + if exists(socket_path): + try: + with socket(AF_UNIX, SOCK_STREAM) as s: + s.connect(socket_path) + return True + except socket_error: + pass + time.sleep(interval) + return False def is_containerd_active(): @@ -19,12 +39,17 @@ def restart_containerd(debug=False): """ run("sudo service containerd restart", shell=True, check=True) + # First wait for systemd to report containerd as active while not is_containerd_active(): if debug: print("Waiting for containerd to be active...") sleep(2) + # Then make sure we can dial the socket + if not wait_for_socket(): + raise RuntimeError("Error dialing containerd socket!") + def get_journalctl_containerd_logs(timeout_mins=1): """ diff --git a/tasks/util/registry.py b/tasks/util/registry.py index 33a51be..b930dba 100644 --- a/tasks/util/registry.py +++ b/tasks/util/registry.py @@ -1,6 +1,6 @@ from os import makedirs from os.path import exists, join -from subprocess import run +from subprocess import CalledProcessError, run from tasks.util.docker import is_ctr_running from tasks.util.env import ( CONF_FILES_DIR, @@ -273,7 +273,10 @@ def stop(debug=False): # For Knative, we only need to delete the secret, as the other bit is a # patch to the controller deployment that can be applied again kube_cmd = "-n knative-serving delete secret {}".format(K8S_SECRET_NAME) - run_kubectl_command(kube_cmd, capture_output=not debug) + try: + run_kubectl_command(kube_cmd, capture_output=not debug) + except CalledProcessError: + print("WARNING: deleting knative-serving secret failed") # For Kata and containerd, all configuration is reversible, so we only # need to sop the container image diff --git a/tasks/util/toml.py b/tasks/util/toml.py index ac1fd39..22c2bdf 100644 --- a/tasks/util/toml.py +++ b/tasks/util/toml.py @@ -99,12 +99,19 @@ def join_dot_preserve_quote(toml_levels): return ".".join(toml_path) -def read_value_from_toml(toml_file_path, toml_path): +def read_value_from_toml(toml_file_path, toml_path, tolerate_missing=False): """ Return the value in a TOML specified by a "." delimited TOML path """ toml_file = toml_load(toml_file_path) for toml_level in split_dot_preserve_quotes(toml_path): + if toml_level not in toml_file: + if tolerate_missing: + return "" + + raise RuntimeError( + f"{toml_level} is not an entry in TOML file {toml_file_path}" + ) toml_file = toml_file[toml_level] if isinstance(toml_file, dict):