Skip to content

Commit

Permalink
coco: issue seems to be coming from an over-running coco install
Browse files Browse the repository at this point in the history
  • Loading branch information
csegarragonz committed Dec 18, 2024
1 parent 26d46c8 commit 55161eb
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 5 deletions.
28 changes: 27 additions & 1 deletion tasks/operator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from invoke import task
from os.path import join
from tasks.util.env import print_dotted_line
from tasks.util.env import CONTAINERD_CONFIG_FILE, KATA_CONFIG_DIR, print_dotted_line
from tasks.util.kubeadm import (
run_kubectl_command,
wait_for_pods_in_ns,
)
from tasks.util.toml import read_value_from_toml
from tasks.util.versions import COCO_VERSION
from time import sleep

Expand Down Expand Up @@ -85,6 +86,31 @@ def install_cc_runtime(ctx, debug=False):
" "
)

# We must also wait until we are done configuring the nydus snapshotter
sleep(10)
for runtime in expected_runtime_classes[1:]:
runtime_no_kata = runtime[5:]
expected_config_path = (
f"{KATA_CONFIG_DIR}//configuration-{runtime_no_kata}.toml"
)
toml_path = (
f'plugins."io.containerd.grpc.v1.cri".containerd.runtimes'
f".{runtime}.options.ConfigPath"
)

while expected_config_path != read_value_from_toml(
CONTAINERD_CONFIG_FILE, toml_path, tolerate_missing=True
):
if debug:
print(
(
f"Waiting for operator to populate containerd "
f"entry for runtime: {runtime}..."
)
)

sleep(2)

print("Success!")


Expand Down
27 changes: 26 additions & 1 deletion tasks/util/containerd.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,26 @@
from json import loads as json_loads
from os.path import exists
from socket import AF_UNIX, SOCK_STREAM, error as socket_error, socket
from subprocess import run
from time import sleep
from time import sleep, time


def wait_for_socket():
timeout = 10
interval = 1
socket_path = "/run/containerd/containerd.sock"

start_time = time()
while time() - start_time < timeout:
if exists(socket_path):
try:
with socket(AF_UNIX, SOCK_STREAM) as s:
s.connect(socket_path)
return True
except socket_error:
pass
time.sleep(interval)
return False


def is_containerd_active():
Expand All @@ -19,12 +39,17 @@ def restart_containerd(debug=False):
"""
run("sudo service containerd restart", shell=True, check=True)

# First wait for systemd to report containerd as active
while not is_containerd_active():
if debug:
print("Waiting for containerd to be active...")

sleep(2)

# Then make sure we can dial the socket
if not wait_for_socket():
raise RuntimeError("Error dialing containerd socket!")


def get_journalctl_containerd_logs(timeout_mins=1):
"""
Expand Down
7 changes: 5 additions & 2 deletions tasks/util/registry.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from os import makedirs
from os.path import exists, join
from subprocess import run
from subprocess import CalledProcessError, run
from tasks.util.docker import is_ctr_running
from tasks.util.env import (
CONF_FILES_DIR,
Expand Down Expand Up @@ -273,7 +273,10 @@ def stop(debug=False):
# For Knative, we only need to delete the secret, as the other bit is a
# patch to the controller deployment that can be applied again
kube_cmd = "-n knative-serving delete secret {}".format(K8S_SECRET_NAME)
run_kubectl_command(kube_cmd, capture_output=not debug)
try:
run_kubectl_command(kube_cmd, capture_output=not debug)
except CalledProcessError:
print("WARNING: deleting knative-serving secret failed")

# For Kata and containerd, all configuration is reversible, so we only
# need to sop the container image
Expand Down
9 changes: 8 additions & 1 deletion tasks/util/toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,19 @@ def join_dot_preserve_quote(toml_levels):
return ".".join(toml_path)


def read_value_from_toml(toml_file_path, toml_path):
def read_value_from_toml(toml_file_path, toml_path, tolerate_missing=False):
"""
Return the value in a TOML specified by a "." delimited TOML path
"""
toml_file = toml_load(toml_file_path)
for toml_level in split_dot_preserve_quotes(toml_path):
if toml_level not in toml_file:
if tolerate_missing:
return ""

raise RuntimeError(
f"{toml_level} is not an entry in TOML file {toml_file_path}"
)
toml_file = toml_file[toml_level]

if isinstance(toml_file, dict):
Expand Down

0 comments on commit 55161eb

Please sign in to comment.