Skip to content

Commit

Permalink
sc2: fix containerd toml population (#120)
Browse files Browse the repository at this point in the history
  • Loading branch information
csegarragonz authored Dec 19, 2024
1 parent 74c13b9 commit e9ac038
Show file tree
Hide file tree
Showing 21 changed files with 585 additions and 449 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
# Rust formatting checks
- name: "Run cargo lints"
run: |
for dir in "./vm-cache/" "./tools/check-kata-hashes" "./tools/tee-detect"; do
for dir in "./vm-cache/" "./tools/check-kata-hashes" "./tools/tee-detect" "./tools/purge-containerd" "./tools/purge-k8s"; do
pushd ${dir} >> /dev/null
cargo fmt --all -- --check
cargo clippy -- -D warnings
Expand Down
16 changes: 3 additions & 13 deletions tasks/coconut/ovmf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from invoke import task
from os.path import join
from tasks.util.env import BIN_DIR, PROJ_ROOT
from tasks.util.docker import copy_from_container, build_image_and_run, stop_container
from tasks.util.env import BIN_DIR
from tasks.util.docker import copy_from_ctr_image

# refer to
# https://github.com/coconut-svsm/svsm/blob/main/Documentation/docs/installation/INSTALL.md
Expand All @@ -11,16 +11,6 @@

@task
def build(ctx):
tmp_ctr_name = "tmp-ovmf-svsm-run"

build_image_and_run(
OVMF_IMAGE_TAG,
join(PROJ_ROOT, "docker", "coconut", "ovmf.dockerfile"),
tmp_ctr_name,
)

ctr_path = "/root/edk2/Build/OvmfX64/DEBUG_GCC5/FV/OVMF.fd"
host_path = join(BIN_DIR, "ovmf-svsm.fd")
copy_from_container(tmp_ctr_name, ctr_path, host_path)

stop_container(tmp_ctr_name)
copy_from_ctr_image(OVMF_IMAGE_TAG, ctr_path, host_path)
11 changes: 5 additions & 6 deletions tasks/coconut/qemu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from os.path import join
from subprocess import run
from tasks.util.env import BIN_DIR, PROJ_ROOT, KATA_ROOT
from tasks.util.docker import copy_from_container, build_image_and_run, stop_container
from tasks.util.docker import copy_from_ctr_image, build_image_and_run

# refer to
# https://github.com/coconut-svsm/svsm/blob/main/Documentation/docs/installation/INSTALL.md
Expand All @@ -15,21 +15,20 @@
def build(ctx):
tmp_ctr_name = "tmp-qemu-igvm-run"

# TODO: fix me
build_image_and_run(
QEMU_IMAGE_TAG,
join(PROJ_ROOT, "docker", "coconut", "qemu.dockerfile"),
tmp_ctr_name,
{"QEMU_DATADIR": DATA_DIR},
)

copy_from_container(
tmp_ctr_name,
copy_from_ctr_image(
QEMU_IMAGE_TAG,
"/root/bin/qemu-svsm/bin/qemu-system-x86_64",
join(BIN_DIR, "qemu-system-x86_64-igvm"),
)
copy_from_container(tmp_ctr_name, f"{DATA_DIR}/.", DATA_DIR)

stop_container(tmp_ctr_name)
# copy_from_container(tmp_ctr_name, f"{DATA_DIR}/.", DATA_DIR)


@task
Expand Down
7 changes: 4 additions & 3 deletions tasks/coconut/svsm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from invoke import task
from os.path import join
from tasks.util.env import BIN_DIR, PROJ_ROOT
from tasks.util.docker import build_image_and_run, copy_from_container, stop_container
from tasks.util.docker import build_image_and_run, copy_from_ctr_image, stop_container

# refer to
# https://github.com/coconut-svsm/svsm/blob/main/Documentation/docs/installation/INSTALL.md
Expand All @@ -27,9 +27,10 @@ def build(ctx):
"coconut-qemu.igvm",
"../target/x86_64-unknown-none/debug/svsm",
]
# FIXME: sure it is the right tag?
for file_name in files_to_copy:
copy_from_container(
tmp_ctr_name, join(ctr_path, file_name), join(host_path, file_name)
copy_from_ctr_image(
QEMU_IMAGE_TAG, join(ctr_path, file_name), join(host_path, file_name)
)

stop_container(tmp_ctr_name)
167 changes: 21 additions & 146 deletions tasks/containerd.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from invoke import task
from os import makedirs
from os import stat
from os.path import join
from subprocess import CalledProcessError, run
from tasks.util.docker import is_ctr_running
from subprocess import run
from tasks.util.containerd import is_containerd_active, restart_containerd
from tasks.util.docker import copy_from_ctr_image, is_ctr_running
from tasks.util.env import (
CONF_FILES_DIR,
CONTAINERD_CONFIG_FILE,
CONTAINERD_CONFIG_ROOT,
PROJ_ROOT,
print_dotted_line,
)
Expand All @@ -16,13 +18,6 @@
CONTAINERD_IMAGE_TAG = "containerd-build"


def restart_containerd():
"""
Utility function to gracefully restart the containerd service
"""
run("sudo service containerd restart", shell=True, check=True)


def do_build(debug=False):
docker_cmd = "docker build -t {} --build-arg CONTAINERD_VERSION={} -f {} .".format(
CONTAINERD_IMAGE_TAG,
Expand Down Expand Up @@ -62,105 +57,6 @@ def cli(ctx):
run("docker exec -it {} bash".format(CONTAINERD_CTR_NAME), shell=True, check=True)


def configure_devmapper_snapshotter():
"""
Configure the devmapper snapshotter in containerd's config file
This method was included at the begining, when we thought that we needed
the devmapper snapshotter to get containerd to work. In the end it turned
out that we did not, but we keep this method here for completeness.
"""
data_dir = "/var/lib/containerd/devmapper"
pool_name = "containerd-pool"

# --------------------------
# Thin Pool device configuration
# --------------------------

# First, remove the device if it already exists
try:
run("sudo dmsetup remove --force {}".format(pool_name), shell=True, check=True)
except CalledProcessError:
print("Ignoring errors when removing device if it doesn't exist...")

# Create data and metadata files
makedirs(data_dir, exist_ok=True)
data_file = join(data_dir, "data")
meta_file = join(data_dir, "meta")
run("sudo touch {}".format(data_file), shell=True, check=True)
run("sudo truncate -s 100G {}".format(data_file), shell=True, check=True)
run("sudo touch {}".format(meta_file), shell=True, check=True)
run("sudo truncate -s 10G {}".format(meta_file), shell=True, check=True)

# Allocate loop devices
data_dev = (
run(
"sudo losetup --find --show {}".format(data_file),
shell=True,
capture_output=True,
)
.stdout.decode("utf-8")
.strip()
)
meta_dev = (
run(
"sudo losetup --find --show {}".format(meta_file),
shell=True,
capture_output=True,
)
.stdout.decode("utf-8")
.strip()
)

# Define thin-pool parameters:
# https://www.kernel.org/doc/Documentation/device-mapper/thin-provisioning.txt
sector_size = 512
data_size = int(
run(
"sudo blockdev --getsize64 -q {}".format(data_dev),
shell=True,
capture_output=True,
)
.stdout.decode("utf-8")
.strip()
)
data_block_size = 128
low_water_mark = 32768

# Create a thin-pool device
dmsetup_cmd = [
"sudo dmsetup",
"create {}".format(pool_name),
"--table",
"'0 {} thin-pool {} {} {} {}'".format(
int(data_size / sector_size),
meta_dev,
data_dev,
data_block_size,
low_water_mark,
),
]
dmsetup_cmd = " ".join(dmsetup_cmd)
run(dmsetup_cmd, shell=True, check=True)

# --------------------------
# Update containerd's config file to use the devmapper snapshotter
# --------------------------

# Note: we currently don't use the devmapper snapshot, so this just
# _configures_ it (but doesn't select it as snapshotter)
updated_toml_str = """
[plugins."io.containerd.snapshotter.v1.devmapper"]
root_path = "{root_path}"
pool_name = "{pool_name}"
base_image_size = "8192MB"
discard_blocks = true
""".format(
root_path=data_dir, pool_name=pool_name
)
update_toml(CONTAINERD_CONFIG_FILE, updated_toml_str)


@task
def set_log_level(ctx, log_level):
"""
Expand Down Expand Up @@ -192,23 +88,11 @@ def install(ctx, debug=False, clean=False):
Install (and build) containerd from source
"""
print_dotted_line(f"Installing containerd (v{CONTAINERD_VERSION})")
do_build(debug=debug)

tmp_ctr_name = "tmp_containerd_build"
docker_cmd = "docker run -td --name {} {} bash".format(
tmp_ctr_name, CONTAINERD_IMAGE_TAG
)
result = run(docker_cmd, capture_output=True, shell=True)
assert result.returncode == 0, print(result.stderr.decode("utf-8").strip())
if debug:
print(result.stdout.decode("utf-8").strip())
if is_containerd_active():
run("sudo service containerd stop", shell=True, check=True)

def cleanup():
docker_cmd = "docker rm -f {}".format(tmp_ctr_name)
result = run(docker_cmd, shell=True, capture_output=True)
assert result.returncode == 0, print(result.stderr.decode("utf-8").strip())
if debug:
print(result.stdout.decode("utf-8").strip())
do_build(debug=debug)

binary_names = [
"containerd",
Expand All @@ -218,27 +102,12 @@ def cleanup():
]
ctr_base_path = "/go/src/github.com/containerd/containerd/bin"
host_base_path = "/usr/bin"
for binary in binary_names:
if clean:
run(
"sudo rm -f {}".format(join(host_base_path, binary)),
shell=True,
check=True,
)

docker_cmd = "sudo docker cp {}:{}/{} {}/{}".format(
tmp_ctr_name, ctr_base_path, binary, host_base_path, binary
)
try:
result = run(docker_cmd, shell=True, capture_output=True)
assert result.returncode == 0, print(result.stderr.decode("utf-8").strip())
if debug:
print(result.stdout.decode("utf-8").strip())
except CalledProcessError as e:
cleanup()
raise e

cleanup()
host_binaries = [join(host_base_path, binary) for binary in binary_names]
ctr_binaries = [join(ctr_base_path, binary) for binary in binary_names]
copy_from_ctr_image(
CONTAINERD_IMAGE_TAG, ctr_binaries, host_binaries, requires_sudo=True
)

# Clean-up all runtime files for a clean start
if clean:
Expand All @@ -247,17 +116,23 @@ def cleanup():
# Configure the CNI (see containerd/scripts/setup/install-cni)
cni_conf_file = "10-containerd-net.conflist"
cni_dir = "/etc/cni/net.d"
run("sudo mkdir -p {}".format(cni_dir), shell=True, check=True)
run(f"sudo mkdir -p {cni_dir}", shell=True, check=True)
cp_cmd = "sudo cp {} {}".format(
join(CONF_FILES_DIR, cni_conf_file), join(cni_dir, cni_conf_file)
)
run(cp_cmd, shell=True, check=True)

# Populate the default config gile
run(f"sudo mkdir -p {CONTAINERD_CONFIG_ROOT}", shell=True, check=True)
config_cmd = "containerd config default > {}".format(CONTAINERD_CONFIG_FILE)
config_cmd = "sudo bash -c '{}'".format(config_cmd)
run(config_cmd, shell=True, check=True)

# Restart containerd service
restart_containerd()
run("sudo service containerd start", shell=True, check=True)

# Sanity check
if stat(CONTAINERD_CONFIG_FILE).st_size == 0:
raise RuntimeError("containerd config file is empty!")

print("Success!")
3 changes: 3 additions & 0 deletions tasks/kata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from invoke import task
from os.path import abspath, join
from subprocess import run
from tasks.util.containerd import restart_containerd
from tasks.util.env import (
KATA_CONFIG_DIR,
KATA_IMAGE_TAG,
Expand Down Expand Up @@ -132,3 +133,5 @@ def replace_shim(ctx, runtime="qemu-snp-sc2"):
),
sc2=runtime in SC2_RUNTIMES,
)

restart_containerd()
32 changes: 31 additions & 1 deletion tasks/operator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from invoke import task
from os.path import join
from tasks.util.env import print_dotted_line
from tasks.util.env import CONTAINERD_CONFIG_FILE, KATA_CONFIG_DIR, print_dotted_line
from tasks.util.kubeadm import (
run_kubectl_command,
wait_for_pods_in_ns,
)
from tasks.util.toml import read_value_from_toml
from tasks.util.versions import COCO_VERSION
from time import sleep

Expand Down Expand Up @@ -85,6 +86,35 @@ def install_cc_runtime(ctx, debug=False):
" "
)

# The operator may report all runtime classes as created, but still be in
# the process of modifying the config files. If we make progress without
# the config files being ready, we will have a race condition on
# containerd's config file. Therefore here we wait until all runtime
# classes have been persisted to the config file.
# See: sc2-sys/deploy/pull/120
for runtime in expected_runtime_classes[1:]:
runtime_no_kata = runtime[5:]
expected_config_path = (
f"{KATA_CONFIG_DIR}//configuration-{runtime_no_kata}.toml"
)
toml_path = (
f'plugins."io.containerd.grpc.v1.cri".containerd.runtimes'
f".{runtime}.options.ConfigPath"
)

while expected_config_path != read_value_from_toml(
CONTAINERD_CONFIG_FILE, toml_path, tolerate_missing=True
):
if debug:
print(
(
f"Waiting for operator to populate containerd "
f"entry for runtime: {runtime}..."
)
)

sleep(2)

print("Success!")


Expand Down
Loading

0 comments on commit e9ac038

Please sign in to comment.