Skip to content
This repository has been archived by the owner on Jun 28, 2024. It is now read-only.

Vagrant: boot q35 machine and other improvements #3965

Merged
merged 8 commits into from
Nov 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ci/setup_env_fedora.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ declare -A packages=( \
[redis]="redis" \
[versionlock]="python3-dnf-plugin-versionlock" \
[agent_shutdown_test]="tmux" \
[vfio_test]="pciutils driverctl" \
)

if [ "$(uname -m)" == "x86_64" ] || ([ "$(uname -m)" == "ppc64le" ] && [ "${VERSION_ID}" -ge "32" ]); then
Expand Down
2 changes: 2 additions & 0 deletions .ci/setup_env_ubuntu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ declare -A packages=( \

if [ "${NAME}" == "Ubuntu" ] && [ "$(echo "${VERSION_ID} >= 20.04" | bc -q)" == "1" ]; then
packages[cri-containerd_dependencies]+=" libbtrfs-dev"
# driverctl is unavailable on older Ubuntu like 18.04
packages[vfio_test]="pciutils driverctl"
fi

if [ "$(uname -m)" == "x86_64" ] && [ "${NAME}" == "Ubuntu" ] && [ "$(echo "${VERSION_ID} >= 18.04" | bc -q)" == "1" ]; then
Expand Down
161 changes: 161 additions & 0 deletions .ci/vagrant-cleaner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/bin/bash
#
# Copyright (c) 2021 Red Hat, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
cidir="$(dirname $(readlink -f "$0"))"

# Print message to stderr (add the 'ERROR:' prefix) and exit 1.
die() {
echo -e "ERROR: $*" >&2
exit 1
}

usage() {
cat <<-EOF
This script helps you to clean up the VMs and Vagrant's control
directories.
By default it will attempt to destroy all VMs. For additionally
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's kind of horrible that this isn't built into vagrant :(.

remove control directories use one or more of the flags shown
below.

Use: $0 [-g] [-h] [-l] [-n VM_NAME], where
-g: also remove the vagrant's global directory (~/.vagrant.d)
Implies -l.
-h: print this help.
-l: also remove the vagrant's local directory.
-n: destroy the VM. Do not remove any vagrant's directories.

Caution: only use the -g flag if you know what you are doing.
EOF
}

# Return the VM names.
get_vms() {
local vms=()
for vm in $(vagrant status --machine-readable | grep 'metadata,provider'); do
vms+=($(awk -F, '{ print $2}' <<< "$vm"))
done
echo ${vms[@]}
}

# Use vagrant to gentle destroy the VM.
#
# Parameters:
# $1 - the VM name
vagrant_destroy() {
local vm="$1"
if vagrant destroy --force "$vm"; then
echo "VM '$vm' destroyed: OK"
else
echo "VM '$vm' destroyed: FAILED"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/echo/perror/ ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here I just want to info the user.

return 1
fi
}

# Remove the domain from libvirt.
#
# Parameters:
# $1 - the VM name
libvirt_cleanup() {
local vm="$1"
# Note: the prefix here should equal to the 'default_prefix'
# property in the Vagrantfile.
local prefix="kata_containers_test-"
local domain="${prefix}${vm}"
if ! virsh dominfo "$vm" &> /dev/null; then
echo "Domain '$domain' does not exist. Nothing to do."
elif virsh destroy "$domain" && virsh undefine "$domain"; then
echo "Domain '$domain' cleaned on libvirt: OK"
else
echo "Domain '$domain' cleaned on libvirt: FAILED"
return 1
fi
}

# Entrypoint function to remove the VM. In case it cannot be destroyed with
# vagrant, it will forcibly remove the domain from libvirt.
#
# Parameters:
# $1 - the VM name
vm_wipeout() {
local vm="$1"
if ! vagrant status "$vm" &>/dev/null; then
echo "VM '$vm' does not exist. Nothing to do."
return 0
fi

vagrant_destroy "$vm"
if [ $? -ne 0 ]; then
echo "WARN: Attempt to clean up the domain on libvirt"
libvirt_cleanup "$vm"
fi
}

# Remove vagrant's control directories, whether local or global.
#
# Parameters:
# $1 - set to 1 to remove the global directory as well. Defaults to 0.
cfgs_wipeout() {
local global=${1:-0}
local local_cfg="${cidir}/../.vagrant"
local global_cfg="${HOME}/.vagrant.d"

echo "Remove vagrant's local directory: $local_cfg"
rm -rf "$local_cfg"
if [ "$global" -eq 1 ]; then
echo "Remove vagrant's global directory: $global_cfg"
rm -rf "$global_cfg"
fi
}

main() {
local vms=()
local ret=0
local single_vm=""
local local_cfg=0
local global_cfg=0
while getopts "aghln:" OPT; do
case $OPT in
g) global_cfg=1;;
h) usage; exit 0;;
l) local_cfg=1;;
n) single_vm="$OPTARG";;
*) usage; exit 1;;
esac
done

command -v vagrant &>/dev/null || \
die "missing 'vagrant' command. Run $0 -h for help."

# If there is any inconsistency on Vagrantfile then it exits here.
vagrant validate || \
die "Vagrantfile cannot be validated. Bailing out.\n" \
"Tip: ensure that GOPATH is exported in your environment."

if [ -n "$single_vm" ]; then
vm_wipeout "$single_vm" || ret=$?
else
for vm in $(get_vms); do
vm_wipeout $vm || ret=$?
done

[ $ret -eq 0 ] || \
die "Failed the removal of some VM."

# Only allow the removal of vagrant's configuration files in
# case the user wants to destroy all VMs and everything went
# well, so to avoid leaking resources.
if [[ "$local_cfg" -eq 1 || "$global_cfg" -eq 1 ]]; then
if [ $ret -ne 0 ]; then
echo "WARN: vagrant's directories are kept."
else
cfgs_wipeout "$global_cfg" || ret=$?
fi
fi
fi
return $ret
}

main $@
22 changes: 20 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -294,12 +294,23 @@ the [vagrant](https://www.vagrantup.com) tool to create a VM with the setup as c
as possible to the environments where CI jobs will run the tests. Thus, allowing to
reproduce a CI job locally.

Currently it is only able to create a *Fedora 32* or *Ubuntu 20.04* VM. And your workstation
must be capable of running VMs with:
Your workstation must be capable of running VMs with:
* 8GB of system memory
* ~45GB and ~20GB of disk space for the VM images (Fedora and Ubuntu, respectively) on
the Libvirt's storage pool

Currently it supports the creation of *Fedora 32* and *Ubuntu 20.04* VM, as shown on the table
below. The `Vagrantfile` was tested on Fedora 33 and Ubuntu 20.04 hosts, and it is
[known to fail](https://github.com/kata-containers/tests/issues/3942) the boot of Fedora VM on
Ubuntu host. If you have the need of testing on a different guest or it fails to work
on your host's distro then please [open an issue](https://github.com/kata-containers/tests/issues/new/choose)
to let us know.

|Host | Fedora 32 guest | Ubuntu 20.04 guest |
| --- | --- | --- |
| Fedora 33 | Yes | Yes |
| Ubuntu 20.04 | No | Yes |

Besides having vagrant installed in your host, it is needed the [vagrant libvirt plug-in](https://github.com/vagrant-libvirt/vagrant-libvirt) (Libvirt is the provider currently used), QEMU and `rsync` (needed to copy files between
the host and guest).

Expand Down Expand Up @@ -347,3 +358,10 @@ same VM (`vagrant provision [fedora|ubuntu]`), however this is not recommended b
our CI scripts are meant for a single-shot execution. So if you need to run a different
job locally, you should destroy the VM with the `vagrant destroy [fedora|ubuntu]` command
then start the process again.

The Vagrant configuration sometimes can get into inconsistent state. That may happen, for
instance, when the domain on Libvirt was created by the framework but it thinks the box
is not initialized yet. Also you may want to stop using Vagrant and you want to simply
wipe out all Vagrant control files and resources from your workstation. For those purposes you
should consider using the `.ci/vagrant-cleaner.sh` script; run `.ci/vagrant-cleaner.sh -h` for
further information.
41 changes: 38 additions & 3 deletions Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ guest_home_dir = '/home/vagrant'
# The file on the guest where environment variables are going to be set
# to export.
guest_env_file = guest_home_dir + '/ci_job_env'
host_arch = `uname -m`.strip

# All Vagrant configuration is done below. The "2" in Vagrant.configure
# configures the configuration version (we support older styles for
Expand All @@ -40,6 +41,28 @@ Vagrant.configure("2") do |config|
lv.driver = "kvm"
lv.cpus = "4"
lv.memory = "8192"
# Domains on Libvirt will be created with the following prefix.
lv.default_prefix = "kata_containers_test-"
if host_arch == "x86_64"
lv.machine_type = "q35"
end
# The VM needs one additional virtio-net device and iommu enabled
# for the vfio tests.
if host_arch == "x86_64"
lv.qemuargs :value => "-machine"
lv.qemuargs :value => "kernel-irqchip=split"
lv.qemuargs :value => "-device"
lv.qemuargs :value => "intel-iommu,intremap=on,caching-mode=on,device-iotlb=on"
# Currently the vfio test picks the last virtio-net device from lspci's
# output. Here we add the device in a PCIe root port with higher slot
# number on the hope it will be the last in the list.
lv.qemuargs :value => "-device"
lv.qemuargs :value => "pcie-root-port,port=0x16,chassis=7,id=pcie.7,multifunction=on,bus=pcie.0,addr=0xF"
lv.qemuargs :value => "-netdev"
lv.qemuargs :value => "user,id=vfio1"
lv.qemuargs :value => "-device"
lv.qemuargs :value => "virtio-net-pci,netdev=vfio1,bus=pcie.7,disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on"
end
end

# Shared provision script.
Expand Down Expand Up @@ -74,13 +97,17 @@ EOF
SHELL

config.vm.define "fedora", autostart: false do |fedora|
fedora.vm.box = "fedora/32-cloud-base"
fedora.vm.box = "generic/fedora32"
# Fedora is required to reboot so that the change to cgroups v1
# makes effect.
# and kernel arguments make effect.
fedora.vm.provision "shell", reboot: true, inline: <<-SHELL
sudo dnf install -y grubby
# Set the kernel parameter to use cgroups v1.
sudo grubby --update-kernel=ALL --args="systemd.unified_cgroup_hierarchy=0"
# Set iommu's kernel parameters for vfio tests.
source "#{guest_env_file}"
if [ "${CI_JOB}" == "VFIO" ]; then
grubby --update-kernel=ALL --args="intel_iommu=on iommu=pt"
fi
SHELL

fedora.vm.provision "shell", inline: <<-SHELL
Expand All @@ -94,6 +121,14 @@ EOF

config.vm.define "ubuntu", autostart: false do |ubuntu|
ubuntu.vm.box = "generic/ubuntu2004"
if job == "VFIO"
ubuntu.vm.provision "shell", reboot: true, inline: <<-SHELL
# Set iommu's kernel parameters for vfio tests. That requires a reboot.
sed -i 's/\\(GRUB_CMDLINE_LINUX_DEFAULT\\)="\\(.*\\)"/\\1="\\2 intel_iommu=on iommu=pt"/' /etc/default/grub
update-grub
SHELL
end

ubuntu.vm.provision "shell", inline: <<-SHELL
source "#{guest_env_file}"
cd "${GOPATH}/src/github.com/kata-containers/tests"
Expand Down