From 776dce338d06c2d3d739e5aa29d3166f97b3b558 Mon Sep 17 00:00:00 2001 From: tu1h Date: Mon, 25 Sep 2023 14:40:41 +0800 Subject: [PATCH] Add container checkpoint support Signed-off-by: tu1h --- docs/vars.md | 2 ++ roles/container-engine/cri-o/tasks/main.yaml | 7 +++++++ roles/container-engine/cri-o/templates/options.conf.j2 | 2 ++ .../control-plane/templates/kubeadm-config.v1beta3.yaml.j2 | 5 ++++- .../node/templates/kubelet-config.v1beta1.yaml.j2 | 5 ++++- roles/kubespray-defaults/defaults/main.yaml | 3 +++ 6 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 roles/container-engine/cri-o/templates/options.conf.j2 diff --git a/docs/vars.md b/docs/vars.md index 3431d519499..e681933ba1f 100644 --- a/docs/vars.md +++ b/docs/vars.md @@ -272,6 +272,8 @@ node_taints: * `audit_webhook_batch_max_size`: 100 * `audit_webhook_batch_max_wait`: 1s +* *container_checkpoint_enabled* - When set to `true`, enables the Checkpoint API on kubelet and container runtime(only cri-o yet). It's required to install [CRIU](https://criu.org/Installation) on the host when dumping checkpoints. You can follow the [documentation](https://kubernetes.io/blog/2022/12/05/forensic-container-checkpointing-alpha/). **Note**: It's still in experimental stage, just for container analytics so far. + ### Custom flags for Kube Components For all kube components, custom flags can be passed in. This allows for edge cases where users need changes to the default deployment that may not be applicable to all deployments. diff --git a/roles/container-engine/cri-o/tasks/main.yaml b/roles/container-engine/cri-o/tasks/main.yaml index f5df97467af..dc94d4d505a 100644 --- a/roles/container-engine/cri-o/tasks/main.yaml +++ b/roles/container-engine/cri-o/tasks/main.yaml @@ -175,6 +175,13 @@ notify: Restart crio when: http_proxy is defined or https_proxy is defined +- name: Cri-o | write cri-o options drop-in + template: + src: options.conf.j2 + dest: /etc/systemd/system/crio.service.d/options.conf + mode: 0644 + notify: Restart crio + - name: Cri-o | configure the uid/gid space for user namespaces lineinfile: path: '{{ item.path }}' diff --git a/roles/container-engine/cri-o/templates/options.conf.j2 b/roles/container-engine/cri-o/templates/options.conf.j2 new file mode 100644 index 00000000000..caddd2965a4 --- /dev/null +++ b/roles/container-engine/cri-o/templates/options.conf.j2 @@ -0,0 +1,2 @@ +[Service] +Environment=CRIO_CONFIG_OPTIONS={% if container_checkpoint_enabled is defined and container_checkpoint_enabled %}--enable-criu-support=true{% endif %} diff --git a/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta3.yaml.j2 b/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta3.yaml.j2 index 64105719bf2..383af834d44 100644 --- a/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta3.yaml.j2 +++ b/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta3.yaml.j2 @@ -447,10 +447,13 @@ clusterDNS: {% for dns_address in kubelet_cluster_dns %} - {{ dns_address }} {% endfor %} +{% if container_checkpoint_enabled is defined and container_checkpoint_enabled -%} +{% set kubelet_feature_gates = kubelet_feature_gates + ["ContainerCheckpoint=true"] -%} +{% endif -%} {% if kubelet_feature_gates or kube_feature_gates %} {% set feature_gates = ( kubelet_feature_gates | default(kube_feature_gates, true) ) %} featureGates: -{% for feature in feature_gates %} +{% for feature in (feature_gates | unique) %} {{ feature | replace("=", ": ") }} {% endfor %} {% endif %} diff --git a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 index f54d1f8b3ee..8dad05840d7 100644 --- a/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 +++ b/roles/kubernetes/node/templates/kubelet-config.v1beta1.yaml.j2 @@ -120,9 +120,12 @@ resolvConf: "{{ kube_resolv_conf }}" {% if inventory_hostname in groups['kube_node'] and kubelet_node_config_extra_args %} {{ kubelet_node_config_extra_args | to_nice_yaml(indent=2) }} {% endif %} +{% if container_checkpoint_enabled is defined and container_checkpoint_enabled -%} +{% set kubelet_feature_gates = kubelet_feature_gates + ["ContainerCheckpoint=true"] -%} +{% endif -%} {% if kubelet_feature_gates or kube_feature_gates %} featureGates: -{% for feature in (kubelet_feature_gates | default(kube_feature_gates, true)) %} +{% for feature in (kubelet_feature_gates | default(kube_feature_gates, true) | unique) %} {{ feature | replace("=", ": ") }} {% endfor %} {% endif %} diff --git a/roles/kubespray-defaults/defaults/main.yaml b/roles/kubespray-defaults/defaults/main.yaml index 23c7a0e1bd8..20fe441a7af 100644 --- a/roles/kubespray-defaults/defaults/main.yaml +++ b/roles/kubespray-defaults/defaults/main.yaml @@ -675,3 +675,6 @@ sysctl_file_path: "/etc/sysctl.d/99-sysctl.conf" system_upgrade: false system_upgrade_reboot: on-upgrade # never, always + +# Forensic container checkpointing in Kubernetes. It's experimental so far. See [Checkpoint API](https://kubernetes.io/docs/reference/node/kubelet-checkpoint-api/) +container_checkpoint_enabled: false