From 6b72d3fdff4d00d00271283fd82e493e6974f970 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 6 Nov 2023 14:43:07 +0100 Subject: [PATCH 1/2] Validate systemd unit files This ensure that we fail early if we have a bad systemd unit file (syntax error, using a version not available in the local version, etc) --- roles/container-engine/containerd/tasks/main.yml | 1 + roles/container-engine/cri-dockerd/tasks/main.yml | 1 + roles/etcd/tasks/configure.yml | 2 ++ roles/kubernetes/control-plane/tasks/main.yml | 1 + roles/kubernetes/node/tasks/kubelet.yml | 1 + 5 files changed, 6 insertions(+) diff --git a/roles/container-engine/containerd/tasks/main.yml b/roles/container-engine/containerd/tasks/main.yml index 43aa689526d..afec44f56fe 100644 --- a/roles/container-engine/containerd/tasks/main.yml +++ b/roles/container-engine/containerd/tasks/main.yml @@ -61,6 +61,7 @@ src: containerd.service.j2 dest: /etc/systemd/system/containerd.service mode: 0644 + validate: "systemd-analyze verify %s:containerd.service" notify: Restart containerd - name: Containerd | Ensure containerd directories exist diff --git a/roles/container-engine/cri-dockerd/tasks/main.yml b/roles/container-engine/cri-dockerd/tasks/main.yml index f8965fd041e..ca8dd3fde98 100644 --- a/roles/container-engine/cri-dockerd/tasks/main.yml +++ b/roles/container-engine/cri-dockerd/tasks/main.yml @@ -18,6 +18,7 @@ src: "{{ item }}.j2" dest: "/etc/systemd/system/{{ item }}" mode: 0644 + validate: "systemd-analyze verify %s:{{ item }}" with_items: - cri-dockerd.service - cri-dockerd.socket diff --git a/roles/etcd/tasks/configure.yml b/roles/etcd/tasks/configure.yml index f1d6a487211..23dd9b6ec96 100644 --- a/roles/etcd/tasks/configure.yml +++ b/roles/etcd/tasks/configure.yml @@ -51,6 +51,7 @@ dest: /etc/systemd/system/etcd.service backup: yes mode: 0644 + validate: "systemd-analyze verify %s:etcd-{{ etcd_deployment_type }}.service" when: is_etcd_master and etcd_cluster_setup - name: Configure | Copy etcd-events.service systemd file @@ -59,6 +60,7 @@ dest: /etc/systemd/system/etcd-events.service backup: yes mode: 0644 + validate: "systemd-analyze verify %s:etcd-events-{{ etcd_deployment_type }}.service" when: is_etcd_master and etcd_events_cluster_setup - name: Configure | reload systemd diff --git a/roles/kubernetes/control-plane/tasks/main.yml b/roles/kubernetes/control-plane/tasks/main.yml index 8f57a04b41e..185a481fb27 100644 --- a/roles/kubernetes/control-plane/tasks/main.yml +++ b/roles/kubernetes/control-plane/tasks/main.yml @@ -113,6 +113,7 @@ src: "{{ item }}.j2" dest: "/etc/systemd/system/{{ item }}" mode: 0644 + validate: "systemd-analyze verify %s:{{item}}" with_items: - k8s-certs-renew.service - k8s-certs-renew.timer diff --git a/roles/kubernetes/node/tasks/kubelet.yml b/roles/kubernetes/node/tasks/kubelet.yml index ee01d06cffb..aeb1b94becc 100644 --- a/roles/kubernetes/node/tasks/kubelet.yml +++ b/roles/kubernetes/node/tasks/kubelet.yml @@ -34,6 +34,7 @@ dest: "/etc/systemd/system/kubelet.service" backup: "yes" mode: 0600 + validate: "systemd-analyze verify %s:kubelet.service" notify: Node | restart kubelet tags: - kubelet From a4096aed91dd62befd20e18ab9824fa2b6b9c9a4 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 17 Nov 2023 16:25:06 +0100 Subject: [PATCH 2/2] Hack to check systemd version for service files validation factory-reset.target was introduced in system 250, same version as the aliasing feature we need for verifying systemd services with ansible. So we only actually executes the validation if that target is present. This is an horrible hack which should be reverted as soon as we drop support for distributions with systemd<250. --- roles/container-engine/containerd/tasks/main.yml | 4 +++- roles/container-engine/cri-dockerd/tasks/main.yml | 4 +++- roles/etcd/tasks/configure.yml | 8 ++++++-- roles/kubernetes/control-plane/tasks/main.yml | 4 +++- roles/kubernetes/node/tasks/kubelet.yml | 4 +++- 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/roles/container-engine/containerd/tasks/main.yml b/roles/container-engine/containerd/tasks/main.yml index afec44f56fe..f1b97771762 100644 --- a/roles/container-engine/containerd/tasks/main.yml +++ b/roles/container-engine/containerd/tasks/main.yml @@ -61,7 +61,9 @@ src: containerd.service.j2 dest: /etc/systemd/system/containerd.service mode: 0644 - validate: "systemd-analyze verify %s:containerd.service" + validate: "sh -c '[ -f /usr/bin/systemd/system/factory-reset.target ] || exit 0 && systemd-analyze verify %s:containerd.service'" + # FIXME: check that systemd version >= 250 (factory-reset.target was introduced in that release) + # Remove once we drop support for systemd < 250 notify: Restart containerd - name: Containerd | Ensure containerd directories exist diff --git a/roles/container-engine/cri-dockerd/tasks/main.yml b/roles/container-engine/cri-dockerd/tasks/main.yml index ca8dd3fde98..730e379eb63 100644 --- a/roles/container-engine/cri-dockerd/tasks/main.yml +++ b/roles/container-engine/cri-dockerd/tasks/main.yml @@ -18,7 +18,9 @@ src: "{{ item }}.j2" dest: "/etc/systemd/system/{{ item }}" mode: 0644 - validate: "systemd-analyze verify %s:{{ item }}" + validate: "sh -c '[ -f /usr/bin/systemd/system/factory-reset.target ] || exit 0 && systemd-analyze verify %s:{{ item }}'" + # FIXME: check that systemd version >= 250 (factory-reset.target was introduced in that release) + # Remove once we drop support for systemd < 250 with_items: - cri-dockerd.service - cri-dockerd.socket diff --git a/roles/etcd/tasks/configure.yml b/roles/etcd/tasks/configure.yml index 23dd9b6ec96..438dbc7df78 100644 --- a/roles/etcd/tasks/configure.yml +++ b/roles/etcd/tasks/configure.yml @@ -51,7 +51,9 @@ dest: /etc/systemd/system/etcd.service backup: yes mode: 0644 - validate: "systemd-analyze verify %s:etcd-{{ etcd_deployment_type }}.service" + # FIXME: check that systemd version >= 250 (factory-reset.target was introduced in that release) + # Remove once we drop support for systemd < 250 + validate: "sh -c '[ -f /usr/bin/systemd/system/factory-reset.target ] || exit 0 && systemd-analyze verify %s:etcd-{{ etcd_deployment_type }}.service'" when: is_etcd_master and etcd_cluster_setup - name: Configure | Copy etcd-events.service systemd file @@ -60,7 +62,9 @@ dest: /etc/systemd/system/etcd-events.service backup: yes mode: 0644 - validate: "systemd-analyze verify %s:etcd-events-{{ etcd_deployment_type }}.service" + validate: "sh -c '[ -f /usr/bin/systemd/system/factory-reset.target ] || exit 0 && systemd-analyze verify %s:etcd-events-{{ etcd_deployment_type }}.service'" + # FIXME: check that systemd version >= 250 (factory-reset.target was introduced in that release) + # Remove once we drop support for systemd < 250 when: is_etcd_master and etcd_events_cluster_setup - name: Configure | reload systemd diff --git a/roles/kubernetes/control-plane/tasks/main.yml b/roles/kubernetes/control-plane/tasks/main.yml index 185a481fb27..50eccbd0735 100644 --- a/roles/kubernetes/control-plane/tasks/main.yml +++ b/roles/kubernetes/control-plane/tasks/main.yml @@ -113,7 +113,9 @@ src: "{{ item }}.j2" dest: "/etc/systemd/system/{{ item }}" mode: 0644 - validate: "systemd-analyze verify %s:{{item}}" + validate: "sh -c '[ -f /usr/bin/systemd/system/factory-reset.target ] || exit 0 && systemd-analyze verify %s:{{item}}'" + # FIXME: check that systemd version >= 250 (factory-reset.target was introduced in that release) + # Remove once we drop support for systemd < 250 with_items: - k8s-certs-renew.service - k8s-certs-renew.timer diff --git a/roles/kubernetes/node/tasks/kubelet.yml b/roles/kubernetes/node/tasks/kubelet.yml index aeb1b94becc..d8ff9e23070 100644 --- a/roles/kubernetes/node/tasks/kubelet.yml +++ b/roles/kubernetes/node/tasks/kubelet.yml @@ -34,7 +34,9 @@ dest: "/etc/systemd/system/kubelet.service" backup: "yes" mode: 0600 - validate: "systemd-analyze verify %s:kubelet.service" + validate: "sh -c '[ -f /usr/bin/systemd/system/factory-reset.target ] || exit 0 && systemd-analyze verify %s:kubelet.service'" + # FIXME: check that systemd version >= 250 (factory-reset.target was introduced in that release) + # Remove once we drop support for systemd < 250 notify: Node | restart kubelet tags: - kubelet