From 8c68b60e2f2aee1b8635b286e0bc477da8566f42 Mon Sep 17 00:00:00 2001 From: "jbarrick@mesosphere.com" Date: Thu, 23 Jul 2020 13:23:44 -0700 Subject: [PATCH 1/4] Use cgroup root for kubelet See #1614 and https://d2iq.com/blog/running-kind-inside-a-kubernetes-cluster-for-continuous-integration --- images/base/files/usr/local/bin/entrypoint | 28 ++++++++++++++++++- .../internal/create/actions/config/config.go | 1 + pkg/cluster/internal/kubeadm/config.go | 7 +++++ .../internal/providers/docker/provision.go | 2 ++ .../internal/providers/podman/provision.go | 2 ++ pkg/internal/apis/config/default.go | 3 ++ pkg/internal/apis/config/types.go | 3 ++ 7 files changed, 45 insertions(+), 1 deletion(-) diff --git a/images/base/files/usr/local/bin/entrypoint b/images/base/files/usr/local/bin/entrypoint index 13839718a6..5a6fb2a26f 100755 --- a/images/base/files/usr/local/bin/entrypoint +++ b/images/base/files/usr/local/bin/entrypoint @@ -62,6 +62,30 @@ fix_mount() { mount --make-rshared / } +mount_kubelet_cgroup_root() { + cgroup_root=$1 + subsystem=$2 + + if [ ! -z ${cgroup_root} ]; then + # This is because we set Kubelet's cgroup-root to `/kubelet` by + # default. We have to do that because otherwise, it'll collide + # with the cgroups used by the Kubelet running on the host if we + # run kind cluster within a Kubernetes pod, resulting in random + # processes to be killed. + mkdir -p "${subsystem}/${cgroup_root}" + if [ "${subsystem}" == "/sys/fs/cgroup/cpuset" ]; then + # This is needed. Otherwise, assigning process to the cgroup + # (or any nested cgroup) would result in ENOSPC. + cat "${subsystem}/cpuset.cpus" > "${subsystem}/${cgroup_root}/cpuset.cpus" + cat "${subsystem}/cpuset.mems" > "${subsystem}/${cgroup_root}/cpuset.mems" + fi + # We need to perform a self bind mount here because otherwise, + # systemd might delete the cgroup unintentionally before the + # kubelet starts. + mount --bind "${subsystem}/${cgroup_root}" "${subsystem}/${cgroup_root}" + fi +} + fix_cgroup() { echo 'INFO: fix cgroup mounts for all subsystems' # For each cgroup subsystem, Docker does a bind mount from the current @@ -85,10 +109,11 @@ fix_cgroup() { while IFS= read -r subsystem; do mkdir -p "${subsystem}${docker_cgroup}" mount --bind "${subsystem}" "${subsystem}${docker_cgroup}" + mount_kubelet_cgroup_root "${CGROUP_ROOT}" "${subsystem}" done fi local podman_cgroup_mounts - podman_cgroup_mounts=$(grep /sys/fs/cgroup /proc/self/mountinfo | grep libpod_parent || true) + podman_cgroup_mounts=$(grep /sys/fs/cgroup /proc/self/mountinfo | grep libpod || true) if [[ -n "${podman_cgroup_mounts}" ]]; then local podman_cgroup cgroup_subsystems subsystem podman_cgroup=$(echo "${podman_cgroup_mounts}" | head -n 1 | cut -d' ' -f 4) @@ -97,6 +122,7 @@ fix_cgroup() { while IFS= read -r subsystem; do mkdir -p "${subsystem}${podman_cgroup}" mount --bind "${subsystem}" "${subsystem}${podman_cgroup}" + mount_kubelet_cgroup_root "${CGROUP_ROOT}" "${subsystem}" done fi } diff --git a/pkg/cluster/internal/create/actions/config/config.go b/pkg/cluster/internal/create/actions/config/config.go index 1d273b1e85..9495cfcf24 100644 --- a/pkg/cluster/internal/create/actions/config/config.go +++ b/pkg/cluster/internal/create/actions/config/config.go @@ -74,6 +74,7 @@ func (a *Action) Execute(ctx *actions.ActionContext) error { IPv6: ctx.Config.Networking.IPFamily == "ipv6", FeatureGates: ctx.Config.FeatureGates, RuntimeConfig: ctx.Config.RuntimeConfig, + CgroupRoot: ctx.Config.CgroupRoot, } kubeadmConfigPlusPatches := func(node nodes.Node, data kubeadm.ConfigData) func() error { diff --git a/pkg/cluster/internal/kubeadm/config.go b/pkg/cluster/internal/kubeadm/config.go index 4eeb8a4bfd..28188eace6 100644 --- a/pkg/cluster/internal/kubeadm/config.go +++ b/pkg/cluster/internal/kubeadm/config.go @@ -70,6 +70,9 @@ type ConfigData struct { // IPv4 values take precedence over IPv6 by default, if true set IPv6 default values IPv6 bool + // CgroupRoot specifies the cgroup root to configure the kubelet to use + CgroupRoot string + // DerivedConfigData is populated by Derive() // These auto-generated fields are available to Config templates, // but not meant to be set by hand @@ -195,6 +198,7 @@ nodeRegistration: fail-swap-on: "false" node-ip: "{{ .NodeAddress }}" provider-id: "kind://{{.NodeProvider}}/{{.ClusterName}}/{{.NodeName}}" + cgroup-root: "{{ .CgroupRoot }}" --- # no-op entry that exists solely so it can be patched apiVersion: kubeadm.k8s.io/v1beta1 @@ -213,6 +217,7 @@ nodeRegistration: fail-swap-on: "false" node-ip: "{{ .NodeAddress }}" provider-id: "kind://{{.NodeProvider}}/{{.ClusterName}}/{{.NodeName}}" + cgroup-root: "{{ .CgroupRoot }}" discovery: bootstrapToken: apiServerEndpoint: "{{ .ControlPlaneEndpoint }}" @@ -315,6 +320,7 @@ nodeRegistration: fail-swap-on: "false" node-ip: "{{ .NodeAddress }}" provider-id: "kind://{{.NodeProvider}}/{{.ClusterName}}/{{.NodeName}}" + cgroup-root: "{{ .CgroupRoot }}" --- # no-op entry that exists solely so it can be patched apiVersion: kubeadm.k8s.io/v1beta2 @@ -333,6 +339,7 @@ nodeRegistration: fail-swap-on: "false" node-ip: "{{ .NodeAddress }}" provider-id: "kind://{{.NodeProvider}}/{{.ClusterName}}/{{.NodeName}}" + cgroup-root: "{{ .CgroupRoot }}" discovery: bootstrapToken: apiServerEndpoint: "{{ .ControlPlaneEndpoint }}" diff --git a/pkg/cluster/internal/providers/docker/provision.go b/pkg/cluster/internal/providers/docker/provision.go index 8bd62c048f..3fab6000d0 100644 --- a/pkg/cluster/internal/providers/docker/provision.go +++ b/pkg/cluster/internal/providers/docker/provision.go @@ -199,6 +199,8 @@ func commonArgs(cluster string, cfg *config.Cluster, networkName string, nodeNam args = append(args, "-e", fmt.Sprintf("%s=%s", key, val)) } + args = append(args, "-e", fmt.Sprintf("CGROUP_ROOT=%s", cfg.CgroupRoot)) + // handle hosts that have user namespace remapping enabled if usernsRemap() { args = append(args, "--userns=host") diff --git a/pkg/cluster/internal/providers/podman/provision.go b/pkg/cluster/internal/providers/podman/provision.go index 47577d3e56..85c43f72bc 100644 --- a/pkg/cluster/internal/providers/podman/provision.go +++ b/pkg/cluster/internal/providers/podman/provision.go @@ -159,6 +159,8 @@ func commonArgs(cfg *config.Cluster, networkName string) ([]string, error) { args = append(args, "-e", fmt.Sprintf("%s=%s", key, val)) } + args = append(args, "-e", fmt.Sprintf("CGROUP_ROOT=%s", cfg.CgroupRoot)) + return args, nil } diff --git a/pkg/internal/apis/config/default.go b/pkg/internal/apis/config/default.go index b4486e9293..05f8b6091b 100644 --- a/pkg/internal/apis/config/default.go +++ b/pkg/internal/apis/config/default.go @@ -84,6 +84,9 @@ func SetDefaultsCluster(obj *Cluster) { if obj.Networking.KubeProxyMode == "" { obj.Networking.KubeProxyMode = IPTablesMode } + + // set the default cgroup root + obj.CgroupRoot = "/kubelet" } // SetDefaultsNode sets uninitialized fields to their default value. diff --git a/pkg/internal/apis/config/types.go b/pkg/internal/apis/config/types.go index 25b364044b..3f84862e7a 100644 --- a/pkg/internal/apis/config/types.go +++ b/pkg/internal/apis/config/types.go @@ -69,6 +69,9 @@ type Cluster struct { // in the order listed. // These should be YAML or JSON formatting RFC 6902 JSON patches ContainerdConfigPatchesJSON6902 []string + + // CgroupRoot specifies the cgroup root for kubelet to use. + CgroupRoot string } // Node contains settings for a node in the `kind` Cluster. From 8101c7b9c9cbd4430896a4a7b55f7cfd0d90964c Mon Sep 17 00:00:00 2001 From: "jbarrick@mesosphere.com" Date: Mon, 30 Nov 2020 10:24:17 -0800 Subject: [PATCH 2/4] Fix shellcheck and linter issues --- images/base/files/usr/local/bin/entrypoint | 4 +++- pkg/cluster/internal/kubeadm/config.go | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/images/base/files/usr/local/bin/entrypoint b/images/base/files/usr/local/bin/entrypoint index 5a6fb2a26f..8a72f8b96a 100755 --- a/images/base/files/usr/local/bin/entrypoint +++ b/images/base/files/usr/local/bin/entrypoint @@ -66,7 +66,7 @@ mount_kubelet_cgroup_root() { cgroup_root=$1 subsystem=$2 - if [ ! -z ${cgroup_root} ]; then + if [ -n "${cgroup_root}" ]; then # This is because we set Kubelet's cgroup-root to `/kubelet` by # default. We have to do that because otherwise, it'll collide # with the cgroups used by the Kubelet running on the host if we @@ -109,6 +109,8 @@ fix_cgroup() { while IFS= read -r subsystem; do mkdir -p "${subsystem}${docker_cgroup}" mount --bind "${subsystem}" "${subsystem}${docker_cgroup}" + # shellcheck disable=SC2153 + # CGROUP_ROOT is set in environment mount_kubelet_cgroup_root "${CGROUP_ROOT}" "${subsystem}" done fi diff --git a/pkg/cluster/internal/kubeadm/config.go b/pkg/cluster/internal/kubeadm/config.go index 28188eace6..3c046a7266 100644 --- a/pkg/cluster/internal/kubeadm/config.go +++ b/pkg/cluster/internal/kubeadm/config.go @@ -71,7 +71,7 @@ type ConfigData struct { IPv6 bool // CgroupRoot specifies the cgroup root to configure the kubelet to use - CgroupRoot string + CgroupRoot string // DerivedConfigData is populated by Derive() // These auto-generated fields are available to Config templates, From 6ed8f56ac939e5632b326d6a835d79ca4032e4c0 Mon Sep 17 00:00:00 2001 From: "jbarrick@mesosphere.com" Date: Mon, 30 Nov 2020 12:26:50 -0800 Subject: [PATCH 3/4] Move cgroup-root configuration entirely into the image to ensure backwards compatibility --- .../etc/systemd/system/kubelet.service.d/10-kubeadm.conf | 2 +- images/base/files/usr/local/bin/entrypoint | 6 ++---- pkg/cluster/internal/create/actions/config/config.go | 1 - pkg/cluster/internal/kubeadm/config.go | 7 ------- pkg/cluster/internal/providers/docker/provision.go | 2 -- pkg/cluster/internal/providers/podman/provision.go | 2 -- pkg/internal/apis/config/default.go | 3 --- pkg/internal/apis/config/types.go | 3 --- 8 files changed, 3 insertions(+), 23 deletions(-) diff --git a/images/base/files/etc/systemd/system/kubelet.service.d/10-kubeadm.conf b/images/base/files/etc/systemd/system/kubelet.service.d/10-kubeadm.conf index 5a6904f363..17d6239f34 100644 --- a/images/base/files/etc/systemd/system/kubelet.service.d/10-kubeadm.conf +++ b/images/base/files/etc/systemd/system/kubelet.service.d/10-kubeadm.conf @@ -9,4 +9,4 @@ EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env # the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file. EnvironmentFile=-/etc/default/kubelet ExecStart= -ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS \ No newline at end of file +ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS --cgroup-root=/kubelet diff --git a/images/base/files/usr/local/bin/entrypoint b/images/base/files/usr/local/bin/entrypoint index 8a72f8b96a..4187402d29 100755 --- a/images/base/files/usr/local/bin/entrypoint +++ b/images/base/files/usr/local/bin/entrypoint @@ -109,9 +109,7 @@ fix_cgroup() { while IFS= read -r subsystem; do mkdir -p "${subsystem}${docker_cgroup}" mount --bind "${subsystem}" "${subsystem}${docker_cgroup}" - # shellcheck disable=SC2153 - # CGROUP_ROOT is set in environment - mount_kubelet_cgroup_root "${CGROUP_ROOT}" "${subsystem}" + mount_kubelet_cgroup_root "/kubelet" "${subsystem}" done fi local podman_cgroup_mounts @@ -124,7 +122,7 @@ fix_cgroup() { while IFS= read -r subsystem; do mkdir -p "${subsystem}${podman_cgroup}" mount --bind "${subsystem}" "${subsystem}${podman_cgroup}" - mount_kubelet_cgroup_root "${CGROUP_ROOT}" "${subsystem}" + mount_kubelet_cgroup_root "/kubelet" "${subsystem}" done fi } diff --git a/pkg/cluster/internal/create/actions/config/config.go b/pkg/cluster/internal/create/actions/config/config.go index 9495cfcf24..1d273b1e85 100644 --- a/pkg/cluster/internal/create/actions/config/config.go +++ b/pkg/cluster/internal/create/actions/config/config.go @@ -74,7 +74,6 @@ func (a *Action) Execute(ctx *actions.ActionContext) error { IPv6: ctx.Config.Networking.IPFamily == "ipv6", FeatureGates: ctx.Config.FeatureGates, RuntimeConfig: ctx.Config.RuntimeConfig, - CgroupRoot: ctx.Config.CgroupRoot, } kubeadmConfigPlusPatches := func(node nodes.Node, data kubeadm.ConfigData) func() error { diff --git a/pkg/cluster/internal/kubeadm/config.go b/pkg/cluster/internal/kubeadm/config.go index 3c046a7266..4eeb8a4bfd 100644 --- a/pkg/cluster/internal/kubeadm/config.go +++ b/pkg/cluster/internal/kubeadm/config.go @@ -70,9 +70,6 @@ type ConfigData struct { // IPv4 values take precedence over IPv6 by default, if true set IPv6 default values IPv6 bool - // CgroupRoot specifies the cgroup root to configure the kubelet to use - CgroupRoot string - // DerivedConfigData is populated by Derive() // These auto-generated fields are available to Config templates, // but not meant to be set by hand @@ -198,7 +195,6 @@ nodeRegistration: fail-swap-on: "false" node-ip: "{{ .NodeAddress }}" provider-id: "kind://{{.NodeProvider}}/{{.ClusterName}}/{{.NodeName}}" - cgroup-root: "{{ .CgroupRoot }}" --- # no-op entry that exists solely so it can be patched apiVersion: kubeadm.k8s.io/v1beta1 @@ -217,7 +213,6 @@ nodeRegistration: fail-swap-on: "false" node-ip: "{{ .NodeAddress }}" provider-id: "kind://{{.NodeProvider}}/{{.ClusterName}}/{{.NodeName}}" - cgroup-root: "{{ .CgroupRoot }}" discovery: bootstrapToken: apiServerEndpoint: "{{ .ControlPlaneEndpoint }}" @@ -320,7 +315,6 @@ nodeRegistration: fail-swap-on: "false" node-ip: "{{ .NodeAddress }}" provider-id: "kind://{{.NodeProvider}}/{{.ClusterName}}/{{.NodeName}}" - cgroup-root: "{{ .CgroupRoot }}" --- # no-op entry that exists solely so it can be patched apiVersion: kubeadm.k8s.io/v1beta2 @@ -339,7 +333,6 @@ nodeRegistration: fail-swap-on: "false" node-ip: "{{ .NodeAddress }}" provider-id: "kind://{{.NodeProvider}}/{{.ClusterName}}/{{.NodeName}}" - cgroup-root: "{{ .CgroupRoot }}" discovery: bootstrapToken: apiServerEndpoint: "{{ .ControlPlaneEndpoint }}" diff --git a/pkg/cluster/internal/providers/docker/provision.go b/pkg/cluster/internal/providers/docker/provision.go index 3fab6000d0..8bd62c048f 100644 --- a/pkg/cluster/internal/providers/docker/provision.go +++ b/pkg/cluster/internal/providers/docker/provision.go @@ -199,8 +199,6 @@ func commonArgs(cluster string, cfg *config.Cluster, networkName string, nodeNam args = append(args, "-e", fmt.Sprintf("%s=%s", key, val)) } - args = append(args, "-e", fmt.Sprintf("CGROUP_ROOT=%s", cfg.CgroupRoot)) - // handle hosts that have user namespace remapping enabled if usernsRemap() { args = append(args, "--userns=host") diff --git a/pkg/cluster/internal/providers/podman/provision.go b/pkg/cluster/internal/providers/podman/provision.go index 85c43f72bc..47577d3e56 100644 --- a/pkg/cluster/internal/providers/podman/provision.go +++ b/pkg/cluster/internal/providers/podman/provision.go @@ -159,8 +159,6 @@ func commonArgs(cfg *config.Cluster, networkName string) ([]string, error) { args = append(args, "-e", fmt.Sprintf("%s=%s", key, val)) } - args = append(args, "-e", fmt.Sprintf("CGROUP_ROOT=%s", cfg.CgroupRoot)) - return args, nil } diff --git a/pkg/internal/apis/config/default.go b/pkg/internal/apis/config/default.go index 05f8b6091b..b4486e9293 100644 --- a/pkg/internal/apis/config/default.go +++ b/pkg/internal/apis/config/default.go @@ -84,9 +84,6 @@ func SetDefaultsCluster(obj *Cluster) { if obj.Networking.KubeProxyMode == "" { obj.Networking.KubeProxyMode = IPTablesMode } - - // set the default cgroup root - obj.CgroupRoot = "/kubelet" } // SetDefaultsNode sets uninitialized fields to their default value. diff --git a/pkg/internal/apis/config/types.go b/pkg/internal/apis/config/types.go index 3f84862e7a..25b364044b 100644 --- a/pkg/internal/apis/config/types.go +++ b/pkg/internal/apis/config/types.go @@ -69,9 +69,6 @@ type Cluster struct { // in the order listed. // These should be YAML or JSON formatting RFC 6902 JSON patches ContainerdConfigPatchesJSON6902 []string - - // CgroupRoot specifies the cgroup root for kubelet to use. - CgroupRoot string } // Node contains settings for a node in the `kind` Cluster. From 08b61481bb9636b3875599bf39527e7b5ef21452 Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Wed, 2 Dec 2020 09:55:06 -0800 Subject: [PATCH 4/4] bump base image --- pkg/build/nodeimage/defaults.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/build/nodeimage/defaults.go b/pkg/build/nodeimage/defaults.go index 5d319fa6ac..d500d09408 100644 --- a/pkg/build/nodeimage/defaults.go +++ b/pkg/build/nodeimage/defaults.go @@ -20,7 +20,7 @@ package nodeimage const DefaultImage = "kindest/node:latest" // DefaultBaseImage is the default base image used -const DefaultBaseImage = "kindest/base:v20201112-cc74d297" +const DefaultBaseImage = "kindest/base:v20201130-23777eca" // DefaultMode is the default kubernetes build mode for the built image // see pkg/build/kube.Bits