Skip to content

Commit

Permalink
Injector stability improvements (#510)
Browse files Browse the repository at this point in the history
- Switch to default image pull policy to simplify connected-cloud deployments
- Add more code comments to clarify podspec in the code
- Add node affinity for more reliable image mounting with multi-node clusters
- Extend the configmap push interval 100ms -> 250ms to reduce mount failures/control plane pressure (cost is slightly slower configmap apply)
* Remove unused function k8s.GetImages()
  • Loading branch information
jeff-mccoy authored Jun 14, 2022
1 parent d96a8cf commit 5a7c9be
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 31 deletions.
33 changes: 20 additions & 13 deletions src/internal/k8s/images.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@ import (
)

type ImageMap map[string]bool
type ImageNodeMap map[string][]string

// GetAllImages returns a list of images found in pods in the cluster.
func GetAllImages() ([]string, error) {
var images []string
var err error
// GetAllImages returns a list of images and their nodes found in pods in the cluster.
func GetAllImages() (ImageNodeMap, error) {
timeout := time.After(5 * time.Minute)

for {
Expand All @@ -24,13 +23,12 @@ func GetAllImages() ([]string, error) {

// on timeout abort
case <-timeout:
message.Debug("get image list timed-out")
return images, nil
return nil, fmt.Errorf("get image list timed-out")

// after delay, try running
default:
// If no images or an error, log and loop
if images, err = GetImages(corev1.NamespaceAll); len(images) < 1 || err != nil {
if images, err := GetImagesWithNodes(corev1.NamespaceAll); len(images) < 1 || err != nil {
message.Debug(err)
} else {
// Otherwise, return the image list
Expand All @@ -40,20 +38,29 @@ func GetAllImages() ([]string, error) {
}
}

// GetImages returns all images for in pods in a given namespace.
func GetImages(namespace string) ([]string, error) {
images := make(ImageMap)
// GetImagesWithNodes returns all images and their nodes in a given namespace.
func GetImagesWithNodes(namespace string) (ImageNodeMap, error) {
result := make(ImageNodeMap)

pods, err := GetPods(namespace)
if err != nil {
return []string{}, fmt.Errorf("unable to get the list of pods in the cluster")
return nil, fmt.Errorf("unable to get the list of pods in the cluster")
}

for _, pod := range pods.Items {
images = BuildImageMap(images, pod.Spec)
node := pod.Spec.NodeName
for _, container := range pod.Spec.InitContainers {
result[container.Image] = append(result[container.Image], node)
}
for _, container := range pod.Spec.Containers {
result[container.Image] = append(result[container.Image], node)
}
for _, container := range pod.Spec.EphemeralContainers {
result[container.Image] = append(result[container.Image], node)
}
}

return SortImages(images, nil), nil
return result, nil
}

// BuildImageMap looks for init container, ephemeral and regular container images.
Expand Down
55 changes: 37 additions & 18 deletions src/internal/packager/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func runInjectionMadness(tempPath tempPaths) {
defer spinner.Stop()

var err error
var images []string
var images k8s.ImageNodeMap
var envVars []corev1.EnvVar
var payloadConfigmaps []string
var sha256sum string
Expand Down Expand Up @@ -72,19 +72,19 @@ func runInjectionMadness(tempPath tempPaths) {
zarfImageRegex := regexp.MustCompile(`(?m)^127\.0\.0\.1:`)

// Try to create an injector pod using an existing image in the cluster
for _, image := range images {
for image, node := range images {
// Don't try to run against the seed image if this is a secondary zarf init run
if zarfImageRegex.MatchString(image) {
continue
}

spinner.Updatef("Attempting to bootstrap with the %s", image)
spinner.Updatef("Attempting to bootstrap with the %s/%s", node, image)

// Make sure the pod is not there first
_ = k8s.DeletePod(k8s.ZarfNamespace, "injector")

// Update the podspec image path
pod := buildInjectionPod(image, envVars, payloadConfigmaps, sha256sum)
// Update the podspec image path and use the first node found
pod := buildInjectionPod(node[0], image, envVars, payloadConfigmaps, sha256sum)

// Create the pod in the cluster
pod, err = k8s.CreatePod(pod)
Expand Down Expand Up @@ -177,8 +177,8 @@ func createPayloadConfigmaps(tempPath tempPaths, spinner *message.Spinner) ([]st
// Add the configmap to the configmaps slice for later usage in the pod
configMaps = append(configMaps, fileName)

// Give the control plane a slight buffeer
time.Sleep(100 * time.Millisecond)
// Give the control plane a 250ms buffer between each configmap
time.Sleep(250 * time.Millisecond)
}

return configMaps, sha256sum, nil
Expand Down Expand Up @@ -301,21 +301,30 @@ func buildEnvVars(tempPath tempPaths) ([]corev1.EnvVar, error) {
}

// buildInjectionPod return a pod for injection with the appropriate containers to perform the injection
func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps []string, payloadShasum string) *corev1.Pod {
func buildInjectionPod(node, image string, envVars []corev1.EnvVar, payloadConfigmaps []string, payloadShasum string) *corev1.Pod {
pod := k8s.GeneratePod("injector", k8s.ZarfNamespace)
executeMode := int32(0777)
seedImage := config.GetSeedImage()

pod.Labels["app"] = "zarf-injector"

// Bind the pod to the node the image was found on
pod.Spec.NodeSelector = map[string]string{"kubernetes.io/hostname": node}

// Do not try to restart the pod as it will be deleted/re-created instead
pod.Spec.RestartPolicy = corev1.RestartPolicyNever

// Init container used to combine and decompress the split tarball into the stage2 directory for use in the main container
pod.Spec.InitContainers = []corev1.Container{
{
Name: "init-injector",
Image: image,
ImagePullPolicy: corev1.PullNever,
WorkingDir: "/zarf-stage1",
Command: []string{"/zarf-stage1/zarf-injector", payloadShasum},
Name: "init-injector",
// An existing image already present on the cluster
Image: image,
// PullIfNotPresent because some distros provide a way (even in airgap) to pull images from local or direct-connected registries
ImagePullPolicy: corev1.PullIfNotPresent,
// This directory is filled via the configmap injections
WorkingDir: "/zarf-stage1",
Command: []string{"/zarf-stage1/zarf-injector", payloadShasum},

VolumeMounts: []corev1.VolumeMount{
{
Expand All @@ -329,6 +338,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
},
},

// Keep resources as light as possible as we aren't actually running the container's other binaries
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse(".5"),
Expand All @@ -344,26 +354,32 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
},
}

// Container definition for the injector pod
pod.Spec.Containers = []corev1.Container{
{
Name: "injector",
Image: image,
ImagePullPolicy: corev1.PullNever,
WorkingDir: "/zarf-stage2",
Name: "injector",
// An existing image already present on the cluster
Image: image,
// PullIfNotPresent because some distros provide a way (even in airgap) to pull images from local or direct-connected registries
ImagePullPolicy: corev1.PullIfNotPresent,
// This directory's contents come from the init container output
WorkingDir: "/zarf-stage2",
Command: []string{
"/zarf-stage2/zarf-registry",
"/zarf-stage2/seed-image.tar",
seedImage,
utils.SwapHost(seedImage, "127.0.0.1:5001"),
},

// Shared mount between the init and regular containers
VolumeMounts: []corev1.VolumeMount{
{
Name: "stage2",
MountPath: "/zarf-stage2",
},
},

// Keep resources as light as possible as we aren't actually running the container's other binaries
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse(".5"),
Expand All @@ -380,7 +396,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
}

pod.Spec.Volumes = []corev1.Volume{
// Bin volume hosts the injector binary and init script
// Stage1 contains the rust binary and collection of configmaps from the tarball (go binary + seed image)
{
Name: "stage1",
VolumeSource: corev1.VolumeSource{
Expand All @@ -392,6 +408,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
},
},
},
// Stage2 is an emtpy directory shared between the containers
{
Name: "stage2",
VolumeSource: corev1.VolumeSource{
Expand All @@ -402,6 +419,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps

// Iterate over all the payload configmaps and add their mounts
for _, filename := range payloadConfigmaps {
// Create the configmap volume from the given filename
pod.Spec.Volumes = append(pod.Spec.Volumes, corev1.Volume{
Name: filename,
VolumeSource: corev1.VolumeSource{
Expand All @@ -413,6 +431,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
},
})

// Create the volume mount to place the new volume in the stage1 directory
pod.Spec.InitContainers[0].VolumeMounts = append(pod.Spec.InitContainers[0].VolumeMounts, corev1.VolumeMount{
Name: filename,
MountPath: fmt.Sprintf("/zarf-stage1/%s", filename),
Expand Down

0 comments on commit 5a7c9be

Please sign in to comment.