Skip to content

Commit

Permalink
fix: avoid injector pod name collisions (#2620)
Browse files Browse the repository at this point in the history
## Description
This PR updates the injector pod naming by appending a UUID. This helps
avoid collisions with pods in a terminating state from previous zarf
init runs.

---------

Co-authored-by: Austin Abro <[email protected]>
Signed-off-by: Austin Abro <[email protected]>
  • Loading branch information
Lucas Rodriguez and AustinAbro321 committed Jul 23, 2024
1 parent 55ccc6d commit d09fdf6
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 111 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ require (
github.com/google/pprof v0.0.0-20231023181126-ff6d637d2a7b // indirect
github.com/google/s2a-go v0.1.7 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/google/uuid v1.6.0
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.4 // indirect
github.com/gookit/color v1.5.4 // indirect
Expand Down
195 changes: 91 additions & 104 deletions src/pkg/cluster/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,29 @@ package cluster
import (
"context"
"fmt"
"net/http"
"os"
"path/filepath"
"regexp"
"time"

"github.com/defenseunicorns/pkg/helpers/v2"
pkgkubernetes "github.com/defenseunicorns/pkg/kubernetes"
"github.com/defenseunicorns/zarf/src/config"
"github.com/defenseunicorns/zarf/src/pkg/k8s"
"github.com/defenseunicorns/zarf/src/pkg/layout"
"github.com/defenseunicorns/zarf/src/pkg/message"
"github.com/defenseunicorns/zarf/src/pkg/transform"
"github.com/defenseunicorns/zarf/src/pkg/utils"
"github.com/google/go-containerregistry/pkg/crane"
"github.com/google/uuid"
"github.com/mholt/archiver/v3"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/intstr"
"sigs.k8s.io/cli-utils/pkg/object"
)

// The chunk size for the tarball chunks.
Expand All @@ -43,7 +46,7 @@ var (
type imageNodeMap map[string][]string

// StartInjectionMadness initializes a Zarf injection into the cluster.
func (c *Cluster) StartInjectionMadness(ctx context.Context, tmpDir string, imagesDir string, injectorSeedSrcs []string) {
func (c *Cluster) StartInjectionMadness(ctx context.Context, tmpDir string, imagesDir string, injectorSeedSrcs []string) error {
spinner := message.NewProgressSpinner("Attempting to bootstrap the seed image into the cluster")
defer spinner.Stop()

Expand All @@ -56,118 +59,138 @@ func (c *Cluster) StartInjectionMadness(ctx context.Context, tmpDir string, imag
}

if err := helpers.CreateDirectory(tmp.SeedImagesDir, helpers.ReadWriteExecuteUser); err != nil {
spinner.Fatalf(err, "Unable to create the seed images directory")
return fmt.Errorf("unable to create the seed images directory: %w", err)
}

var err error
var images imageNodeMap
var payloadConfigmaps []string
var sha256sum string
var seedImages []transform.Image

// Get all the images from the cluster
spinner.Updatef("Getting the list of existing cluster images")
findImagesCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
if images, err = c.getImagesAndNodesForInjection(findImagesCtx); err != nil {
spinner.Fatalf(err, "Unable to generate a list of candidate images to perform the registry injection")
images, err = c.getImagesAndNodesForInjection(findImagesCtx)
if err != nil {
return err
}

spinner.Updatef("Creating the injector configmap")
if err = c.createInjectorConfigMap(ctx, tmp.InjectionBinary); err != nil {
spinner.Fatalf(err, "Unable to create the injector configmap")
return fmt.Errorf("unable to create the injector configmap: %w", err)
}

spinner.Updatef("Creating the injector service")
if service, err := c.createService(ctx); err != nil {
spinner.Fatalf(err, "Unable to create the injector service")
} else {
config.ZarfSeedPort = fmt.Sprintf("%d", service.Spec.Ports[0].NodePort)
service, err := c.createService(ctx)
if err != nil {
return fmt.Errorf("unable to create the injector service: %w", err)
}
config.ZarfSeedPort = fmt.Sprintf("%d", service.Spec.Ports[0].NodePort)

spinner.Updatef("Loading the seed image from the package")
if seedImages, err = c.loadSeedImages(imagesDir, tmp.SeedImagesDir, injectorSeedSrcs, spinner); err != nil {
spinner.Fatalf(err, "Unable to load the injector seed image from the package")
_, err = c.loadSeedImages(imagesDir, tmp.SeedImagesDir, injectorSeedSrcs)
if err != nil {
return fmt.Errorf("unable to load the injector seed image from the package: %w", err)
}

spinner.Updatef("Loading the seed registry configmaps")
if payloadConfigmaps, sha256sum, err = c.createPayloadConfigMaps(ctx, tmp.SeedImagesDir, tmp.InjectorPayloadTarGz, spinner); err != nil {
spinner.Fatalf(err, "Unable to generate the injector payload configmaps")
return fmt.Errorf("unable to generate the injector payload configmaps: %w", err)
}

// https://regex101.com/r/eLS3at/1
zarfImageRegex := regexp.MustCompile(`(?m)^127\.0\.0\.1:`)

var injectorImage string
var injectorNode string
// Try to create an injector pod using an existing image in the cluster
for image, node := range images {
// Don't try to run against the seed image if this is a secondary zarf init run
if zarfImageRegex.MatchString(image) {
continue
}

spinner.Updatef("Attempting to bootstrap with the %s/%s", node, image)
injectorImage = image
injectorNode = node[0]
}
// Make sure the pod is not there first
// TODO: Explain why no grace period is given.
deleteGracePeriod := int64(0)
deletePolicy := metav1.DeletePropagationForeground
deleteOpts := metav1.DeleteOptions{
GracePeriodSeconds: &deleteGracePeriod,
PropagationPolicy: &deletePolicy,
}
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "zarf-injector",
},
})
if err != nil {
return err
}
listOpts := metav1.ListOptions{
LabelSelector: selector.String(),
}
err = c.Clientset.CoreV1().Pods(ZarfNamespaceName).DeleteCollection(ctx, deleteOpts, listOpts)
if err != nil {
return err
}

// Make sure the pod is not there first
// TODO: Explain why no grace period is given.
deleteGracePeriod := int64(0)
deletePolicy := metav1.DeletePropagationForeground
deleteOpts := metav1.DeleteOptions{
GracePeriodSeconds: &deleteGracePeriod,
PropagationPolicy: &deletePolicy,
}
err := c.Clientset.CoreV1().Pods(ZarfNamespaceName).Delete(ctx, "injector", deleteOpts)
if err != nil {
message.Debug("could not delete pod injector:", err)
}

// Update the podspec image path and use the first node found

pod, err := c.buildInjectionPod(node[0], image, payloadConfigmaps, sha256sum)
if err != nil {
// Just debug log the output because failures just result in trying the next image
message.Debug("error making injection pod:", err)
continue
}

// Create the pod in the cluster
pod, err = c.Clientset.CoreV1().Pods(pod.Namespace).Create(ctx, pod, metav1.CreateOptions{})
if err != nil {
// Just debug log the output because failures just result in trying the next image
message.Debug("error creating pod in cluster:", pod, err)
continue
}
pod, err := c.buildInjectionPod(injectorNode, injectorImage, payloadConfigmaps, sha256sum)
if err != nil {
return fmt.Errorf("error making injection pod: %w", err)
}

// if no error, try and wait for a seed image to be present, return if successful
if c.injectorIsReady(ctx, seedImages, spinner) {
spinner.Success()
return
}
pod, err = c.Clientset.CoreV1().Pods(pod.Namespace).Create(ctx, pod, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("error creating pod in cluster: %w", err)
}

// Otherwise just continue to try next image
objs := []object.ObjMetadata{
{
GroupKind: schema.GroupKind{
Kind: "Pod",
},
Namespace: ZarfNamespaceName,
Name: pod.Name,
},
}
waitCtx, waitCancel := context.WithTimeout(ctx, 60*time.Second)
defer waitCancel()
err = pkgkubernetes.WaitForReady(waitCtx, c.Watcher, objs)
if err != nil {
return err
}
spinner.Success()
return nil

// All images were exhausted and still no happiness
spinner.Fatalf(nil, "Unable to perform the injection")
}

// StopInjectionMadness handles cleanup once the seed registry is up.
func (c *Cluster) StopInjectionMadness(ctx context.Context) error {
// Try to kill the injector pod now
err := c.Clientset.CoreV1().Pods(ZarfNamespaceName).Delete(ctx, "injector", metav1.DeleteOptions{})
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "zarf-injector",
},
})
if err != nil {
return err
}
listOpts := metav1.ListOptions{
LabelSelector: selector.String(),
}
err = c.Clientset.CoreV1().Pods(ZarfNamespaceName).DeleteCollection(ctx, metav1.DeleteOptions{}, listOpts)
if err != nil {
return err
}

// Remove the configmaps
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
selector, err = metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{
"zarf-injector": "payload",
},
})
if err != nil {
return err
}
listOpts := metav1.ListOptions{
listOpts = metav1.ListOptions{
LabelSelector: selector.String(),
}
err = c.Clientset.CoreV1().ConfigMaps(ZarfNamespaceName).DeleteCollection(ctx, metav1.DeleteOptions{}, listOpts)
Expand All @@ -183,13 +206,12 @@ func (c *Cluster) StopInjectionMadness(ctx context.Context) error {
return nil
}

func (c *Cluster) loadSeedImages(imagesDir, seedImagesDir string, injectorSeedSrcs []string, spinner *message.Spinner) ([]transform.Image, error) {
func (c *Cluster) loadSeedImages(imagesDir, seedImagesDir string, injectorSeedSrcs []string) ([]transform.Image, error) {
seedImages := []transform.Image{}
localReferenceToDigest := make(map[string]string)

// Load the injector-specific images and save them as seed-images
for _, src := range injectorSeedSrcs {
spinner.Updatef("Loading the seed image '%s' from the package", src)
ref, err := transform.ParseImageRef(src)
if err != nil {
return nil, fmt.Errorf("failed to create ref for image %s: %w", src, err)
Expand Down Expand Up @@ -230,7 +252,6 @@ func (c *Cluster) createPayloadConfigMaps(ctx context.Context, seedImagesDir, ta
return configMaps, "", err
}

spinner.Updatef("Creating the seed registry archive to send to the cluster")
// Create a tar archive of the injector payload
if err := archiver.Archive(tarFileList, tarPath); err != nil {
return configMaps, "", err
Expand All @@ -241,8 +262,6 @@ func (c *Cluster) createPayloadConfigMaps(ctx context.Context, seedImagesDir, ta
return configMaps, "", err
}

spinner.Updatef("Splitting the archive into binary configmaps")

chunkCount := len(chunks)

// Loop over all chunks and generate configmaps
Expand Down Expand Up @@ -282,43 +301,6 @@ func (c *Cluster) createPayloadConfigMaps(ctx context.Context, seedImagesDir, ta
return configMaps, sha256sum, nil
}

// Test for pod readiness and seed image presence.
func (c *Cluster) injectorIsReady(ctx context.Context, seedImages []transform.Image, spinner *message.Spinner) bool {
tunnel, err := c.NewTunnel(ZarfNamespaceName, SvcResource, ZarfInjectorName, "", 0, ZarfInjectorPort)
if err != nil {
return false
}

_, err = tunnel.Connect(ctx)
if err != nil {
return false
}
defer tunnel.Close()

spinner.Updatef("Testing the injector for seed image availability")

for _, seedImage := range seedImages {
seedRegistry := fmt.Sprintf("%s/v2/%s/manifests/%s", tunnel.HTTPEndpoint(), seedImage.Path, seedImage.Tag)

var resp *http.Response
var err error
err = tunnel.Wrap(func() error {
message.Debug("getting seed registry %v", seedRegistry)
resp, err = http.Get(seedRegistry)
return err
})

if err != nil || resp.StatusCode != 200 {
// Just debug log the output because failures just result in trying the next image
message.Debug(resp, err)
return false
}
}

spinner.Updatef("Seed image found, injector is ready")
return true
}

func (c *Cluster) createInjectorConfigMap(ctx context.Context, binaryPath string) error {
name := "rust-binary"
// TODO: Replace with a create or update.
Expand Down Expand Up @@ -384,13 +366,18 @@ func (c *Cluster) createService(ctx context.Context) (*corev1.Service, error) {
// buildInjectionPod return a pod for injection with the appropriate containers to perform the injection.
func (c *Cluster) buildInjectionPod(node, image string, payloadConfigmaps []string, payloadShasum string) (*corev1.Pod, error) {
executeMode := int32(0777)

// Generate a UUID to append to the pod name.
// This prevents collisions where `zarf init` is ran back to back and a previous injector pod still exists.
uuid := uuid.New().String()[:16]

pod := &corev1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: corev1.SchemeGroupVersion.String(),
Kind: "Pod",
},
ObjectMeta: metav1.ObjectMeta{
Name: "injector",
Name: fmt.Sprintf("injector-%s", uuid),
Namespace: ZarfNamespaceName,
Labels: map[string]string{
"app": "zarf-injector",
Expand Down
3 changes: 3 additions & 0 deletions src/pkg/cluster/injector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ func TestBuildInjectionPod(t *testing.T) {
c := &Cluster{}
pod, err := c.buildInjectionPod("injection-node", "docker.io/library/ubuntu:latest", []string{"foo", "bar"}, "shasum")
require.NoError(t, err)
require.Contains(t, pod.Name, "injector-")
// Replace the random UUID in the pod name with a fixed placeholder for consistent comparison.
pod.ObjectMeta.Name = "injector-UUID"
b, err := json.Marshal(pod)
require.NoError(t, err)
expected, err := os.ReadFile("./testdata/expected-injection-pod.json")
Expand Down
2 changes: 1 addition & 1 deletion src/pkg/cluster/testdata/expected-injection-pod.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"kind":"Pod","apiVersion":"v1","metadata":{"name":"injector","namespace":"zarf","creationTimestamp":null,"labels":{"app":"zarf-injector","zarf.dev/agent":"ignore"}},"spec":{"volumes":[{"name":"init","configMap":{"name":"rust-binary","defaultMode":511}},{"name":"seed","emptyDir":{}},{"name":"foo","configMap":{"name":"foo"}},{"name":"bar","configMap":{"name":"bar"}}],"containers":[{"name":"injector","image":"docker.io/library/ubuntu:latest","command":["/zarf-init/zarf-injector","shasum"],"workingDir":"/zarf-init","resources":{"limits":{"cpu":"1","memory":"256Mi"},"requests":{"cpu":"500m","memory":"64Mi"}},"volumeMounts":[{"name":"init","mountPath":"/zarf-init/zarf-injector","subPath":"zarf-injector"},{"name":"seed","mountPath":"/zarf-seed"},{"name":"foo","mountPath":"/zarf-init/foo","subPath":"foo"},{"name":"bar","mountPath":"/zarf-init/bar","subPath":"bar"}],"readinessProbe":{"httpGet":{"path":"/v2/","port":5000},"periodSeconds":2,"successThreshold":1,"failureThreshold":10},"imagePullPolicy":"IfNotPresent"}],"restartPolicy":"Never","nodeName":"injection-node"},"status":{}}
{"kind":"Pod","apiVersion":"v1","metadata":{"name":"injector-UUID","namespace":"zarf","creationTimestamp":null,"labels":{"app":"zarf-injector","zarf.dev/agent":"ignore"}},"spec":{"volumes":[{"name":"init","configMap":{"name":"rust-binary","defaultMode":511}},{"name":"seed","emptyDir":{}},{"name":"foo","configMap":{"name":"foo"}},{"name":"bar","configMap":{"name":"bar"}}],"containers":[{"name":"injector","image":"docker.io/library/ubuntu:latest","command":["/zarf-init/zarf-injector","shasum"],"workingDir":"/zarf-init","resources":{"limits":{"cpu":"1","memory":"256Mi"},"requests":{"cpu":"500m","memory":"64Mi"}},"volumeMounts":[{"name":"init","mountPath":"/zarf-init/zarf-injector","subPath":"zarf-injector"},{"name":"seed","mountPath":"/zarf-seed"},{"name":"foo","mountPath":"/zarf-init/foo","subPath":"foo"},{"name":"bar","mountPath":"/zarf-init/bar","subPath":"bar"}],"readinessProbe":{"httpGet":{"path":"/v2/","port":5000},"periodSeconds":2,"successThreshold":1,"failureThreshold":10},"imagePullPolicy":"IfNotPresent"}],"restartPolicy":"Never","nodeName":"injection-node"},"status":{}}
13 changes: 8 additions & 5 deletions src/pkg/packager/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,13 +237,13 @@ func (p *Packager) deployInitComponent(ctx context.Context, component types.Zarf
if component.RequiresCluster() && p.state == nil {
err = p.cluster.InitZarfState(ctx, p.cfg.InitOpts)
if err != nil {
return charts, fmt.Errorf("unable to initialize Zarf state: %w", err)
return nil, fmt.Errorf("unable to initialize Zarf state: %w", err)
}
}

if hasExternalRegistry && (isSeedRegistry || isInjector || isRegistry) {
message.Notef("Not deploying the component (%s) since external registry information was provided during `zarf init`", component.Name)
return charts, nil
return nil, nil
}

if isRegistry {
Expand All @@ -253,18 +253,21 @@ func (p *Packager) deployInitComponent(ctx context.Context, component types.Zarf

// Before deploying the seed registry, start the injector
if isSeedRegistry {
p.cluster.StartInjectionMadness(ctx, p.layout.Base, p.layout.Images.Base, component.Images)
err := p.cluster.StartInjectionMadness(ctx, p.layout.Base, p.layout.Images.Base, component.Images)
if err != nil {
return nil, err
}
}

charts, err = p.deployComponent(ctx, component, isAgent /* skip img checksum if isAgent */, isSeedRegistry /* skip image push if isSeedRegistry */)
if err != nil {
return charts, err
return nil, err
}

// Do cleanup for when we inject the seed registry during initialization
if isSeedRegistry {
if err := p.cluster.StopInjectionMadness(ctx); err != nil {
return charts, fmt.Errorf("unable to seed the Zarf Registry: %w", err)
return nil, fmt.Errorf("unable to seed the Zarf Registry: %w", err)
}
}

Expand Down

0 comments on commit d09fdf6

Please sign in to comment.