Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: avoid injector pod name collisions #2620

Merged
merged 6 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ require (
github.com/google/pprof v0.0.0-20231023181126-ff6d637d2a7b // indirect
github.com/google/s2a-go v0.1.7 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/google/uuid v1.6.0
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.4 // indirect
github.com/gookit/color v1.5.4 // indirect
Expand Down
195 changes: 91 additions & 104 deletions src/pkg/cluster/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,29 @@ package cluster
import (
"context"
"fmt"
"net/http"
"os"
"path/filepath"
"regexp"
"time"

"github.com/defenseunicorns/pkg/helpers/v2"
pkgkubernetes "github.com/defenseunicorns/pkg/kubernetes"
"github.com/defenseunicorns/zarf/src/config"
"github.com/defenseunicorns/zarf/src/pkg/k8s"
"github.com/defenseunicorns/zarf/src/pkg/layout"
"github.com/defenseunicorns/zarf/src/pkg/message"
"github.com/defenseunicorns/zarf/src/pkg/transform"
"github.com/defenseunicorns/zarf/src/pkg/utils"
"github.com/google/go-containerregistry/pkg/crane"
"github.com/google/uuid"
"github.com/mholt/archiver/v3"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/intstr"
"sigs.k8s.io/cli-utils/pkg/object"
)

// The chunk size for the tarball chunks.
Expand All @@ -43,7 +46,7 @@ var (
type imageNodeMap map[string][]string

// StartInjectionMadness initializes a Zarf injection into the cluster.
func (c *Cluster) StartInjectionMadness(ctx context.Context, tmpDir string, imagesDir string, injectorSeedSrcs []string) {
func (c *Cluster) StartInjectionMadness(ctx context.Context, tmpDir string, imagesDir string, injectorSeedSrcs []string) error {
spinner := message.NewProgressSpinner("Attempting to bootstrap the seed image into the cluster")
defer spinner.Stop()

Expand All @@ -56,118 +59,138 @@ func (c *Cluster) StartInjectionMadness(ctx context.Context, tmpDir string, imag
}

if err := helpers.CreateDirectory(tmp.SeedImagesDir, helpers.ReadWriteExecuteUser); err != nil {
spinner.Fatalf(err, "Unable to create the seed images directory")
return fmt.Errorf("unable to create the seed images directory: %w", err)
}

var err error
var images imageNodeMap
var payloadConfigmaps []string
var sha256sum string
var seedImages []transform.Image

// Get all the images from the cluster
spinner.Updatef("Getting the list of existing cluster images")
AustinAbro321 marked this conversation as resolved.
Show resolved Hide resolved
findImagesCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
if images, err = c.getImagesAndNodesForInjection(findImagesCtx); err != nil {
spinner.Fatalf(err, "Unable to generate a list of candidate images to perform the registry injection")
images, err = c.getImagesAndNodesForInjection(findImagesCtx)
if err != nil {
return err
}

spinner.Updatef("Creating the injector configmap")
if err = c.createInjectorConfigMap(ctx, tmp.InjectionBinary); err != nil {
spinner.Fatalf(err, "Unable to create the injector configmap")
return fmt.Errorf("unable to create the injector configmap: %w", err)
}

spinner.Updatef("Creating the injector service")
if service, err := c.createService(ctx); err != nil {
spinner.Fatalf(err, "Unable to create the injector service")
} else {
config.ZarfSeedPort = fmt.Sprintf("%d", service.Spec.Ports[0].NodePort)
service, err := c.createService(ctx)
if err != nil {
return fmt.Errorf("unable to create the injector service: %w", err)
}
config.ZarfSeedPort = fmt.Sprintf("%d", service.Spec.Ports[0].NodePort)

spinner.Updatef("Loading the seed image from the package")
if seedImages, err = c.loadSeedImages(imagesDir, tmp.SeedImagesDir, injectorSeedSrcs, spinner); err != nil {
spinner.Fatalf(err, "Unable to load the injector seed image from the package")
_, err = c.loadSeedImages(imagesDir, tmp.SeedImagesDir, injectorSeedSrcs)
if err != nil {
return fmt.Errorf("unable to load the injector seed image from the package: %w", err)
}

spinner.Updatef("Loading the seed registry configmaps")
if payloadConfigmaps, sha256sum, err = c.createPayloadConfigMaps(ctx, tmp.SeedImagesDir, tmp.InjectorPayloadTarGz, spinner); err != nil {
spinner.Fatalf(err, "Unable to generate the injector payload configmaps")
return fmt.Errorf("unable to generate the injector payload configmaps: %w", err)
}

// https://regex101.com/r/eLS3at/1
zarfImageRegex := regexp.MustCompile(`(?m)^127\.0\.0\.1:`)

var injectorImage string
var injectorNode string
// Try to create an injector pod using an existing image in the cluster
for image, node := range images {
// Don't try to run against the seed image if this is a secondary zarf init run
if zarfImageRegex.MatchString(image) {
continue
}

spinner.Updatef("Attempting to bootstrap with the %s/%s", node, image)
injectorImage = image
injectorNode = node[0]
}
// Make sure the pod is not there first
// TODO: Explain why no grace period is given.
deleteGracePeriod := int64(0)
deletePolicy := metav1.DeletePropagationForeground
deleteOpts := metav1.DeleteOptions{
GracePeriodSeconds: &deleteGracePeriod,
PropagationPolicy: &deletePolicy,
}
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "zarf-injector",
},
})
if err != nil {
return err
}
listOpts := metav1.ListOptions{
LabelSelector: selector.String(),
}
err = c.Clientset.CoreV1().Pods(ZarfNamespaceName).DeleteCollection(ctx, deleteOpts, listOpts)
if err != nil {
return err
}

// Make sure the pod is not there first
// TODO: Explain why no grace period is given.
deleteGracePeriod := int64(0)
deletePolicy := metav1.DeletePropagationForeground
deleteOpts := metav1.DeleteOptions{
GracePeriodSeconds: &deleteGracePeriod,
PropagationPolicy: &deletePolicy,
}
err := c.Clientset.CoreV1().Pods(ZarfNamespaceName).Delete(ctx, "injector", deleteOpts)
if err != nil {
message.Debug("could not delete pod injector:", err)
}

// Update the podspec image path and use the first node found

pod, err := c.buildInjectionPod(node[0], image, payloadConfigmaps, sha256sum)
if err != nil {
// Just debug log the output because failures just result in trying the next image
message.Debug("error making injection pod:", err)
continue
}

// Create the pod in the cluster
pod, err = c.Clientset.CoreV1().Pods(pod.Namespace).Create(ctx, pod, metav1.CreateOptions{})
if err != nil {
// Just debug log the output because failures just result in trying the next image
message.Debug("error creating pod in cluster:", pod, err)
continue
}
pod, err := c.buildInjectionPod(injectorNode, injectorImage, payloadConfigmaps, sha256sum)
if err != nil {
return fmt.Errorf("error making injection pod: %w", err)
}

// if no error, try and wait for a seed image to be present, return if successful
if c.injectorIsReady(ctx, seedImages, spinner) {
spinner.Success()
return
}
pod, err = c.Clientset.CoreV1().Pods(pod.Namespace).Create(ctx, pod, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("error creating pod in cluster: %w", err)
}

// Otherwise just continue to try next image
objs := []object.ObjMetadata{
{
GroupKind: schema.GroupKind{
Kind: "Pod",
},
Namespace: ZarfNamespaceName,
Name: pod.Name,
},
}
waitCtx, waitCancel := context.WithTimeout(ctx, 60*time.Second)
defer waitCancel()
err = pkgkubernetes.WaitForReady(waitCtx, c.Watcher, objs)
if err != nil {
return err
}
spinner.Success()
return nil

// All images were exhausted and still no happiness
spinner.Fatalf(nil, "Unable to perform the injection")
AustinAbro321 marked this conversation as resolved.
Show resolved Hide resolved
}

// StopInjectionMadness handles cleanup once the seed registry is up.
func (c *Cluster) StopInjectionMadness(ctx context.Context) error {
// Try to kill the injector pod now
err := c.Clientset.CoreV1().Pods(ZarfNamespaceName).Delete(ctx, "injector", metav1.DeleteOptions{})
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "zarf-injector",
},
})
if err != nil {
return err
}
listOpts := metav1.ListOptions{
LabelSelector: selector.String(),
}
err = c.Clientset.CoreV1().Pods(ZarfNamespaceName).DeleteCollection(ctx, metav1.DeleteOptions{}, listOpts)
if err != nil {
return err
}

// Remove the configmaps
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
selector, err = metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{
"zarf-injector": "payload",
},
})
if err != nil {
return err
}
listOpts := metav1.ListOptions{
listOpts = metav1.ListOptions{
LabelSelector: selector.String(),
}
err = c.Clientset.CoreV1().ConfigMaps(ZarfNamespaceName).DeleteCollection(ctx, metav1.DeleteOptions{}, listOpts)
Expand All @@ -183,13 +206,12 @@ func (c *Cluster) StopInjectionMadness(ctx context.Context) error {
return nil
}

func (c *Cluster) loadSeedImages(imagesDir, seedImagesDir string, injectorSeedSrcs []string, spinner *message.Spinner) ([]transform.Image, error) {
func (c *Cluster) loadSeedImages(imagesDir, seedImagesDir string, injectorSeedSrcs []string) ([]transform.Image, error) {
seedImages := []transform.Image{}
localReferenceToDigest := make(map[string]string)

// Load the injector-specific images and save them as seed-images
for _, src := range injectorSeedSrcs {
spinner.Updatef("Loading the seed image '%s' from the package", src)
ref, err := transform.ParseImageRef(src)
if err != nil {
return nil, fmt.Errorf("failed to create ref for image %s: %w", src, err)
Expand Down Expand Up @@ -230,7 +252,6 @@ func (c *Cluster) createPayloadConfigMaps(ctx context.Context, seedImagesDir, ta
return configMaps, "", err
}

spinner.Updatef("Creating the seed registry archive to send to the cluster")
// Create a tar archive of the injector payload
if err := archiver.Archive(tarFileList, tarPath); err != nil {
return configMaps, "", err
Expand All @@ -241,8 +262,6 @@ func (c *Cluster) createPayloadConfigMaps(ctx context.Context, seedImagesDir, ta
return configMaps, "", err
}

spinner.Updatef("Splitting the archive into binary configmaps")

chunkCount := len(chunks)

// Loop over all chunks and generate configmaps
Expand Down Expand Up @@ -282,43 +301,6 @@ func (c *Cluster) createPayloadConfigMaps(ctx context.Context, seedImagesDir, ta
return configMaps, sha256sum, nil
}

// Test for pod readiness and seed image presence.
func (c *Cluster) injectorIsReady(ctx context.Context, seedImages []transform.Image, spinner *message.Spinner) bool {
tunnel, err := c.NewTunnel(ZarfNamespaceName, SvcResource, ZarfInjectorName, "", 0, ZarfInjectorPort)
if err != nil {
return false
}

_, err = tunnel.Connect(ctx)
if err != nil {
return false
}
defer tunnel.Close()

spinner.Updatef("Testing the injector for seed image availability")

for _, seedImage := range seedImages {
seedRegistry := fmt.Sprintf("%s/v2/%s/manifests/%s", tunnel.HTTPEndpoint(), seedImage.Path, seedImage.Tag)

var resp *http.Response
var err error
err = tunnel.Wrap(func() error {
message.Debug("getting seed registry %v", seedRegistry)
resp, err = http.Get(seedRegistry)
return err
})

if err != nil || resp.StatusCode != 200 {
// Just debug log the output because failures just result in trying the next image
message.Debug(resp, err)
return false
}
}

spinner.Updatef("Seed image found, injector is ready")
return true
}

func (c *Cluster) createInjectorConfigMap(ctx context.Context, binaryPath string) error {
name := "rust-binary"
// TODO: Replace with a create or update.
Expand Down Expand Up @@ -384,13 +366,18 @@ func (c *Cluster) createService(ctx context.Context) (*corev1.Service, error) {
// buildInjectionPod return a pod for injection with the appropriate containers to perform the injection.
func (c *Cluster) buildInjectionPod(node, image string, payloadConfigmaps []string, payloadShasum string) (*corev1.Pod, error) {
executeMode := int32(0777)

// Generate a UUID to append to the pod name.
// This prevents collisions where `zarf init` is ran back to back and a previous injector pod still exists.
uuid := uuid.New().String()[:16]

pod := &corev1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: corev1.SchemeGroupVersion.String(),
Kind: "Pod",
},
ObjectMeta: metav1.ObjectMeta{
Name: "injector",
Name: fmt.Sprintf("injector-%s", uuid),
Namespace: ZarfNamespaceName,
Labels: map[string]string{
"app": "zarf-injector",
Expand Down
3 changes: 3 additions & 0 deletions src/pkg/cluster/injector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ func TestBuildInjectionPod(t *testing.T) {
c := &Cluster{}
pod, err := c.buildInjectionPod("injection-node", "docker.io/library/ubuntu:latest", []string{"foo", "bar"}, "shasum")
require.NoError(t, err)
require.Contains(t, pod.Name, "injector-")
// Replace the random UUID in the pod name with a fixed placeholder for consistent comparison.
pod.ObjectMeta.Name = "injector-UUID"
b, err := json.Marshal(pod)
require.NoError(t, err)
expected, err := os.ReadFile("./testdata/expected-injection-pod.json")
Expand Down
2 changes: 1 addition & 1 deletion src/pkg/cluster/testdata/expected-injection-pod.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"kind":"Pod","apiVersion":"v1","metadata":{"name":"injector","namespace":"zarf","creationTimestamp":null,"labels":{"app":"zarf-injector","zarf.dev/agent":"ignore"}},"spec":{"volumes":[{"name":"init","configMap":{"name":"rust-binary","defaultMode":511}},{"name":"seed","emptyDir":{}},{"name":"foo","configMap":{"name":"foo"}},{"name":"bar","configMap":{"name":"bar"}}],"containers":[{"name":"injector","image":"docker.io/library/ubuntu:latest","command":["/zarf-init/zarf-injector","shasum"],"workingDir":"/zarf-init","resources":{"limits":{"cpu":"1","memory":"256Mi"},"requests":{"cpu":"500m","memory":"64Mi"}},"volumeMounts":[{"name":"init","mountPath":"/zarf-init/zarf-injector","subPath":"zarf-injector"},{"name":"seed","mountPath":"/zarf-seed"},{"name":"foo","mountPath":"/zarf-init/foo","subPath":"foo"},{"name":"bar","mountPath":"/zarf-init/bar","subPath":"bar"}],"readinessProbe":{"httpGet":{"path":"/v2/","port":5000},"periodSeconds":2,"successThreshold":1,"failureThreshold":10},"imagePullPolicy":"IfNotPresent"}],"restartPolicy":"Never","nodeName":"injection-node"},"status":{}}
{"kind":"Pod","apiVersion":"v1","metadata":{"name":"injector-UUID","namespace":"zarf","creationTimestamp":null,"labels":{"app":"zarf-injector","zarf.dev/agent":"ignore"}},"spec":{"volumes":[{"name":"init","configMap":{"name":"rust-binary","defaultMode":511}},{"name":"seed","emptyDir":{}},{"name":"foo","configMap":{"name":"foo"}},{"name":"bar","configMap":{"name":"bar"}}],"containers":[{"name":"injector","image":"docker.io/library/ubuntu:latest","command":["/zarf-init/zarf-injector","shasum"],"workingDir":"/zarf-init","resources":{"limits":{"cpu":"1","memory":"256Mi"},"requests":{"cpu":"500m","memory":"64Mi"}},"volumeMounts":[{"name":"init","mountPath":"/zarf-init/zarf-injector","subPath":"zarf-injector"},{"name":"seed","mountPath":"/zarf-seed"},{"name":"foo","mountPath":"/zarf-init/foo","subPath":"foo"},{"name":"bar","mountPath":"/zarf-init/bar","subPath":"bar"}],"readinessProbe":{"httpGet":{"path":"/v2/","port":5000},"periodSeconds":2,"successThreshold":1,"failureThreshold":10},"imagePullPolicy":"IfNotPresent"}],"restartPolicy":"Never","nodeName":"injection-node"},"status":{}}
13 changes: 8 additions & 5 deletions src/pkg/packager/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,13 +237,13 @@ func (p *Packager) deployInitComponent(ctx context.Context, component types.Zarf
if component.RequiresCluster() && p.state == nil {
err = p.cluster.InitZarfState(ctx, p.cfg.InitOpts)
if err != nil {
return charts, fmt.Errorf("unable to initialize Zarf state: %w", err)
return nil, fmt.Errorf("unable to initialize Zarf state: %w", err)
}
}

if hasExternalRegistry && (isSeedRegistry || isInjector || isRegistry) {
message.Notef("Not deploying the component (%s) since external registry information was provided during `zarf init`", component.Name)
return charts, nil
return nil, nil
}

if isRegistry {
Expand All @@ -253,18 +253,21 @@ func (p *Packager) deployInitComponent(ctx context.Context, component types.Zarf

// Before deploying the seed registry, start the injector
if isSeedRegistry {
p.cluster.StartInjectionMadness(ctx, p.layout.Base, p.layout.Images.Base, component.Images)
err := p.cluster.StartInjectionMadness(ctx, p.layout.Base, p.layout.Images.Base, component.Images)
if err != nil {
return nil, err
}
}

charts, err = p.deployComponent(ctx, component, isAgent /* skip img checksum if isAgent */, isSeedRegistry /* skip image push if isSeedRegistry */)
if err != nil {
return charts, err
return nil, err
}

// Do cleanup for when we inject the seed registry during initialization
if isSeedRegistry {
if err := p.cluster.StopInjectionMadness(ctx); err != nil {
return charts, fmt.Errorf("unable to seed the Zarf Registry: %w", err)
return nil, fmt.Errorf("unable to seed the Zarf Registry: %w", err)
}
}

Expand Down
Loading