diff --git a/api/v1beta2/gitrepository_types.go b/api/v1beta2/gitrepository_types.go index 9b9948b0e..f230560dc 100644 --- a/api/v1beta2/gitrepository_types.go +++ b/api/v1beta2/gitrepository_types.go @@ -211,6 +211,17 @@ type GitRepositoryStatus struct { // +optional IncludedArtifacts []*Artifact `json:"includedArtifacts,omitempty"` + // ContentConfigChecksum is a checksum of all the configurations related to + // the content of the source artifact: + // - .spec.ignore + // - .spec.recurseSubmodules + // - .spec.included and the checksum of the included artifacts + // observed in .status.observedGeneration version of the object. This can + // be used to determine if the content of the + // It has the format of `:`, for example: `sha256:`. + // +optional + ContentConfigChecksum string `json:"contentConfigChecksum,omitempty"` + meta.ReconcileRequestStatus `json:",inline"` } diff --git a/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml b/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml index e4e6b97e6..e315bee1c 100644 --- a/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml +++ b/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml @@ -653,6 +653,14 @@ spec: - type type: object type: array + contentConfigChecksum: + description: 'ContentConfigChecksum is a checksum of all the configurations + related to the content of the source artifact: - .spec.ignore - + .spec.recurseSubmodules - .spec.included and the checksum of the + included artifacts observed in .status.observedGeneration version + of the object. This can be used to determine if the content of the + It has the format of `:`, for example: `sha256:`.' + type: string includedArtifacts: description: IncludedArtifacts contains a list of the last successfully included Artifacts as instructed by GitRepositorySpec.Include. diff --git a/controllers/gitrepository_controller.go b/controllers/gitrepository_controller.go index 0a7ef4384..e93511c68 100644 --- a/controllers/gitrepository_controller.go +++ b/controllers/gitrepository_controller.go @@ -18,10 +18,12 @@ package controllers import ( "context" + "crypto/sha256" "errors" "fmt" "os" "path/filepath" + "strconv" "strings" "time" @@ -289,11 +291,11 @@ func (r *GitRepositoryReconciler) reconcile(ctx context.Context, obj *sourcev1.G return res, resErr } -// notify emits notification related to the reconciliation. +// notify emits notification related to the result of reconciliation. func (r *GitRepositoryReconciler) notify(oldObj, newObj *sourcev1.GitRepository, commit git.Commit, res sreconcile.Result, resErr error) { - // Notify successful reconciliation for new artifact and recovery from any - // failure. - if resErr == nil && res == sreconcile.ResultSuccess && newObj.Status.Artifact != nil { + // Notify successful reconciliation for new artifact, no-op reconciliation + // and recovery from any failure. + if r.shouldNotify(oldObj, newObj, res, resErr) { annotations := map[string]string{ sourcev1.GroupVersion.Group + "/revision": newObj.Status.Artifact.Revision, sourcev1.GroupVersion.Group + "/checksum": newObj.Status.Artifact.Checksum, @@ -304,7 +306,14 @@ func (r *GitRepositoryReconciler) notify(oldObj, newObj *sourcev1.GitRepository, oldChecksum = oldObj.GetArtifact().Checksum } - message := fmt.Sprintf("stored artifact for commit '%s'", commit.ShortMessage()) + // A partial commit due to no-op clone doesn't contain the commit + // message information. Have separate message for it. + var message string + if git.IsConcreteCommit(commit) { + message = fmt.Sprintf("stored artifact for commit '%s'", commit.ShortMessage()) + } else { + message = fmt.Sprintf("stored artifact for commit '%s'", commit.String()) + } // Notify on new artifact and failure recovery. if oldChecksum != newObj.GetArtifact().Checksum { @@ -319,6 +328,25 @@ func (r *GitRepositoryReconciler) notify(oldObj, newObj *sourcev1.GitRepository, } } +// shouldNotify analyzes the result of subreconcilers and determines if a +// notification should be sent. It decides about the final informational +// notifications after the reconciliation. Failure notification and in-line +// notifications are not handled here. +func (r *GitRepositoryReconciler) shouldNotify(oldObj, newObj *sourcev1.GitRepository, res sreconcile.Result, resErr error) bool { + // Notify for successful reconciliation. + if resErr == nil && res == sreconcile.ResultSuccess && newObj.Status.Artifact != nil { + return true + } + // Notify for no-op reconciliation with ignore error. + if resErr != nil && res == sreconcile.ResultEmpty && newObj.Status.Artifact != nil { + // Convert to Generic error and check for ignore. + if ge, ok := resErr.(*serror.Generic); ok { + return ge.Ignore == true + } + } + return false +} + // reconcileStorage ensures the current state of the storage matches the // desired and previously observed state. // @@ -361,8 +389,15 @@ func (r *GitRepositoryReconciler) reconcileStorage(ctx context.Context, // reconcileSource ensures the upstream Git repository and reference can be // cloned and checked out using the specified configuration, and observes its -// state. +// state. It also checks if the included repositories are available for use. // +// The included repositories are fetched and their metadata are stored. In case +// one of the included repositories isn't ready, it records +// v1beta2.IncludeUnavailableCondition=True and returns early. When all the +// included repositories are ready, it removes +// v1beta2.IncludeUnavailableCondition from the object. +// When the included artifactSet differs from the current set in the Status of +// the object, it marks the object with v1beta2.ArtifactOutdatedCondition=True. // The repository is cloned to the given dir, using the specified configuration // to check out the reference. In case of an error during this process // (including transient errors), it records v1beta2.FetchFailedCondition=True @@ -377,8 +412,13 @@ func (r *GitRepositoryReconciler) reconcileStorage(ctx context.Context, // it records v1beta2.SourceVerifiedCondition=True. // When all the above is successful, the given Commit pointer is set to the // commit of the checked out Git repository. +// +// If the optimized git clone feature is enabled, it checks if the remote repo +// and the local artifact are on the same revision, and no other source content +// related configurations have changed since last reconciliation and +// short-circuits the whole reconciliation with an early return. func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, - obj *sourcev1.GitRepository, commit *git.Commit, _ *artifactSet, dir string) (sreconcile.Result, error) { + obj *sourcev1.GitRepository, commit *git.Commit, includes *artifactSet, dir string) (sreconcile.Result, error) { // Configure authentication strategy to access the source var authOpts *git.AuthOptions var err error @@ -415,37 +455,6 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, return sreconcile.ResultEmpty, e } - // Configure checkout strategy - checkoutOpts := git.CheckoutOptions{RecurseSubmodules: obj.Spec.RecurseSubmodules} - if ref := obj.Spec.Reference; ref != nil { - checkoutOpts.Branch = ref.Branch - checkoutOpts.Commit = ref.Commit - checkoutOpts.Tag = ref.Tag - checkoutOpts.SemVer = ref.SemVer - } - - if val, ok := r.features[features.OptimizedGitClones]; ok && val { - // Only if the object is ready, use the last revision to attempt - // short-circuiting clone operation. - if conditions.IsTrue(obj, meta.ReadyCondition) { - if artifact := obj.GetArtifact(); artifact != nil { - checkoutOpts.LastRevision = artifact.Revision - } - } - } - - checkoutStrategy, err := strategy.CheckoutStrategyForImplementation(ctx, - git.Implementation(obj.Spec.GitImplementation), checkoutOpts) - if err != nil { - e := &serror.Stalling{ - Err: fmt.Errorf("failed to configure checkout strategy for Git implementation '%s': %w", obj.Spec.GitImplementation, err), - Reason: sourcev1.GitOperationFailedReason, - } - conditions.MarkTrue(obj, sourcev1.FetchFailedCondition, e.Reason, e.Err.Error()) - // Do not return err as recovery without changes is impossible - return sreconcile.ResultEmpty, e - } - repositoryURL := obj.Spec.URL // managed GIT transport only affects the libgit2 implementation if managed.Enabled() && obj.Spec.GitImplementation == sourcev1.LibGit2Implementation { @@ -473,32 +482,77 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, } } - // Checkout HEAD of reference in object - gitCtx, cancel := context.WithTimeout(ctx, obj.Spec.Timeout.Duration) - defer cancel() - c, err := checkoutStrategy.Checkout(gitCtx, dir, repositoryURL, authOpts) + // Fetch the included artifact metadata. + artifacts, err := r.fetchIncludes(ctx, obj) if err != nil { - var v git.NoChangesError - if errors.As(err, &v) { - // Create generic error without notification. Since it's a no-op - // error, ignore (no runtime error), normal event. - ge := serror.NewGeneric(v, sourcev1.GitOperationSucceedReason) - ge.Notification = false - ge.Ignore = true - ge.Event = corev1.EventTypeNormal - return sreconcile.ResultEmpty, ge - } + return sreconcile.ResultEmpty, err + } + // Observe if the artifacts still match the previous included ones + if artifacts.Diff(obj.Status.IncludedArtifacts) { + message := fmt.Sprintf("included artifacts differ from last observed includes") + conditions.MarkTrue(obj, sourcev1.ArtifactOutdatedCondition, "IncludeChange", message) + conditions.MarkReconciling(obj, "IncludeChange", message) + } + + // Persist the ArtifactSet. + *includes = *artifacts + + var optimizedClone bool + if val, ok := r.features[features.OptimizedGitClones]; ok && val { + optimizedClone = true + } + + c, err := r.gitCheckout(ctx, obj, repositoryURL, authOpts, dir, optimizedClone) + if err != nil { e := serror.NewGeneric( fmt.Errorf("failed to checkout and determine revision: %w", err), sourcev1.GitOperationFailedReason, ) conditions.MarkTrue(obj, sourcev1.FetchFailedCondition, e.Reason, e.Err.Error()) - // Coin flip on transient or persistent error, return error and hope for the best return sreconcile.ResultEmpty, e } // Assign the commit to the shared commit reference. *commit = *c + + // If it's a partial commit obtained from an existing artifact, check if the + // reconciliation can be skipped if other configurations have not changed. + if !git.IsConcreteCommit(*commit) { + // Calculate content configuration checksum. + if r.calculateContentConfigChecksum(obj, includes) == obj.Status.ContentConfigChecksum { + ge := serror.NewGeneric( + fmt.Errorf("no changes since last reconcilation: observed revision '%s'", + commit.String()), sourcev1.GitOperationSucceedReason, + ) + ge.Notification = false + ge.Ignore = true + ge.Event = corev1.EventTypeNormal + // Remove any stale fetch failed condition. + conditions.Delete(obj, sourcev1.FetchFailedCondition) + // IMPORTANT: This must be set to ensure that the observed + // generation of this condition is updated. In case of full + // reconciliation reconcileArtifact() ensures that it's set at the + // very end. + conditions.MarkTrue(obj, sourcev1.ArtifactInStorageCondition, meta.SucceededReason, + "stored artifact for revision '%s'", commit.String()) + // TODO: Find out if such condition setting is needed when commit + // signature verification is enabled. + return sreconcile.ResultEmpty, ge + } + + // If we can't skip the reconciliation, checkout again without any + // optimization. + c, err := r.gitCheckout(ctx, obj, repositoryURL, authOpts, dir, false) + if err != nil { + e := serror.NewGeneric( + fmt.Errorf("failed to checkout and determine revision: %w", err), + sourcev1.GitOperationFailedReason, + ) + conditions.MarkTrue(obj, sourcev1.FetchFailedCondition, e.Reason, e.Err.Error()) + return sreconcile.ResultEmpty, e + } + *commit = *c + } ctrl.LoggerFrom(ctx).V(logger.DebugLevel).Info("git repository checked out", "url", obj.Spec.URL, "revision", commit.String()) conditions.Delete(obj, sourcev1.FetchFailedCondition) @@ -521,21 +575,27 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, // // The inspection of the given data to the object is differed, ensuring any // stale observations like v1beta2.ArtifactOutdatedCondition are removed. -// If the given Artifact and/or artifactSet (includes) do not differ from the -// object's current, it returns early. +// If the given Artifact and/or artifactSet (includes) and the content config +// checksum do not differ from the object's current, it returns early. // Source ignore patterns are loaded, and the given directory is archived while // taking these patterns into account. -// On a successful archive, the Artifact and Includes in the Status of the -// object are set, and the symlink in the Storage is updated to its path. +// On a successful archive, the Artifact, Includes and new content config +// checksum in the Status of the object are set, and the symlink in the Storage +// is updated to its path. func (r *GitRepositoryReconciler) reconcileArtifact(ctx context.Context, obj *sourcev1.GitRepository, commit *git.Commit, includes *artifactSet, dir string) (sreconcile.Result, error) { // Create potential new artifact with current available metadata artifact := r.Storage.NewArtifactFor(obj.Kind, obj.GetObjectMeta(), commit.String(), fmt.Sprintf("%s.tar.gz", commit.Hash.String())) + // Calculate the content config checksum. + ccc := r.calculateContentConfigChecksum(obj, includes) + // Set the ArtifactInStorageCondition if there's no drift. defer func() { - if obj.GetArtifact().HasRevision(artifact.Revision) && !includes.Diff(obj.Status.IncludedArtifacts) { + if obj.GetArtifact().HasRevision(artifact.Revision) && + !includes.Diff(obj.Status.IncludedArtifacts) && + obj.Status.ContentConfigChecksum == ccc { conditions.Delete(obj, sourcev1.ArtifactOutdatedCondition) conditions.MarkTrue(obj, sourcev1.ArtifactInStorageCondition, meta.SucceededReason, "stored artifact for revision '%s'", artifact.Revision) @@ -543,7 +603,9 @@ func (r *GitRepositoryReconciler) reconcileArtifact(ctx context.Context, }() // The artifact is up-to-date - if obj.GetArtifact().HasRevision(artifact.Revision) && !includes.Diff(obj.Status.IncludedArtifacts) { + if obj.GetArtifact().HasRevision(artifact.Revision) && + !includes.Diff(obj.Status.IncludedArtifacts) && + obj.Status.ContentConfigChecksum == ccc { r.eventLogf(ctx, obj, events.EventTypeTrace, sourcev1.ArtifactUpToDateReason, "artifact up-to-date with remote revision: '%s'", artifact.Revision) return sreconcile.ResultSuccess, nil } @@ -609,6 +671,7 @@ func (r *GitRepositoryReconciler) reconcileArtifact(ctx context.Context, // Record it on the object obj.Status.Artifact = artifact.DeepCopy() obj.Status.IncludedArtifacts = *includes + obj.Status.ContentConfigChecksum = ccc // Update symlink on a "best effort" basis url, err := r.Storage.Symlink(artifact, "latest.tar.gz") @@ -636,7 +699,6 @@ func (r *GitRepositoryReconciler) reconcileArtifact(ctx context.Context, func (r *GitRepositoryReconciler) reconcileInclude(ctx context.Context, obj *sourcev1.GitRepository, _ *git.Commit, includes *artifactSet, dir string) (sreconcile.Result, error) { - artifacts := make(artifactSet, len(obj.Spec.Include)) for i, incl := range obj.Spec.Include { // Do this first as it is much cheaper than copy operations toPath, err := securejoin.SecureJoin(dir, incl.GetToPath()) @@ -645,56 +707,142 @@ func (r *GitRepositoryReconciler) reconcileInclude(ctx context.Context, fmt.Errorf("path calculation for include '%s' failed: %w", incl.GitRepositoryRef.Name, err), "IllegalPath", ) - conditions.MarkTrue(obj, sourcev1.IncludeUnavailableCondition, e.Reason, e.Err.Error()) + conditions.MarkTrue(obj, sourcev1.StorageOperationFailedCondition, e.Reason, e.Err.Error()) + return sreconcile.ResultEmpty, e + } + + // Get artifact at the same include index. The artifactSet is created + // such that the index of artifactSet matches with the index of Include. + // Hence, index is used here to pick the associated artifact from + // includes. + var artifact *sourcev1.Artifact + for j, art := range *includes { + if i == j { + artifact = art + } + } + + // Copy artifact (sub)contents to configured directory. + if err := r.Storage.CopyToPath(artifact, incl.GetFromPath(), toPath); err != nil { + e := &serror.Event{ + Err: fmt.Errorf("failed to copy '%s' include from %s to %s: %w", incl.GitRepositoryRef.Name, incl.GetFromPath(), incl.GetToPath(), err), + Reason: "CopyFailure", + } + conditions.MarkTrue(obj, sourcev1.StorageOperationFailedCondition, e.Reason, e.Err.Error()) return sreconcile.ResultEmpty, e } + } + conditions.Delete(obj, sourcev1.IncludeUnavailableCondition) + return sreconcile.ResultSuccess, nil +} + +// gitCheckout builds checkout options with the given configurations and +// performs a git checkout. +func (r *GitRepositoryReconciler) gitCheckout(ctx context.Context, + obj *sourcev1.GitRepository, repoURL string, authOpts *git.AuthOptions, dir string, optimized bool) (*git.Commit, error) { + // Configure checkout strategy. + checkoutOpts := git.CheckoutOptions{RecurseSubmodules: obj.Spec.RecurseSubmodules} + if ref := obj.Spec.Reference; ref != nil { + checkoutOpts.Branch = ref.Branch + checkoutOpts.Commit = ref.Commit + checkoutOpts.Tag = ref.Tag + checkoutOpts.SemVer = ref.SemVer + } + + // Only if the object has an existing artifact in storage, attempt to + // short-circuit clone operation. reconcileStorage has already verified + // that the artifact exists. + if optimized && conditions.IsTrue(obj, sourcev1.ArtifactInStorageCondition) { + if artifact := obj.GetArtifact(); artifact != nil { + checkoutOpts.LastRevision = artifact.Revision + } + } + + checkoutStrategy, err := strategy.CheckoutStrategyForImplementation(ctx, + git.Implementation(obj.Spec.GitImplementation), checkoutOpts) + if err != nil { + e := &serror.Stalling{ + Err: fmt.Errorf("failed to configure checkout strategy for Git implementation '%s': %w", obj.Spec.GitImplementation, err), + Reason: sourcev1.GitOperationFailedReason, + } + conditions.MarkTrue(obj, sourcev1.FetchFailedCondition, e.Reason, e.Err.Error()) + // Do not return err as recovery without changes is impossible. + return nil, e + } + + // Checkout HEAD of reference in object + gitCtx, cancel := context.WithTimeout(ctx, obj.Spec.Timeout.Duration) + defer cancel() + return checkoutStrategy.Checkout(gitCtx, dir, repoURL, authOpts) +} - // Retrieve the included GitRepository +// fetchIncludes fetches artifact metadata of all the included repos. +func (r *GitRepositoryReconciler) fetchIncludes(ctx context.Context, obj *sourcev1.GitRepository) (*artifactSet, error) { + artifacts := make(artifactSet, len(obj.Spec.Include)) + for i, incl := range obj.Spec.Include { + // Retrieve the included GitRepository. dep := &sourcev1.GitRepository{} if err := r.Get(ctx, types.NamespacedName{Namespace: obj.Namespace, Name: incl.GitRepositoryRef.Name}, dep); err != nil { - e := serror.NewGeneric( + e := serror.NewWaiting( fmt.Errorf("could not get resource for include '%s': %w", incl.GitRepositoryRef.Name, err), "NotFound", ) + e.RequeueAfter = r.requeueDependency conditions.MarkTrue(obj, sourcev1.IncludeUnavailableCondition, e.Reason, e.Err.Error()) - return sreconcile.ResultEmpty, e + return nil, e } // Confirm include has an artifact if dep.GetArtifact() == nil { - e := serror.NewGeneric( + e := serror.NewWaiting( fmt.Errorf("no artifact available for include '%s'", incl.GitRepositoryRef.Name), "NoArtifact", ) + e.RequeueAfter = r.requeueDependency conditions.MarkTrue(obj, sourcev1.IncludeUnavailableCondition, e.Reason, e.Err.Error()) - return sreconcile.ResultEmpty, e + return nil, e } - // Copy artifact (sub)contents to configured directory - if err := r.Storage.CopyToPath(dep.GetArtifact(), incl.GetFromPath(), toPath); err != nil { - e := serror.NewGeneric( - fmt.Errorf("failed to copy '%s' include from %s to %s: %w", incl.GitRepositoryRef.Name, incl.GetFromPath(), incl.GetToPath(), err), - "CopyFailure", - ) - conditions.MarkTrue(obj, sourcev1.IncludeUnavailableCondition, e.Reason, e.Err.Error()) - return sreconcile.ResultEmpty, e - } artifacts[i] = dep.GetArtifact().DeepCopy() } - // We now know all includes are available + // We now know all the includes are available. conditions.Delete(obj, sourcev1.IncludeUnavailableCondition) - // Observe if the artifacts still match the previous included ones - if artifacts.Diff(obj.Status.IncludedArtifacts) { - message := fmt.Sprintf("included artifacts differ from last observed includes") - conditions.MarkTrue(obj, sourcev1.ArtifactOutdatedCondition, "IncludeChange", message) - conditions.MarkReconciling(obj, "IncludeChange", message) + return &artifacts, nil +} + +// calculateContentConfigChecksum calculates a checksum of all the +// configurations that result in a change in the source artifact. It can be used +// to decide if further reconciliation is needed when an artifact already exists +// for a set of configurations. +func (r *GitRepositoryReconciler) calculateContentConfigChecksum(obj *sourcev1.GitRepository, includes *artifactSet) string { + c := []byte{} + // Consider the ignore rules and recurse submodules. + if obj.Spec.Ignore != nil { + c = append(c, []byte(*obj.Spec.Ignore)...) } + c = append(c, []byte(strconv.FormatBool(obj.Spec.RecurseSubmodules))...) - // Persist the artifactSet. - *includes = artifacts - return sreconcile.ResultSuccess, nil + // Consider the included repository attributes. + for _, incl := range obj.Spec.Include { + c = append(c, []byte(incl.GitRepositoryRef.Name+incl.FromPath+incl.ToPath)...) + } + + // Consider the checksum and revision of all the included remote artifact. + // This ensures that if the included repos get updated, this checksum changes. + // NOTE: The content of artifact may change at the same revision if the + // ignore rules change. Hence, consider both checksum and revision to + // capture changes in artifact checksum as well. + // TODO: Fix artifactSet.Diff() to consider checksum as well. + if includes != nil { + for _, incl := range *includes { + c = append(c, []byte(incl.Checksum)...) + c = append(c, []byte(incl.Revision)...) + } + } + + return fmt.Sprintf("sha256:%x", sha256.Sum256(c)) } // verifyCommitSignature verifies the signature of the given Git commit, if a diff --git a/controllers/gitrepository_controller_test.go b/controllers/gitrepository_controller_test.go index b88f2e014..fd78abcde 100644 --- a/controllers/gitrepository_controller_test.go +++ b/controllers/gitrepository_controller_test.go @@ -57,6 +57,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" sourcev1 "github.com/fluxcd/source-controller/api/v1beta2" + serror "github.com/fluxcd/source-controller/internal/error" "github.com/fluxcd/source-controller/internal/features" sreconcile "github.com/fluxcd/source-controller/internal/reconcile" "github.com/fluxcd/source-controller/internal/reconcile/summarize" @@ -141,6 +142,7 @@ Oomb3gD/TRf/nAdVED+k81GdLzciYdUGtI71/qI47G0nMBluLRE= =/4e+ -----END PGP PUBLIC KEY BLOCK----- ` + emptyContentConfigChecksum = "sha256:fcbcf165908dd18a9e49f7ff27810176db8e9f63b4352213741664245224f8aa" ) var ( @@ -551,27 +553,31 @@ func TestGitRepositoryReconciler_reconcileSource_checkoutStrategy(t *testing.T) want sreconcile.Result wantErr bool wantRevision string + wantArtifactOutdated bool }{ { - name: "Nil reference (default branch)", - want: sreconcile.ResultSuccess, - wantRevision: "master/", + name: "Nil reference (default branch)", + want: sreconcile.ResultSuccess, + wantRevision: "master/", + wantArtifactOutdated: true, }, { name: "Branch", reference: &sourcev1.GitRepositoryRef{ Branch: "staging", }, - want: sreconcile.ResultSuccess, - wantRevision: "staging/", + want: sreconcile.ResultSuccess, + wantRevision: "staging/", + wantArtifactOutdated: true, }, { name: "Tag", reference: &sourcev1.GitRepositoryRef{ Tag: "v0.1.0", }, - want: sreconcile.ResultSuccess, - wantRevision: "v0.1.0/", + want: sreconcile.ResultSuccess, + wantRevision: "v0.1.0/", + wantArtifactOutdated: true, }, { name: "Branch commit", @@ -580,8 +586,9 @@ func TestGitRepositoryReconciler_reconcileSource_checkoutStrategy(t *testing.T) Branch: "staging", Commit: "", }, - want: sreconcile.ResultSuccess, - wantRevision: "staging/", + want: sreconcile.ResultSuccess, + wantRevision: "staging/", + wantArtifactOutdated: true, }, { name: "Branch commit", @@ -590,60 +597,81 @@ func TestGitRepositoryReconciler_reconcileSource_checkoutStrategy(t *testing.T) Branch: "staging", Commit: "", }, - want: sreconcile.ResultSuccess, - wantRevision: "HEAD/", + want: sreconcile.ResultSuccess, + wantRevision: "HEAD/", + wantArtifactOutdated: true, }, { name: "SemVer", reference: &sourcev1.GitRepositoryRef{ SemVer: "*", }, - want: sreconcile.ResultSuccess, - wantRevision: "v2.0.0/", + want: sreconcile.ResultSuccess, + wantRevision: "v2.0.0/", + wantArtifactOutdated: true, }, { name: "SemVer range", reference: &sourcev1.GitRepositoryRef{ SemVer: "", + want: sreconcile.ResultSuccess, + wantRevision: "0.2.0/", + wantArtifactOutdated: true, }, { name: "SemVer prerelease", reference: &sourcev1.GitRepositoryRef{ SemVer: ">=1.0.0-0 <1.1.0-0", }, - wantRevision: "v1.0.0-alpha/", - want: sreconcile.ResultSuccess, + wantRevision: "v1.0.0-alpha/", + want: sreconcile.ResultSuccess, + wantArtifactOutdated: true, }, { - name: "Optimized clone, Ready=True", + name: "Optimized clone", reference: &sourcev1.GitRepositoryRef{ Branch: "staging", }, beforeFunc: func(obj *sourcev1.GitRepository, latestRev string) { + // Add existing artifact on the object and storage. obj.Status = sourcev1.GitRepositoryStatus{ Artifact: &sourcev1.Artifact{ Revision: "staging/" + latestRev, + Path: randStringRunes(10), }, + // Checksum with all the relevant fields unset. + ContentConfigChecksum: emptyContentConfigChecksum, } - conditions.MarkTrue(obj, meta.ReadyCondition, meta.SucceededReason, "ready") + conditions.MarkTrue(obj, sourcev1.ArtifactInStorageCondition, meta.SucceededReason, "foo") }, - want: sreconcile.ResultEmpty, - wantErr: true, - wantRevision: "staging/", + want: sreconcile.ResultEmpty, + wantErr: true, + wantRevision: "staging/", + wantArtifactOutdated: false, }, { - name: "Optimized clone, Ready=False", + name: "Optimized clone different ignore", reference: &sourcev1.GitRepositoryRef{ Branch: "staging", }, beforeFunc: func(obj *sourcev1.GitRepository, latestRev string) { - conditions.MarkFalse(obj, meta.ReadyCondition, meta.FailedReason, "not ready") + // Set new ignore value. + obj.Spec.Ignore = pointer.StringPtr("foo") + // Add existing artifact on the object and storage. + obj.Status = sourcev1.GitRepositoryStatus{ + Artifact: &sourcev1.Artifact{ + Revision: "staging/" + latestRev, + Path: randStringRunes(10), + }, + // Checksum with all the relevant fields unset. + ContentConfigChecksum: emptyContentConfigChecksum, + } + conditions.MarkTrue(obj, sourcev1.ArtifactInStorageCondition, meta.SucceededReason, "foo") }, - want: sreconcile.ResultSuccess, - wantRevision: "staging/", + want: sreconcile.ResultSuccess, + wantRevision: "staging/", + wantArtifactOutdated: false, }, } @@ -721,7 +749,7 @@ func TestGitRepositoryReconciler_reconcileSource_checkoutStrategy(t *testing.T) if tt.wantRevision != "" && !tt.wantErr { revision := strings.ReplaceAll(tt.wantRevision, "", headRef.Hash().String()) g.Expect(commit.String()).To(Equal(revision)) - g.Expect(conditions.IsTrue(obj, sourcev1.ArtifactOutdatedCondition)).To(BeTrue()) + g.Expect(conditions.IsTrue(obj, sourcev1.ArtifactOutdatedCondition)).To(Equal(tt.wantArtifactOutdated)) } }) } @@ -780,7 +808,8 @@ func TestGitRepositoryReconciler_reconcileArtifact(t *testing.T) { beforeFunc: func(obj *sourcev1.GitRepository) { obj.Spec.Interval = metav1.Duration{Duration: interval} obj.Status.Artifact = &sourcev1.Artifact{Revision: "main/revision"} - obj.Status.IncludedArtifacts = []*sourcev1.Artifact{{Revision: "main/revision"}} + obj.Status.IncludedArtifacts = []*sourcev1.Artifact{{Revision: "main/revision", Checksum: "some-checksum"}} + obj.Status.ContentConfigChecksum = "sha256:f825d11a1c5987e033d2cb36449a3b0435a6abc9b2bfdbcdcc7c49bf40e9285d" }, afterFunc: func(t *WithT, obj *sourcev1.GitRepository) { t.Expect(obj.Status.URL).To(BeEmpty()) @@ -985,39 +1014,6 @@ func TestGitRepositoryReconciler_reconcileInclude(t *testing.T) { {name: "b", toPath: "b/", shouldExist: true}, }, want: sreconcile.ResultSuccess, - assertConditions: []metav1.Condition{ - *conditions.TrueCondition(sourcev1.ArtifactOutdatedCondition, "IncludeChange", "included artifacts differ from last observed includes"), - *conditions.TrueCondition(meta.ReconcilingCondition, "IncludeChange", "included artifacts differ from last observed includes"), - }, - }, - { - name: "Include get failure makes IncludeUnavailable=True and returns error", - includes: []include{ - {name: "a", toPath: "a/"}, - }, - wantErr: true, - assertConditions: []metav1.Condition{ - *conditions.TrueCondition(sourcev1.IncludeUnavailableCondition, "NotFound", "could not get resource for include 'a': gitrepositories.source.toolkit.fluxcd.io \"a\" not found"), - }, - }, - { - name: "Include without an artifact makes IncludeUnavailable=True", - dependencies: []dependency{ - { - name: "a", - withArtifact: false, - conditions: []metav1.Condition{ - *conditions.TrueCondition(sourcev1.IncludeUnavailableCondition, "Foo", "foo unavailable"), - }, - }, - }, - includes: []include{ - {name: "a", toPath: "a/"}, - }, - wantErr: true, - assertConditions: []metav1.Condition{ - *conditions.TrueCondition(sourcev1.IncludeUnavailableCondition, "NoArtifact", "no artifact available for include 'a'"), - }, }, { name: "Invalid FromPath makes IncludeUnavailable=True and returns error", @@ -1032,16 +1028,8 @@ func TestGitRepositoryReconciler_reconcileInclude(t *testing.T) { }, wantErr: true, assertConditions: []metav1.Condition{ - *conditions.TrueCondition(sourcev1.IncludeUnavailableCondition, "CopyFailure", "unpack/path: no such file or directory"), - }, - }, - { - name: "Outdated IncludeUnavailable is removed", - beforeFunc: func(obj *sourcev1.GitRepository) { - conditions.MarkTrue(obj, sourcev1.IncludeUnavailableCondition, "NoArtifact", "") + *conditions.TrueCondition(sourcev1.StorageOperationFailedCondition, "CopyFailure", "unpack/path: no such file or directory"), }, - want: sreconcile.ResultSuccess, - assertConditions: []metav1.Condition{}, }, } for _, tt := range tests { @@ -1111,6 +1099,11 @@ func TestGitRepositoryReconciler_reconcileInclude(t *testing.T) { var commit git.Commit var includes artifactSet + // Build includes artifactSet. + artifactSet, err := r.fetchIncludes(ctx, obj) + g.Expect(err).ToNot(HaveOccurred()) + includes = *artifactSet + got, err := r.reconcileInclude(ctx, obj, &commit, &includes, tmpDir) g.Expect(obj.GetConditions()).To(conditions.MatchConditions(tt.assertConditions)) g.Expect(err != nil).To(Equal(tt.wantErr)) @@ -1815,12 +1808,25 @@ func TestGitRepositoryReconciler_statusConditions(t *testing.T) { } func TestGitRepositoryReconciler_notify(t *testing.T) { + concreteCommit := git.Commit{ + Hash: git.Hash("some-hash"), + Message: "test commit", + Encoded: []byte("content"), + } + partialCommit := git.Commit{ + Hash: git.Hash("some-hash"), + } + + noopErr := serror.NewGeneric(fmt.Errorf("some no-op error"), "NoOpReason") + noopErr.Ignore = true + tests := []struct { name string res sreconcile.Result resErr error oldObjBeforeFunc func(obj *sourcev1.GitRepository) newObjBeforeFunc func(obj *sourcev1.GitRepository) + commit git.Commit wantEvent string }{ { @@ -1835,7 +1841,8 @@ func TestGitRepositoryReconciler_notify(t *testing.T) { newObjBeforeFunc: func(obj *sourcev1.GitRepository) { obj.Status.Artifact = &sourcev1.Artifact{Revision: "xxx", Checksum: "yyy"} }, - wantEvent: "Normal NewArtifact stored artifact for commit", + commit: concreteCommit, + wantEvent: "Normal NewArtifact stored artifact for commit 'test commit'", }, { name: "recovery from failure", @@ -1850,7 +1857,8 @@ func TestGitRepositoryReconciler_notify(t *testing.T) { obj.Status.Artifact = &sourcev1.Artifact{Revision: "xxx", Checksum: "yyy"} conditions.MarkTrue(obj, meta.ReadyCondition, meta.SucceededReason, "ready") }, - wantEvent: "Normal Succeeded stored artifact for commit", + commit: concreteCommit, + wantEvent: "Normal Succeeded stored artifact for commit 'test commit'", }, { name: "recovery and new artifact", @@ -1865,7 +1873,8 @@ func TestGitRepositoryReconciler_notify(t *testing.T) { obj.Status.Artifact = &sourcev1.Artifact{Revision: "aaa", Checksum: "bbb"} conditions.MarkTrue(obj, meta.ReadyCondition, meta.SucceededReason, "ready") }, - wantEvent: "Normal NewArtifact stored artifact for commit", + commit: concreteCommit, + wantEvent: "Normal NewArtifact stored artifact for commit 'test commit'", }, { name: "no updates", @@ -1880,6 +1889,22 @@ func TestGitRepositoryReconciler_notify(t *testing.T) { conditions.MarkTrue(obj, meta.ReadyCondition, meta.SucceededReason, "ready") }, }, + { + name: "no-op error result", + res: sreconcile.ResultEmpty, + resErr: noopErr, + oldObjBeforeFunc: func(obj *sourcev1.GitRepository) { + obj.Status.Artifact = &sourcev1.Artifact{Revision: "xxx", Checksum: "yyy"} + conditions.MarkTrue(obj, sourcev1.FetchFailedCondition, sourcev1.GitOperationFailedReason, "fail") + conditions.MarkFalse(obj, meta.ReadyCondition, meta.FailedReason, "foo") + }, + newObjBeforeFunc: func(obj *sourcev1.GitRepository) { + obj.Status.Artifact = &sourcev1.Artifact{Revision: "xxx", Checksum: "yyy"} + conditions.MarkTrue(obj, meta.ReadyCondition, meta.SucceededReason, "ready") + }, + commit: partialCommit, // no-op will always result in partial commit. + wantEvent: "Normal Succeeded stored artifact for commit 'HEAD/some-hash'", + }, } for _, tt := range tests { @@ -1901,10 +1926,7 @@ func TestGitRepositoryReconciler_notify(t *testing.T) { EventRecorder: recorder, features: features.FeatureGates(), } - commit := &git.Commit{ - Message: "test commit", - } - reconciler.notify(oldObj, newObj, *commit, tt.res, tt.resErr) + reconciler.notify(oldObj, newObj, tt.commit, tt.res, tt.resErr) select { case x, ok := <-recorder.Events: @@ -1920,3 +1942,203 @@ func TestGitRepositoryReconciler_notify(t *testing.T) { }) } } + +func TestGitRepositoryReconciler_fetchIncludes(t *testing.T) { + type dependency struct { + name string + withArtifact bool + conditions []metav1.Condition + } + + type include struct { + name string + fromPath string + toPath string + shouldExist bool + } + + tests := []struct { + name string + dependencies []dependency + includes []include + beforeFunc func(obj *sourcev1.GitRepository) + wantErr bool + wantArtifactSet artifactSet + assertConditions []metav1.Condition + }{ + { + name: "Existing includes", + dependencies: []dependency{ + { + name: "a", + withArtifact: true, + conditions: []metav1.Condition{ + *conditions.TrueCondition(meta.ReadyCondition, "Foo", "foo ready"), + }, + }, + { + name: "b", + withArtifact: true, + conditions: []metav1.Condition{ + *conditions.TrueCondition(meta.ReadyCondition, "Bar", "bar ready"), + }, + }, + }, + includes: []include{ + {name: "a", toPath: "a/", shouldExist: true}, + {name: "b", toPath: "b/", shouldExist: true}, + }, + wantErr: false, + wantArtifactSet: []*sourcev1.Artifact{ + {Revision: "a"}, + {Revision: "b"}, + }, + }, + { + name: "Include get failure", + includes: []include{ + {name: "a", toPath: "a/"}, + }, + wantErr: true, + assertConditions: []metav1.Condition{ + *conditions.TrueCondition(sourcev1.IncludeUnavailableCondition, "NotFound", "could not get resource for include 'a': gitrepositories.source.toolkit.fluxcd.io \"a\" not found"), + }, + }, + { + name: "Include without an artifact makes IncludeUnavailable=True", + dependencies: []dependency{ + { + name: "a", + withArtifact: false, + conditions: []metav1.Condition{ + *conditions.TrueCondition(sourcev1.IncludeUnavailableCondition, "Foo", "foo unavailable"), + }, + }, + }, + includes: []include{ + {name: "a", toPath: "a/"}, + }, + wantErr: true, + assertConditions: []metav1.Condition{ + *conditions.TrueCondition(sourcev1.IncludeUnavailableCondition, "NoArtifact", "no artifact available for include 'a'"), + }, + }, + { + name: "Outdated IncludeUnavailable is removed", + beforeFunc: func(obj *sourcev1.GitRepository) { + conditions.MarkTrue(obj, sourcev1.IncludeUnavailableCondition, "NoArtifact", "") + }, + assertConditions: []metav1.Condition{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + var depObjs []client.Object + for _, d := range tt.dependencies { + obj := &sourcev1.GitRepository{ + ObjectMeta: metav1.ObjectMeta{ + Name: d.name, + }, + Status: sourcev1.GitRepositoryStatus{ + Conditions: d.conditions, + }, + } + if d.withArtifact { + obj.Status.Artifact = &sourcev1.Artifact{ + Path: d.name + ".tar.gz", + Revision: d.name, + LastUpdateTime: metav1.Now(), + } + } + depObjs = append(depObjs, obj) + } + + builder := fakeclient.NewClientBuilder().WithScheme(testEnv.GetScheme()) + if len(tt.dependencies) > 0 { + builder.WithObjects(depObjs...) + } + + r := &GitRepositoryReconciler{ + Client: builder.Build(), + EventRecorder: record.NewFakeRecorder(32), + } + + obj := &sourcev1.GitRepository{ + ObjectMeta: metav1.ObjectMeta{ + Name: "reconcile-include", + }, + Spec: sourcev1.GitRepositorySpec{ + Interval: metav1.Duration{Duration: interval}, + }, + } + + for i, incl := range tt.includes { + incl := sourcev1.GitRepositoryInclude{ + GitRepositoryRef: meta.LocalObjectReference{Name: incl.name}, + FromPath: incl.fromPath, + ToPath: incl.toPath, + } + tt.includes[i].fromPath = incl.GetFromPath() + tt.includes[i].toPath = incl.GetToPath() + obj.Spec.Include = append(obj.Spec.Include, incl) + } + + gotArtifactSet, err := r.fetchIncludes(ctx, obj) + g.Expect(err != nil).To(Equal(tt.wantErr)) + g.Expect(obj.GetConditions()).To(conditions.MatchConditions(tt.assertConditions)) + if !tt.wantErr && gotArtifactSet != nil { + g.Expect(gotArtifactSet.Diff(tt.wantArtifactSet)).To(BeFalse()) + } + }) + } +} + +func TestGitRepositoryReconciler_calculateContentConfigChecksum(t *testing.T) { + g := NewWithT(t) + obj := &sourcev1.GitRepository{} + r := &GitRepositoryReconciler{} + + emptyChecksum := r.calculateContentConfigChecksum(obj, nil) + g.Expect(emptyChecksum).To(Equal(emptyContentConfigChecksum)) + + // Ignore modified. + obj.Spec.Ignore = pointer.String("some-rule") + ignoreModChecksum := r.calculateContentConfigChecksum(obj, nil) + g.Expect(emptyChecksum).ToNot(Equal(ignoreModChecksum)) + + // Recurse submodules modified. + obj.Spec.RecurseSubmodules = true + submodModChecksum := r.calculateContentConfigChecksum(obj, nil) + g.Expect(ignoreModChecksum).ToNot(Equal(submodModChecksum)) + + // Include modified. + obj.Spec.Include = []sourcev1.GitRepositoryInclude{ + { + GitRepositoryRef: meta.LocalObjectReference{Name: "foo"}, + FromPath: "aaa", + ToPath: "bbb", + }, + } + artifacts := &artifactSet{ + &sourcev1.Artifact{Revision: "some-revision-1", Checksum: "some-checksum-1"}, + } + includeModChecksum := r.calculateContentConfigChecksum(obj, artifacts) + g.Expect(submodModChecksum).ToNot(Equal(includeModChecksum)) + + // Artifact modified revision. + artifacts = &artifactSet{ + &sourcev1.Artifact{Revision: "some-revision-2", Checksum: "some-checksum-1"}, + } + artifactModChecksum := r.calculateContentConfigChecksum(obj, artifacts) + g.Expect(includeModChecksum).ToNot(Equal(artifactModChecksum)) + + // Artifact modified checksum. + artifacts = &artifactSet{ + &sourcev1.Artifact{Revision: "some-revision-2", Checksum: "some-checksum-2"}, + } + artifactCsumModChecksum := r.calculateContentConfigChecksum(obj, artifacts) + g.Expect(artifactModChecksum).ToNot(Equal(artifactCsumModChecksum)) +} diff --git a/docs/api/source.md b/docs/api/source.md index 52c3013f2..1b4240ad7 100644 --- a/docs/api/source.md +++ b/docs/api/source.md @@ -1643,6 +1643,25 @@ Artifacts as instructed by GitRepositorySpec.Include.

+contentConfigChecksum
+ +string + + + +(Optional) +

ContentConfigChecksum is a checksum of all the configurations related to +the content of the source artifact: +- .spec.ignore +- .spec.recurseSubmodules +- .spec.included and the checksum of the included artifacts +observed in .status.observedGeneration version of the object. This can +be used to determine if the content of the +It has the format of <algo>:<checksum>, for example: sha256:<checksum>.

+ + + + ReconcileRequestStatus
diff --git a/docs/spec/v1beta2/gitrepositories.md b/docs/spec/v1beta2/gitrepositories.md index 2d95db474..46e016dd4 100644 --- a/docs/spec/v1beta2/gitrepositories.md +++ b/docs/spec/v1beta2/gitrepositories.md @@ -405,9 +405,12 @@ Optimized Git clones decreases resource utilization for GitRepository reconciliations. It supports both `go-git` and `libgit2` implementations when cloning repositories using branches or tags. -When enabled, avoids full clone operations by first checking whether -the last revision is still the same at the target repository, -and if that is so, skips the reconciliation. +When enabled, it avoids full Git clone operations by first checking whether +the revision of the last stored artifact is still the head of the remote +repository and none of the other factors that contribute to a change in the +artifact, like ignore rules and included repositories, have changed. If that is +so, the reconciliation is skipped. Else, a full reconciliation is performed as +usual. This feature is enabled by default. It can be disabled by starting the controller with the argument `--feature-gates=OptimizedGitClones=false`. diff --git a/pkg/git/git.go b/pkg/git/git.go index da0e7d225..5ce6fb09a 100644 --- a/pkg/git/git.go +++ b/pkg/git/git.go @@ -107,18 +107,6 @@ type CheckoutStrategy interface { Checkout(ctx context.Context, path, url string, config *AuthOptions) (*Commit, error) } -// NoChangesError represents the case in which a Git clone operation -// is attempted, but cancelled as the revision is still the same as -// the one observed on the last successful reconciliation. -type NoChangesError struct { - Message string - ObservedRevision string -} - -func (e NoChangesError) Error() string { - return fmt.Sprintf("%s: observed revision '%s'", e.Message, e.ObservedRevision) -} - // IsConcreteCommit returns if a given commit is a concrete commit. Concrete // commits have most of commit metadata and commit content. In contrast, a // partial commit may only have some metadata and no commit content.