From 52e6ecfb5031dfa6605b2c9c0a6c0987b622025e Mon Sep 17 00:00:00 2001 From: Justin Firsching Date: Sat, 13 Nov 2021 23:18:59 -0500 Subject: [PATCH] Git Repository Overhaul (#155) Update the pull function so that tag-provided mirrors do not fetch all tags, instead later fetching (externally) only the tag they need. Implement tag fetching function to retrieve only the desired tag when creating a tag-provided repo mirror. Checkout the tag into a detatched HEAD state at the end of the create stage of tag-provided repository mirrors. Implement ref removal and addition functions used during package creation and mirror push to ensure that refs that aren't wanted on the mirror won't be pushed. Update the refspecs used in the push to push detatched HEAD, branches, online remote, and tags to the offline mirror. Note that if we later checkout a branch from the remote and do not clean up the remote ref it will lead to a duplicate ref name and the push will fail on one of the refs (likely the online one since it is later in the refspec slice). Fixes #154 feat: Allow for repos to be provided without a tag to mirror all branches/tags feat: Make tag-provided repository mirrors use the tag as master fix: Prevent tag-provided repo mirrors from storing extra refs fix: tag-provided clones use trunk branch name docs: Update gitops-data Example README Signed-off-by: Jeff McCoy --- cli/internal/git/checkout.go | 85 +++++++++++++++-- cli/internal/git/fetch.go | 65 +++++++++++++ cli/internal/git/pull.go | 45 ++++++++- cli/internal/git/push.go | 28 +++++- cli/internal/git/utils.go | 160 ++++++++++++++++++++++++++++++++ cli/internal/packager/create.go | 8 +- examples/gitops-data/README.md | 87 +++++++++++++++-- examples/gitops-data/zarf.yaml | 7 +- 8 files changed, 458 insertions(+), 27 deletions(-) create mode 100644 cli/internal/git/fetch.go diff --git a/cli/internal/git/checkout.go b/cli/internal/git/checkout.go index a3b25aed2e..974a59421e 100644 --- a/cli/internal/git/checkout.go +++ b/cli/internal/git/checkout.go @@ -3,14 +3,88 @@ package git import ( "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" "github.com/sirupsen/logrus" ) +// CheckoutTag performs a `git checkout` of the provided tag to a detached HEAD func CheckoutTag(path string, tag string) { + options := &git.CheckoutOptions{ + Branch: plumbing.ReferenceName("refs/tags/" + tag), + } + checkout(path, options) +} + +// CheckoutTagAsBranch performs a `git checkout` of the provided tag but rather +// than checking out to a detatched head, checks out to the provided branch ref +// It will delete the branch provided if it exists +func CheckoutTagAsBranch(path string, tag string, branch plumbing.ReferenceName) { + logContext := logrus.WithFields(logrus.Fields{ + "Path": path, + "Tag": tag, + "Branch": branch.String(), + }) + + repo, err := git.PlainOpen(path) + if err != nil { + logContext.Debug(err) + logContext.Fatal("Not a valid git repo or unable to open") + } + tagRef, err := repo.Tag(tag) + if err != nil { + logContext.Debug(err) + logContext.Fatal("Failed to locate tag in repository.") + } + checkoutHashAsBranch(path, tagRef.Hash(), branch) +} + +// checkoutHashAsBranch performs a `git checkout` of the commit hash associated +// with the provided hash +// It will delete the branch provided if it exists +func checkoutHashAsBranch(path string, hash plumbing.Hash, branch plumbing.ReferenceName) { + logContext := logrus.WithFields(logrus.Fields{ + "Path": path, + "Hash": hash.String(), + "Branch": branch.String(), + }) + + DeleteBranchIfExists(path, branch) + + repo, err := git.PlainOpen(path) + if err != nil { + logContext.Debug(err) + logContext.Fatal("Not a valid git repo or unable to open") + } + + objRef, err := repo.Object(plumbing.AnyObject, hash) + + var commitHash plumbing.Hash + switch objRef := objRef.(type) { + case *object.Tag: + commitHash = objRef.Target + case *object.Commit: + commitHash = objRef.Hash + default: + // This shouldn't ever hit, but we should at least log it if someday it + // does get hit + logContext.Debug("Unsupported tag hash type: " + objRef.Type().String()) + logContext.Fatal("Checkout failed. Hash type not supported.") + } + + options := &git.CheckoutOptions{ + Hash: commitHash, + Branch: branch, + Create: true, + } + checkout(path, options) +} + +// checkout performs a `git checkout` on the path provided using the options provided +// It assumes the caller knows what to do and does not perform any safety checks +func checkout(path string, checkoutOptions *git.CheckoutOptions) { logContext := logrus.WithFields(logrus.Fields{ "Path": path, - "Tag": tag, }) // Open the given repo @@ -18,7 +92,6 @@ func CheckoutTag(path string, tag string) { if err != nil { logContext.Debug(err) logContext.Fatal("Not a valid git repo or unable to open") - return } // Get the working tree so we can change refs @@ -28,12 +101,10 @@ func CheckoutTag(path string, tag string) { logContext.Fatal("Unable to load the git repo") } - // Checkout our tag - err = tree.Checkout(&git.CheckoutOptions{ - Branch: plumbing.ReferenceName("refs/tags/" + tag), - }) + // Perform the checkout + err = tree.Checkout(checkoutOptions) if err != nil { logContext.Debug(err) - logContext.Fatal("Unable to checkout the given tag") + logContext.Fatal("Unable to perform checkout") } } diff --git a/cli/internal/git/fetch.go b/cli/internal/git/fetch.go new file mode 100644 index 0000000000..322c8207de --- /dev/null +++ b/cli/internal/git/fetch.go @@ -0,0 +1,65 @@ +package git + +import ( + "path" + + "github.com/go-git/go-git/v5" + goConfig "github.com/go-git/go-git/v5/config" + "github.com/sirupsen/logrus" +) + +// FetchTag performs a `git fetch` of _only_ the provided tag +func FetchTag(gitDirectory string, tag string) { + logContext := logrus.WithFields(logrus.Fields{ + // Base should be similar to the repo name + "Repo": path.Base(gitDirectory), + }) + + repo, err := git.PlainOpen(gitDirectory) + if err != nil { + logContext.Fatal(err) + } + + remotes, err := repo.Remotes() + // There should never be no remotes, but it's easier to account for than + // let be a bug later + if err != nil || len(remotes) == 0 { + if err != nil { + logContext.Debug(err) + } + logContext.Fatal("Failed to identify remotes.") + } + + gitUrl := remotes[0].Config().URLs[0] + // Now that we have an exact match, we may as well update the logger, + // especially since nothing has been logged to this point that hasn't been + // fatal. + logContext = logrus.WithFields(logrus.Fields{ + "Remote": gitUrl, + }) + + gitCred := FindAuthForHost(gitUrl) + + logContext.Debug("Attempting to find tag: " + tag) + fetchOptions := &git.FetchOptions{ + RemoteName: onlineRemoteName, + RefSpecs: []goConfig.RefSpec{ + goConfig.RefSpec("refs/tags/" + tag + ":refs/tags/" + tag), + }, + } + + if gitCred.Auth.Username != "" { + fetchOptions.Auth = &gitCred.Auth + } + + err = repo.Fetch(fetchOptions) + + if err == git.ErrTagExists { + logContext.Info("Tag already fetched") + } else if err != nil { + logContext.Debug(err) + logContext.Fatal("Not a valid tag or unable to fetch") + } + + logContext.Info("Git tag fetched") +} diff --git a/cli/internal/git/pull.go b/cli/internal/git/pull.go index 515be9c03a..15bd743abd 100644 --- a/cli/internal/git/pull.go +++ b/cli/internal/git/pull.go @@ -5,20 +5,27 @@ import ( "github.com/defenseunicorns/zarf/cli/internal/utils" "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" "github.com/sirupsen/logrus" + + "strings" ) const onlineRemoteName = "online-upstream" func DownloadRepoToTemp(gitUrl string) string { path := utils.MakeTempDir() + // If downloading to temp, grab all tags since the repo isn't being + // packaged anyways and it saves us from having to fetch the tags + // later if we need them pull(gitUrl, path) return path } -func Pull(gitUrl string, targetFolder string) { +func Pull(gitUrl string, targetFolder string) string { path := targetFolder + "/" + transformURLtoRepoName(gitUrl) pull(gitUrl, path) + return path } func pull(gitUrl string, targetFolder string) { @@ -29,19 +36,25 @@ func pull(gitUrl string, targetFolder string) { gitCred := FindAuthForHost(gitUrl) + matches := strings.Split(gitUrl, "@") + fetchAllTags := len(matches) == 1 cloneOptions := &git.CloneOptions{ - URL: gitUrl, + URL: matches[0], Progress: os.Stdout, RemoteName: onlineRemoteName, } + if !fetchAllTags { + cloneOptions.Tags = git.NoTags + } + // Gracefully handle no git creds on the system (like our CI/CD) if gitCred.Auth.Username != "" { cloneOptions.Auth = &gitCred.Auth } // Clone the given repo - _, err := git.PlainClone(targetFolder, false, cloneOptions) + repo, err := git.PlainClone(targetFolder, false, cloneOptions) if err == git.ErrRepositoryAlreadyExists { logContext.Info("Repo already cloned") @@ -50,5 +63,31 @@ func pull(gitUrl string, targetFolder string) { logContext.Fatal("Not a valid git repo or unable to clone") } + if !fetchAllTags { + tag := matches[1] + + // Identify the remote trunk branch name + trunkBranchName := plumbing.NewBranchReferenceName("master") + head, err := repo.Head() + + if err != nil { + // No repo head available + logContext.Debug(err) + logContext.Warn("Failed to identify repo head. Tag will be pushed to 'master'.") + } else if head.Name().IsBranch() { + // Valid repo head and it is a branch + trunkBranchName = head.Name() + } else { + // Valid repo head but not a branch + logContext.Warn("No branch found for this repo head. Tag will be pushed to 'master'.") + } + + RemoveLocalBranchRefs(targetFolder) + RemoveOnlineRemoteRefs(targetFolder) + + FetchTag(targetFolder, tag) + CheckoutTagAsBranch(targetFolder, tag, trunkBranchName) + } + logContext.Info("Git repo synced") } diff --git a/cli/internal/git/push.go b/cli/internal/git/push.go index 233f4a5ef5..ed0f46ea01 100644 --- a/cli/internal/git/push.go +++ b/cli/internal/git/push.go @@ -1,6 +1,8 @@ package git import ( + "os" + "github.com/defenseunicorns/zarf/cli/config" "github.com/defenseunicorns/zarf/cli/internal/utils" "github.com/go-git/go-git/v5" @@ -9,6 +11,7 @@ import ( ) const offlineRemoteName = "offline-downstream" +const onlineRemoteRefPrefix = "refs/remotes/" + onlineRemoteName + "/" func PushAllDirectories(localPath string) { paths := utils.ListDirectories(localPath) @@ -25,8 +28,7 @@ func push(localPath string) { // Open the given repo repo, err := git.PlainOpen(localPath) if err != nil { - logContext.Warn("Not a valid git repo or unable to open") - return + logContext.Fatal("Not a valid git repo or unable to open") } // Get the upstream URL @@ -38,29 +40,47 @@ func push(localPath string) { remoteUrl := remote.Config().URLs[0] targetUrl := transformURL("https://"+config.ZarfLocalIP, remoteUrl) - _, _ = repo.CreateRemote(&goConfig.RemoteConfig{ + _, err = repo.CreateRemote(&goConfig.RemoteConfig{ Name: offlineRemoteName, URLs: []string{targetUrl}, }) + if err != nil { + logContext.Debug(err) + logContext.Fatal("Failed to create offline remote") + } + gitCred := FindAuthForHost(config.ZarfLocalIP) + pushContext := logContext.WithField("target", targetUrl) + + // Since we are pushing HEAD:refs/heads/master on deployment, leaving + // duplicates of the HEAD ref (ex. refs/heads/master, + // refs/remotes/online-upstream/master, will cause the push to fail) + removedRefs := RemoveHeadCopies(localPath) + err = repo.Push(&git.PushOptions{ RemoteName: offlineRemoteName, Auth: &gitCred.Auth, + Progress: os.Stdout, + // If a provided refspec doesn't push anything, it is just ignored RefSpecs: []goConfig.RefSpec{ "refs/heads/*:refs/heads/*", + onlineRemoteRefPrefix + "*:refs/heads/*", "refs/tags/*:refs/tags/*", }, }) - pushContext := logContext.WithField("target", targetUrl) if err == git.NoErrAlreadyUpToDate { pushContext.Info("Repo already up-to-date") } else if err != nil { + pushContext.Debug(err) pushContext.Warn("Unable to push repo to the gitops service") } else { pushContext.Info("Repo updated") } + // Add back the refs we removed just incase this push isn't the last thing + // being run and a later task needs to reference them. + AddRefs(localPath, removedRefs) } diff --git a/cli/internal/git/utils.go b/cli/internal/git/utils.go index 3c33feeac6..2762f67184 100644 --- a/cli/internal/git/utils.go +++ b/cli/internal/git/utils.go @@ -4,11 +4,14 @@ import ( "bufio" "net/url" "os" + "path" "regexp" "strings" "github.com/defenseunicorns/zarf/cli/config" "github.com/defenseunicorns/zarf/cli/internal/utils" + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/transport/http" "github.com/sirupsen/logrus" ) @@ -45,6 +48,11 @@ func transformURL(baseUrl string, url string) string { return output } +func transformRepoDirToURL(baseUrl string, repoDir string) string { + baseDir := path.Base(repoDir) + return baseUrl + "/zarf-git-user/" + baseDir +} + func credentialFilePath() string { homePath, _ := os.UserHomeDir() return homePath + "/.git-credentials" @@ -151,3 +159,155 @@ func CredentialsGenerator() string { return gitSecret } + +// GetTaggedUrl builds a URL of the repo@tag format +// It returns a string of format repo@tag +func GetTaggedUrl(gitUrl string, gitTag string) string { + return gitUrl + "@" + gitTag +} + +// RemoveLocalBranchRefs removes all refs that are local branches +// It returns a slice of references deleted +func RemoveLocalBranchRefs(gitDirectory string) []*plumbing.Reference { + return removeReferences( + gitDirectory, + func(ref *plumbing.Reference) bool { + return ref.Name().IsBranch() + }, + ) +} + +// RemoveOnlineRemoteRefs removes all refs pointing to the online-upstream +// It returns a slice of references deleted +func RemoveOnlineRemoteRefs(gitDirectory string) []*plumbing.Reference { + return removeReferences( + gitDirectory, + func(ref *plumbing.Reference) bool { + return strings.HasPrefix(ref.Name().String(), onlineRemoteRefPrefix) + }, + ) +} + +// RemoveHeadCopies removes any refs that aren't HEAD but have the same hash +// It returns a slice of references deleted +func RemoveHeadCopies(gitDirectory string) []*plumbing.Reference { + logContext := logrus.WithField("Repo", gitDirectory) + repo, err := git.PlainOpen(gitDirectory) + if err != nil { + logContext.Debug(err) + logContext.Fatal("Not a valid git repo or unable to open") + } + + head, err := repo.Head() + if err != nil { + logContext.Debug(err) + logContext.Fatal("Failed to identify references") + } + + headHash := head.Hash().String() + return removeReferences( + gitDirectory, + func(ref *plumbing.Reference) bool { + // Don't ever remove tags + return !ref.Name().IsTag() && ref.Hash().String() == headHash + }, + ) +} + +// removeReferences removes references based on a provided callback +// removeReferences does not allow you to delete HEAD +// It returns a slice of references deleted +func removeReferences( + gitDirectory string, + shouldRemove func(*plumbing.Reference) bool, +) []*plumbing.Reference { + logContext := logrus.WithField("Repo", gitDirectory) + repo, err := git.PlainOpen(gitDirectory) + if err != nil { + logContext.Debug(err) + logContext.Fatal("Not a valid git repo or unable to open") + } + + references, err := repo.References() + if err != nil { + logContext.Debug(err) + logContext.Fatal("Failed to identify references") + } + + head, err := repo.Head() + if err != nil { + logContext.Debug(err) + logContext.Fatal("Failed to identify head") + } + + removedRefs := []*plumbing.Reference{} + err = references.ForEach(func(ref *plumbing.Reference) error { + refIsNotHeadOrHeadTarget := ref.Name() != plumbing.HEAD && ref.Name() != head.Name() + // Run shouldRemove inline here to take advantage of short circuit + // evaluation as to not waste a cycle on HEAD + if refIsNotHeadOrHeadTarget && shouldRemove(ref) { + err = repo.Storer.RemoveReference(ref.Name()) + if err != nil { + return err + } + removedRefs = append(removedRefs, ref) + } + return nil + }) + + if err != nil { + logContext.Debug(err) + logContext.Fatal("Failed to remove references") + } + + return removedRefs +} + +// AddRefs adds a provided arbitrary list of references to a repo +// It is intended to be used with references returned by a Remove function +func AddRefs(gitDirectory string, refs []*plumbing.Reference) { + logContext := logrus.WithField("Repo", gitDirectory) + repo, err := git.PlainOpen(gitDirectory) + if err != nil { + logContext.Debug(err) + logContext.Fatal("Not a valid git repo or unable to open") + } + + for _, ref := range refs { + err = repo.Storer.SetReference(ref) + if err != nil { + logContext.Debug(err) + logContext.Fatal("Failed to add references") + } + } +} + +// DeleteBranchIfExists ensures the provided branch name does not exist +func DeleteBranchIfExists(gitDirectory string, branchName plumbing.ReferenceName) { + logContext := logrus.WithFields(logrus.Fields{ + "Repo": gitDirectory, + "Branch": branchName.String, + }) + + repo, err := git.PlainOpen(gitDirectory) + if err != nil { + logContext.Debug(err) + logContext.Fatal("Not a valid git repo or unable to open") + } + + // Deletes the branch by name + err = repo.DeleteBranch(branchName.Short()) + if err != nil && err != git.ErrBranchNotFound { + logContext.Debug(err) + logContext.Fatal("Failed to delete branch") + } + + // Delete reference too + err = repo.Storer.RemoveReference(branchName) + if err != nil && err != git.ErrInvalidReference { + logContext.Debug(err) + logContext.Fatal("Failed to delete branch reference") + } + + logContext.Info("Branch deleted") +} diff --git a/cli/internal/packager/create.go b/cli/internal/packager/create.go index f7985761d1..7f14d34136 100644 --- a/cli/internal/packager/create.go +++ b/cli/internal/packager/create.go @@ -5,7 +5,6 @@ import ( "path/filepath" "regexp" "strconv" - "strings" "github.com/defenseunicorns/zarf/cli/config" "github.com/defenseunicorns/zarf/cli/internal/git" @@ -118,11 +117,8 @@ func addLocalAssets(tempPath componentPaths, assets config.ZarfComponent) { logrus.Info("loading git repos for gitops service transfer") // Load all specified git repos for _, url := range assets.Repos { - matches := strings.Split(url, "@") - if len(matches) < 2 { - logrus.WithField("remote", url).Fatal("Unable to parse git url. Ensure you use the format url.git@tag") - } - git.Pull(matches[0], tempPath.repos) + // Pull all of the references if there is no `@` in the string + git.Pull(url, tempPath.repos) } } } diff --git a/examples/gitops-data/README.md b/examples/gitops-data/README.md index 4eeea24e15..652e4c0b1c 100644 --- a/examples/gitops-data/README.md +++ b/examples/gitops-data/README.md @@ -1,8 +1,83 @@ -## Zarf Simple gitops service Update +# Zarf Simple GitOps Service Update -This examples shows how to package images and repos to be loaded into the gitops service. This package does not deploy anything itself, but pushes assets to the gitops service to be consumed by the gitops engine of your choice. +This examples shows how to package images and repos to be loaded into the +GitOps service. This package does not deploy anything itself, but pushes +assets to the GitOps service to be consumed by the GitOps engine of your +choice. -### Steps to use: -1. Create a Zarf cluster as outlined in the main [README](../../README.md#2-create-the-zarf-cluster), note the git username / password output at the end -2. Follow [step 3](../../README.md#3-add-resources-to-the-zarf-cluster) using this config in this folder -3. Run `kubectl apply -k https://zarf-git-user:$(./zarf tools get-admin-password)@localhost/zarf-git-user/mirror__github.com__stefanprodan__podinfo//kustomize` to deploy podinfo into cluster from the gitops service +## Demonstrated Features + +### Docker Image Deployment + +This example demonstrates using component `images` to deploy container images +to a docker container image registry. Images provided to the `images` tag are +uploaded to a Zarf hosted docker registry, which can be later used by +Kubernetes manifests, or manually used as shown in this guide. + +### Tag-Provided Git Repository Clone + +Tag-provided git repository cloning is the recommended way of cloning a git +repository for air-gapped deployment. Tag-provided clones are defined using +the `url.git@tag` format as seen in the example with the `defenseunicorns/zarf` +repository (`https://github.com/defenseunicorns/zarf.git@v0.12.0`). + +A tag-provided clone only mirrors the tag defined in the Zarf definition. The +tag will appear on the Gitea mirror as the default branch name of the +repository being mirrored, and the tag itself. + +### Git Repository Full Clone + +Full clones are used in this example by the `stefanprodan/podinfo` repository, +following the `url.git` format (`https://github.com/stefanprodan/podinfo.git`). +Full clones will contain **all** branches and tags in the mirrored repository +rather than any one specific tag. + +## Prerequisites + +This example assumes you have already created a Zarf cluster. If that is not +the case, refer to the below locations in the game example README. Be sure when +creating the Zarf cluster to deploy the GitOps component! + +1. [Prepare the Zarf Environment](../game/README.md#get-ready) +1. [Create a Zarf Cluster](../game/README.md#create-a-cluster) + +## Create the Zarf Package + +To create this Zarf package run the below command: + +```sh +cd /examples/gitops-data # directory with zarf.yaml +zarf package create # make the package +``` + +Successful execution will create a package named +`zarf-package-gitops-service-data.tar.zst`, the Zarf example package. + +## Deploying the Zarf Package + +To deploy the Zarf package, copy it to a machine that has a Zarf cluster +deployed with the GitOps component enabled and the `zarf` executable accessible +in your `PATH`. + +With the Zarf package in the current working directory, execute the below +command to deploy the package, uploading the Git repositories to Gitea and the +container images to the Docker registry. + +```sh +zarf package deploy zarf-package-gitops-service-data.tar.zst +``` + +> _**Important**_ +> +> It's possible to try a package deploy _before the Zarf cluster is ready to receive it_. If you see an error like `"https:///v2/": dial tcp ,:443: connect: connection refused;` then it's very likely that you've beat the Zarf startup routines. +> +> The fix is simple: just wait for the cluster to finish starting & try again. + +## Applying the Kustomization + +Once the package has been deployed, the Kustomization can be applied from the +Gitea repository using the below command. + +```sh +kubectl apply -k https://zarf-git-user:$(./zarf tools get-admin-password)@localhost/zarf-git-user/mirror__github.com__stefanprodan__podinfo//kustomize +``` diff --git a/examples/gitops-data/zarf.yaml b/examples/gitops-data/zarf.yaml index f9146b3dd9..4753f3fe3a 100644 --- a/examples/gitops-data/zarf.yaml +++ b/examples/gitops-data/zarf.yaml @@ -9,4 +9,9 @@ components: images: - ghcr.io/stefanprodan/podinfo:6.0.0 repos: - - https://github.com/stefanprodan/podinfo.git@6.0.0 + # Do a tag-provided Git Repo mirror + - https://github.com/defenseunicorns/zarf.git@v0.12.0 + # Do a tag-provided Git Repo mirror with the default branch of main + - https://repo1.dso.mil/platform-one/big-bang/apps/security-tools/twistlock.git@0.0.9-bb.0 + # Do a full Git Repo Mirror + - https://github.com/stefanprodan/podinfo.git