Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow specifying a remote config dependency from Google Cloud Storage #9057

Merged
merged 9 commits into from
Sep 5, 2023
Prev Previous commit
Next Next commit
feat: Adjust remote config dependency for Google Cloud Storage to be …
…more inline with git.

Instead of copying the skaffold config from GCS the user specifies a source that signifies the bucket and directory structure to copy recursively. For example, specifying the source "gs://my-bucket/dir1/*" will copy all the files and subdirectories stored in "gs://my-bucket/dir1/". This requires the user to also specify the path to the skaffold config relative to the provided source. In the example mentioned, if the skaffold config was stored in "gs://my-bucket/dir1/configs/skaffold.yaml" then the path required would be "configs/skaffold.yaml".
mattsanta committed Aug 30, 2023
commit 42f7e025f0fc11bfc88ae5368cae2c02457a35e1
5 changes: 3 additions & 2 deletions docs-v2/content/en/docs/design/config.md
Original file line number Diff line number Diff line change
@@ -147,10 +147,11 @@ requires:
ref: main
- configs: ["cfg3"]
googleCloudStorage:
path: gs://storage-bucket/dir/skaffold.yaml
source: gs://my-bucket/dir1/*
path: config/skaffold.yaml
```

The environment variable `SKAFFOLD_REMOTE_CACHE_DIR` or flag `--remote-cache-dir` specifies the download location for all remote dependency contents. If undefined then it defaults to `~/.skaffold/remote-cache`. The remote cache directory consists of subdirectories with the contents retrieved from the remote dependency. For git dependencies the subdirectory name is a hash of the repo `uri` and the `branch/ref`. For Google Cloud Storage dependencies the subdirectory name is a hash of the `path`.
The environment variable `SKAFFOLD_REMOTE_CACHE_DIR` or flag `--remote-cache-dir` specifies the download location for all remote dependency contents. If undefined then it defaults to `~/.skaffold/remote-cache`. The remote cache directory consists of subdirectories with the contents retrieved from the remote dependency. For git dependencies the subdirectory name is a hash of the repo `uri` and the `branch/ref`. For Google Cloud Storage dependencies the subdirectory name is a hash of the `source`.

The remote config gets treated like a local config after substituting the path with the actual path in the cache directory.

51 changes: 23 additions & 28 deletions pkg/skaffold/gcs/gsutil.go
Original file line number Diff line number Diff line change
@@ -25,7 +25,6 @@ import (
"os"
"os/exec"
"path/filepath"
"strings"

"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/config"
"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/output/log"
@@ -56,33 +55,30 @@ func (g *Gsutil) Copy(ctx context.Context, src, dst string, recursive bool) erro
return nil
}

// SyncObject syncs the target Google Cloud Storage object with skaffold's local cache and returns the local path to the object.
func SyncObject(ctx context.Context, g latest.GoogleCloudStorageInfo, opts config.SkaffoldOptions) (string, error) {
// SyncObjects syncs the target Google Cloud Storage objects with skaffold's local cache and returns the local path to the objects.
func SyncObjects(ctx context.Context, g latest.GoogleCloudStorageInfo, opts config.SkaffoldOptions) (string, error) {
remoteCacheDir, err := config.GetRemoteCacheDir(opts)
if err != nil {
return "", fmt.Errorf("failed determining Google Cloud Storage object cache directory: %w", err)
return "", fmt.Errorf("failed determining remote cache directory: %w", err)
}
if err := os.MkdirAll(remoteCacheDir, 0700); err != nil {
return "", fmt.Errorf("failed creating Google Cloud Storage object cache directory: %w", err)
return "", fmt.Errorf("failed creating remote cache directory: %w", err)
}

subDir, err := getPerObjectCacheDir(g)
sourceDir, err := getPerSourceDir(g)
if err != nil {
return "", fmt.Errorf("failed determining Google Cloud Storage object cache directory for %s: %w", g.Path, err)
return "", fmt.Errorf("failed determining Google Cloud Storage remote cache directory for %q: %w", g.Source, err)
}
// Determine the name of the file to preserve it in the cache.
parts := strings.Split(g.Path, "/")
if len(parts) == 0 {
return "", fmt.Errorf("failed parsing Google Cloud Storage object %s", g.Path)
}
fileName := parts[len(parts)-1]
if len(fileName) == 0 {
return "", fmt.Errorf("failed parsing Google Cloud Storage object %s", g.Path)
}
cacheDir := filepath.Join(remoteCacheDir, subDir, fileName)
// If cache doesn't exist and cloning is disabled then we can't move forward.
if _, err := os.Stat(cacheDir); os.IsNotExist(err) && opts.SyncRemoteCache.CloneDisabled() {
return "", syncDisabledErr(g, cacheDir)
cacheDir := filepath.Join(remoteCacheDir, sourceDir)
if _, err := os.Stat(cacheDir); os.IsNotExist(err) {
// If cache doesn't exist and cloning is disabled then we can't move forward.
if opts.SyncRemoteCache.CloneDisabled() {
return "", syncDisabledErr(g, cacheDir)
}
// The subdirectory needs to exist to work with gsutil.
if err := os.MkdirAll(cacheDir, 0700); err != nil {
return "", fmt.Errorf("failed creating Google Cloud Storage cache directory for %q: %w", g.Source, err)
}
}
// If sync property is false then skip fetching latest object from remote storage.
if g.Sync != nil && !*g.Sync {
@@ -94,16 +90,15 @@ func SyncObject(ctx context.Context, g latest.GoogleCloudStorageInfo, opts confi
}

gcs := Gsutil{}
// Non-recursive since this should only download a single file - the remote Skaffold Config.
if err := gcs.Copy(ctx, g.Path, cacheDir, false); err != nil {
return "", fmt.Errorf("failed to cache Google Cloud Storage object %s: %w", g.Path, err)
if err := gcs.Copy(ctx, g.Source, cacheDir, true); err != nil {
return "", fmt.Errorf("failed to cache Google Cloud Storage objects from %q: %w", g.Source, err)
}
return cacheDir, nil
}

// getPerObjectCacheDir returns the directory used per Google Cloud Storage Object. Directory is a hash of the path provided.
func getPerObjectCacheDir(g latest.GoogleCloudStorageInfo) (string, error) {
inputs := []string{g.Path}
// getPerSourceDir returns the directory used per Google Cloud Storage source. Directory is a hash of the source provided.
func getPerSourceDir(g latest.GoogleCloudStorageInfo) (string, error) {
inputs := []string{g.Source}
hasher := sha256.New()
enc := json.NewEncoder(hasher)
if err := enc.Encode(inputs); err != nil {
@@ -115,15 +110,15 @@ func getPerObjectCacheDir(g latest.GoogleCloudStorageInfo) (string, error) {

// syncDisabledErr returns error to use when remote sync is turned off by the user and the Google Cloud Storage object doesn't exist inside the cache directory.
func syncDisabledErr(g latest.GoogleCloudStorageInfo, cacheDir string) error {
msg := fmt.Sprintf("cache directory %q for Google Cloud Storage object %q does not exist and remote cache sync is explicitly disabled via flag `--sync-remote-cache`", cacheDir, g.Path)
msg := fmt.Sprintf("cache directory %q for Google Cloud Storage source %q does not exist and remote cache sync is explicitly disabled via flag `--sync-remote-cache`", cacheDir, g.Source)
return sErrors.NewError(fmt.Errorf(msg),
&proto.ActionableErr{
Message: msg,
ErrCode: proto.StatusCode(proto.StatusCode_CONFIG_REMOTE_REPO_CACHE_NOT_FOUND_ERR),
Suggestions: []*proto.Suggestion{
{
SuggestionCode: proto.SuggestionCode_CONFIG_ENABLE_REMOTE_REPO_SYNC,
Action: fmt.Sprintf("Either download the Google Cloud Storage object manually to %q or set flag `--sync-remote-cache` to `always` or `missing`", cacheDir),
Action: fmt.Sprintf("Either download the Google Cloud Storage objects manually to %q or set flag `--sync-remote-cache` to `always` or `missing`", cacheDir),
},
},
})
34 changes: 17 additions & 17 deletions pkg/skaffold/gcs/gsutil_test.go
Original file line number Diff line number Diff line change
@@ -78,9 +78,9 @@ func TestCopy(t *testing.T) {
}

func TestSyncObject(t *testing.T) {
gcsPath := "gs://bucket/skaffold.yaml"
hash := "uJqp7MPtzRQ0g7nR-pH-O7RBEfCEt6aY"
localObjectCache := fmt.Sprintf("%s/skaffold.yaml", hash)
source := "gs://my-bucket/dir1/*"
path := "configs/skaffold.yaml"
sourceHash := "B7fSU6BUuHFJuLenty9ErOwyxPVqO5cB"

tests := []struct {
description string
@@ -93,51 +93,51 @@ func TestSyncObject(t *testing.T) {
}{
{
description: "first time copy succeeds",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
syncFlag: "always",
expected: localObjectCache,
expected: sourceHash,
},
{
description: "first time copy fails",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
gsutilErr: fmt.Errorf("not found"),
syncFlag: "always",
shouldErr: true,
},
{
description: "first time copy with sync off via flag fails",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
syncFlag: "never",
shouldErr: true,
},
{
description: "existing copy update succeeds",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
syncFlag: "always",
existing: true,
expected: localObjectCache,
expected: sourceHash,
},
{
description: "existing copy with sync off via flag succeeds",
g: latest.GoogleCloudStorageInfo{Path: gcsPath},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path},
syncFlag: "never",
existing: true,
expected: localObjectCache,
expected: sourceHash,
},
{
description: "existing copy with sync off succeeds",
g: latest.GoogleCloudStorageInfo{Path: gcsPath, Sync: util.Ptr(false)},
g: latest.GoogleCloudStorageInfo{Source: source, Path: path, Sync: util.Ptr(false)},
syncFlag: "always",
existing: true,
expected: localObjectCache,
expected: sourceHash,
},
}

for _, test := range tests {
testutil.Run(t, test.description, func(t *testutil.T) {
td := t.NewTempDir()
if test.existing {
td.Touch(localObjectCache)
td.Touch(sourceHash)
}

syncRemote := &config.SyncRemoteCacheOption{}
@@ -146,13 +146,13 @@ func TestSyncObject(t *testing.T) {

var cmd *testutil.FakeCmd
if test.gsutilErr == nil {
cmd = testutil.CmdRunOut(fmt.Sprintf("gsutil cp %s %s", gcsPath, td.Path(localObjectCache)), "logs")
cmd = testutil.CmdRunOut(fmt.Sprintf("gsutil cp -r %s %s", source, td.Path(sourceHash)), "logs")
} else {
cmd = testutil.CmdRunOutErr(fmt.Sprintf("gsutil cp %s %s", gcsPath, td.Path(localObjectCache)), "logs", test.gsutilErr)
cmd = testutil.CmdRunOutErr(fmt.Sprintf("gsutil cp -r %s %s", source, td.Path(sourceHash)), "logs", test.gsutilErr)
}
t.Override(&util.DefaultExecCommand, cmd)

path, err := SyncObject(context.Background(), test.g, opts)
path, err := SyncObjects(context.Background(), test.g, opts)
var expected string
if !test.shouldErr {
expected = filepath.Join(td.Root(), test.expected)
10 changes: 5 additions & 5 deletions pkg/skaffold/parser/config.go
Original file line number Diff line number Diff line change
@@ -353,25 +353,25 @@ func cacheRepo(ctx context.Context, g latest.GitInfo, opts config.SkaffoldOption

// cacheGCSObject downloads the referenced Google Cloud Storage object to skaffold's cache if required and returns the path to the target configuration file.
func cacheGCSObject(ctx context.Context, g latest.GoogleCloudStorageInfo, opts config.SkaffoldOptions, r *record) (string, error) {
key := g.Path
key := g.Source
if p, found := r.cachedObjects[key]; found {
switch v := p.(type) {
case string:
return v, nil
return filepath.Join(v, g.Path), nil
case error:
return "", v
default:
log.Entry(context.TODO()).Fatalf("unable to check download status of Google Cloud Storage object at %s", g.Path)
log.Entry(context.TODO()).Fatalf("unable to check download status of Google Cloud Storage objects at %s", g.Source)
return "", nil
}
}
p, err := gcs.SyncObject(ctx, g, opts)
p, err := gcs.SyncObjects(ctx, g, opts)
if err != nil {
r.cachedObjects[key] = err
return "", err
}
r.cachedObjects[key] = p
return p, nil
return filepath.Join(p, g.Path), nil
}

// checkRevisit ensures that each config is activated with the same set of active profiles
7 changes: 5 additions & 2 deletions pkg/skaffold/schema/latest/config.go
Original file line number Diff line number Diff line change
@@ -109,9 +109,12 @@ type GitInfo struct {
Sync *bool `yaml:"sync,omitempty"`
}

// GoogleCloudStorageInfo contains information on the origin of skaffold configurations stored in Google Cloud Storage.
// GoogleCloudStorageInfo contains information on the origin of skaffold configurations copied from Google Cloud Storage.
type GoogleCloudStorageInfo struct {
// Google Cloud Storage path to the skaffold configuration file. e.g. `gs://bucket/dir/skaffold.yaml`
// Source is the Google Cloud Storage objects to copy. e.g. `gs://my-bucket/dir1/dir2/*`.
Source string `yaml:"source,omitempty"`

// Path is the relative path from the source to the skaffold configuration file. e.g. `configs/skaffold.yaml`.
Path string `yaml:"path,omitempty"`

// Sync when set to `true` will reset the cached object to the latest remote version on every run.