Skip to content

Commit

Permalink
Merge pull request #4246 from ottoyiu/s3_vfs
Browse files Browse the repository at this point in the history
Improve S3 url parsing for vfsPath to support more naming conventions
  • Loading branch information
k8s-ci-robot authored Jan 29, 2018
2 parents 6cce242 + e4427e9 commit cc67497
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 20 deletions.
6 changes: 3 additions & 3 deletions pkg/acls/s3/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ func (s *s3PublicAclStrategy) GetACL(p vfs.Path, cluster *kops.Cluster) (vfs.ACL
return "", fmt.Errorf("unable to parse: %q", fileRepository)
}

// We are checking that the file is in s3.amazonaws.com meaning that it is in s3
// This will miss edge cases when the region url is used.
if u.Host != "s3.amazonaws.com" {
// We are checking that the file repository url is in S3
_, err = vfs.VFSPath(fileRepository)
if err != nil {
glog.V(8).Infof("path %q is not inside of a s3 bucket", u.String)
return nil, nil
}
Expand Down
44 changes: 27 additions & 17 deletions upup/pkg/fi/assettasks/copyfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,26 +197,36 @@ func buildVFSPath(target string) (string, error) {
return target, nil
}

u, err := url.Parse(target)
if err != nil {
return "", fmt.Errorf("unable to parse url: %q", target)
}

var vfsPath string

// These matches only cover a subset of the URLs that you can use, but I am uncertain how to cover more of the possible
// options.
// This code parses the HOST and determines to use s3 or gs.
// URLs. For instance you can have the bucket name in the s3 url hostname.
// We are translating known https urls such as https://s3.amazonaws.com/example-kops to vfs path like
// s3://example-kops
if u.Host == "s3.amazonaws.com" {
vfsPath = "s3:/" + u.Path
} else if u.Host == "storage.googleapis.com" {
vfsPath = "gs:/" + u.Path
// Matches all S3 regional naming conventions:
// https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
// and converts to a s3://<bucket>/<path> vfsPath
s3VfsPath, err := vfs.VFSPath(target)
if err == nil {
vfsPath = s3VfsPath
} else {
glog.Errorf("unable to determine vfs path s3, google storage, and file paths are supported")
glog.Errorf("URLs starting with https://s3.amazonaws.com and http://storage.googleapis.com are transformed into s3 and gs URLs")
// These matches only cover a subset of the URLs that you can use, but I am uncertain how to cover more of the possible
// options.
// This code parses the HOST and determines gs URLs.
// For instance you can have the bucket name in the gs url hostname.
u, err := url.Parse(target)
if err != nil {
return "", fmt.Errorf("Unable to parse Google Cloud Storage URL: %q", target)
}
if u.Host == "storage.googleapis.com" {
vfsPath = "gs:/" + u.Path
}
}

if vfsPath == "" {
glog.Errorf("Unable to determine VFS path from supplied URL: %s", target)
glog.Errorf("S3, Google Cloud Storage, and File Paths are supported.")
glog.Errorf("For S3, please make sure that the supplied file repository URL adhere to S3 naming conventions, https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region.")
glog.Errorf("For GCS, please make sure that the supplied file repository URL adheres to https://storage.googleapis.com/")
if err != nil { // print the S3 error for more details
return "", fmt.Errorf("Error Details: %v", err)
}
return "", fmt.Errorf("unable to determine vfs type for %q", target)
}

Expand Down
20 changes: 20 additions & 0 deletions upup/pkg/fi/assettasks/copyfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,26 @@ func Test_BuildVFSPath(t *testing.T) {
"s3://k8s-for-greeks-kops/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
true,
},
{
"https://s3.cn-north-1.amazonaws.com/k8s-for-greeks-kops/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
"s3://k8s-for-greeks-kops/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
true,
},
{
"https://s3-cn-north-1.amazonaws.com/k8s-for-greeks-kops/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
"s3://k8s-for-greeks-kops/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
true,
},
{
"https://s3.k8s-for-greeks-kops.amazonaws.com/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
"s3://k8s-for-greeks-kops/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
true,
},
{
"https://s3-k8s-for-greeks-kops.amazonaws.com/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
"s3://k8s-for-greeks-kops/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
true,
},
{
"https://foo/k8s-for-greeks-kops/kubernetes-release/release/v1.7.2/bin/linux/amd64/kubectl",
"",
Expand Down
31 changes: 31 additions & 0 deletions util/pkg/vfs/s3context.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package vfs
import (
"fmt"
"os"
"regexp"
"sync"
"time"

Expand All @@ -31,6 +32,14 @@ import (
"github.com/golang/glog"
)

var (
// matches all regional naming conventions of S3:
// https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
// TODO: perhaps make region regex more specific, ie. (us|eu|ap|cn|ca|sa), to prevent catching bucket names that match region format?
// but that will mean updating this list when AWS introduces new regions
s3UrlRegexp = regexp.MustCompile(`s3([-.](?P<region>\w{2}-\w+-\d{1})|[-.](?P<bucket>[\w.\-\_]+)|)?.amazonaws.com(.cn)?(?P<path>.*)?`)
)

type S3Context struct {
mutex sync.Mutex
clients map[string]*s3.S3
Expand Down Expand Up @@ -230,3 +239,25 @@ func validateRegion(region string) error {
}
return fmt.Errorf("%s is not a valid region\nPlease check that your region is formatted correctly (i.e. us-east-1)", region)
}

func VFSPath(url string) (string, error) {
if !s3UrlRegexp.MatchString(url) {
return "", fmt.Errorf("%s is not a valid S3 URL", url)
}
groupNames := s3UrlRegexp.SubexpNames()
result := s3UrlRegexp.FindAllStringSubmatch(url, -1)[0]

captured := map[string]string{}
for i, value := range result {
captured[groupNames[i]] = value
}
bucket := captured["bucket"]
path := captured["path"]
if bucket == "" {
if path == "" {
return "", fmt.Errorf("%s is not a valid S3 URL. No bucket defined.", url)
}
return fmt.Sprintf("s3:/%s", path), nil
}
return fmt.Sprintf("s3://%s%s", bucket, path), nil
}
128 changes: 128 additions & 0 deletions util/pkg/vfs/s3context_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package vfs

import "testing"

func Test_VFSPath(t *testing.T) {
grid := []struct {
Input string
ExpectedResult string
ExpectError bool
}{
{
Input: "s3.amazonaws.com/bucket",
ExpectedResult: "s3://bucket",
ExpectError: false,
},
{
Input: "s3-bucket.amazonaws.com",
ExpectedResult: "s3://bucket",
ExpectError: false,
},
{
Input: "s3-bucket.amazonaws.com/path",
ExpectedResult: "s3://bucket/path",
ExpectError: false,
},
{
Input: "s3.bucket.amazonaws.com",
ExpectedResult: "s3://bucket",
ExpectError: false,
},
{
Input: "s3.bucket_foo-bar.abc.amazonaws.com/path",
ExpectedResult: "s3://bucket_foo-bar.abc/path",
ExpectError: false,
},
{
Input: "s3-us-west-2.amazonaws.com/bucket/path",
ExpectedResult: "s3://bucket/path",
ExpectError: false,
},
{
Input: "s3-us-west-2.amazonaws.com/bucket/path",
ExpectedResult: "s3://bucket/path",
ExpectError: false,
},
{
Input: "s3.cn-north-1.amazonaws.com.cn/bucket",
ExpectedResult: "s3://bucket",
ExpectError: false,
},
{
Input: "s3.cn-north-1.amazonaws.com.cn/bucket/path",
ExpectedResult: "s3://bucket/path",
ExpectError: false,
},
{
Input: "https://s3.amazonaws.com/bucket",
ExpectedResult: "s3://bucket",
ExpectError: false,
},
{
Input: "http://s3.amazonaws.com/bucket",
ExpectedResult: "s3://bucket",
ExpectError: false,
},
{
Input: "example.com/bucket",
ExpectedResult: "",
ExpectError: true,
},
{
Input: "https://example.com/bucket",
ExpectedResult: "",
ExpectError: true,
},
{
Input: "storage.googleapis.com",
ExpectedResult: "",
ExpectError: true,
},
{
Input: "storage.googleapis.com/foo/bar",
ExpectedResult: "",
ExpectError: true,
},
{
Input: "https://storage.googleapis.com",
ExpectedResult: "",
ExpectError: true,
},
{
Input: "https://storage.googleapis.com/foo/bar",
ExpectedResult: "",
ExpectError: true,
},
}
for _, g := range grid {
vfsPath, err := VFSPath(g.Input)
if !g.ExpectError {
if err != nil {
t.Fatalf("unexpected error parsing vfs path: %v", err)
}
if vfsPath != g.ExpectedResult {
t.Fatalf("s3 url does not match expected result (%v): %v", g.ExpectedResult, g.Input)
}
} else {
if err == nil {
t.Fatalf("unexpected error parsing %q", g.Input)
}
}
}
}

0 comments on commit cc67497

Please sign in to comment.