Skip to content

Commit

Permalink
storage/url: fixes the bug where some part of destination path is rem…
Browse files Browse the repository at this point in the history
…oved by cp (#456)

The cause of problem is a little bit weird.

(url).SetRelative method was given the source (abs.) path as it was provided in command line arguments including the wildcard characters(* and ?). Hence it determined relative path incorrectly, where "expected" relative path became "../expected" which in turn caused the some part of destination to be lost (since path/filepath.Join converts e.g. /a/b/../c to /a/c)

We also need to identify whether the * and ? characters present in base strings are part of key or they are used for wildcard operation. So we need to know if the "raw" flag is given. To. solve this I changed (URL).SetRelative method to take argument of type URL rather than string.

Fixes #360

Co-authored-by: Selman Kayrancioglu <[email protected]>
Co-authored-by: Aykut Farsak <[email protected]>
Co-authored-by: Muhammed Can Küçükaslan <[email protected]>
  • Loading branch information
4 people authored Jul 22, 2022
1 parent 6b87fb4 commit 6727b11
Show file tree
Hide file tree
Showing 6 changed files with 314 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#### Bugfixes
- Fixed a bug where (`--stat`) prints unnecessarily when used with help and version commands ([#452](https://github.com/peak/s5cmd/issues/452))
- Changed cp error message to be more precise. "given object not found" error message now will also include absolute path of the file. ([#463](https://github.com/peak/s5cmd/pull/463))
- Fixed a bug where some part of the destination path is removed by `cp` and `sync` subcommands ([#360](https://github.com/peak/s5cmd/issues/360))
- Fixed a bug where proxy is not being used when `--no-verify-ssl` flag is used. ([#445](https://github.com/peak/s5cmd/issues/445))
- Fixed `unknown url format` error when object key also includes `s3://` e.g. `s5cmd ls s3://foo/bar/s3://baz` ([#449](https://github.com/peak/s5cmd/issues/449))

Expand Down
154 changes: 152 additions & 2 deletions e2e/cp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,6 @@ func TestFlattenCopyDirToS3(t *testing.T) {

// cp dir/* s3://bucket/
func TestCopyMultipleFilesToS3Bucket(t *testing.T) {

t.Parallel()

bucket := s3BucketFromTestName(t)
Expand Down Expand Up @@ -1110,9 +1109,160 @@ func TestCopyMultipleFilesToS3Bucket(t *testing.T) {
}
}

// cp parent/*/name.txt s3://bucket/newfolder
func TestCopyMultipleFilesWithWildcardedDirectoryToS3Bucket(t *testing.T) {
t.Parallel()

bucket := s3BucketFromTestName(t)

s3client, s5cmd, cleanup := setup(t)
defer cleanup()

createBucket(t, s3client, bucket)

folderLayout := []fs.PathOp{
fs.WithDir("parent", fs.WithDir(
"child1",
fs.WithFile("name.txt", "A file in parent/child1/"),
),
fs.WithDir(
"child2",
fs.WithFile("name.txt", "A file in parent/child2/"),
),
),
}

workdir := fs.NewDir(t, "somedir", folderLayout...)
dstpath := fmt.Sprintf("s3://%v/newfolder/", bucket)
srcpath := workdir.Path()
srcpath = filepath.ToSlash(srcpath)
defer workdir.Remove()

cmd := s5cmd("cp", srcpath+"/parent/*/name.txt", dstpath)
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)
rs := result.Stdout()
assertLines(t, rs, map[int]compareFunc{
0: equals(`cp %v/parent/child1/name.txt %vchild1/name.txt`, srcpath, dstpath),
1: equals(`cp %v/parent/child2/name.txt %vchild2/name.txt`, srcpath, dstpath),
}, sortInput(true))

// assert local filesystem
expected := fs.Expected(t, folderLayout...)
assert.Assert(t, fs.Equal(workdir.Path(), expected))
expectedS3Content := map[string]string{
"newfolder/child1/name.txt": "A file in parent/child1/",
"newfolder/child2/name.txt": "A file in parent/child2/",
}

// assert s3
for filename, content := range expectedS3Content {
assert.Assert(t, ensureS3Object(s3client, bucket, filename, content))
}
}

// cp parent/c*/name.txt s3://bucket/newfolder
func TestCopyMultipleFilesEndWildcardedToS3Bucket(t *testing.T) {
t.Parallel()

bucket := s3BucketFromTestName(t)

s3client, s5cmd, cleanup := setup(t)
defer cleanup()

createBucket(t, s3client, bucket)

folderLayout := []fs.PathOp{
fs.WithDir("parent", fs.WithDir(
"child1",
fs.WithFile("name.txt", "A file in parent/child1/"),
),
fs.WithDir(
"child2",
fs.WithFile("name.txt", "A file in parent/child2/"),
),
),
}

workdir := fs.NewDir(t, "somedir", folderLayout...)
dstpath := fmt.Sprintf("s3://%v/newfolder/", bucket)
srcpath := workdir.Path()
srcpath = filepath.ToSlash(srcpath)
defer workdir.Remove()

cmd := s5cmd("cp", srcpath+"/parent/c*/name.txt", dstpath)
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)
rs := result.Stdout()
assertLines(t, rs, map[int]compareFunc{
0: equals(`cp %v/parent/child1/name.txt %vchild1/name.txt`, srcpath, dstpath),
1: equals(`cp %v/parent/child2/name.txt %vchild2/name.txt`, srcpath, dstpath),
}, sortInput(true))

// assert local filesystem
expected := fs.Expected(t, folderLayout...)
assert.Assert(t, fs.Equal(workdir.Path(), expected))
expectedS3Content := map[string]string{
"newfolder/child1/name.txt": "A file in parent/child1/",
"newfolder/child2/name.txt": "A file in parent/child2/",
}

// assert s3
for filename, content := range expectedS3Content {
assert.Assert(t, ensureS3Object(s3client, bucket, filename, content))
}
}

// cp parent/c*1/name.txt s3://bucket/newfolder
func TestCopyMultipleFilesMiddleWildcardedDirectoryToS3Bucket(t *testing.T) {
t.Parallel()

bucket := s3BucketFromTestName(t)

s3client, s5cmd, cleanup := setup(t)
defer cleanup()

createBucket(t, s3client, bucket)

folderLayout := []fs.PathOp{
fs.WithDir("parent", fs.WithDir(
"child1",
fs.WithFile("name.txt", "A file in parent/child1/"),
)),
}

workdir := fs.NewDir(t, "somedir", folderLayout...)
dstpath := fmt.Sprintf("s3://%v/newfolder/", bucket)
srcpath := workdir.Path()
srcpath = filepath.ToSlash(srcpath)
defer workdir.Remove()

cmd := s5cmd("cp", srcpath+"/parent/c*1/name.txt", dstpath)
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)
rs := result.Stdout()
assertLines(t, rs, map[int]compareFunc{
0: equals(`cp %v/parent/child1/name.txt %vchild1/name.txt`, srcpath, dstpath),
}, sortInput(true))

// assert local filesystem
expected := fs.Expected(t, folderLayout...)
assert.Assert(t, fs.Equal(workdir.Path(), expected))
expectedS3Content := map[string]string{
"newfolder/child1/name.txt": "A file in parent/child1/",
}

// assert s3
for filename, content := range expectedS3Content {
assert.Assert(t, ensureS3Object(s3client, bucket, filename, content))
}
}

// cp --flatten dir/* s3://bucket/
func TestFlattenCopyMultipleFilesToS3Bucket(t *testing.T) {

t.Parallel()

bucket := s3BucketFromTestName(t)
Expand Down
53 changes: 53 additions & 0 deletions e2e/sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,59 @@ func TestSyncLocalFolderToS3EmptyBucket(t *testing.T) {
}
}

// cp parent/*/name.txt s3://bucket/newfolder
func TestSyncMultipleFilesWithWildcardedDirectoryToS3Bucket(t *testing.T) {
t.Parallel()

bucket := s3BucketFromTestName(t)

s3client, s5cmd, cleanup := setup(t)
defer cleanup()

createBucket(t, s3client, bucket)

folderLayout := []fs.PathOp{
fs.WithDir("parent", fs.WithDir(
"child1",
fs.WithFile("name.txt", "A file in parent/child1/"),
),
fs.WithDir(
"child2",
fs.WithFile("name.txt", "A file in parent/child2/"),
),
),
}

workdir := fs.NewDir(t, "somedir", folderLayout...)
dstpath := fmt.Sprintf("s3://%v/newfolder/", bucket)
srcpath := workdir.Path()
srcpath = filepath.ToSlash(srcpath)
defer workdir.Remove()

cmd := s5cmd("sync", srcpath+"/parent/*/name.txt", dstpath)
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)
rs := result.Stdout()
assertLines(t, rs, map[int]compareFunc{
0: equals(`cp %v/parent/child1/name.txt %vchild1/name.txt`, srcpath, dstpath),
1: equals(`cp %v/parent/child2/name.txt %vchild2/name.txt`, srcpath, dstpath),
}, sortInput(true))

// assert local filesystem
expected := fs.Expected(t, folderLayout...)
assert.Assert(t, fs.Equal(workdir.Path(), expected))
expectedS3Content := map[string]string{
"newfolder/child1/name.txt": "A file in parent/child1/",
"newfolder/child2/name.txt": "A file in parent/child2/",
}

// assert s3
for filename, content := range expectedS3Content {
assert.Assert(t, ensureS3Object(s3client, bucket, filename, content))
}
}

// sync s3://bucket/* folder/
func TestSyncS3BucketToEmptyFolder(t *testing.T) {
t.Parallel()
Expand Down
4 changes: 2 additions & 2 deletions storage/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (f *Filesystem) expandGlob(ctx context.Context, src *url.URL, followSymlink
filename := filename

fileurl, _ := url.New(filename)
fileurl.SetRelative(src.Absolute())
fileurl.SetRelative(src)

obj, _ := f.Stat(ctx, fileurl)

Expand Down Expand Up @@ -120,7 +120,7 @@ func walkDir(ctx context.Context, fs *Filesystem, src *url.URL, followSymlinks b
return err
}

fileurl.SetRelative(src.Absolute())
fileurl.SetRelative(src)

//skip if symlink is pointing to a file and --no-follow-symlink
if !ShouldProcessUrl(fileurl, followSymlinks) {
Expand Down
34 changes: 31 additions & 3 deletions storage/url/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,37 @@ func (u *URL) Clone() *URL {
}

// SetRelative explicitly sets the relative path of u against given base value.
func (u *URL) SetRelative(base string) {
dir := filepath.Dir(base)
u.relativePath, _ = filepath.Rel(dir, u.Absolute())
// If the base path contains `globCharacters` then, the relative path is
// determined with respect to the parent directory of the so called wildcarded
// object.
func (u *URL) SetRelative(base *URL) {
basePath := base.Absolute()
if base.IsWildcard() {
// When the basePath includes a wildcard character (globCharacters)
// replace basePath with its substring up to the
// index of the first instance of a wildcard character.
//
// If we don't handle this, the filepath.Dir()
// will assume those wildcards as a part of the name.
// Consequently the filepath.Rel() will determine
// the relative path incorrectly since the wildcarded
// path string won't match with the actual name of the
// object.
// e.g. base.Absolute(): "/a/*/n"
// u.Absolute() : "/a/b/n"
//
// if we don't trim substring from globCharacters on
// filepath.Dir() will give: "/a/*"
// consequently the
// filepath.Rel() will give: "../b/c" rather than "b/c"
// since "b" and "*" are not the same.
loc := strings.IndexAny(basePath, globCharacters)
if loc >= 0 {
basePath = basePath[:loc]
}
}
baseDir := filepath.Dir(basePath)
u.relativePath, _ = filepath.Rel(baseDir, u.Absolute())
}

// Match reports whether if given key matches with the object.
Expand Down
75 changes: 75 additions & 0 deletions storage/url/url_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package url

import (
"path/filepath"
"reflect"
"regexp"
"testing"
Expand Down Expand Up @@ -525,3 +526,77 @@ func TestURLWithMode(t *testing.T) {
}
}
}

func TestURLSetRelative(t *testing.T) {
type testcase struct {
name string
base string
target string
expect string
}

sep := string(filepath.Separator)
tests := []testcase{
{
name: "local_sibling_child_object",
base: sep + "parent" + sep + "child" + sep + "object",
target: sep + "parent" + sep + "child2" + sep + "object",
expect: ".." + sep + "child2" + sep + "object",
},
{
name: "local_same_directory_object",
base: sep + "parent" + sep + "child" + sep + "object",
target: sep + "parent" + sep + "child" + sep + "object2",
expect: "object2",
},
{
name: "s3_sibling_child_object",
base: "s3://bucket/parent" + sep + "child" + sep + "object",
target: "s3://bucket/parent" + sep + "child2" + sep + "",
expect: ".." + sep + "child2",
},
{
name: "local_child_directory_fully_wildcarded",
base: sep + "parent" + sep + "*" + sep + "object",
target: sep + "parent" + sep + "child" + sep + "object",
expect: "child" + sep + "object",
},
{
name: "local_child_directory_partially_wildcarded",
base: sep + "parent" + sep + "c*d" + sep + "object",
target: sep + "parent" + sep + "child" + sep + "object",
expect: "child" + sep + "object",
},
{
name: "local_child_directory_fully_wildcarded_with_question_mark",
base: sep + "parent" + sep + "?" + sep + "object",
target: sep + "parent" + sep + "c" + sep + "object",
expect: "c" + sep + "object",
},
{
name: "s3_child_directory_wildcarded",
base: "s3://bucket/parent" + sep + "*" + sep + "object",
target: "s3://bucket/parent" + sep + "child2" + sep + "",
expect: "child2",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
baseURL, err := New(tt.base)
if err != nil {
t.Errorf("URL cannot be instantiated: \nPath: %v, Error: %v", tt.base, err)
}
targUrl, err := New(tt.target)
if err != nil {
t.Errorf("URL cannot be instantiated:\nPath: %v, Error: %v", tt.base, err)
}

targUrl.SetRelative(baseURL)

if diff := cmp.Diff(tt.expect, targUrl.Relative()); diff != "" {
t.Errorf("SetRelative() with %s did not produce expected path (-want +got):\n%s", tt.name, diff)
}
})
}
}

0 comments on commit 6727b11

Please sign in to comment.