From 6727b1191a9855c0c4bd5b39693e544dca934092 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Muhammed=20Can=20K=C3=BC=C3=A7=C3=BCkaslan?= <32893303+Kucukaslan@users.noreply.github.com> Date: Fri, 22 Jul 2022 18:34:12 +0300 Subject: [PATCH] storage/url: fixes the bug where some part of destination path is removed by cp (#456) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cause of problem is a little bit weird. (url).SetRelative method was given the source (abs.) path as it was provided in command line arguments including the wildcard characters(* and ?). Hence it determined relative path incorrectly, where "expected" relative path became "../expected" which in turn caused the some part of destination to be lost (since path/filepath.Join converts e.g. /a/b/../c to /a/c) We also need to identify whether the * and ? characters present in base strings are part of key or they are used for wildcard operation. So we need to know if the "raw" flag is given. To. solve this I changed (URL).SetRelative method to take argument of type URL rather than string. Fixes #360 Co-authored-by: Selman Kayrancioglu Co-authored-by: Aykut Farsak Co-authored-by: Muhammed Can Küçükaslan <32893303+MuhammedCanKucukaslan@users.noreply.github.com> --- CHANGELOG.md | 1 + e2e/cp_test.go | 154 +++++++++++++++++++++++++++++++++++++++- e2e/sync_test.go | 53 ++++++++++++++ storage/fs.go | 4 +- storage/url/url.go | 34 ++++++++- storage/url/url_test.go | 75 +++++++++++++++++++ 6 files changed, 314 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ae646c5c..9c8e64431 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ #### Bugfixes - Fixed a bug where (`--stat`) prints unnecessarily when used with help and version commands ([#452](https://github.com/peak/s5cmd/issues/452)) - Changed cp error message to be more precise. "given object not found" error message now will also include absolute path of the file. ([#463](https://github.com/peak/s5cmd/pull/463)) +- Fixed a bug where some part of the destination path is removed by `cp` and `sync` subcommands ([#360](https://github.com/peak/s5cmd/issues/360)) - Fixed a bug where proxy is not being used when `--no-verify-ssl` flag is used. ([#445](https://github.com/peak/s5cmd/issues/445)) - Fixed `unknown url format` error when object key also includes `s3://` e.g. `s5cmd ls s3://foo/bar/s3://baz` ([#449](https://github.com/peak/s5cmd/issues/449)) diff --git a/e2e/cp_test.go b/e2e/cp_test.go index 484c4275b..cf8d2ddf5 100644 --- a/e2e/cp_test.go +++ b/e2e/cp_test.go @@ -1053,7 +1053,6 @@ func TestFlattenCopyDirToS3(t *testing.T) { // cp dir/* s3://bucket/ func TestCopyMultipleFilesToS3Bucket(t *testing.T) { - t.Parallel() bucket := s3BucketFromTestName(t) @@ -1110,9 +1109,160 @@ func TestCopyMultipleFilesToS3Bucket(t *testing.T) { } } +// cp parent/*/name.txt s3://bucket/newfolder +func TestCopyMultipleFilesWithWildcardedDirectoryToS3Bucket(t *testing.T) { + t.Parallel() + + bucket := s3BucketFromTestName(t) + + s3client, s5cmd, cleanup := setup(t) + defer cleanup() + + createBucket(t, s3client, bucket) + + folderLayout := []fs.PathOp{ + fs.WithDir("parent", fs.WithDir( + "child1", + fs.WithFile("name.txt", "A file in parent/child1/"), + ), + fs.WithDir( + "child2", + fs.WithFile("name.txt", "A file in parent/child2/"), + ), + ), + } + + workdir := fs.NewDir(t, "somedir", folderLayout...) + dstpath := fmt.Sprintf("s3://%v/newfolder/", bucket) + srcpath := workdir.Path() + srcpath = filepath.ToSlash(srcpath) + defer workdir.Remove() + + cmd := s5cmd("cp", srcpath+"/parent/*/name.txt", dstpath) + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + rs := result.Stdout() + assertLines(t, rs, map[int]compareFunc{ + 0: equals(`cp %v/parent/child1/name.txt %vchild1/name.txt`, srcpath, dstpath), + 1: equals(`cp %v/parent/child2/name.txt %vchild2/name.txt`, srcpath, dstpath), + }, sortInput(true)) + + // assert local filesystem + expected := fs.Expected(t, folderLayout...) + assert.Assert(t, fs.Equal(workdir.Path(), expected)) + expectedS3Content := map[string]string{ + "newfolder/child1/name.txt": "A file in parent/child1/", + "newfolder/child2/name.txt": "A file in parent/child2/", + } + + // assert s3 + for filename, content := range expectedS3Content { + assert.Assert(t, ensureS3Object(s3client, bucket, filename, content)) + } +} + +// cp parent/c*/name.txt s3://bucket/newfolder +func TestCopyMultipleFilesEndWildcardedToS3Bucket(t *testing.T) { + t.Parallel() + + bucket := s3BucketFromTestName(t) + + s3client, s5cmd, cleanup := setup(t) + defer cleanup() + + createBucket(t, s3client, bucket) + + folderLayout := []fs.PathOp{ + fs.WithDir("parent", fs.WithDir( + "child1", + fs.WithFile("name.txt", "A file in parent/child1/"), + ), + fs.WithDir( + "child2", + fs.WithFile("name.txt", "A file in parent/child2/"), + ), + ), + } + + workdir := fs.NewDir(t, "somedir", folderLayout...) + dstpath := fmt.Sprintf("s3://%v/newfolder/", bucket) + srcpath := workdir.Path() + srcpath = filepath.ToSlash(srcpath) + defer workdir.Remove() + + cmd := s5cmd("cp", srcpath+"/parent/c*/name.txt", dstpath) + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + rs := result.Stdout() + assertLines(t, rs, map[int]compareFunc{ + 0: equals(`cp %v/parent/child1/name.txt %vchild1/name.txt`, srcpath, dstpath), + 1: equals(`cp %v/parent/child2/name.txt %vchild2/name.txt`, srcpath, dstpath), + }, sortInput(true)) + + // assert local filesystem + expected := fs.Expected(t, folderLayout...) + assert.Assert(t, fs.Equal(workdir.Path(), expected)) + expectedS3Content := map[string]string{ + "newfolder/child1/name.txt": "A file in parent/child1/", + "newfolder/child2/name.txt": "A file in parent/child2/", + } + + // assert s3 + for filename, content := range expectedS3Content { + assert.Assert(t, ensureS3Object(s3client, bucket, filename, content)) + } +} + +// cp parent/c*1/name.txt s3://bucket/newfolder +func TestCopyMultipleFilesMiddleWildcardedDirectoryToS3Bucket(t *testing.T) { + t.Parallel() + + bucket := s3BucketFromTestName(t) + + s3client, s5cmd, cleanup := setup(t) + defer cleanup() + + createBucket(t, s3client, bucket) + + folderLayout := []fs.PathOp{ + fs.WithDir("parent", fs.WithDir( + "child1", + fs.WithFile("name.txt", "A file in parent/child1/"), + )), + } + + workdir := fs.NewDir(t, "somedir", folderLayout...) + dstpath := fmt.Sprintf("s3://%v/newfolder/", bucket) + srcpath := workdir.Path() + srcpath = filepath.ToSlash(srcpath) + defer workdir.Remove() + + cmd := s5cmd("cp", srcpath+"/parent/c*1/name.txt", dstpath) + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + rs := result.Stdout() + assertLines(t, rs, map[int]compareFunc{ + 0: equals(`cp %v/parent/child1/name.txt %vchild1/name.txt`, srcpath, dstpath), + }, sortInput(true)) + + // assert local filesystem + expected := fs.Expected(t, folderLayout...) + assert.Assert(t, fs.Equal(workdir.Path(), expected)) + expectedS3Content := map[string]string{ + "newfolder/child1/name.txt": "A file in parent/child1/", + } + + // assert s3 + for filename, content := range expectedS3Content { + assert.Assert(t, ensureS3Object(s3client, bucket, filename, content)) + } +} + // cp --flatten dir/* s3://bucket/ func TestFlattenCopyMultipleFilesToS3Bucket(t *testing.T) { - t.Parallel() bucket := s3BucketFromTestName(t) diff --git a/e2e/sync_test.go b/e2e/sync_test.go index 8626a530b..50a6d657f 100644 --- a/e2e/sync_test.go +++ b/e2e/sync_test.go @@ -246,6 +246,59 @@ func TestSyncLocalFolderToS3EmptyBucket(t *testing.T) { } } +// cp parent/*/name.txt s3://bucket/newfolder +func TestSyncMultipleFilesWithWildcardedDirectoryToS3Bucket(t *testing.T) { + t.Parallel() + + bucket := s3BucketFromTestName(t) + + s3client, s5cmd, cleanup := setup(t) + defer cleanup() + + createBucket(t, s3client, bucket) + + folderLayout := []fs.PathOp{ + fs.WithDir("parent", fs.WithDir( + "child1", + fs.WithFile("name.txt", "A file in parent/child1/"), + ), + fs.WithDir( + "child2", + fs.WithFile("name.txt", "A file in parent/child2/"), + ), + ), + } + + workdir := fs.NewDir(t, "somedir", folderLayout...) + dstpath := fmt.Sprintf("s3://%v/newfolder/", bucket) + srcpath := workdir.Path() + srcpath = filepath.ToSlash(srcpath) + defer workdir.Remove() + + cmd := s5cmd("sync", srcpath+"/parent/*/name.txt", dstpath) + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + rs := result.Stdout() + assertLines(t, rs, map[int]compareFunc{ + 0: equals(`cp %v/parent/child1/name.txt %vchild1/name.txt`, srcpath, dstpath), + 1: equals(`cp %v/parent/child2/name.txt %vchild2/name.txt`, srcpath, dstpath), + }, sortInput(true)) + + // assert local filesystem + expected := fs.Expected(t, folderLayout...) + assert.Assert(t, fs.Equal(workdir.Path(), expected)) + expectedS3Content := map[string]string{ + "newfolder/child1/name.txt": "A file in parent/child1/", + "newfolder/child2/name.txt": "A file in parent/child2/", + } + + // assert s3 + for filename, content := range expectedS3Content { + assert.Assert(t, ensureS3Object(s3client, bucket, filename, content)) + } +} + // sync s3://bucket/* folder/ func TestSyncS3BucketToEmptyFolder(t *testing.T) { t.Parallel() diff --git a/storage/fs.go b/storage/fs.go index 628244a10..3626414e6 100644 --- a/storage/fs.go +++ b/storage/fs.go @@ -86,7 +86,7 @@ func (f *Filesystem) expandGlob(ctx context.Context, src *url.URL, followSymlink filename := filename fileurl, _ := url.New(filename) - fileurl.SetRelative(src.Absolute()) + fileurl.SetRelative(src) obj, _ := f.Stat(ctx, fileurl) @@ -120,7 +120,7 @@ func walkDir(ctx context.Context, fs *Filesystem, src *url.URL, followSymlinks b return err } - fileurl.SetRelative(src.Absolute()) + fileurl.SetRelative(src) //skip if symlink is pointing to a file and --no-follow-symlink if !ShouldProcessUrl(fileurl, followSymlinks) { diff --git a/storage/url/url.go b/storage/url/url.go index daa48cf9a..b19f8ed96 100644 --- a/storage/url/url.go +++ b/storage/url/url.go @@ -290,9 +290,37 @@ func (u *URL) Clone() *URL { } // SetRelative explicitly sets the relative path of u against given base value. -func (u *URL) SetRelative(base string) { - dir := filepath.Dir(base) - u.relativePath, _ = filepath.Rel(dir, u.Absolute()) +// If the base path contains `globCharacters` then, the relative path is +// determined with respect to the parent directory of the so called wildcarded +// object. +func (u *URL) SetRelative(base *URL) { + basePath := base.Absolute() + if base.IsWildcard() { + // When the basePath includes a wildcard character (globCharacters) + // replace basePath with its substring up to the + // index of the first instance of a wildcard character. + // + // If we don't handle this, the filepath.Dir() + // will assume those wildcards as a part of the name. + // Consequently the filepath.Rel() will determine + // the relative path incorrectly since the wildcarded + // path string won't match with the actual name of the + // object. + // e.g. base.Absolute(): "/a/*/n" + // u.Absolute() : "/a/b/n" + // + // if we don't trim substring from globCharacters on + // filepath.Dir() will give: "/a/*" + // consequently the + // filepath.Rel() will give: "../b/c" rather than "b/c" + // since "b" and "*" are not the same. + loc := strings.IndexAny(basePath, globCharacters) + if loc >= 0 { + basePath = basePath[:loc] + } + } + baseDir := filepath.Dir(basePath) + u.relativePath, _ = filepath.Rel(baseDir, u.Absolute()) } // Match reports whether if given key matches with the object. diff --git a/storage/url/url_test.go b/storage/url/url_test.go index a1e04aa14..4aef7eaa3 100644 --- a/storage/url/url_test.go +++ b/storage/url/url_test.go @@ -1,6 +1,7 @@ package url import ( + "path/filepath" "reflect" "regexp" "testing" @@ -525,3 +526,77 @@ func TestURLWithMode(t *testing.T) { } } } + +func TestURLSetRelative(t *testing.T) { + type testcase struct { + name string + base string + target string + expect string + } + + sep := string(filepath.Separator) + tests := []testcase{ + { + name: "local_sibling_child_object", + base: sep + "parent" + sep + "child" + sep + "object", + target: sep + "parent" + sep + "child2" + sep + "object", + expect: ".." + sep + "child2" + sep + "object", + }, + { + name: "local_same_directory_object", + base: sep + "parent" + sep + "child" + sep + "object", + target: sep + "parent" + sep + "child" + sep + "object2", + expect: "object2", + }, + { + name: "s3_sibling_child_object", + base: "s3://bucket/parent" + sep + "child" + sep + "object", + target: "s3://bucket/parent" + sep + "child2" + sep + "", + expect: ".." + sep + "child2", + }, + { + name: "local_child_directory_fully_wildcarded", + base: sep + "parent" + sep + "*" + sep + "object", + target: sep + "parent" + sep + "child" + sep + "object", + expect: "child" + sep + "object", + }, + { + name: "local_child_directory_partially_wildcarded", + base: sep + "parent" + sep + "c*d" + sep + "object", + target: sep + "parent" + sep + "child" + sep + "object", + expect: "child" + sep + "object", + }, + { + name: "local_child_directory_fully_wildcarded_with_question_mark", + base: sep + "parent" + sep + "?" + sep + "object", + target: sep + "parent" + sep + "c" + sep + "object", + expect: "c" + sep + "object", + }, + { + name: "s3_child_directory_wildcarded", + base: "s3://bucket/parent" + sep + "*" + sep + "object", + target: "s3://bucket/parent" + sep + "child2" + sep + "", + expect: "child2", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + baseURL, err := New(tt.base) + if err != nil { + t.Errorf("URL cannot be instantiated: \nPath: %v, Error: %v", tt.base, err) + } + targUrl, err := New(tt.target) + if err != nil { + t.Errorf("URL cannot be instantiated:\nPath: %v, Error: %v", tt.base, err) + } + + targUrl.SetRelative(baseURL) + + if diff := cmp.Diff(tt.expect, targUrl.Relative()); diff != "" { + t.Errorf("SetRelative() with %s did not produce expected path (-want +got):\n%s", tt.name, diff) + } + }) + } +}