Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add wildcard and prefix support to cat #736

Merged
merged 12 commits into from
Jul 16, 2024
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
#### Breaking changes
- Changed the exit code from 1 to 0 for `ls` when used with an empty bucket. Exits with 1 if the bucket is non-existent. ([#722](https://github.com/peak/s5cmd/issues/722))

#### Features
- Added prefix and wildcard support to `cat` command. ([#716](https://github.com/peak/s5cmd/issues/716))

## v2.2.2 - 13 Sep 2023

#### Bugfixes
Expand Down
47 changes: 38 additions & 9 deletions command/cat.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ Examples:

2. Print specific version of a remote object's content to stdout
> s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object

3. Concatenate multiple objects matching a prefix or wildcard and print to stdout
> s5cmd {{.HelpName}} "s3://bucket/prefix/*"
`

func NewCatCommand() *cli.Command {
Expand Down Expand Up @@ -111,17 +114,45 @@ func (c Cat) Run(ctx context.Context) error {
printError(c.fullCommand, c.op, err)
return err
}

if c.src.IsWildcard() || c.src.IsPrefix() || c.src.IsBucket() {
objectChan := client.List(ctx, c.src, false)
return c.processObjects(ctx, client, objectChan)
}

_, err = client.Stat(ctx, c.src)
if err != nil {
printError(c.fullCommand, c.op, err)
return err
}
return c.processSingleObject(ctx, client, &storage.Object{URL: c.src})
}

func (c Cat) processObjects(ctx context.Context, client *storage.S3, objectChan <-chan *storage.Object) error {
for obj := range objectChan {
if err := c.processSingleObject(ctx, client, obj); err != nil {
return err
}
}
return nil
}

func (c Cat) processSingleObject(ctx context.Context, client *storage.S3, obj *storage.Object) error {
if obj.Err != nil {
printError(c.fullCommand, c.op, obj.Err)
return obj.Err
}
if obj.Type.IsDir() {
return nil
}
buf := orderedwriter.New(os.Stdout)
_, err = client.Get(ctx, c.src, buf, c.concurrency, c.partSize)

_, err := client.Get(ctx, obj.URL, buf, c.concurrency, c.partSize)
if err != nil {
printError(c.fullCommand, c.op, err)
return err
}

return nil
}

Expand All @@ -140,17 +171,15 @@ func validateCatCommand(c *cli.Context) error {
return fmt.Errorf("source must be a remote object")
}

if src.IsBucket() || src.IsPrefix() {
return fmt.Errorf("remote source must be an object")
}

if src.IsWildcard() {
return fmt.Errorf("remote source %q can not contain glob characters", src)
}

if err := checkVersioningWithGoogleEndpoint(c); err != nil {
return err
}

if src.IsWildcard() || src.IsPrefix() || src.IsBucket() {
if c.String("version-id") != "" {
return fmt.Errorf("wildcard/prefix operations are disabled with --version-id flag")
}
}

return nil
}
276 changes: 252 additions & 24 deletions e2e/cat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,30 +133,6 @@ func TestCatS3ObjectFail(t *testing.T) {
jsonCheck(true),
},
},
{
src: "s3://%v/prefix/file.txt/*",
name: "cat remote object with glob",
cmd: []string{
"--json",
"cat",
},
expected: map[int]compareFunc{
0: match(`{"operation":"cat","command":"cat s3:\/\/(.+)?\/prefix\/file\.txt\/\*","error":"remote source \\"s3:\/\/(.*)\/prefix\/file\.txt\/\*\\" can not contain glob characters"}`),
},
assertOps: []assertOp{
jsonCheck(true),
},
},
{
src: "s3://%v/prefix/",
name: "cat bucket",
cmd: []string{
"cat",
},
expected: map[int]compareFunc{
0: match(`ERROR "cat s3://(.+)?": remote source must be an object`),
},
},
}

for _, tc := range testcases {
Expand Down Expand Up @@ -229,6 +205,47 @@ func TestCatLocalFileFail(t *testing.T) {
}
}

func TestCatInEmptyBucket(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

t.Run("without prefix", func(t *testing.T) {
t.Parallel()

cmd := s5cmd("cat", fmt.Sprintf("s3://%v", bucket))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 0})
assertLines(t, result.Stdout(), nil)
})

t.Run("with prefix", func(t *testing.T) {
t.Parallel()

cmd := s5cmd("cat", fmt.Sprintf("s3://%v/", bucket))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 0})
assertLines(t, result.Stdout(), nil)
})

t.Run("with wildcard", func(t *testing.T) {
t.Parallel()

cmd := s5cmd("cat", fmt.Sprintf("s3://%v/*", bucket))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: contains(fmt.Sprintf(`ERROR "cat s3://%v/*": no object found`, bucket)),
})
})
}

// getSequentialFileContent creates a string with size bytes in size.
func getSequentialFileContent(size int64) (string, map[int]compareFunc) {
sb := strings.Builder{}
Expand Down Expand Up @@ -305,4 +322,215 @@ func TestCatByVersionID(t *testing.T) {
t.Errorf("(-want +got):\n%v", diff)
}
}

version := "1"

// wildcard and prefix fail cases
cmd = s5cmd("cat", "--version-id", version, "s3://"+bucket+"/")
result = icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`ERROR "cat --version-id=%v s3://%v/": wildcard/prefix operations are disabled with --version-id flag`, version, bucket),
}, strictLineCheck(false))

cmd = s5cmd("cat", "--version-id", version, "s3://"+bucket+"/folder/")
result = icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`ERROR "cat --version-id=%v s3://%v/folder/": wildcard/prefix operations are disabled with --version-id flag`, version, bucket),
}, strictLineCheck(false))

cmd = s5cmd("cat", "--version-id", version, "s3://"+bucket+"/*")
result = icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`ERROR "cat --version-id=%v s3://%v/*": wildcard/prefix operations are disabled with --version-id flag`, version, bucket),
}, strictLineCheck(false))
}

func TestCatPrefix(t *testing.T) {
t.Parallel()

testCases := []struct {
files []string
contents []string
seruman marked this conversation as resolved.
Show resolved Hide resolved
prefix string
expected string
}{
{
files: []string{"file1.txt", "file2.txt"},
contents: []string{"content0", "content1"},
expected: "content0content1"},
{
files: []string{"file1.txt", "file2.txt", "dir/file3.txt", "dir/file4.txt"},
contents: []string{"content0", "content1", "content2", "content3"},
expected: "content0content1",
},
{
files: []string{"file1.txt", "file2.txt", "dir/file3.txt", "dir/file4.txt"},
contents: []string{"content0", "content1", "content2", "content3"},
prefix: "dir/",
expected: "content2content3",
},
{
files: []string{"file1.txt", "file2.txt", "dir/file3.txt", "dir/file4.txt", "dir/nesteddir/file5.txt"},
contents: []string{"content0", "content1", "content2", "content3", "content4"},
prefix: "dir/",
expected: "content2content3",
},
{
files: []string{"file1.txt", "file2.txt", "dir/file3.txt", "dir/file4.txt", "dir/nesteddir/file5.txt"},
contents: []string{"content0", "content1", "content2", "content3", "content4"},
prefix: "dir/nesteddir/",
expected: "content4",
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.expected, func(t *testing.T) {
seruman marked this conversation as resolved.
Show resolved Hide resolved
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

for idx, file := range tc.files {
content := tc.contents[idx]
putFile(t, s3client, bucket, file, content)
}

cmd := s5cmd("cat", fmt.Sprintf("s3://%v/%v", bucket, tc.prefix))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)
assertLines(t, result.Stdout(), map[int]compareFunc{
0: equals(tc.expected),
}, alignment(true))
})
}
}

func TestCatWildcard(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)
bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)
seruman marked this conversation as resolved.
Show resolved Hide resolved

files := []struct {
key string
content string
seruman marked this conversation as resolved.
Show resolved Hide resolved
}{
{"foo1.txt", "content0"},
{"foo2.txt", "content1"},
{"bar1.txt", "content2"},
{"foolder/foo3.txt", "content3"},
{"log-file-2024-01.txt", "content4"},
{"log-file-2024-02.txt", "content5"},
{"log-file-2023-01.txt", "content6"},
{"log-file-2022-01.txt", "content7"},
}

for _, file := range files {
putFile(t, s3client, bucket, file.key, file.content)
}

testCases := []struct {
name string
expression string
expected string
}{
{
name: "wildcard matching with both file and folder",
expression: "foo*",
expected: "content0content1content3",
},
{
name: "log files 2024",
expression: "log-file-2024-*",
expected: "content4content5",
},
{
name: "all log files",
expression: "log-file-*",
expected: "content7content6content4content5",
},
}

for _, tc := range testCases {
tc := tc
t.Run("", func(t *testing.T) {
t.Parallel()

cmd := s5cmd("cat", fmt.Sprintf("s3://%v/%v", bucket, tc.expression))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)
assertLines(t, result.Stdout(), map[int]compareFunc{
0: equals(tc.expected),
}, alignment(true))
})
}
}

func TestPrefixWildcardFail(t *testing.T) {
t.Parallel()

testCases := []struct {
name string
expression string
}{
{
name: "wildcard",
expression: "foo*",
},
{
name: "prefix",
expression: "foolder/",
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
t.Run("default", func(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

cmd := s5cmd("cat", fmt.Sprintf("s3://%v/%v", bucket, tc.expression))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`ERROR "cat s3://%v/%v": no object found`, bucket, tc.expression),
}, strictLineCheck(false))
})
t.Run("json", func(t *testing.T) {
t.Parallel()
s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

cmd := s5cmd("--json", "cat", fmt.Sprintf("s3://%v/%v", bucket, tc.expression))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`{"operation":"cat","command":"cat s3://%v/%v","error":"no object found"}`, bucket, tc.expression),
}, strictLineCheck(false))
})
})
}

}
Loading