From d49263b2ab614cad801e26b4a169eafe08d4a2a0 Mon Sep 17 00:00:00 2001 From: jshore1296 Date: Thu, 19 Dec 2024 14:05:25 -0500 Subject: [PATCH] feat(storage): return file metadata on read (#11212) * storage: Return file Metadata on file read * review comments * review comments * gofmt * Access metadata through function, rather than field Adding a map field to Attrs would have made the object uncomparable * Store metadata as a pointer to preserve API compatibility - keeps the Reader struct comparable --------- Co-authored-by: Chris Cotter --- storage/client_test.go | 61 +++++++++++++++++++++++++++++++++ storage/grpc_client.go | 2 ++ storage/http_client.go | 17 +++++++--- storage/integration_test.go | 44 ++++++++++++++++++++++-- storage/reader.go | 17 +++++++++- storage/reader_test.go | 68 +++++++++++++++++++++++++++++++++++++ 6 files changed, 202 insertions(+), 7 deletions(-) diff --git a/storage/client_test.go b/storage/client_test.go index 35a825289baa..403a04b84bc1 100644 --- a/storage/client_test.go +++ b/storage/client_test.go @@ -795,6 +795,67 @@ func TestOpenReaderEmulated(t *testing.T) { }) } +func TestOpenReaderEmulated_Metadata(t *testing.T) { + transportClientTest(context.Background(), t, func(t *testing.T, ctx context.Context, project, bucket string, client storageClient) { + // Populate test data. + _, err := client.CreateBucket(ctx, project, bucket, &BucketAttrs{ + Name: bucket, + }, nil) + if err != nil { + t.Fatalf("client.CreateBucket: %v", err) + } + prefix := time.Now().Nanosecond() + want := &ObjectAttrs{ + Bucket: bucket, + Name: fmt.Sprintf("%d-object-%d", prefix, time.Now().Nanosecond()), + } + w := veneerClient.Bucket(bucket).Object(want.Name).NewWriter(ctx) + if _, err := w.Write(randomBytesToWrite); err != nil { + t.Fatalf("failed to populate test data: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("closing object: %v", err) + } + if _, err := veneerClient.Bucket(bucket).Object(want.Name).Update(ctx, ObjectAttrsToUpdate{ + Metadata: map[string]string{ + "Custom-Key": "custom-value", + "Some-Other-Key": "some-other-value", + }, + }); err != nil { + t.Fatalf("failed to update test object: %v", err) + } + + params := &newRangeReaderParams{ + bucket: bucket, + object: want.Name, + gen: defaultGen, + offset: 0, + length: -1, + } + r, err := client.NewRangeReader(ctx, params) + if err != nil { + t.Fatalf("opening reading: %v", err) + } + wantLen := len(randomBytesToWrite) + got := make([]byte, wantLen) + n, err := r.Read(got) + if n != wantLen { + t.Fatalf("expected to read %d bytes, but got %d", wantLen, n) + } + if diff := cmp.Diff(got, randomBytesToWrite); diff != "" { + t.Fatalf("Read: got(-),want(+):\n%s", diff) + } + expectedMetadata := map[string]string{ + "Custom-Key": "custom-value", + "Some-Other-Key": "some-other-value", + } + if diff := cmp.Diff(r.Metadata(), expectedMetadata); diff != "" { + t.Fatalf("Object Metadata: got(-),want(+):\n%s", diff) + } + + }) +} + func TestOpenWriterEmulated(t *testing.T) { transportClientTest(context.Background(), t, func(t *testing.T, ctx context.Context, project, bucket string, client storageClient) { // Populate test data. diff --git a/storage/grpc_client.go b/storage/grpc_client.go index bb746b8f031a..ccaeb849ca6d 100644 --- a/storage/grpc_client.go +++ b/storage/grpc_client.go @@ -1125,6 +1125,7 @@ func (c *grpcStorageClient) NewRangeReader(ctx context.Context, params *newRange wantCRC = checksums.GetCrc32C() } + metadata := obj.GetMetadata() r = &Reader{ Attrs: ReaderObjectAttrs{ Size: size, @@ -1136,6 +1137,7 @@ func (c *grpcStorageClient) NewRangeReader(ctx context.Context, params *newRange Generation: obj.GetGeneration(), CRC32C: wantCRC, }, + objectMetadata: &metadata, reader: &gRPCReader{ stream: res.stream, reopen: reopen, diff --git a/storage/http_client.go b/storage/http_client.go index 4c62ce930573..0ca5cd95edb0 100644 --- a/storage/http_client.go +++ b/storage/http_client.go @@ -1523,6 +1523,14 @@ func parseReadResponse(res *http.Response, params *newRangeReaderParams, reopen } } + metadata := map[string]string{} + for key, values := range res.Header { + if len(values) > 0 && strings.HasPrefix(key, "X-Goog-Meta-") { + key := key[len("X-Goog-Meta-"):] + metadata[key] = values[0] + } + } + attrs := ReaderObjectAttrs{ Size: size, ContentType: res.Header.Get("Content-Type"), @@ -1536,10 +1544,11 @@ func parseReadResponse(res *http.Response, params *newRangeReaderParams, reopen Decompressed: res.Uncompressed || uncompressedByServer(res), } return &Reader{ - Attrs: attrs, - size: size, - remain: remain, - checkCRC: checkCRC, + Attrs: attrs, + objectMetadata: &metadata, + size: size, + remain: remain, + checkCRC: checkCRC, reader: &httpReader{ reopen: reopen, body: body, diff --git a/storage/integration_test.go b/storage/integration_test.go index e28afae60850..03a1a6f822f6 100644 --- a/storage/integration_test.go +++ b/storage/integration_test.go @@ -5044,8 +5044,48 @@ func TestIntegration_ReaderAttrs(t *testing.T) { Metageneration: attrs.Metageneration, CRC32C: crc32c(c), } - if got != want { - t.Fatalf("got\t%v,\nwanted\t%v", got, want) + if diff := cmp.Diff(got, want); diff != "" { + t.Fatalf("diff got vs want: %v", diff) + } + }) +} + +func TestIntegration_ReaderAttrs_Metadata(t *testing.T) { + multiTransportTest(skipJSONReads(context.Background(), "metadata on read not supported on JSON api"), t, func(t *testing.T, ctx context.Context, bucket, _ string, client *Client) { + bkt := client.Bucket(bucket) + + const defaultType = "text/plain" + o := bkt.Object("reader-attrs-metadata-obj") + c := randomContents() + if err := writeObject(ctx, o, defaultType, c); err != nil { + t.Errorf("Write for %v failed with %v", o.ObjectName(), err) + } + t.Cleanup(func() { + if err := o.Delete(ctx); err != nil { + log.Printf("failed to delete test object: %v", err) + } + }) + + oa, err := o.Update(ctx, ObjectAttrsToUpdate{Metadata: map[string]string{"Custom-Key": "custom-value", "Other-Key": "other-value"}}) + if err != nil { + t.Fatal(err) + } + _ = oa + + o = o.Generation(oa.Generation) + rc, err := o.NewReader(ctx) + if err != nil { + t.Fatal(err) + } + + got := rc.Metadata() + want := map[string]string{ + "Custom-Key": "custom-value", + "Other-Key": "other-value", + } + + if diff := cmp.Diff(got, want); diff != "" { + t.Fatalf("diff got vs want: %v", diff) } }) } diff --git a/storage/reader.go b/storage/reader.go index e1d96659282b..7f5abee1f25e 100644 --- a/storage/reader.go +++ b/storage/reader.go @@ -222,7 +222,9 @@ var emptyBody = ioutil.NopCloser(strings.NewReader("")) // the stored CRC, returning an error from Read if there is a mismatch. This integrity check // is skipped if transcoding occurs. See https://cloud.google.com/storage/docs/transcoding. type Reader struct { - Attrs ReaderObjectAttrs + Attrs ReaderObjectAttrs + objectMetadata *map[string]string + seen, remain, size int64 checkCRC bool // Did we check the CRC? This is now only used by tests. @@ -298,3 +300,16 @@ func (r *Reader) CacheControl() string { func (r *Reader) LastModified() (time.Time, error) { return r.Attrs.LastModified, nil } + +// Metadata returns user-provided metadata, in key/value pairs. +// +// It can be nil if no metadata is present, or if the client uses the JSON +// API for downloads. Only the XML and gRPC APIs support getting +// custom metadata via the Reader; for JSON make a separate call to +// ObjectHandle.Attrs. +func (r *Reader) Metadata() map[string]string { + if r.objectMetadata != nil { + return *r.objectMetadata + } + return nil +} diff --git a/storage/reader_test.go b/storage/reader_test.go index d0142af14eed..59150d8daa13 100644 --- a/storage/reader_test.go +++ b/storage/reader_test.go @@ -29,6 +29,7 @@ import ( "strings" "testing" + "github.com/google/go-cmp/cmp" "google.golang.org/api/option" ) @@ -380,6 +381,7 @@ func TestContentEncodingGzipWithReader(t *testing.T) { w.Header().Set("Etag", `"c50e3e41c9bc9df34e84c94ce073f928"`) w.Header().Set("X-Goog-Generation", "1587012235914578") w.Header().Set("X-Goog-MetaGeneration", "2") + w.Header().Set("X-Goog-Meta-custom-metadata-key", "custom-metadata-value") w.Header().Set("X-Goog-Stored-Content-Encoding", "gzip") w.Header().Set("vary", "Accept-Encoding") w.Header().Set("x-goog-stored-content-length", "43") @@ -470,6 +472,72 @@ func TestContentEncodingGzipWithReader(t *testing.T) { }, option.WithEndpoint(mockGCS.URL), option.WithoutAuthentication(), option.WithHTTPClient(whc)) } +func TestMetadataParsingWithReader(t *testing.T) { + bucketName := "my-bucket" + objectName := "test" + downloadObjectXMLurl := fmt.Sprintf("/%s/%s", bucketName, objectName) + downloadObjectJSONurl := fmt.Sprintf("/b/%s/o/%s?alt=media&prettyPrint=false&projection=full", bucketName, objectName) + + original := bytes.Repeat([]byte("a"), 4) + mockGCS := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.String() { + case downloadObjectXMLurl, downloadObjectJSONurl: + // Serve back the file. + w.Header().Set("Content-Type", "text/plain") + w.Header().Set("Etag", `"c50e3e41c9bc9df34e84c94ce073f928"`) + w.Header().Set("X-Goog-Generation", "1587012235914578") + w.Header().Set("X-Goog-MetaGeneration", "2") + w.Header().Set("X-Goog-Meta-custom-metadata-key", "custom-metadata-value") + w.Header().Set("vary", "Accept-Encoding") + w.Header().Set("x-goog-stored-content-length", "4") + w.Header().Set("x-goog-hash", "crc32c=pYIWwQ==") + w.Header().Set("x-goog-hash", "md5=xQ4+Qcm8nfNOhMlM4HP5KA==") + w.Header().Set("x-goog-storage-class", "STANDARD") + w.Write(original) + default: + fmt.Fprintf(w, "unrecognized URL %s", r.URL) + } + })) + mockGCS.EnableHTTP2 = true + mockGCS.StartTLS() + defer mockGCS.Close() + + ctx := context.Background() + hc := mockGCS.Client() + ux, _ := url.Parse(mockGCS.URL) + hc.Transport.(*http.Transport).TLSClientConfig.InsecureSkipVerify = true + wrt := &alwaysToTargetURLRoundTripper{ + destURL: ux, + hc: hc, + } + + whc := &http.Client{Transport: wrt} + + multiReaderTest(ctx, t, func(t *testing.T, c *Client) { + obj := c.Bucket(bucketName).Object(objectName) + rd, err := obj.NewReader(ctx) + if err != nil { + t.Fatal(err) + } + defer rd.Close() + + expectedMetadata := map[string]string{ + "Custom-Metadata-Key": "custom-metadata-value", + } + if diff := cmp.Diff(rd.Metadata(), expectedMetadata); diff != "" { + t.Fatalf("metadata mismatch diff got vs want: %v", diff) + } + + got, err := ioutil.ReadAll(rd) + if err != nil { + t.Fatal(err) + } + if g, w := got, original; !bytes.Equal(g, w) { + t.Fatalf("Response mismatch\nGot:\n%q\n\nWant:\n%q", g, w) + } + }, option.WithEndpoint(mockGCS.URL), option.WithoutAuthentication(), option.WithHTTPClient(whc)) +} + // alwaysToTargetURLRoundTripper ensures that every single request // is routed to a target destination. Some requests within the storage // client by-pass using the provided HTTP client, hence this enforcemenet.