Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(storage): return file metadata on read #11212

Merged
merged 8 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions storage/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,62 @@ func TestOpenReaderEmulated(t *testing.T) {
})
}

func TestOpenReaderEmulated_Metadata(t *testing.T) {
transportClientTest(skipHTTP("metadata on read not supported by JSON api"), t, func(t *testing.T, ctx context.Context, project, bucket string, client storageClient) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the HTTP client should use XML by default, does the test not pass with a default HTTP client?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right you are.

// Populate test data.
_, err := client.CreateBucket(ctx, project, bucket, &BucketAttrs{
Name: bucket,
}, nil)
if err != nil {
t.Fatalf("client.CreateBucket: %v", err)
}
prefix := time.Now().Nanosecond()
want := &ObjectAttrs{
Bucket: bucket,
Name: fmt.Sprintf("%d-object-%d", prefix, time.Now().Nanosecond()),
}
w := veneerClient.Bucket(bucket).Object(want.Name).NewWriter(ctx)
if _, err := w.Write(randomBytesToWrite); err != nil {
t.Fatalf("failed to populate test data: %v", err)
}
if err := w.Close(); err != nil {
t.Fatalf("closing object: %v", err)
}
if _, err := veneerClient.Bucket(bucket).Object(want.Name).Update(ctx, ObjectAttrsToUpdate{
Metadata: map[string]string{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's do at least 2 keys in here just to make sure the decoding logic works correctly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

"Custom-Key": "custom-value",
},
}); err != nil {
t.Fatalf("failed to update test object: %v", err)
}

params := &newRangeReaderParams{
bucket: bucket,
object: want.Name,
gen: defaultGen,
offset: 0,
length: -1,
}
r, err := client.NewRangeReader(ctx, params)
if err != nil {
t.Fatalf("opening reading: %v", err)
}
wantLen := len(randomBytesToWrite)
got := make([]byte, wantLen)
n, err := r.Read(got)
if n != wantLen {
t.Fatalf("expected to read %d bytes, but got %d", wantLen, n)
}
if diff := cmp.Diff(got, randomBytesToWrite); diff != "" {
t.Fatalf("Read: got(-),want(+):\n%s", diff)
}
if diff := cmp.Diff(r.Attrs.Metadata["Custom-Key"], "custom-value"); diff != "" {
t.Fatalf("Object Metadata: got(-),want(+):\n%s", diff)
}

})
}

func TestOpenWriterEmulated(t *testing.T) {
transportClientTest(context.Background(), t, func(t *testing.T, ctx context.Context, project, bucket string, client storageClient) {
// Populate test data.
Expand Down
1 change: 1 addition & 0 deletions storage/grpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1133,6 +1133,7 @@ func (c *grpcStorageClient) NewRangeReader(ctx context.Context, params *newRange
CacheControl: obj.GetCacheControl(),
LastModified: obj.GetUpdateTime().AsTime(),
Metageneration: obj.GetMetageneration(),
Metadata: obj.GetMetadata(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was concerned this wouldn't be sufficient because we didn't have it in the unmarshaler for gRPC, but it looks like we did that part already so this should be fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I was surprised that was already in there!

Generation: obj.GetGeneration(),
CRC32C: wantCRC,
},
Expand Down
9 changes: 9 additions & 0 deletions storage/http_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1523,6 +1523,14 @@ func parseReadResponse(res *http.Response, params *newRangeReaderParams, reopen
}
}

metadata := map[string]string{}
for key, values := range res.Header {
if len(values) > 0 && strings.HasPrefix(key, "X-Goog-Meta-") {
key := key[len("X-Goog-Meta-"):]
metadata[key] = values[0]
}
}

attrs := ReaderObjectAttrs{
Size: size,
ContentType: res.Header.Get("Content-Type"),
Expand All @@ -1531,6 +1539,7 @@ func parseReadResponse(res *http.Response, params *newRangeReaderParams, reopen
LastModified: lm,
StartOffset: startOffset,
Generation: params.gen,
Metadata: metadata,
Metageneration: metaGen,
CRC32C: crc,
Decompressed: res.Uncompressed || uncompressedByServer(res),
Expand Down
58 changes: 55 additions & 3 deletions storage/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"net/http"
"net/http/httputil"
"os"
"reflect"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -204,10 +205,10 @@ func initIntegrationTest() func() error {
if err != nil {
log.Fatalf("NewStorageControlClient: %v", err)
}
if err := client.Bucket(bucketName).Create(ctx, testutil.ProjID(), nil); err != nil {
if err := client.Bucket(bucketName).Create(ctx, testutil.ProjID(), &BucketAttrs{SoftDeletePolicy: &SoftDeletePolicy{RetentionDuration: 0}}); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should maybe be a separate change in our test harness? Was it impossible to delete the bucket if this wasn't included?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, this is leftover. My org has a policy set that made this necessary for me to be able to run the tests. I'll remove this before merge.

log.Fatalf("creating bucket %q: %v", bucketName, err)
}
if err := client.Bucket(grpcBucketName).Create(ctx, testutil.ProjID(), nil); err != nil {
if err := client.Bucket(grpcBucketName).Create(ctx, testutil.ProjID(), &BucketAttrs{SoftDeletePolicy: &SoftDeletePolicy{RetentionDuration: 0}}); err != nil {
log.Fatalf("creating bucket %q: %v", grpcBucketName, err)
}
return cleanup
Expand Down Expand Up @@ -5041,7 +5042,58 @@ func TestIntegration_ReaderAttrs(t *testing.T) {
Metageneration: attrs.Metageneration,
CRC32C: crc32c(c),
}
if got != want {
if !reflect.DeepEqual(got, want) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can use https://pkg.go.dev/github.com/google/go-cmp/cmp#Diff as elsewhere in these tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

t.Fatalf("got\t%v,\nwanted\t%v", got, want)
}
})
}

func TestIntegration_ReaderAttrs_Metadata(t *testing.T) {
multiTransportTest(skipJSONReads(context.Background(), "metadata on read not supported on JSON api"), t, func(t *testing.T, ctx context.Context, bucket, _ string, client *Client) {
bkt := client.Bucket(bucket)

const defaultType = "text/plain"
o := bkt.Object("reader-attrs-obj")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use a different object name; repeatedly overwriting the same object can be problematic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

c := randomContents()
if err := writeObject(ctx, o, defaultType, c); err != nil {
t.Errorf("Write for %v failed with %v", o.ObjectName(), err)
}
defer func() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use t.Cleanup to ensure this is run.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

if err := o.Delete(ctx); err != nil {
log.Printf("failed to delete test object: %v", err)
}
}()

oa, err := o.Update(ctx, ObjectAttrsToUpdate{Metadata: map[string]string{"Custom-Key": "custom-value"}})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, let's use at least 2 keys here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

if err != nil {
t.Fatal(err)
}
_ = oa

o = o.Generation(oa.Generation)
rc, err := o.NewReader(ctx)
if err != nil {
t.Fatal(err)
}

attrs, err := o.Attrs(ctx)
if err != nil {
t.Fatal(err)
}

got := rc.Attrs
want := ReaderObjectAttrs{
Size: attrs.Size,
ContentType: attrs.ContentType,
ContentEncoding: attrs.ContentEncoding,
CacheControl: got.CacheControl, // ignored, tested separately
LastModified: got.LastModified, // ignored, tested separately
Generation: attrs.Generation,
Metadata: map[string]string{"Custom-Key": "custom-value"},
Metageneration: attrs.Metageneration,
CRC32C: crc32c(c),
}
if !reflect.DeepEqual(got, want) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use cmp.Diff and let's only check the Metadata field in this test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

t.Fatalf("got\t%v,\nwanted\t%v", got, want)
}
})
Expand Down
4 changes: 4 additions & 0 deletions storage/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ type ReaderObjectAttrs struct {
// Generation is the generation number of the object's content.
Generation int64

// Metadata represents user-provided metadata, in key/value pairs.
// Not supported by the JSON api. Use ObjectHandle.Attrs instead.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rephrase this as follows:

// Metadata represents user-provided metadata, in key/value pairs.
//
// It can be nil if no metadata is present, or if the client uses the JSON
// API for downloads. Only the XML and gRPC APIs support getting
// custom metadata via the Reader; for JSON make a separate call to 
// ObjectHandle.Attrs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Metadata map[string]string

// Metageneration is the version of the metadata for this object at
// this generation. This field is used for preconditions and for
// detecting changes in metadata. A metageneration number is only
Expand Down
68 changes: 68 additions & 0 deletions storage/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"net/http"
"net/http/httptest"
"net/url"
"reflect"
"strconv"
"strings"
"testing"
Expand Down Expand Up @@ -380,6 +381,7 @@ func TestContentEncodingGzipWithReader(t *testing.T) {
w.Header().Set("Etag", `"c50e3e41c9bc9df34e84c94ce073f928"`)
w.Header().Set("X-Goog-Generation", "1587012235914578")
w.Header().Set("X-Goog-MetaGeneration", "2")
w.Header().Set("X-Goog-Meta-custom-metadata-key", "custom-metadata-value")
w.Header().Set("X-Goog-Stored-Content-Encoding", "gzip")
w.Header().Set("vary", "Accept-Encoding")
w.Header().Set("x-goog-stored-content-length", "43")
Expand Down Expand Up @@ -470,6 +472,72 @@ func TestContentEncodingGzipWithReader(t *testing.T) {
}, option.WithEndpoint(mockGCS.URL), option.WithoutAuthentication(), option.WithHTTPClient(whc))
}

func TestMetadataParsingWithReader(t *testing.T) {
bucketName := "my-bucket"
objectName := "test"
downloadObjectXMLurl := fmt.Sprintf("/%s/%s", bucketName, objectName)
downloadObjectJSONurl := fmt.Sprintf("/b/%s/o/%s?alt=media&prettyPrint=false&projection=full", bucketName, objectName)

original := bytes.Repeat([]byte("a"), 4)
mockGCS := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.String() {
case downloadObjectXMLurl, downloadObjectJSONurl:
// Serve back the file.
w.Header().Set("Content-Type", "text/plain")
w.Header().Set("Etag", `"c50e3e41c9bc9df34e84c94ce073f928"`)
w.Header().Set("X-Goog-Generation", "1587012235914578")
w.Header().Set("X-Goog-MetaGeneration", "2")
w.Header().Set("X-Goog-Meta-custom-metadata-key", "custom-metadata-value")
w.Header().Set("vary", "Accept-Encoding")
w.Header().Set("x-goog-stored-content-length", "4")
w.Header().Set("x-goog-hash", "crc32c=pYIWwQ==")
w.Header().Set("x-goog-hash", "md5=xQ4+Qcm8nfNOhMlM4HP5KA==")
w.Header().Set("x-goog-storage-class", "STANDARD")
w.Write(original)
default:
fmt.Fprintf(w, "unrecognized URL %s", r.URL)
}
}))
mockGCS.EnableHTTP2 = true
mockGCS.StartTLS()
defer mockGCS.Close()

ctx := context.Background()
hc := mockGCS.Client()
ux, _ := url.Parse(mockGCS.URL)
hc.Transport.(*http.Transport).TLSClientConfig.InsecureSkipVerify = true
wrt := &alwaysToTargetURLRoundTripper{
destURL: ux,
hc: hc,
}

whc := &http.Client{Transport: wrt}

multiReaderTest(ctx, t, func(t *testing.T, c *Client) {
obj := c.Bucket(bucketName).Object(objectName)
rd, err := obj.NewReader(ctx)
if err != nil {
t.Fatal(err)
}
defer rd.Close()

expectedMetadata := map[string]string{
"Custom-Metadata-Key": "custom-metadata-value",
}
if !reflect.DeepEqual(rd.Attrs.Metadata, expectedMetadata) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One more place to switch to cmp.Diff

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Old habits die hard, haha.

t.Fatalf("metadata mismatch\nGot:\n%v\n\nWant:\n%v", rd.Attrs.Metadata, expectedMetadata)
}

got, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatal(err)
}
if g, w := got, original; !bytes.Equal(g, w) {
t.Fatalf("Response mismatch\nGot:\n%q\n\nWant:\n%q", g, w)
}
}, option.WithEndpoint(mockGCS.URL), option.WithoutAuthentication(), option.WithHTTPClient(whc))
}

// alwaysToTargetURLRoundTripper ensures that every single request
// is routed to a target destination. Some requests within the storage
// client by-pass using the provided HTTP client, hence this enforcemenet.
Expand Down
4 changes: 4 additions & 0 deletions storage/retry_conformance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"net/http"
"net/url"
"os"
"runtime"
"slices"
"strings"
"testing"
Expand Down Expand Up @@ -586,6 +587,9 @@ func TestRetryConformance(t *testing.T) {
if host == "" {
t.Skip("This test must use the testbench emulator; set STORAGE_EMULATOR_HOST to run.")
}
if runtime.GOOS == "darwin" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove this from the PR; seems non-relevant to this particular issue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 there was handling for this in the test script so I moved it in here, good to remove though.

t.Skip("We do not expect the RetryConformanceTest suite to pass on darwin due to\n differences in the network errors emitted by the system.")
}
endpoint, err := url.Parse(host)
if err != nil {
t.Fatalf("error parsing emulator host (make sure it includes the scheme such as http://host): %v", err)
Expand Down