Skip to content

Commit

Permalink
Update VFS to call ListObjectsV2 from the AWS SDK. (#4216)
Browse files Browse the repository at this point in the history
This updates `VFS::ls` implementation for S3 to call ListObjectsV2 from the AWS SDK, which increases performance when listing a prefix with several objects.

This came up while working on SC-31529 after testing recursive ls performance with ~350k results. ListObjectsV1 completes in ~10 minutes while ListObjectsV2 completes in 1 minute.

---
TYPE: IMPROVEMENT
DESC: Update VFS to call ListObjectsV2 from the AWS SDK.
  • Loading branch information
shaunrd0 authored Aug 3, 2023
1 parent 154485d commit fe7c13d
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 21 deletions.
40 changes: 20 additions & 20 deletions tiledb/sm/filesystem/s3.cc
Original file line number Diff line number Diff line change
Expand Up @@ -745,18 +745,20 @@ Status S3::is_empty_bucket(const URI& bucket, bool* is_empty) const {

bool exists;
RETURN_NOT_OK(is_bucket(bucket, &exists));
if (!exists)
if (!exists) {
return LOG_STATUS(Status_S3Error(
"Cannot check if bucket is empty; Bucket does not exist"));
}

Aws::Http::URI aws_uri = bucket.c_str();
Aws::S3::Model::ListObjectsRequest list_objects_request;
Aws::S3::Model::ListObjectsV2Request list_objects_request;
list_objects_request.SetBucket(aws_uri.GetAuthority());
list_objects_request.SetPrefix("");
list_objects_request.SetDelimiter("/");
if (request_payer_ != Aws::S3::Model::RequestPayer::NOT_SET)
if (request_payer_ != Aws::S3::Model::RequestPayer::NOT_SET) {
list_objects_request.SetRequestPayer(request_payer_);
auto list_objects_outcome = client_->ListObjects(list_objects_request);
}
auto list_objects_outcome = client_->ListObjectsV2(list_objects_request);

if (!list_objects_outcome.IsSuccess()) {
return LOG_STATUS(Status_S3Error(
Expand Down Expand Up @@ -887,22 +889,24 @@ tuple<Status, optional<std::vector<directory_entry>>> S3::ls_with_sizes(
Aws::Http::URI aws_uri = prefix_str.c_str();
auto aws_prefix = remove_front_slash(aws_uri.GetPath().c_str());
std::string aws_auth = aws_uri.GetAuthority().c_str();
Aws::S3::Model::ListObjectsRequest list_objects_request;
Aws::S3::Model::ListObjectsV2Request list_objects_request;
list_objects_request.SetBucket(aws_uri.GetAuthority());
list_objects_request.SetPrefix(aws_prefix.c_str());
list_objects_request.SetDelimiter(delimiter.c_str());
if (request_payer_ != Aws::S3::Model::RequestPayer::NOT_SET)
if (request_payer_ != Aws::S3::Model::RequestPayer::NOT_SET) {
list_objects_request.SetRequestPayer(request_payer_);
}

std::vector<directory_entry> entries;

bool is_done = false;
while (!is_done) {
// Not requesting more items than needed
if (max_paths != -1)
if (max_paths != -1) {
list_objects_request.SetMaxKeys(
max_paths - static_cast<int>(entries.size()));
auto list_objects_outcome = client_->ListObjects(list_objects_request);
}
auto list_objects_outcome = client_->ListObjectsV2(list_objects_request);

if (!list_objects_outcome.IsSuccess()) {
auto st = LOG_STATUS(Status_S3Error(
Expand Down Expand Up @@ -934,19 +938,15 @@ tuple<Status, optional<std::vector<directory_entry>>> S3::ls_with_sizes(
!list_objects_outcome.GetResult().GetIsTruncated() ||
(max_paths != -1 && entries.size() >= static_cast<size_t>(max_paths));
if (!is_done) {
// The documentation states that "GetNextMarker" will be non-empty only
// when the delimiter in the request is non-empty. When the delimiter is
// non-empty, we must used the last returned key as the next marker.
assert(
!delimiter.empty() ||
!list_objects_outcome.GetResult().GetContents().empty());
Aws::String next_marker =
!delimiter.empty() ?
list_objects_outcome.GetResult().GetNextMarker() :
list_objects_outcome.GetResult().GetContents().back().GetKey();
assert(!next_marker.empty());

list_objects_request.SetMarker(std::move(next_marker));
list_objects_outcome.GetResult().GetNextContinuationToken();
if (next_marker.empty()) {
auto st =
LOG_STATUS(Status_S3Error("Failed to retrieve next continuation "
"token for ListObjectsV2 request."));
return {st, nullopt};
}
list_objects_request.SetContinuationToken(std::move(next_marker));
}
}

Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/filesystem/s3.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
#include <aws/s3/model/GetObjectRequest.h>
#include <aws/s3/model/HeadBucketRequest.h>
#include <aws/s3/model/HeadObjectRequest.h>
#include <aws/s3/model/ListObjectsRequest.h>
#include <aws/s3/model/ListObjectsV2Request.h>
#include <aws/s3/model/PutObjectRequest.h>
#include <aws/s3/model/UploadPartRequest.h>
#include <sys/types.h>
Expand Down

0 comments on commit fe7c13d

Please sign in to comment.