From 5f8ae4c126816fcf9d08a1bcacb64b8e9ea6d0bf Mon Sep 17 00:00:00 2001 From: Isaiah Norton Date: Fri, 3 Jan 2025 09:43:24 -0500 Subject: [PATCH 1/6] Add -fexperimental-library flag on Clang 16 to fix compile errors (#5416) Fix compilation errors on newer Clang (upstream) versions which do not fully support jthread and stop_token. Already used for AppleClang variant. x-ref - https://github.com/TileDB-Inc/TileDB/pull/5364 - closes https://github.com/TileDB-Inc/TileDB/issues/5411 --- TYPE: NO_HISTORY DESC: Add -fexperimental-library flag on Clang 16 to fix compile errors --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2eb6942eb2a..a40b0b33f14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -201,7 +201,7 @@ else() endif() endif() - if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang" AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "16") + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "16") add_compile_options($<$:-fexperimental-library>) endif() endif() From 97e1a1137b92c0cf0fe56959cc8fcd5a60e13134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Du=C5=A1an=20Baran?= Date: Fri, 3 Jan 2025 16:03:56 +0100 Subject: [PATCH 2/6] Prepare files for change of main branch name (#5407) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR changes all mentions of the `dev` branch as the default branch to branch name `main`. Follow up PRs: https://github.com/TileDB-Inc/conda-forge-nightly-controller/pull/161 https://github.com/TileDB-Inc/centralized-tiledb-nightlies/pull/32 --- TYPE: NO_HISTORY DESC: Change all mentions of default branch --------- Co-authored-by: Dušan Baran --- .github/DIRECTORY.md | 2 +- .github/workflows/build-docs.yml | 2 +- .github/workflows/full-ci.yml | 2 +- .github/workflows/quarto-render.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/test-cloud-e2e.yml | 2 +- .gitlab-ci.yml | 2 +- CONTRIBUTING.md | 15 +++++++-------- README.md | 16 ++++++++-------- bootstrap | 2 +- 10 files changed, 23 insertions(+), 24 deletions(-) diff --git a/.github/DIRECTORY.md b/.github/DIRECTORY.md index 63e003ea3a2..d9b8435bdb8 100644 --- a/.github/DIRECTORY.md +++ b/.github/DIRECTORY.md @@ -30,4 +30,4 @@ the version-specific tests. This fan-out setup reduces the runtime of each job f ## Documentation -Documentation is built by `workflows/build-docs.yml` using `/scripts/ci/build_docs.sh`. \ No newline at end of file +Documentation is built by `workflows/build-docs.yml` using `/scripts/ci/build_docs.sh`. diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index 31c15addeda..e03d566a337 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -2,7 +2,7 @@ name: build-docs on: push: branches: - - dev + - main - 'release-*' paths-ignore: - '.github/workflows/quarto-render.yml' diff --git a/.github/workflows/full-ci.yml b/.github/workflows/full-ci.yml index 316b5b56f16..3b00a6ab6c3 100644 --- a/.github/workflows/full-ci.yml +++ b/.github/workflows/full-ci.yml @@ -3,7 +3,7 @@ on: workflow_dispatch: push: branches: - - dev + - main - release-* - refs/tags/* diff --git a/.github/workflows/quarto-render.yml b/.github/workflows/quarto-render.yml index 7a6a8eec066..f81f2e44b83 100644 --- a/.github/workflows/quarto-render.yml +++ b/.github/workflows/quarto-render.yml @@ -4,7 +4,7 @@ name: Render and deploy Quarto files on: push: # publish on merge only - branches: dev + branches: main # when changes match any path below paths: - '.github/workflows/quarto-render.yml' diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 18eb1f515ee..6aad2377c2e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: push: branches: - - dev + - main - 'release-*' tags: - '*' diff --git a/.github/workflows/test-cloud-e2e.yml b/.github/workflows/test-cloud-e2e.yml index dae04836de7..102c8dacd1c 100644 --- a/.github/workflows/test-cloud-e2e.yml +++ b/.github/workflows/test-cloud-e2e.yml @@ -9,7 +9,7 @@ on: type: boolean push: branches: - - dev + - main - release-* env: diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0cb52e92984..294109c1b26 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,7 +4,7 @@ stages: trigger_pipeline: stage: test rules: - - if: $CI_COMMIT_BRANCH =~ /^dev|^release-.*/ || $CI_COMMIT_TAG != "" # only/except rules are no longer actively developed. Please use `rules` instead. + - if: $CI_COMMIT_BRANCH =~ /^main|^release-.*/ || $CI_COMMIT_TAG != "" # only/except rules are no longer actively developed. Please use `rules` instead. - if: $CI_PIPELINE_SOURCE == "external_pull_request_event" changes: - "!.github/workflows/quarto-render.yml" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 627ab9af9e4..205725e0468 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,9 +5,9 @@ help you file issues, bug reports, or contribute code to the open source TileDB ## Contribution Checklist: -* Reporting a bug? Please read [how to file a bug report](https://github.com/TileDB-Inc/TileDB/blob/dev/CONTRIBUTING.md#reporting-a-bug) section to make sure sufficient information is included. +* Reporting a bug? Please read [how to file a bug report](https://github.com/TileDB-Inc/TileDB/blob/main/CONTRIBUTING.md#reporting-a-bug) section to make sure sufficient information is included. -* Contributing code? You rock! Be sure to [review the contributor section](https://github.com/TileDB-Inc/TileDB/blob/dev/CONTRIBUTING.md#contributing-code) for helpful tips on the tools we use to build TileDB, format code, and issue pull requests (PR)'s. +* Contributing code? You rock! Be sure to [review the contributor section](https://github.com/TileDB-Inc/TileDB/blob/main/CONTRIBUTING.md#contributing-code) for helpful tips on the tools we use to build TileDB, format code, and issue pull requests (PR)'s. ## Reporting a Bug @@ -16,7 +16,7 @@ A useful bug report filed as a GitHub issue provides information about how to re 1. Before opening a new [GitHub issue](https://github.com/TileDB-Inc/TileDB/issues) try searching the existing issues to see if someone else has already noticed the same problem. 2. When filing a bug report, provide where possible: - - The version TileDB (`tiledb_version()`) or if a `dev` version, specific commit that triggers the error. + - The version TileDB (`tiledb_version()`) or if a `main` version, specific commit that triggers the error. - The full error message, including the backtrace (if possible). Verbose error reporting is enabled by building TileDB with `../bootstrap --enable-verbose`. - A minimal working example, i.e. the smallest chunk of code that triggers the error. Ideally, this should be code that can be a small reduced C / C++ source file. If the code to reproduce is somewhat long, consider putting it in a [gist](https://gist.github.com). @@ -25,7 +25,7 @@ A useful bug report filed as a GitHub issue provides information about how to re ## Contributing Code -*By contributing code to TileDB, you are agreeing to release it under the [MIT License](https://github.com/TileDB-Inc/TileDB/tree/dev/LICENSE).* +*By contributing code to TileDB, you are agreeing to release it under the [MIT License](https://github.com/TileDB-Inc/TileDB/tree/main/LICENSE).* ### Quickstart Workflow: @@ -49,8 +49,7 @@ for dependencies and detailed build instructions. [Issue a PR from your updated TileDB fork](https://help.github.com/articles/creating-a-pull-request-from-a-fork/) Branch conventions: -- `dev` is the development branch of TileDB, all PR's are merged into `dev`. -- `master` tracks the latest stable / released version of TileDB. +- `main` is the development branch of TileDB, all PR's are merged into `main`. - `release-x.y.z` are major / bugfix release branches of TileDB. Formatting conventions: @@ -63,13 +62,13 @@ Formatting conventions: ### Pull Requests: -- `dev` is the development branch, all PR’s should be rebased on top of the latest `dev` commit. +- `main` is the development branch, all PR’s should be rebased on top of the latest `main` commit. - Commit changes to a local branch. The convention is to use your initials to identify branches: (ex. “Fred Jones” , `fj/my_bugfix_branch`). Branch names should be identifiable and reflect the feature or bug that they want to address / fix. This helps in deleting old branches later. - Make sure the test suite passes by running `make check`. -- When ready to submit a PR, `git rebase` the branch on top of the latest `dev` commit. Be sure to squash / cleanup the commit history so that the PR preferably one, or a couple commits at most. Each atomic commit in a PR should be able to pass the test suite. +- When ready to submit a PR, `git rebase` the branch on top of the latest `main` commit. Be sure to squash / cleanup the commit history so that the PR preferably one, or a couple commits at most. Each atomic commit in a PR should be able to pass the test suite. - Run the limiting / code format tooling (`make format`) before submitting a final PR. Make sure that your contribution generally follows the format and naming conventions used by surrounding code. diff --git a/README.md b/README.md index 02b74aad5c5..a380abc65e5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -TileDB logo +TileDB logo -[![Full CI](https://github.com/TileDB-Inc/TileDB/actions/workflows/full-ci.yml/badge.svg?branch=dev)](https://github.com/TileDB-Inc/TileDB/actions/workflows/full-ci.yml) [![Azure Pipelines](https://dev.azure.com/TileDB-Inc/CI/_apis/build/status/TileDB-Inc.TileDB?branchName=dev)](https://dev.azure.com/TileDB-Inc/CI/_build/latest?definitionId=2&branchName=dev) [![](doc/anaconda.svg)![Anaconda download count badge](https://anaconda.org/conda-forge/TileDB/badges/downloads.svg)](https://anaconda.org/conda-forge/TileDB) +[![Full CI](https://github.com/TileDB-Inc/TileDB/actions/workflows/full-ci.yml/badge.svg?branch=main)](https://github.com/TileDB-Inc/TileDB/actions/workflows/full-ci.yml) [![Azure Pipelines](https://dev.azure.com/TileDB-Inc/CI/_apis/build/status/TileDB-Inc.TileDB?branchName=main)](https://dev.azure.com/TileDB-Inc/CI/_build/latest?definitionId=2&branchName=main) [![](doc/anaconda.svg)![Anaconda download count badge](https://anaconda.org/conda-forge/TileDB/badges/downloads.svg)](https://anaconda.org/conda-forge/TileDB) # The Universal Storage Engine @@ -41,10 +41,10 @@ $ docker pull tiledb/tiledb $ docker run -it tiledb/tiledb ``` -We include several [examples](https://github.com/TileDB-Inc/TileDB/tree/dev/examples). You can start with the following: +We include several [examples](https://github.com/TileDB-Inc/TileDB/tree/main/examples). You can start with the following: -* [Dense array example](https://github.com/TileDB-Inc/TileDB/blob/dev/examples/cpp_api/quickstart_dense.cc) -* [Sparse array example](https://github.com/TileDB-Inc/TileDB/blob/dev/examples/cpp_api/quickstart_sparse.cc) +* [Dense array example](https://github.com/TileDB-Inc/TileDB/blob/main/examples/cpp_api/quickstart_dense.cc) +* [Sparse array example](https://github.com/TileDB-Inc/TileDB/blob/main/examples/cpp_api/quickstart_sparse.cc) ## Documentation @@ -71,8 +71,8 @@ The TileDB data format is open-source and can be found [here](format_spec/FORMAT The TileDB team maintains a variety of APIs built on top of the C++ library: -* C ([examples](https://github.com/TileDB-Inc/TileDB/tree/dev/examples/c_api), [API docs](https://tiledb-inc-tiledb.readthedocs-hosted.com/en/stable/c-api.html)) -* C++ ([examples](https://github.com/TileDB-Inc/TileDB/tree/dev/examples/cpp_api), [API docs](https://tiledb-inc-tiledb.readthedocs-hosted.com/en/stable/c++-api.html)) +* C ([examples](https://github.com/TileDB-Inc/TileDB/tree/main/examples/c_api), [API docs](https://tiledb-inc-tiledb.readthedocs-hosted.com/en/stable/c-api.html)) +* C++ ([examples](https://github.com/TileDB-Inc/TileDB/tree/main/examples/cpp_api), [API docs](https://tiledb-inc-tiledb.readthedocs-hosted.com/en/stable/c++-api.html)) * [Python](https://github.com/TileDB-Inc/TileDB-Py) * [R](https://github.com/TileDB-Inc/TileDB-R) * [Java](https://github.com/TileDB-Inc/TileDB-Java) @@ -91,6 +91,6 @@ TileDB is also integrated with several popular databases and data science tools: ## Get involved -TileDB Embedded is an open-source project and welcomes all forms of contributions. Contributors to the project should read over the [contribution docs](https://github.com/TileDB-Inc/TileDB/blob/dev/CONTRIBUTING.md) for more information. +TileDB Embedded is an open-source project and welcomes all forms of contributions. Contributors to the project should read over the [contribution docs](https://github.com/TileDB-Inc/TileDB/blob/main/CONTRIBUTING.md) for more information. We'd love to hear from you. Drop us a line at [hello@tiledb.com](mailto:hello@tiledb.com), visit our [forum](https://forum.tiledb.com/) or [contact form](https://tiledb.com/contact), or [follow us on Twitter](https://twitter.com/tiledb) to stay informed of updates and news. diff --git a/bootstrap b/bootstrap index 5ae7321b8c7..a6a305de0f8 100755 --- a/bootstrap +++ b/bootstrap @@ -119,7 +119,7 @@ while test $# != 0; do --dependency=*) dir=`arg "$1"` dependency_dir="$dir";; --linkage=*) linkage=`arg "$1"`;; - --force-build-all-deps) echo "Argument '--force-build-all-deps' has no effect and will be removed in a future version. Vcpkg builds all dependencies by default, please consult the guide in doc/dev/BUILD.md or vcpkg's documentation to see how to provide your own dependencies.";; + --force-build-all-deps) echo "Argument '--force-build-all-deps' has no effect and will be removed in a future version. Vcpkg builds all dependencies by default, please consult the guide in doc/main/BUILD.md or vcpkg's documentation to see how to provide your own dependencies.";; --remove-deprecations) tiledb_remove_deprecations="ON";; --disable-werror) tiledb_werror="OFF";; --disable-tests) tiledb_tests="OFF";; From 9013c66ce5609363233d563e2ea8f35594b8e2e2 Mon Sep 17 00:00:00 2001 From: Beka Davis <31743465+bekadavis9@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:38:42 -0500 Subject: [PATCH 3/6] C.41 compliance: Azure factory constructor. (#5392) Remove `Azure::init` in favor of a C.41-compliant factory constructor. The `Azure::azure_params_` member variable is no longer optional and is fully-initialized at construction time. [sc-60071] --- TYPE: IMPROVEMENT DESC: C.41 constructor: `Azure`. --- test/src/unit-vfs.cc | 3 +- tiledb/common/exception/status.h | 4 - tiledb/sm/filesystem/azure.cc | 599 +++++++++++-------------------- tiledb/sm/filesystem/azure.h | 187 ++++------ tiledb/sm/filesystem/vfs.cc | 56 +-- tiledb/sm/filesystem/vfs.h | 41 ++- 6 files changed, 336 insertions(+), 554 deletions(-) diff --git a/test/src/unit-vfs.cc b/test/src/unit-vfs.cc index d04837d9f6d..70087a28032 100644 --- a/test/src/unit-vfs.cc +++ b/test/src/unit-vfs.cc @@ -786,9 +786,8 @@ TEST_CASE("VFS: Construct Azure Blob Storage endpoint URIs", "[azure][uri]") { // perform any requests) to prevent Entra ID from being chosen. require_tiledb_ok(config.set("vfs.azure.storage_account_key", "foobar")); } - tiledb::sm::Azure azure; ThreadPool thread_pool(1); - require_tiledb_ok(azure.init(config, &thread_pool)); + tiledb::sm::Azure azure(&thread_pool, config); REQUIRE(azure.client().GetUrl() == expected_endpoint); } #endif diff --git a/tiledb/common/exception/status.h b/tiledb/common/exception/status.h index 27a7d322976..239e06450fe 100644 --- a/tiledb/common/exception/status.h +++ b/tiledb/common/exception/status.h @@ -310,10 +310,6 @@ inline Status Status_UtilsError(const std::string& msg) { inline Status Status_S3Error(const std::string& msg) { return {"[TileDB::S3] Error", msg}; } -/** Return a FS_AZURE error class Status with a given message **/ -inline Status Status_AzureError(const std::string& msg) { - return {"[TileDB::Azure] Error", msg}; -} /** Return a FS_GCS error class Status with a given message **/ inline Status Status_GCSError(const std::string& msg) { return {"[TileDB::GCS] Error", msg}; diff --git a/tiledb/sm/filesystem/azure.cc b/tiledb/sm/filesystem/azure.cc index c1a8d2a27df..edce11c5ffb 100644 --- a/tiledb/sm/filesystem/azure.cc +++ b/tiledb/sm/filesystem/azure.cc @@ -81,7 +81,36 @@ ::Azure::Nullable to_azure_nullable(const std::optional& value) { /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ -Azure::Azure() { +AzureParameters::AzureParameters(const Config& config) + : max_parallel_ops_( + config.get("vfs.azure.max_parallel_ops", Config::must_find)) + , block_list_block_size_(config.get( + "vfs.azure.block_list_block_size", Config::must_find)) + , write_cache_max_size_(max_parallel_ops_ * block_list_block_size_) + , max_retries_( + config.get("vfs.azure.max_retries", Config::must_find)) + , retry_delay_(std::chrono::milliseconds( + config.get("vfs.azure.retry_delay_ms", Config::must_find))) + , max_retry_delay_(std::chrono::milliseconds(config.get( + "vfs.azure.max_retry_delay_ms", Config::must_find))) + , use_block_list_upload_(config.get( + "vfs.azure.use_block_list_upload", Config::must_find)) + , account_name_(get_config_with_env_fallback( + config, "vfs.azure.storage_account_name", "AZURE_STORAGE_ACCOUNT")) + , account_key_(get_config_with_env_fallback( + config, "vfs.azure.storage_account_key", "AZURE_STORAGE_KEY")) + , blob_endpoint_(get_blob_endpoint(config, account_name_)) + , ssl_cfg_(config) + , has_sas_token_( + !get_config_with_env_fallback( + config, "vfs.azure.storage_sas_token", "AZURE_STORAGE_SAS_TOKEN") + .empty()) { +} + +Azure::Azure(ThreadPool* const thread_pool, const Config& config) + : azure_params_(AzureParameters(config)) + , thread_pool_(thread_pool) { + assert(thread_pool); } Azure::~Azure() { @@ -99,7 +128,7 @@ std::string get_config_with_env_fallback( return result; } -std::optional get_blob_endpoint( +std::string get_blob_endpoint( const Config& config, const std::string& account_name) { std::string sas_token = get_config_with_env_fallback( config, "vfs.azure.storage_sas_token", "AZURE_STORAGE_SAS_TOKEN"); @@ -110,7 +139,7 @@ std::optional get_blob_endpoint( if (!account_name.empty()) { result = "https://" + account_name + ".blob.core.windows.net"; } else { - return std::nullopt; + return ""; } } else if (!(utils::parse::starts_with(result, "http://") || utils::parse::starts_with(result, "https://"))) { @@ -131,62 +160,6 @@ std::optional get_blob_endpoint( return result; } -/** - * Check if config has a SAS token set - * @param config Configuration parameters. - * @return whether there is a SAS token in the config - */ -static bool has_sas_token(const Config& config) { - std::string sas_token = get_config_with_env_fallback( - config, "vfs.azure.storage_sas_token", "AZURE_STORAGE_SAS_TOKEN"); - return !sas_token.empty(); -} - -std::optional AzureParameters::create(const Config& config) { - auto account_name = get_config_with_env_fallback( - config, "vfs.azure.storage_account_name", "AZURE_STORAGE_ACCOUNT"); - auto blob_endpoint = get_blob_endpoint(config, account_name); - if (!blob_endpoint) { - return std::nullopt; - } - return AzureParameters{config, account_name, *blob_endpoint}; -} - -AzureParameters::AzureParameters( - const Config& config, - const std::string& account_name, - const std::string& blob_endpoint) - : max_parallel_ops_( - config.get("vfs.azure.max_parallel_ops", Config::must_find)) - , block_list_block_size_(config.get( - "vfs.azure.block_list_block_size", Config::must_find)) - , write_cache_max_size_(max_parallel_ops_ * block_list_block_size_) - , max_retries_( - config.get("vfs.azure.max_retries", Config::must_find)) - , retry_delay_(std::chrono::milliseconds( - config.get("vfs.azure.retry_delay_ms", Config::must_find))) - , max_retry_delay_(std::chrono::milliseconds(config.get( - "vfs.azure.max_retry_delay_ms", Config::must_find))) - , use_block_list_upload_(config.get( - "vfs.azure.use_block_list_upload", Config::must_find)) - , account_name_(account_name) - , account_key_(get_config_with_env_fallback( - config, "vfs.azure.storage_account_key", "AZURE_STORAGE_KEY")) - , blob_endpoint_(blob_endpoint) - , ssl_cfg_(config) - , has_sas_token_(has_sas_token(config)) { -} - -Status Azure::init(const Config& config, ThreadPool* const thread_pool) { - if (thread_pool == nullptr) { - return LOG_STATUS( - Status_AzureError("Can't initialize with null thread pool.")); - } - thread_pool_ = thread_pool; - azure_params_ = AzureParameters::create(config); - return Status::Ok(); -} - /* ********************************* */ /* API */ /* ********************************* */ @@ -299,16 +272,13 @@ Azure::AzureClientSingleton::get(const AzureParameters& params) { return *client_; } -Status Azure::create_container(const URI& uri) const { +void Azure::create_container(const URI& uri) const { const auto& c = client(); if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } - std::string container_name; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, nullptr)); - + auto [container_name, blob_path] = parse_azure_uri(uri); bool created; std::string error_message = ""; try { @@ -319,50 +289,43 @@ Status Azure::create_container(const URI& uri) const { } if (!created) { - return LOG_STATUS(Status_AzureError(std::string( - "Create container failed on: " + uri.to_string() + error_message))); + throw AzureException( + "Create container failed on: " + uri.to_string() + error_message); } - - return Status::Ok(); } -Status Azure::empty_container(const URI& container) const { - return remove_dir(container); +void Azure::empty_container(const URI& container) const { + remove_dir(container); } -Status Azure::flush_blob(const URI& uri) { - assert(azure_params_); +void Azure::flush_blob(const URI& uri) { const auto& c = client(); - - if (!azure_params_->use_block_list_upload_) { - return flush_blob_direct(uri); + if (!azure_params_.use_block_list_upload_) { + flush_blob_direct(uri); } - if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } Buffer* const write_cache_buffer = get_write_cache_buffer(uri.to_string()); - - const Status flush_write_cache_st = - flush_write_cache(uri, write_cache_buffer, true); + Status flush_write_cache_st; + try { + flush_write_cache(uri, write_cache_buffer, true); + } catch (std::exception& e) { + flush_write_cache_st = Status("[TileDB::Azure]", e.what()); + } BlockListUploadState* state; { std::unique_lock states_lock(block_list_upload_states_lock_); - if (block_list_upload_states_.count(uri.to_string()) == 0) { - return flush_write_cache_st; + throw_if_not_ok(flush_write_cache_st); + return; } - state = &block_list_upload_states_.at(uri.to_string()); } - std::string container_name; - std::string blob_path; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); - + auto [container_name, blob_path] = parse_azure_uri(uri); if (!state->st().ok()) { // Save the return status because 'state' will be freed before we return. const Status st = state->st(); @@ -376,15 +339,16 @@ Status Azure::flush_blob(const URI& uri) { // // Alternatively, we could do nothing and let Azure release the // uncommited blocks ~7 days later. We chose to delete the blob - // as a best-effort operation. We intentionally are ignoring the - // returned Status from 'remove_blob'. - throw_if_not_ok(remove_blob(uri)); + // as a best-effort operation. We are intentionally ignoring any + // errors thrown by `remove_blob`. + try { + remove_blob(uri); + } catch (...) { + } // Release all instance state associated with this block list // transactions. finish_block_list_upload(uri); - - return st; } // Build the block list to commit. @@ -414,12 +378,10 @@ Status Azure::flush_blob(const URI& uri) { " This error might be resolved by increasing the value of the " "'vfs.azure.block_list_block_size' config option"; } - return LOG_STATUS(Status_AzureError( + throw AzureException( "Flush blob failed on: " + uri.to_string() + "; " + e.Message + - msg_extra)); + msg_extra); } - - return Status::Ok(); } void Azure::finish_block_list_upload(const URI& uri) { @@ -436,23 +398,19 @@ void Azure::finish_block_list_upload(const URI& uri) { } } -Status Azure::flush_blob_direct(const URI& uri) { +void Azure::flush_blob_direct(const URI& uri) { auto& c = client(); if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } Buffer* const write_cache_buffer = get_write_cache_buffer(uri.to_string()); if (write_cache_buffer->size() == 0) { - return Status::Ok(); + return; } - std::string container_name; - std::string blob_path; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); - + auto [container_name, blob_path] = parse_azure_uri(uri); try { c.GetBlobContainerClient(container_name) .GetBlockBlobClient(blob_path) @@ -460,8 +418,8 @@ Status Azure::flush_blob_direct(const URI& uri) { static_cast(write_cache_buffer->data()), static_cast(write_cache_buffer->size())); } catch (const ::Azure::Storage::StorageException& e) { - return LOG_STATUS(Status_AzureError( - "Flush blob failed on: " + uri.to_string() + "; " + e.Message)); + throw AzureException( + "Flush blob failed on: " + uri.to_string() + "; " + e.Message); } // Protect 'write_cache_map_' from multiple writers. @@ -469,112 +427,66 @@ Status Azure::flush_blob_direct(const URI& uri) { std::unique_lock cache_lock(write_cache_map_lock_); write_cache_map_.erase(uri.to_string()); } - - return Status::Ok(); } -Status Azure::is_empty_container(const URI& uri, bool* is_empty) const { +bool Azure::is_empty_container(const URI& uri) const { const auto& c = client(); - assert(is_empty); - if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } - std::string container_name; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, nullptr)); - + auto [container_name, blob_path] = parse_azure_uri(uri); ::Azure::Storage::Blobs::ListBlobsOptions options; options.PageSizeHint = 1; try { - *is_empty = c.GetBlobContainerClient(container_name) - .ListBlobs(options) - .Blobs.empty(); + return c.GetBlobContainerClient(container_name) + .ListBlobs(options) + .Blobs.empty(); } catch (const ::Azure::Storage::StorageException& e) { - return LOG_STATUS(Status_AzureError( - "List blobs failed on: " + uri.to_string() + "; " + e.Message)); + throw AzureException( + "List blobs failed on: " + uri.to_string() + "; " + e.Message); } - - return Status::Ok(); } -Status Azure::is_container(const URI& uri, bool* const is_container) const { - assert(is_container); - +bool Azure::is_container(const URI& uri) const { if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } - - std::string container_name; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, nullptr)); - - return this->is_container(container_name, is_container); -} - -Status Azure::is_container( - const std::string& container_name, bool* const is_container) const { + auto [container_name, blob_path] = parse_azure_uri(uri); const auto& c = client(); - assert(is_container); - try { c.GetBlobContainerClient(container_name).GetProperties(); } catch (const ::Azure::Storage::StorageException& e) { if (e.StatusCode == ::Azure::Core::Http::HttpStatusCode::NotFound) { - *is_container = false; - return Status_Ok(); + return false; } - return LOG_STATUS(Status_AzureError( + throw AzureException( "Get container properties failed on: " + container_name + "; " + - e.Message)); + e.Message); } - - *is_container = true; - return Status_Ok(); + return true; } -Status Azure::is_dir(const URI& uri, bool* const exists) const { - assert(exists); - - std::vector paths; - RETURN_NOT_OK(ls(uri, &paths, "/", 1)); - *exists = (bool)paths.size(); - return Status_Ok(); +bool Azure::is_dir(const URI& uri) const { + std::vector paths = ls(uri, "/", 1); + return (bool)paths.size(); } -Status Azure::is_blob(const URI& uri, bool* const is_blob) const { - assert(is_blob); - - std::string container_name; - std::string blob_path; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); - - return this->is_blob(container_name, blob_path, is_blob); -} - -Status Azure::is_blob( - const std::string& container_name, - const std::string& blob_path, - bool* const is_blob) const { +bool Azure::is_blob(const URI& uri) const { + auto [container_name, blob_path] = parse_azure_uri(uri); const auto& c = client(); - assert(is_blob); - try { c.GetBlobContainerClient(container_name) .GetBlobClient(blob_path) .GetProperties(); } catch (const ::Azure::Storage::StorageException& e) { if (e.StatusCode == ::Azure::Core::Http::HttpStatusCode::NotFound) { - *is_blob = false; - return Status_Ok(); + return false; } - return LOG_STATUS(Status_AzureError( - "Get blob properties failed on: " + blob_path + "; " + e.Message)); + throw AzureException( + "Get blob properties failed on: " + blob_path + "; " + e.Message); } - - *is_blob = true; - return Status_Ok(); + return true; } std::string Azure::remove_front_slash(const std::string& path) { @@ -601,18 +513,13 @@ std::string Azure::remove_trailing_slash(const std::string& path) { return path; } -Status Azure::ls( - const URI& uri, - std::vector* paths, - const std::string& delimiter, - const int max_paths) const { - assert(paths); - +std::vector Azure::ls( + const URI& uri, const std::string& delimiter, const int max_paths) const { + std::vector paths; for (auto& fs : ls_with_sizes(uri, delimiter, max_paths)) { - paths->emplace_back(fs.path().native()); + paths.emplace_back(fs.path().native()); } - - return Status::Ok(); + return paths; } std::vector Azure::ls_with_sizes( @@ -625,10 +532,7 @@ std::vector Azure::ls_with_sizes( throw AzureException("URI is not an Azure URI: " + uri_dir.to_string()); } - std::string container_name; - std::string blob_path; - throw_if_not_ok(parse_azure_uri(uri_dir, &container_name, &blob_path)); - + auto [container_name, blob_path] = parse_azure_uri(uri_dir); auto container_client = c.GetBlobContainerClient(container_name); std::vector entries; @@ -667,98 +571,63 @@ std::vector Azure::ls_with_sizes( return entries; } -Status Azure::move_object(const URI& old_uri, const URI& new_uri) { - RETURN_NOT_OK(copy_blob(old_uri, new_uri)); - RETURN_NOT_OK(remove_blob(old_uri)); - return Status::Ok(); +void Azure::move_object(const URI& old_uri, const URI& new_uri) { + copy_blob(old_uri, new_uri); + remove_blob(old_uri); } -Status Azure::copy_blob(const URI& old_uri, const URI& new_uri) { - assert(azure_params_); +void Azure::copy_blob(const URI& old_uri, const URI& new_uri) { auto& c = client(); - if (!old_uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + old_uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + old_uri.to_string()); } - if (!new_uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + new_uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + new_uri.to_string()); } - std::string old_container_name; - std::string old_blob_path; - RETURN_NOT_OK(parse_azure_uri(old_uri, &old_container_name, &old_blob_path)); + auto [old_container_name, old_blob_path] = parse_azure_uri(old_uri); std::string source_uri = c.GetBlobContainerClient(old_container_name) .GetBlobClient(old_blob_path) .GetUrl(); - std::string new_container_name; - std::string new_blob_path; - RETURN_NOT_OK(parse_azure_uri(new_uri, &new_container_name, &new_blob_path)); - + auto [new_container_name, new_blob_path] = parse_azure_uri(new_uri); try { c.GetBlobContainerClient(new_container_name) .GetBlobClient(new_blob_path) .StartCopyFromUri(source_uri) - .PollUntilDone(azure_params_->retry_delay_); + .PollUntilDone(azure_params_.retry_delay_); } catch (const ::Azure::Storage::StorageException& e) { - return LOG_STATUS(Status_AzureError( - "Copy blob failed on: " + old_uri.to_string() + "; " + e.Message)); + throw AzureException( + "Copy blob failed on: " + old_uri.to_string() + "; " + e.Message); } - - return Status::Ok(); } -Status Azure::move_dir(const URI& old_uri, const URI& new_uri) { - std::vector paths; - RETURN_NOT_OK(ls(old_uri, &paths, "")); +void Azure::move_dir(const URI& old_uri, const URI& new_uri) { + std::vector paths = ls(old_uri, ""); for (const auto& path : paths) { const std::string suffix = path.substr(old_uri.to_string().size()); const URI new_path = new_uri.join_path(suffix); - RETURN_NOT_OK(move_object(URI(path), new_path)); + move_object(URI(path), new_path); } - return Status::Ok(); } -Status Azure::blob_size(const URI& uri, uint64_t* const nbytes) const { +uint64_t Azure::blob_size(const URI& uri) const { auto& c = client(); - assert(nbytes); - if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } - std::string container_name; - std::string blob_path; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); - - std::optional error_message = nullopt; - - try { - ::Azure::Storage::Blobs::ListBlobsOptions options; - options.Prefix = blob_path; - options.PageSizeHint = 1; - - auto response = c.GetBlobContainerClient(container_name).ListBlobs(options); - - if (response.Blobs.empty()) { - error_message = "Blob does not exist."; - } else { - *nbytes = static_cast(response.Blobs[0].BlobSize); - } - } catch (const ::Azure::Storage::StorageException& e) { - error_message = e.Message; - } - - if (error_message.has_value()) { - return LOG_STATUS(Status_AzureError( - "Get blob size failed on: " + uri.to_string() + error_message.value())); + auto [container_name, blob_path] = parse_azure_uri(uri); + ::Azure::Storage::Blobs::ListBlobsOptions options; + options.Prefix = blob_path; + options.PageSizeHint = 1; + auto response = c.GetBlobContainerClient(container_name).ListBlobs(options); + if (response.Blobs.empty()) { + throw AzureException( + "Get blob size failed on: " + uri.to_string() + "Blob does not exist."); + } else { + return static_cast(response.Blobs[0].BlobSize); } - - return Status::Ok(); } Status Azure::read( @@ -771,14 +640,10 @@ Status Azure::read( const auto& c = client(); if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } - std::string container_name; - std::string blob_path; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); - + auto [container_name, blob_path] = parse_azure_uri(uri); size_t total_length = length + read_ahead_length; ::Azure::Storage::Blobs::DownloadBlobOptions options; @@ -793,30 +658,26 @@ Status Azure::read( .Download(options) .Value; } catch (const ::Azure::Storage::StorageException& e) { - return LOG_STATUS(Status_AzureError( - "Read blob failed on: " + uri.to_string() + "; " + e.Message)); + throw AzureException( + "Read blob failed on: " + uri.to_string() + "; " + e.Message); } *length_returned = result.BodyStream->ReadToCount( static_cast(buffer), total_length); if (*length_returned < length) { - return LOG_STATUS(Status_AzureError( - std::string("Read operation read unexpected number of bytes."))); + throw AzureException("Read operation read unexpected number of bytes."); } return Status::Ok(); } -Status Azure::remove_container(const URI& uri) const { +void Azure::remove_container(const URI& uri) const { auto& c = client(); // Empty container - RETURN_NOT_OK(empty_container(uri)); - - std::string container_name; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, nullptr)); - + empty_container(uri); + auto [container_name, blob_path] = parse_azure_uri(uri); bool deleted; std::string error_message = ""; try { @@ -827,20 +688,14 @@ Status Azure::remove_container(const URI& uri) const { } if (!deleted) { - return LOG_STATUS(Status_AzureError(std::string( - "Remove container failed on: " + uri.to_string() + error_message))); + throw AzureException( + "Remove container failed on: " + uri.to_string() + error_message); } - - return Status::Ok(); } -Status Azure::remove_blob(const URI& uri) const { +void Azure::remove_blob(const URI& uri) const { auto& c = client(); - - std::string container_name; - std::string blob_path; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); - + auto [container_name, blob_path] = parse_azure_uri(uri); bool deleted; std::string error_message = ""; try { @@ -853,115 +708,93 @@ Status Azure::remove_blob(const URI& uri) const { } if (!deleted) { - return LOG_STATUS(Status_AzureError(std::string( - "Remove blob failed on: " + uri.to_string() + error_message))); + throw AzureException( + "Remove blob failed on: " + uri.to_string() + error_message); } - - return Status::Ok(); } -Status Azure::remove_dir(const URI& uri) const { - std::vector paths; - RETURN_NOT_OK(ls(uri, &paths, "")); - auto status = parallel_for(thread_pool_, 0, paths.size(), [&](size_t i) { - throw_if_not_ok(remove_blob(URI(paths[i]))); +void Azure::remove_dir(const URI& uri) const { + std::vector paths = ls(uri, ""); + throw_if_not_ok(parallel_for(thread_pool_, 0, paths.size(), [&](size_t i) { + remove_blob(URI(paths[i])); return Status::Ok(); - }); - RETURN_NOT_OK(status); - - return Status::Ok(); + })); } -Status Azure::touch(const URI& uri) const { +void Azure::touch(const URI& uri) const { auto& c = client(); - if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } - if (uri.to_string().back() == '/') { - return LOG_STATUS(Status_AzureError(std::string( - "Cannot create file; URI is a directory: " + uri.to_string()))); + throw AzureException( + "Cannot create file; URI is a directory: " + uri.to_string()); } - - bool is_blob; - RETURN_NOT_OK(this->is_blob(uri, &is_blob)); - if (is_blob) { - return Status::Ok(); + if (is_blob(uri)) { + return; } - std::string container_name; - std::string blob_path; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); - + auto [container_name, blob_path] = parse_azure_uri(uri); try { c.GetBlobContainerClient(container_name) .GetBlockBlobClient(blob_path) .UploadFrom(nullptr, 0); } catch (const ::Azure::Storage::StorageException& e) { - return LOG_STATUS(Status_AzureError( - "Touch blob failed on: " + uri.to_string() + "; " + e.Message)); + throw AzureException( + "Touch blob failed on: " + uri.to_string() + "; " + e.Message); } - - return Status::Ok(); } -Status Azure::write( +void Azure::write( const URI& uri, const void* const buffer, const uint64_t length) { - assert(azure_params_); - auto write_cache_max_size = azure_params_->write_cache_max_size_; + auto write_cache_max_size = azure_params_.write_cache_max_size_; if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } Buffer* const write_cache_buffer = get_write_cache_buffer(uri.to_string()); uint64_t nbytes_filled; - RETURN_NOT_OK( - fill_write_cache(write_cache_buffer, buffer, length, &nbytes_filled)); + fill_write_cache(write_cache_buffer, buffer, length, &nbytes_filled); - if (!azure_params_->use_block_list_upload_) { + if (!azure_params_.use_block_list_upload_) { if (nbytes_filled != length) { std::stringstream errmsg; errmsg << "Direct write failed! " << nbytes_filled << " bytes written to buffer, " << length << " bytes requested."; - return LOG_STATUS(Status_AzureError(errmsg.str())); + throw AzureException(errmsg.str()); } else { - return Status::Ok(); + return; } } if (write_cache_buffer->size() == write_cache_max_size) { - RETURN_NOT_OK(flush_write_cache(uri, write_cache_buffer, false)); + flush_write_cache(uri, write_cache_buffer, false); } uint64_t new_length = length - nbytes_filled; uint64_t offset = nbytes_filled; while (new_length > 0) { if (new_length >= write_cache_max_size) { - RETURN_NOT_OK(write_blocks( + write_blocks( uri, static_cast(buffer) + offset, write_cache_max_size, - false)); + false); offset += write_cache_max_size; new_length -= write_cache_max_size; } else { - RETURN_NOT_OK(fill_write_cache( + fill_write_cache( write_cache_buffer, static_cast(buffer) + offset, new_length, - &nbytes_filled)); + &nbytes_filled); offset += nbytes_filled; new_length -= nbytes_filled; } } assert(offset == length); - - return Status::Ok(); } Buffer* Azure::get_write_cache_buffer(const std::string& uri) { @@ -973,54 +806,49 @@ Buffer* Azure::get_write_cache_buffer(const std::string& uri) { } } -Status Azure::fill_write_cache( +void Azure::fill_write_cache( Buffer* const write_cache_buffer, const void* const buffer, const uint64_t length, uint64_t* const nbytes_filled) { - assert(azure_params_); assert(write_cache_buffer); assert(buffer); assert(nbytes_filled); *nbytes_filled = std::min( - azure_params_->write_cache_max_size_ - write_cache_buffer->size(), - length); + azure_params_.write_cache_max_size_ - write_cache_buffer->size(), length); if (*nbytes_filled > 0) { - RETURN_NOT_OK(write_cache_buffer->write(buffer, *nbytes_filled)); + throw_if_not_ok(write_cache_buffer->write(buffer, *nbytes_filled)); } - - return Status::Ok(); } -Status Azure::flush_write_cache( +void Azure::flush_write_cache( const URI& uri, Buffer* const write_cache_buffer, const bool last_block) { assert(write_cache_buffer); - if (write_cache_buffer->size() > 0) { - const Status st = write_blocks( - uri, - write_cache_buffer->data(), - write_cache_buffer->size(), - last_block); - write_cache_buffer->reset_size(); - RETURN_NOT_OK(st); + Status st; + try { + write_blocks( + uri, + write_cache_buffer->data(), + write_cache_buffer->size(), + last_block); + write_cache_buffer->reset_size(); + } catch (...) { + throw; + } } - - return Status::Ok(); } -Status Azure::write_blocks( +void Azure::write_blocks( const URI& uri, const void* const buffer, const uint64_t length, const bool last_block) { - assert(azure_params_); - auto block_list_block_size = azure_params_->block_list_block_size_; + auto block_list_block_size = azure_params_.block_list_block_size_; if (!uri.is_azure()) { - return LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri.to_string()))); + throw AzureException("URI is not an Azure URI: " + uri.to_string()); } // Ensure that each thread is responsible for exactly block_list_block_size_ @@ -1031,12 +859,11 @@ Status Azure::write_blocks( uint64_t num_ops = last_block ? utils::math::ceil(length, block_list_block_size) : (length / block_list_block_size); - num_ops = std::min( - std::max(num_ops, uint64_t(1)), azure_params_->max_parallel_ops_); + num_ops = + std::min(std::max(num_ops, uint64_t(1)), azure_params_.max_parallel_ops_); if (!last_block && length % block_list_block_size != 0) { - return LOG_STATUS( - Status_AzureError("Length not evenly divisible by block size")); + throw AzureException("Length not evenly divisible by block size"); } BlockListUploadState* state; @@ -1047,10 +874,8 @@ Status Azure::write_blocks( auto state_iter = block_list_upload_states_.find(uri.to_string()); if (state_iter == block_list_upload_states_.end()) { // Delete file if it exists (overwrite). - bool exists; - RETURN_NOT_OK(is_blob(uri, &exists)); - if (exists) { - RETURN_NOT_OK(remove_blob(uri)); + if (is_blob(uri)) { + remove_blob(uri); } // Instantiate the new state. @@ -1072,17 +897,14 @@ Status Azure::write_blocks( // 'block_list_upload_states_'. } - std::string container_name; - std::string blob_path; - RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); - + auto [container_name, blob_path] = parse_azure_uri(uri); if (num_ops == 1) { const std::string block_id = state->next_block_id(); - const Status st = upload_block(container_name, blob_path, buffer, length, block_id); state->update_st(st); - return st; + throw_if_not_ok(st); + return; } else { std::vector tasks; tasks.reserve(num_ops); @@ -1109,10 +931,8 @@ Status Azure::write_blocks( const Status st = thread_pool_->wait_all(tasks); state->update_st(st); - return st; + throw_if_not_ok(st); } - - return Status::Ok(); } LsObjects Azure::list_blobs_impl( @@ -1192,29 +1012,20 @@ Status Azure::upload_block( .GetBlockBlobClient(blob_path) .StageBlock(block_id, stream); } catch (const ::Azure::Storage::StorageException& e) { - return LOG_STATUS(Status_AzureError( - "Upload block failed on: " + blob_path + "; " + e.Message)); + throw AzureException( + "Upload block failed on: " + blob_path + "; " + e.Message); } return Status::Ok(); } -Status Azure::parse_azure_uri( - const URI& uri, - std::string* const container_name, - std::string* const blob_path) { +std::tuple Azure::parse_azure_uri(const URI& uri) { assert(uri.is_azure()); const std::string uri_str = uri.to_string(); - const static std::string azure_prefix = "azure://"; assert(uri_str.rfind(azure_prefix, 0) == 0); - if (uri_str.size() == azure_prefix.size()) { - if (container_name) - *container_name = ""; - if (blob_path) - *blob_path = ""; - return Status::Ok(); + return std::make_tuple("", ""); } // Find the '/' after the container name. @@ -1224,11 +1035,8 @@ Status Azure::parse_azure_uri( if (separator == std::string::npos) { const size_t c_pos_start = azure_prefix.size(); const size_t c_pos_end = uri_str.size(); - if (container_name) - *container_name = uri_str.substr(c_pos_start, c_pos_end - c_pos_start); - if (blob_path) - *blob_path = ""; - return Status::Ok(); + return std::make_tuple( + uri_str.substr(c_pos_start, c_pos_end - c_pos_start), ""); } // There is only a container name if there aren't any characters past the @@ -1236,24 +1044,17 @@ Status Azure::parse_azure_uri( if (uri_str.size() == separator) { const size_t c_pos_start = azure_prefix.size(); const size_t c_pos_end = separator; - if (container_name) - *container_name = uri_str.substr(c_pos_start, c_pos_end - c_pos_start); - if (blob_path) - *blob_path = ""; - return Status::Ok(); + return std::make_tuple( + uri_str.substr(c_pos_start, c_pos_end - c_pos_start), ""); } const size_t c_pos_start = azure_prefix.size(); const size_t c_pos_end = separator; const size_t b_pos_start = separator + 1; const size_t b_pos_end = uri_str.size(); - - if (container_name) - *container_name = uri_str.substr(c_pos_start, c_pos_end - c_pos_start); - if (blob_path) - *blob_path = uri_str.substr(b_pos_start, b_pos_end - b_pos_start); - - return Status::Ok(); + return std::make_tuple( + uri_str.substr(c_pos_start, c_pos_end - c_pos_start), + uri_str.substr(b_pos_start, b_pos_end - b_pos_start)); } /* Generates the next base64-encoded block id. */ diff --git a/tiledb/sm/filesystem/azure.h b/tiledb/sm/filesystem/azure.h index f19e3779753..c67b05949fe 100644 --- a/tiledb/sm/filesystem/azure.h +++ b/tiledb/sm/filesystem/azure.h @@ -72,19 +72,24 @@ class AzureException : public StatusException { } }; +/** Helper function to retrieve the given parameter from the config or env. */ +std::string get_config_with_env_fallback( + const Config& config, const std::string& key, const char* env_name); + +/** Helper function to retrieve the blob endpoint from the config or env. */ +std::string get_blob_endpoint( + const Config& config, const std::string& account_name); + /** * The Azure-specific configuration parameters. + * + * @note The member variables' default declarations have not yet been moved + * from the Config declaration into this struct. */ struct AzureParameters { AzureParameters() = delete; - /** - * Creates an AzureParameters object. - * - * @return AzureParameters or nullopt if config does not have - * a storage account or blob endpoint set. - */ - static std::optional create(const Config& config); + AzureParameters(const Config& config); /** The maximum number of parallel requests. */ uint64_t max_parallel_ops_; @@ -121,12 +126,6 @@ struct AzureParameters { /** Whether the config specifies a SAS token. */ bool has_sas_token_; - - private: - AzureParameters( - const Config& config, - const std::string& account_name, - const std::string& blob_endpoint); }; class Azure; @@ -267,61 +266,59 @@ class Azure { /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ - /** Constructor. */ - Azure(); + /** + * Constructor. + * + * @param thread_pool The parent VFS thread pool. + * @param config Configuration parameters. + */ + Azure(ThreadPool* thread_pool, const Config& config); - /** Destructor. */ + /** + * Destructor. + * + * @note The default destructor may cause undefined behavior with + * `Azure::Storage::Blobs::BlobServiceClient`, so this destructor must be + * explicitly defined. + */ ~Azure(); /* ********************************* */ /* API */ /* ********************************* */ - /** - * Initializes and connects an Azure client. - * - * @param config Configuration parameters. - * @param thread_pool The parent VFS thread pool. - * @return Status - */ - Status init(const Config& config, ThreadPool* thread_pool); - /** * Creates a container. * * @param container The name of the container to be created. - * @return Status */ - Status create_container(const URI& container) const; + void create_container(const URI& container) const; /** Removes the contents of an Azure container. */ - Status empty_container(const URI& container) const; + void empty_container(const URI& container) const; /** * Flushes an blob to Azure, finalizing the upload. * * @param uri The URI of the blob to be flushed. - * @return Status */ - Status flush_blob(const URI& uri); + void flush_blob(const URI& uri); /** * Check if a container is empty. * * @param container The name of the container. - * @param is_empty Mutates to `true` if the container is empty. - * @return Status + * @return `true` if the container is empty, `false` otherwise. */ - Status is_empty_container(const URI& uri, bool* is_empty) const; + bool is_empty_container(const URI& uri) const; /** * Check if a container exists. * * @param container The name of the container. - * @param is_container Mutates to `true` if `uri` is a container. - * @return Status + * @return `true` if `uri` is a container, `false` otherwise. */ - Status is_container(const URI& uri, bool* is_container) const; + bool is_container(const URI& uri) const; /** * Checks if there is an object with prefix `uri/`. For instance, suppose @@ -338,19 +335,17 @@ class Azure { * prefix `azure://some_container/foo2/` (in this case there is not). * * @param uri The URI to check. - * @param exists Sets it to `true` if the above mentioned condition holds. - * @return Status + * @return `true` if the above mentioned condition holds, `false` otherwise. */ - Status is_dir(const URI& uri, bool* exists) const; + bool is_dir(const URI& uri) const; /** * Checks if the given URI is an existing Azure blob. * * @param uri The URI of the object to be checked. - * @param is_blob Mutates to `true` if `uri` is an existing blob, and `false` - * otherwise. + * @return `true` if `uri` is an existing blob, `false` otherwise. */ - Status is_blob(const URI& uri, bool* is_blob) const; + bool is_blob(const URI& uri) const; /** * Lists the objects that start with `uri`. Full URI paths are @@ -369,15 +364,13 @@ class Azure { * - `foo/bar` * * @param uri The prefix URI. - * @param paths Pointer of a vector of URIs to store the retrieved paths. * @param delimiter The delimiter that will * @param max_paths The maximum number of paths to be retrieved. The default * `-1` indicates that no upper bound is specified. - * @return Status + * @return The retrieved paths. */ - Status ls( + std::vector ls( const URI& uri, - std::vector* paths, const std::string& delimiter = "/", int max_paths = -1) const; @@ -449,9 +442,8 @@ class Azure { * * @param old_uri The URI of the old path. * @param new_uri The URI of the new path. - * @return Status */ - Status move_object(const URI& old_uri, const URI& new_uri); + void move_object(const URI& old_uri, const URI& new_uri); /** * Renames a directory. Note that this is an expensive operation. @@ -461,18 +453,16 @@ class Azure { * * @param old_uri The URI of the old path. * @param new_uri The URI of the new path. - * @return Status */ - Status move_dir(const URI& old_uri, const URI& new_uri); + void move_dir(const URI& old_uri, const URI& new_uri); /** * Returns the size of the input blob with a given URI in bytes. * * @param uri The URI of the blob. - * @param nbytes Pointer to `uint64_t` bytes to return. - * @return Status + * @return The size of the input blob, in bytes */ - Status blob_size(const URI& uri, uint64_t* nbytes) const; + uint64_t blob_size(const URI& uri) const; /** * Reads data from an object into a buffer. @@ -497,17 +487,15 @@ class Azure { * Deletes a container. * * @param uri The URI of the container to be deleted. - * @return Status */ - Status remove_container(const URI& uri) const; + void remove_container(const URI& uri) const; /** * Deletes an blob with a given URI. * * @param uri The URI of the blob to be deleted. - * @return Status */ - Status remove_blob(const URI& uri) const; + void remove_blob(const URI& uri) const; /** * Deletes all objects with prefix `uri/` (if the ending `/` does not @@ -532,17 +520,15 @@ class Azure { * this example. * * @param uri The prefix uri of the objects to be deleted. - * @return Status */ - Status remove_dir(const URI& uri) const; + void remove_dir(const URI& uri) const; /** * Creates an empty blob. * * @param uri The URI of the blob to be created. - * @return Status */ - Status touch(const URI& uri) const; + void touch(const URI& uri) const; /** * Writes the input buffer to an Azure object. Note that this is essentially @@ -551,9 +537,8 @@ class Azure { * @param uri The URI of the object to be written to. * @param buffer The input buffer. * @param length The size of the input buffer. - * @return Status */ - Status write(const URI& uri, const void* buffer, uint64_t length); + void write(const URI& uri, const void* buffer, uint64_t length); /** * Initializes the Azure blob service client and returns a reference to it. @@ -562,22 +547,13 @@ class Azure { * use of the BlobServiceClient. */ const ::Azure::Storage::Blobs::BlobServiceClient& client() const { - // This branch can be entered in two circumstances: - // 1. The init method has not been called yet. - // 2. The init method has been called, but the config (or environment - // variables) do not contain enough information to get the Azure - // endpoint. - // We don't distinguish between the two, because only the latter can - // happen under normal circumstances, and the former is a C.41 issue - // that will go away once the class is C.41 compliant. - if (!azure_params_) { - throw StatusException(Status_AzureError( + if (azure_params_.blob_endpoint_.empty()) { + throw AzureException( "Azure VFS is not configured. Please set the " "'vfs.azure.storage_account_name' and/or " - "'vfs.azure.blob_endpoint' configuration options.")); + "'vfs.azure.blob_endpoint' configuration options."); } - - return client_singleton_.get(*azure_params_); + return client_singleton_.get(azure_params_); } private: @@ -655,6 +631,9 @@ class Azure { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The Azure configuration parameters. */ + AzureParameters azure_params_; + /** The VFS thread pool. */ ThreadPool* thread_pool_; @@ -667,12 +646,6 @@ class Azure { /** Protects 'write_cache_map_'. */ std::mutex write_cache_map_lock_; - /** - * Contains options to configure connection to Azure. - * After the class becomes C.41 compliant, remove the std::optional. - */ - std::optional azure_params_; - /** Maps a blob URI to its block list upload state. */ std::unordered_map block_list_upload_states_; @@ -702,9 +675,8 @@ class Azure { * @param buffer The source binary buffer to fill the data from. * @param length The length of `buffer`. * @param nbytes_filled The number of bytes filled into `write_cache_buffer`. - * @return Status */ - Status fill_write_cache( + void fill_write_cache( Buffer* write_cache_buffer, const void* buffer, const uint64_t length, @@ -719,9 +691,8 @@ class Azure { * @param write_cache_buffer The input buffer to flush. * @param last_block Should be true only when the flush corresponds to the * last block(s) of a block list upload. - * @return Status */ - Status flush_write_cache( + void flush_write_cache( const URI& uri, Buffer* write_cache_buffer, bool last_block); /** @@ -732,9 +703,8 @@ class Azure { * @param buffer The input buffer. * @param length The size of the input buffer. * @param last_part Should be true only when this is the last block of a blob. - * @return Status */ - Status write_blocks( + void write_blocks( const URI& uri, const void* buffer, uint64_t length, bool last_block); /** @@ -763,7 +733,7 @@ class Azure { * Uploads the write cache buffer associated with 'uri' as an entire * blob. */ - Status flush_blob_direct(const URI& uri); + void flush_blob_direct(const URI& uri); /** * Performs an Azure ListBlobs operation. @@ -799,44 +769,17 @@ class Azure { * `*container_name == "my-container"` and `*blob_path == "dir1/file1"`. * * @param uri The URI to parse. - * @param container_name Mutates to the container name. - * @param blob_path Mutates to the blob path. - * @return Status + * @return A tuple of the container name and blob path. */ - static Status parse_azure_uri( - const URI& uri, std::string* container_name, std::string* blob_path); + static std::tuple parse_azure_uri(const URI& uri); /** * Copies the blob at 'old_uri' to `new_uri`. * * @param old_uri The blob's current URI. * @param new_uri The blob's URI to move to. - * @return Status - */ - Status copy_blob(const URI& old_uri, const URI& new_uri); - - /** - * Check if 'container_name' is a container on Azure. - * - * @param container_name The container's name. - * @param is_container Mutates to the output. - * @return Status */ - Status is_container( - const std::string& container_name, bool* const is_container) const; - - /** - * Check if 'is_blob' is a blob on Azure. - * - * @param container_name The blob's container name. - * @param blob_path The blob's path. - * @param is_blob Mutates to the output. - * @return Status - */ - Status is_blob( - const std::string& container_name, - const std::string& blob_path, - bool* const is_blob) const; + void copy_blob(const URI& old_uri, const URI& new_uri); /** * Removes a leading slash from 'path' if it exists. @@ -876,8 +819,10 @@ AzureScanner::AzureScanner( throw AzureException("URI is not an Azure URI: " + prefix.to_string()); } - throw_if_not_ok(Azure::parse_azure_uri( - prefix.add_trailing_slash(), &container_name_, &blob_path_)); + auto [container_name, blob_path] = + Azure::parse_azure_uri(prefix.add_trailing_slash()); + container_name_ = container_name; + blob_path_ = blob_path; fetch_results(); next(begin_); } diff --git a/tiledb/sm/filesystem/vfs.cc b/tiledb/sm/filesystem/vfs.cc index e32e036f23a..d122d83ee3e 100644 --- a/tiledb/sm/filesystem/vfs.cc +++ b/tiledb/sm/filesystem/vfs.cc @@ -64,6 +64,7 @@ VFS::VFS( ThreadPool* const io_tp, const Config& config) : VFSBase(parent_stats) + , Azure_within_VFS(io_tp, config) , S3_within_VFS(stats_, io_tp, config) , config_(config) , logger_(logger) @@ -93,10 +94,6 @@ VFS::VFS( #ifdef HAVE_AZURE supported_fs_.insert(Filesystem::AZURE); - st = azure_.init(config_, io_tp_); - if (!st.ok()) { - throw VFSException("Failed to initialize Azure backend."); - } #endif #ifdef HAVE_GCS @@ -264,7 +261,8 @@ Status VFS::touch(const URI& uri) const { } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.touch(uri); + azure().touch(uri); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -299,7 +297,8 @@ Status VFS::create_bucket(const URI& uri) const { } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.create_container(uri); + azure().create_container(uri); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -325,7 +324,8 @@ Status VFS::remove_bucket(const URI& uri) const { } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.remove_container(uri); + azure().remove_container(uri); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -352,7 +352,8 @@ Status VFS::empty_bucket(const URI& uri) const { } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.empty_container(uri); + azure().empty_container(uri); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -380,7 +381,8 @@ Status VFS::is_empty_bucket( } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.is_empty_container(uri, is_empty); + *is_empty = azure().is_empty_container(uri); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -417,7 +419,7 @@ Status VFS::remove_dir(const URI& uri) const { #endif } else if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.remove_dir(uri); + azure().remove_dir(uri); #else throw BuiltWithout("Azure"); #endif @@ -484,7 +486,8 @@ Status VFS::remove_file(const URI& uri) const { } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.remove_blob(uri); + azure().remove_blob(uri); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -571,7 +574,12 @@ Status VFS::file_size(const URI& uri, uint64_t* size) const { } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.blob_size(uri, size); + try { + *size = azure().blob_size(uri); + } catch (std::exception& e) { + return Status_Error(e.what()); + } + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -617,7 +625,8 @@ Status VFS::is_dir(const URI& uri, bool* is_dir) const { } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.is_dir(uri, is_dir); + *is_dir = azure().is_dir(uri); + return Status::Ok(); #else *is_dir = false; throw BuiltWithout("Azure"); @@ -668,7 +677,8 @@ Status VFS::is_file(const URI& uri, bool* is_file) const { } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.is_blob(uri, is_file); + *is_file = azure().is_blob(uri); + return Status::Ok(); #else *is_file = false; throw BuiltWithout("Azure"); @@ -702,7 +712,7 @@ Status VFS::is_bucket(const URI& uri, bool* is_bucket) const { } if (uri.is_azure()) { #ifdef HAVE_AZURE - RETURN_NOT_OK(azure_.is_container(uri, is_bucket)); + *is_bucket = azure().is_container(uri); return Status::Ok(); #else *is_bucket = false; @@ -761,7 +771,7 @@ std::vector VFS::ls_with_sizes(const URI& parent) const { #endif } else if (parent.is_azure()) { #ifdef HAVE_AZURE - entries = azure_.ls_with_sizes(parent); + entries = azure().ls_with_sizes(parent); #else throw BuiltWithout("Azure"); #endif @@ -842,7 +852,8 @@ Status VFS::move_file(const URI& old_uri, const URI& new_uri) { if (old_uri.is_azure()) { if (new_uri.is_azure()) #ifdef HAVE_AZURE - return azure_.move_object(old_uri, new_uri); + azure().move_object(old_uri, new_uri); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -915,7 +926,8 @@ Status VFS::move_dir(const URI& old_uri, const URI& new_uri) { if (old_uri.is_azure()) { if (new_uri.is_azure()) #ifdef HAVE_AZURE - return azure_.move_dir(old_uri, new_uri); + azure().move_dir(old_uri, new_uri); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -1199,7 +1211,7 @@ Status VFS::read_impl( #ifdef HAVE_AZURE const auto read_fn = std::bind( &Azure::read, - &azure_, + &azure(), std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, @@ -1434,7 +1446,8 @@ Status VFS::close_file(const URI& uri) { } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.flush_blob(uri); + azure().flush_blob(uri); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif @@ -1498,7 +1511,8 @@ Status VFS::write( } if (uri.is_azure()) { #ifdef HAVE_AZURE - return azure_.write(uri, buffer, buffer_size); + azure().write(uri, buffer, buffer_size); + return Status::Ok(); #else throw BuiltWithout("Azure"); #endif diff --git a/tiledb/sm/filesystem/vfs.h b/tiledb/sm/filesystem/vfs.h index 3fc7d5590b5..68b6e059d47 100644 --- a/tiledb/sm/filesystem/vfs.h +++ b/tiledb/sm/filesystem/vfs.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -235,6 +235,37 @@ struct VFSBase { stats::Stats* stats_; }; +/** The Azure filesystem. */ +#ifdef HAVE_AZURE +class Azure_within_VFS { + /** Private member variable */ + Azure azure_; + + protected: + template + Azure_within_VFS(Args&&... args) + : azure_(std::forward(args)...) { + } + + /** Protected accessor for the Azure object. */ + inline Azure& azure() { + return azure_; + } + + /** Protected accessor for the const Azure object. */ + inline const Azure& azure() const { + return azure_; + } +}; +#else +class Azure_within_VFS { + protected: + template + Azure_within_VFS(Args&&...) { + } // empty constructor +}; +#endif + /** The S3 filesystem. */ #ifdef HAVE_S3 class S3_within_VFS { @@ -270,7 +301,7 @@ class S3_within_VFS { * This class implements a virtual filesystem that directs filesystem-related * function execution to the appropriate backend based on the input URI. */ -class VFS : private VFSBase, protected S3_within_VFS { +class VFS : private VFSBase, protected Azure_within_VFS, S3_within_VFS { public: /* ********************************* */ /* TYPE DEFINITIONS */ @@ -572,7 +603,7 @@ class VFS : private VFSBase, protected S3_within_VFS { #endif } else if (parent.is_azure()) { #ifdef HAVE_AZURE - results = azure_.ls_filtered(parent, f, d, recursive); + results = azure().ls_filtered(parent, f, d, recursive); #else throw filesystem::VFSException( "TileDB was built without Azure support"); @@ -939,10 +970,6 @@ class VFS : private VFSBase, protected S3_within_VFS { /* PRIVATE ATTRIBUTES */ /* ********************************* */ -#ifdef HAVE_AZURE - Azure azure_; -#endif - #ifdef HAVE_GCS GCS gcs_; #endif From 468046ca4e743a6fde7a3231e2518126a18889b1 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Tue, 7 Jan 2025 14:51:36 +0200 Subject: [PATCH 4/6] Simplify move constructors. (#5409) Partially reverts #5412. --- TYPE: NO_HISTORY --- tiledb/sm/filter/filter_buffer.cc | 6 +----- tiledb/sm/filter/filter_buffer.h | 2 +- tiledb/sm/filter/filter_pipeline.cc | 11 ++++------- tiledb/sm/filter/filter_pipeline.h | 2 +- 4 files changed, 7 insertions(+), 14 deletions(-) diff --git a/tiledb/sm/filter/filter_buffer.cc b/tiledb/sm/filter/filter_buffer.cc index 68373b5322e..0dfeeeb2c15 100644 --- a/tiledb/sm/filter/filter_buffer.cc +++ b/tiledb/sm/filter/filter_buffer.cc @@ -55,11 +55,7 @@ FilterBuffer::BufferOrView::BufferOrView( tdb_new(Buffer, (char*)buffer->data() + offset, nbytes)); } -FilterBuffer::BufferOrView::BufferOrView(BufferOrView&& other) { - underlying_buffer_.swap(other.underlying_buffer_); - view_.swap(other.view_); - std::swap(is_view_, other.is_view_); -} +FilterBuffer::BufferOrView::BufferOrView(BufferOrView&& other) = default; Buffer* FilterBuffer::BufferOrView::buffer() const { return is_view_ ? view_.get() : underlying_buffer_.get(); diff --git a/tiledb/sm/filter/filter_buffer.h b/tiledb/sm/filter/filter_buffer.h index d62d1e3bce7..e44c82f5ee6 100644 --- a/tiledb/sm/filter/filter_buffer.h +++ b/tiledb/sm/filter/filter_buffer.h @@ -339,7 +339,7 @@ class FilterBuffer { shared_ptr underlying_buffer_; /** True if this instance is a view on the underlying buffer. */ - bool is_view_{false}; + bool is_view_; /** * If this instance is a view, the view Buffer (which does not own its diff --git a/tiledb/sm/filter/filter_pipeline.cc b/tiledb/sm/filter/filter_pipeline.cc index 158d9a6fa72..089c14feb37 100644 --- a/tiledb/sm/filter/filter_pipeline.cc +++ b/tiledb/sm/filter/filter_pipeline.cc @@ -68,6 +68,8 @@ FilterPipeline::FilterPipeline( , max_chunk_size_(max_chunk_size) { } +// Unlike move constructors, copy constructors must not use default, +// because individual filters are being copied by calling clone. FilterPipeline::FilterPipeline(const FilterPipeline& other) { for (auto& filter : other.filters_) { add_filter(*filter); @@ -85,9 +87,7 @@ FilterPipeline::FilterPipeline( max_chunk_size_ = other.max_chunk_size_; } -FilterPipeline::FilterPipeline(FilterPipeline&& other) { - swap(other); -} +FilterPipeline::FilterPipeline(FilterPipeline&& other) = default; FilterPipeline& FilterPipeline::operator=(const FilterPipeline& other) { // Call copy constructor @@ -97,10 +97,7 @@ FilterPipeline& FilterPipeline::operator=(const FilterPipeline& other) { return *this; } -FilterPipeline& FilterPipeline::operator=(FilterPipeline&& other) { - swap(other); - return *this; -} +FilterPipeline& FilterPipeline::operator=(FilterPipeline&& other) = default; void FilterPipeline::add_filter(const Filter& filter) { shared_ptr copy(filter.clone()); diff --git a/tiledb/sm/filter/filter_pipeline.h b/tiledb/sm/filter/filter_pipeline.h index f26dff882d3..1db5bcec53b 100644 --- a/tiledb/sm/filter/filter_pipeline.h +++ b/tiledb/sm/filter/filter_pipeline.h @@ -323,7 +323,7 @@ class FilterPipeline { std::vector> filters_; /** The max chunk size allowed within tiles. */ - uint32_t max_chunk_size_{0}; + uint32_t max_chunk_size_; /** * Get the chunk offsets for a var sized tile so that integral cells are From e7021c4394cd7735a84cca19453eadd78e1737b7 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Wed, 8 Jan 2025 20:52:47 +0700 Subject: [PATCH 5/6] Bug fix: Ensure correct data retrieval for global cell order in dense reader, avoiding fill values (#5413) `slab_dim` is being calculated incorrectly in `DenseReader::cell_slab_overlaps_range`, causing global cell order reads to return some fill values. After this fix, `slab_dim` remains unchanged for row-major or column-major requests, but for global order requests, cell order is used to determine `slab_dim`. The initial issue was also verified with TileDB-Py, and after applying this fix, the results are as expected. [sc-60301] --- TYPE: NO_HISTORY | BUG DESC: Fix incorrect `slab_dim` calculation in `DenseReader::cell_slab_overlaps_range` to prevent fill values in global cell order reads. --------- Co-authored-by: Ypatia Tsavliri --- test/CMakeLists.txt | 1 + test/src/unit-dense-global-order-reader.cc | 107 +++++++++++++++++++++ tiledb/sm/query/readers/dense_reader.cc | 7 +- 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 test/src/unit-dense-global-order-reader.cc diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a56574c273d..5ca5f4d0d31 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -112,6 +112,7 @@ set(TILEDB_UNIT_TEST_SOURCES src/unit-ctx.cc src/unit-current-domain-rest.cc src/unit-dense-reader.cc + src/unit-dense-global-order-reader.cc src/unit-DenseTiler.cc src/unit-dimension.cc src/unit-duplicates.cc diff --git a/test/src/unit-dense-global-order-reader.cc b/test/src/unit-dense-global-order-reader.cc new file mode 100644 index 00000000000..a49ee02d538 --- /dev/null +++ b/test/src/unit-dense-global-order-reader.cc @@ -0,0 +1,107 @@ +/** + * @file unit-dense-global-order-reader.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include "test/support/src/vfs_helpers.h" +#include "tiledb/sm/cpp_api/tiledb" + +using namespace tiledb; + +static void create_array(Context& ctx, const std::string& array_uri); +static void write_array( + Context& ctx, const std::string& array_uri, tiledb_layout_t layout); + +TEST_CASE( + "SC-60301: Read data with global cell order returns fill values", + "[dense-reader][bug][global-cell-order][fixed][sc60301]") { + test::VFSTestSetup vfs_test_setup; + const std::string array_uri = + vfs_test_setup.array_uri("dense_global_cell_order"); + Context ctx{vfs_test_setup.ctx()}; + + // Test setup + create_array(ctx, array_uri); + auto write_layout = + GENERATE(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR, TILEDB_GLOBAL_ORDER); + write_array(ctx, array_uri, write_layout); + + Array array(ctx, array_uri, TILEDB_READ); + Subarray subarray(ctx, array); + subarray.set_subarray({1, 2, 1, 2}); + std::vector a_read; + a_read.resize(4); + Query query(ctx, array); + query.set_subarray(subarray) + .set_layout(TILEDB_GLOBAL_ORDER) + .set_data_buffer("a", a_read); + + REQUIRE(query.submit() == Query::Status::COMPLETE); + + if (write_layout == TILEDB_ROW_MAJOR) { + REQUIRE(a_read[0] == 1); + REQUIRE(a_read[1] == 3); + REQUIRE(a_read[2] == 2); + REQUIRE(a_read[3] == 4); + } else { + REQUIRE(a_read[0] == 1); + REQUIRE(a_read[1] == 2); + REQUIRE(a_read[2] == 3); + REQUIRE(a_read[3] == 4); + } + + array.close(); +} + +void create_array(Context& ctx, const std::string& array_uri) { + auto obj = Object::object(ctx, array_uri); + if (obj.type() != Object::Type::Invalid) { + Object::remove(ctx, array_uri); + } + + Domain domain(ctx); + domain.add_dimension(Dimension::create(ctx, "d1", {{1, 2}}, 2)) + .add_dimension(Dimension::create(ctx, "d2", {{1, 2}}, 2)); + + // Create the array schema with col-major cell order and tile order + ArraySchema schema(ctx, TILEDB_DENSE); + schema.set_domain(domain) + .set_order({{TILEDB_COL_MAJOR, TILEDB_COL_MAJOR}}) + .add_attribute(Attribute::create(ctx, "a")); + Array::create(ctx, array_uri, schema); +} + +void write_array( + Context& ctx, const std::string& array_uri, tiledb_layout_t layout) { + std::vector data = {1, 2, 3, 4}; + Array array(ctx, array_uri, TILEDB_WRITE); + Query query(ctx, array); + query.set_layout(layout).set_data_buffer("a", data); + REQUIRE(query.submit() == Query::Status::COMPLETE); + query.finalize(); + array.close(); +} diff --git a/tiledb/sm/query/readers/dense_reader.cc b/tiledb/sm/query/readers/dense_reader.cc index f59c6dbe4d9..42c068b0e8d 100644 --- a/tiledb/sm/query/readers/dense_reader.cc +++ b/tiledb/sm/query/readers/dense_reader.cc @@ -1539,7 +1539,12 @@ tuple DenseReader::cell_slab_overlaps_range( const NDRange& ndrange, const std::vector& coords, const uint64_t length) { - const unsigned slab_dim = (layout_ == Layout::COL_MAJOR) ? 0 : dim_num - 1; + const auto cell_order = array_schema_.cell_order(); + const unsigned slab_dim = + (layout_ == Layout::ROW_MAJOR || + (layout_ == Layout::GLOBAL_ORDER && cell_order == Layout::ROW_MAJOR)) ? + dim_num - 1 : + 0; const DimType slab_start = coords[slab_dim]; const DimType slab_end = slab_start + length - 1; From 3eda3c1c12be6a74c1def952330df776d2620371 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 8 Jan 2025 09:38:14 -0500 Subject: [PATCH 6/6] Remove windows 2019 CI / update to windows 2022 (#5419) The conda feedstock is on VS2022, so we have no need to keep the 2019 CI. This also reduces the need for developers to have to support older compilers. --- TYPE: BREAKING_BEHAVIOR DESC: The release binaries for Windows now require MSVC 2022 redistributables. --- .github/workflows/build-windows.yml | 2 +- .github/workflows/nightly-test.yml | 3 +-- .github/workflows/release.yml | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 70b3a43fb7f..414ebe3261e 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - os: [windows-2019, windows-2022] + os: [windows-2022] #considering https://stackoverflow.com/questions/65035256/how-to-access-matrix-variables-in-github-actions environ: [azure, s3, gcs, serialization] include: diff --git a/.github/workflows/nightly-test.yml b/.github/workflows/nightly-test.yml index 0394bbc545b..1de21014c65 100644 --- a/.github/workflows/nightly-test.yml +++ b/.github/workflows/nightly-test.yml @@ -20,7 +20,6 @@ jobs: - os: macos-latest experimental: ON - os: windows-latest - - os: windows-2019 - os: windows-latest config: "Debug" # Insufficient space on default D:/ for debug build @@ -83,7 +82,7 @@ jobs: include: - os: ubuntu-latest triplet: x64-linux - - os: windows-2019 + - os: windows-2022 triplet: x64-windows - os: macos-13 triplet: x64-osx diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6aad2377c2e..43b73722451 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -45,7 +45,7 @@ jobs: matrix: include: - platform: windows-x86_64 - os: windows-2019 + os: windows-2022 triplet: x64-windows-release - platform: linux-x86_64 os: ubuntu-20.04