From d3dfb6c6da0cc1d0bced2b35aaac78b776ea23bb Mon Sep 17 00:00:00 2001 From: Shaun M Reed Date: Wed, 20 Nov 2024 04:44:15 -0500 Subject: [PATCH] Add configuration parameter to load enumerations on all schemas. (#5379) This adds a new config parameter `rest.load_enumerations_on_array_open_all_schemas` to control loading enumerations on all array schemas. The previously existing `rest.load_enumerations_on_array_open` config parameter is used to control loading enumerations on the latest schema only. --- TYPE: FEATURE DESC: Add `rest.load_enumerations_on_array_open_all_schemas` config parameter. --------- Co-authored-by: Isaiah Norton --- test/src/unit-capi-config.cc | 3 + test/src/unit-cppapi-enumerations.cc | 357 +++++++++++++++++++++++- test/src/unit-enumerations.cc | 2 +- test/src/unit-rest-enumerations.cc | 21 ++ tiledb/sm/array/array.cc | 56 ++-- tiledb/sm/array/array.h | 8 +- tiledb/sm/c_api/tiledb.cc | 2 +- tiledb/sm/config/config.cc | 5 + tiledb/sm/config/config.h | 11 +- tiledb/sm/cpp_api/config.h | 8 +- tiledb/sm/serialization/array.cc | 3 +- tiledb/sm/serialization/array_schema.cc | 2 +- tiledb/sm/serialization/array_schema.h | 15 +- 13 files changed, 430 insertions(+), 63 deletions(-) diff --git a/test/src/unit-capi-config.cc b/test/src/unit-capi-config.cc index 36e52835416..37a731d0c70 100644 --- a/test/src/unit-capi-config.cc +++ b/test/src/unit-capi-config.cc @@ -233,6 +233,7 @@ void check_save_to_file() { ss << "rest.curl.verbose false\n"; ss << "rest.http_compressor any\n"; ss << "rest.load_enumerations_on_array_open false\n"; + ss << "rest.load_enumerations_on_array_open_all_schemas false\n"; ss << "rest.load_metadata_on_array_open true\n"; ss << "rest.load_non_empty_domain_on_array_open true\n"; ss << "rest.retry_count 25\n"; @@ -617,6 +618,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") { all_param_values["rest.load_metadata_on_array_open"] = "false"; all_param_values["rest.load_non_empty_domain_on_array_open"] = "false"; all_param_values["rest.load_enumerations_on_array_open"] = "false"; + all_param_values["rest.load_enumerations_on_array_open_all_schemas"] = + "false"; all_param_values["rest.use_refactored_array_open"] = "false"; all_param_values["rest.use_refactored_array_open_and_query_submit"] = "false"; all_param_values["rest.payer_namespace"] = ""; diff --git a/test/src/unit-cppapi-enumerations.cc b/test/src/unit-cppapi-enumerations.cc index 03222cc2378..5952bdbd5e1 100644 --- a/test/src/unit-cppapi-enumerations.cc +++ b/test/src/unit-cppapi-enumerations.cc @@ -431,10 +431,11 @@ TEST_CASE_METHOD( "CPP API: Load All Enumerations - All Schemas", "[enumeration][array][load-all-enumerations][all-schemas][rest]") { create_array(); - // Test with `rest.load_enumerations_on_array_open` enabled and disabled. + // Test with `rest.load_enumerations_on_array_open_all_schemas` enabled and + // disabled. bool load_enmrs = GENERATE(true, false); auto config = ctx_.config(); - config["rest.load_enumerations_on_array_open"] = + config["rest.load_enumerations_on_array_open_all_schemas"] = load_enmrs ? "true" : "false"; vfs_test_setup_.update_config(config.ptr().get()); ctx_ = vfs_test_setup_.ctx(); @@ -453,7 +454,7 @@ TEST_CASE_METHOD( if (array_open_v2) { // If we are not loading enmrs on array open we must load them explicitly // with a separate request. - if (!load_enmrs) { + if (!load_enmrs || !vfs_test_setup_.is_rest()) { // Load enumerations for all schemas if using array open v2. CHECK_NOTHROW( ArrayExperimental::load_enumerations_all_schemas(ctx_, array)); @@ -636,10 +637,11 @@ TEST_CASE_METHOD( "[enumeration][array][schema-evolution][load-all-enumerations][all-schemas]" "[rest]") { create_array(); - // Test with `rest.load_enumerations_on_array_open` enabled and disabled. + // Test with `rest.load_enumerations_on_array_open_all_schemas` enabled and + // disabled. bool load_enmrs = GENERATE(true, false); auto config = ctx_.config(); - config["rest.load_enumerations_on_array_open"] = + config["rest.load_enumerations_on_array_open_all_schemas"] = load_enmrs ? "true" : "false"; vfs_test_setup_.update_config(config.ptr().get()); ctx_ = vfs_test_setup_.ctx(); @@ -655,16 +657,16 @@ TEST_CASE_METHOD( if (load_enmrs) { // Array open v1 does not support loading enumerations on array open so we // must load them explicitly in this case. - if (!array_open_v2) { + if (!array_open_v2 || !vfs_test_setup_.is_rest()) { ArrayExperimental::load_all_enumerations(ctx_, array); } REQUIRE( array.schema().ptr()->array_schema()->is_enumeration_loaded( "an_enumeration") == true); } - // If `rest.load_enumerations_on_array_open=false` do not load enmrs - // explicitly. Leave enumerations unloaded initially, evolve the array without - // reopening, and then attempt to load all enumerations. + // If `rest.load_enumerations_on_array_open_all_schemas=false` do not load + // enmrs explicitly. Leave enumerations unloaded initially, evolve the array + // without reopening, and then attempt to load all enumerations. // Evolve once to add an enumeration. ArraySchemaEvolution ase(ctx_); @@ -734,7 +736,7 @@ TEST_CASE_METHOD( // Reopen and load the evolved enumerations. array.reopen(); std::string schema_name_2 = array.schema().ptr()->array_schema()->name(); - if (!load_enmrs) { + if (!load_enmrs || !vfs_test_setup_.is_rest()) { CHECK_NOTHROW( ArrayExperimental::load_enumerations_all_schemas(ctx_, array)); } @@ -774,7 +776,7 @@ TEST_CASE_METHOD( // Reopen to apply the schema evolution and reload enumerations. array.reopen(); - if (!load_enmrs) { + if (!load_enmrs || !vfs_test_setup_.is_rest()) { CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); } } @@ -834,6 +836,339 @@ TEST_CASE_METHOD( "an_enumeration") == true); } +TEST_CASE_METHOD( + CPPEnumerationFx, + "CPP API: Load Enumerations - Latest Schema", + "[enumeration][array][load-enumerations][schema][rest]") { + create_array(); + // Test with `rest.load_enumerations_on_array_open` enabled and disabled. + bool load_enmrs = GENERATE(true, false); + auto config = ctx_.config(); + config["rest.load_enumerations_on_array_open"] = + load_enmrs ? "true" : "false"; + vfs_test_setup_.update_config(config.ptr().get()); + ctx_ = vfs_test_setup_.ctx(); + + auto array = tiledb::Array(ctx_, uri_, TILEDB_READ); + bool array_open_v2 = array.ptr()->array()->use_refactored_array_open(); + + // Helper function to reopen and conditionally call load_all_enumerations. + auto reopen_and_load_enmrs = [&]() { + // We always reopen because we are evolving the schema which requires + // reopening the array after applying the schema evolution. + CHECK_NOTHROW(array.reopen()); + + // If we are not loading enmrs on array open we must load them explicitly + // with a separate request. + if (!load_enmrs || !vfs_test_setup_.is_rest()) { + // Load enumerations for all schemas if using array open v2. + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + } + }; + reopen_and_load_enmrs(); + + REQUIRE( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + REQUIRE( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + std::string schema_name_1 = array.schema().ptr()->array_schema()->name(); + + // Evolve once to add an enumeration. + ArraySchemaEvolution ase(ctx_); + std::vector var_values{"one", "two", "three"}; + auto var_enmr = Enumeration::create(ctx_, "ase_var_enmr", var_values); + ase.add_enumeration(var_enmr); + auto attr4 = Attribute::create(ctx_, "attr4"); + AttributeExperimental::set_enumeration_name(ctx_, attr4, "ase_var_enmr"); + ase.add_attribute(attr4); + // Apply evolution to the array and reopen. + ase.array_evolve(uri_); + reopen_and_load_enmrs(); + + std::string schema_name_2 = array.schema().ptr()->array_schema()->name(); + if (array_open_v2) { + // Check all schemas if we are using array open v2. + auto all_schemas = array.ptr()->array()->array_schemas_all(); + // Previous schemas + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + false); + + // Latest schema + CHECK( + all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") == + true); + CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") == + true); + } + // We can always validate the latest schema. + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr") == true); + + // Evolve a second time to drop an enumeration. + ArraySchemaEvolution ase2(ctx_); + ase2.drop_enumeration("an_enumeration"); + ase2.drop_attribute("attr1"); + // Apply evolution to the array and reopen. + CHECK_NOTHROW(ase2.array_evolve(uri_)); + reopen_and_load_enmrs(); + + std::string schema_name_3 = array.schema().ptr()->array_schema()->name(); + if (array_open_v2) { + // Check all schemas if we are using array open v2. + auto all_schemas = array.ptr()->array()->array_schemas_all(); + // Previous schemas + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + false); + CHECK( + all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") == + false); + CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") == + false); + + // Latest schema + CHECK( + all_schemas[schema_name_3]->has_enumeration("an_enumeration") == false); + CHECK(all_schemas[schema_name_3]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") == + true); + } + // Always validate the latest schema. + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + false); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr") == true); + + // Evolve a third time to add an enumeration with a name equal to a previously + // dropped enumeration. + ArraySchemaEvolution ase3(ctx_); + auto old_enmr = Enumeration::create(ctx_, "an_enumeration", var_values); + ase3.add_enumeration(old_enmr); + auto attr5 = Attribute::create(ctx_, "attr5"); + AttributeExperimental::set_enumeration_name(ctx_, attr5, "an_enumeration"); + ase.add_attribute(attr5); + // Apply evolution to the array and reopen. + CHECK_NOTHROW(ase3.array_evolve(uri_)); + reopen_and_load_enmrs(); + + // Check all schemas. + if (array_open_v2) { + auto all_schemas = array.ptr()->array()->array_schemas_all(); + std::string schema_name_4 = array.schema().ptr()->array_schema()->name(); + // Previous schemas. + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + false); + CHECK( + all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") == + false); + CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") == + false); + CHECK( + all_schemas[schema_name_3]->has_enumeration("an_enumeration") == false); + CHECK(all_schemas[schema_name_3]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") == + false); + + // Latest schema. + CHECK( + all_schemas[schema_name_4]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_4]->is_enumeration_loaded("an_enumeration") == + true); + CHECK(all_schemas[schema_name_4]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_4]->is_enumeration_loaded("ase_var_enmr") == + true); + } + // Always validate the latest schema. + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr") == true); +} + +TEST_CASE_METHOD( + CPPEnumerationFx, + "CPP API: Load Enumerations - Latest schema post evolution", + "[enumeration][array][schema-evolution][load-enumerations][schema][rest]") { + create_array(); + // Test with `rest.load_enumerations_on_array_open` enabled and disabled. + bool load_enmrs = GENERATE(true, false); + auto config = ctx_.config(); + config["rest.load_enumerations_on_array_open"] = + load_enmrs ? "true" : "false"; + vfs_test_setup_.update_config(config.ptr().get()); + ctx_ = vfs_test_setup_.ctx(); + + // Open the array with no explicit timestamps set. + auto array = tiledb::Array(ctx_, uri_, TILEDB_READ); + + // REST CI will test with both array open v1 and v2. + bool array_open_v2 = array.ptr()->array()->use_refactored_array_open(); + REQUIRE( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + if (load_enmrs) { + // Array open v1 does not support loading enumerations on array open so we + // must load them explicitly in this case. + if (!array_open_v2) { + ArrayExperimental::load_all_enumerations(ctx_, array); + } else if (!vfs_test_setup_.is_rest()) { + ArrayExperimental::load_enumerations_all_schemas(ctx_, array); + } + REQUIRE( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + } + // If `rest.load_enumerations_on_array_open=false` do not load enmrs + // explicitly. Leave enumerations unloaded initially, evolve the array without + // reopening, and then attempt to load all enumerations. + + // Evolve once to add an enumeration. + ArraySchemaEvolution ase(ctx_); + std::vector var_values{"one", "two", "three"}; + auto var_enmr = Enumeration::create(ctx_, "ase_var_enmr", var_values); + ase.add_enumeration(var_enmr); + auto attr4 = Attribute::create(ctx_, "attr4"); + AttributeExperimental::set_enumeration_name(ctx_, attr4, "ase_var_enmr"); + ase.add_attribute(attr4); + // Apply evolution to the array and intentionally skip reopening after + // evolution to test exceptions. + ase.array_evolve(uri_); + + // Store the original array schema name so we can validate all_schemas later. + std::string schema_name_1 = array.schema().ptr()->array_schema()->name(); + if (array_open_v2) { + if (load_enmrs) { + // If we load enmrs before reopen, we will not load the evolved schema + // that contains the enumeration added during evolution. Since we + // explicitly load only enumerations in the latest schema, we will not hit + // an exception when loading enumerations on an evolved schema prior to + // reopening. + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + auto all_schemas = array.ptr()->array_schemas_all(); + CHECK(all_schemas.size() == 1); + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == + true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + true); + CHECK( + all_schemas[schema_name_1]->has_enumeration("ase_var_enmr") == false); + } + + // Reopen and load the evolved enumerations. + array.reopen(); + std::string schema_name_2 = array.schema().ptr()->array_schema()->name(); + if (!load_enmrs || !vfs_test_setup_.is_rest()) { + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + } + // Validate all array schemas now contain the expected enumerations. + auto all_schemas = array.ptr()->array_schemas_all(); + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + false); + + // Latest schema. + CHECK( + all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") == + true); + CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") == + true); + } else { + // If we load enmrs before reopen, we will not load the evolved schema that + // contains the enumeration added during evolution. + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + false); + CHECK_THROWS_WITH( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr"), + Catch::Matchers::ContainsSubstring("unknown enumeration")); + + // Reopen to apply the schema evolution and reload enumerations. + array.reopen(); + if (!load_enmrs || !vfs_test_setup_.is_rest()) { + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + } + } + + // Check all original and evolved enumerations are in the latest schema. + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr") == true); +} + TEST_CASE_METHOD( CPPEnumerationFx, "CPP: ArraySchemaEvolution - Add Enumeration", diff --git a/test/src/unit-enumerations.cc b/test/src/unit-enumerations.cc index 2a5669fa0a8..a846d07a987 100644 --- a/test/src/unit-enumerations.cc +++ b/test/src/unit-enumerations.cc @@ -2488,7 +2488,7 @@ TEST_CASE_METHOD( REQUIRE(a1->serialize_enumerations() == (do_load == "true")); REQUIRE( a1->array_schema_latest_ptr()->get_loaded_enumeration_names().size() == - (do_load == "true" ? 2 : 0)); + 0); auto a2 = make_shared(HERE(), ctx.resources(), uri_); diff --git a/test/src/unit-rest-enumerations.cc b/test/src/unit-rest-enumerations.cc index 85efda75fcb..1a299043fcc 100644 --- a/test/src/unit-rest-enumerations.cc +++ b/test/src/unit-rest-enumerations.cc @@ -164,6 +164,15 @@ TEST_CASE_METHOD( create_array(uri_); Array opened_array(ctx_, uri_, TILEDB_READ); + if (!vfs_test_setup_.is_rest() && load_enmrs) { + if (opened_array.ptr()->array()->use_refactored_array_open()) { + CHECK_NOTHROW( + ArrayExperimental::load_enumerations_all_schemas(ctx_, opened_array)); + } else { + CHECK_NOTHROW( + ArrayExperimental::load_all_enumerations(ctx_, opened_array)); + } + } auto array = opened_array.ptr()->array(); auto schema = array->array_schema_latest_ptr(); REQUIRE(schema->is_enumeration_loaded("my_enum") == load_enmrs); @@ -218,6 +227,9 @@ TEST_CASE_METHOD( CHECK_NOTHROW(sm::Array::evolve_array_schema( ctx_.ptr()->resources(), uri, ase.get(), array->get_encryption_key())); CHECK(array->reopen().ok()); + if (load_enmrs && !vfs_test_setup_.is_rest()) { + array->load_all_enumerations(array->use_refactored_array_open()); + } schema = array->array_schema_latest_ptr(); std::string schema_name_2 = schema->name(); REQUIRE(schema->is_enumeration_loaded("my_enum") == load_enmrs); @@ -227,6 +239,9 @@ TEST_CASE_METHOD( expected_enmrs[schema_name_2] = {enmr1, enmr2, var_enmr.ptr()->enumeration()}; actual_enmrs = array->get_enumerations_all_schemas(); if (!load_enmrs) { + if (!vfs_test_setup_.is_rest()) { + array->load_all_enumerations(array->use_refactored_array_open()); + } REQUIRE(schema->is_enumeration_loaded("my_enum") == true); REQUIRE(schema->is_enumeration_loaded("fruit") == true); REQUIRE(schema->is_enumeration_loaded("ase_var_enmr") == true); @@ -242,6 +257,9 @@ TEST_CASE_METHOD( CHECK_NOTHROW(sm::Array::evolve_array_schema( ctx_.ptr()->resources(), uri, ase.get(), array->get_encryption_key())); CHECK(array->reopen().ok()); + if (load_enmrs && !vfs_test_setup_.is_rest()) { + array->load_all_enumerations(array->use_refactored_array_open()); + } schema = array->array_schema_latest_ptr(); std::string schema_name_3 = schema->name(); REQUIRE_THROWS_WITH( @@ -253,6 +271,9 @@ TEST_CASE_METHOD( expected_enmrs[schema_name_3] = {enmr2, var_enmr.ptr()->enumeration()}; actual_enmrs = array->get_enumerations_all_schemas(); if (!load_enmrs) { + if (!vfs_test_setup_.is_rest()) { + array->load_all_enumerations(array->use_refactored_array_open()); + } REQUIRE_THROWS_WITH( schema->is_enumeration_loaded("my_enum"), Catch::Matchers::ContainsSubstring("unknown enumeration")); diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index c4e1de27ca2..b277ff7a380 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -502,8 +502,7 @@ Status Array::open( rest_client->get_array_schema_from_rest(array_uri_); throw_if_not_ok(st); set_array_schema_latest(array_schema_latest.value()); - if (config_.get( - "rest.load_enumerations_on_array_open", Config::must_find)) { + if (serialize_enumerations()) { // The route for array open v1 does not currently support loading // enumerations. Once #5359 is merged and deployed to REST this will // not be the case. @@ -596,17 +595,6 @@ Status Array::open( return LOG_STATUS(Status_ArrayError(e.what())); } - // Handle any array open config options for local arrays. - if (!remote_) { - // For fetching remote enumerations REST calls - // tiledb_handle_load_enumerations_request which loads enumerations. For - // local arrays we don't call this method. - if (config_.get( - "rest.load_enumerations_on_array_open", Config::must_find)) { - load_all_enumerations(use_refactored_array_open()); - } - } - is_opening_or_closing_ = false; return Status::Ok(); } @@ -904,32 +892,24 @@ Array::get_enumerations_all_schemas() { } // Store the loaded enumerations into the schemas. - auto latest_schema = opened_array_->array_schema_latest_ptr(); - for (const auto& schema_enmrs : ret) { + for (const auto& [schema_name, enmrs] : ret) { // This case will only be hit for remote arrays if the client evolves the // schema and does not reopen the array before loading all enumerations. - if (!array_schemas_all().contains(schema_enmrs.first)) { + if (!array_schemas_all().contains(schema_name)) { throw ArrayException( "Array opened using timestamp range (" + std::to_string(array_dir_timestamp_start_) + ", " + std::to_string(array_dir_timestamp_end_) + - ") has no loaded schema named '" + schema_enmrs.first + + ") has no loaded schema named '" + schema_name + "'; If the array was recently evolved be sure to reopen it after " "applying the evolution."); } - auto schema = array_schemas_all().at(schema_enmrs.first); - for (const auto& enmr : schema_enmrs.second) { + auto schema = array_schemas_all().at(schema_name); + for (const auto& enmr : enmrs) { if (!schema->is_enumeration_loaded(enmr->name())) { schema->store_enumeration(enmr); } - // Also store enumerations into the latest schema when we encounter - // it. - if (schema_enmrs.first == latest_schema->name()) { - if (!latest_schema->is_enumeration_loaded(enmr->name())) { - latest_schema->store_enumeration(enmr); - } - } } } @@ -989,9 +969,12 @@ std::vector> Array::get_enumerations( paths_to_load, *encryption_key(), memory_tracker_); } - // Store the loaded enumerations in the schema + // Store the loaded enumerations in the latest schema + auto schema = opened_array_->array_schema_latest_ptr(); for (auto& enmr : loaded) { - opened_array_->array_schema_latest_ptr()->store_enumeration(enmr); + if (!schema->is_enumeration_loaded(enmr->name())) { + schema->store_enumeration(enmr); + } } } @@ -1147,11 +1130,6 @@ Status Array::reopen(uint64_t timestamp_start, uint64_t timestamp_end) { set_array_schemas_all(std::move(array_schemas_all)); set_fragment_metadata(std::move(fragment_metadata)); - if (config_.get( - "rest.load_enumerations_on_array_open", Config::must_find)) { - load_all_enumerations(use_refactored_array_open()); - } - return Status::Ok(); } @@ -1615,13 +1593,11 @@ bool Array::serialize_non_empty_domain() const { } bool Array::serialize_enumerations() const { - auto serialize = config_.get("rest.load_enumerations_on_array_open"); - if (!serialize.has_value()) { - throw std::runtime_error( - "Cannot get rest.load_enumerations_on_array_open configuration option " - "from config"); - } - return serialize.value(); + return config_.get( + "rest.load_enumerations_on_array_open", Config::must_find) || + config_.get( + "rest.load_enumerations_on_array_open_all_schemas", + Config::must_find); } bool Array::serialize_metadata() const { diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index 2861cbd5435..71e9361cb55 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -142,6 +142,8 @@ class OpenedArray { inline void set_array_schema_latest( const shared_ptr& array_schema) { array_schema_latest_ = array_schema; + // Update the latest schema in the map of all array schemas. + array_schemas_all_[array_schema->name()] = array_schema; } /** Returns all array schemas. */ @@ -985,7 +987,11 @@ class Array { bool serialize_non_empty_domain() const; /** - * Checks the config to se if enumerations should be serialized on array open. + * Checks the config to see if enumerations should be serialized on array + * open. + * + * @return True if either `rest.load_enumerations_on_array_open` or + * `rest.load_enumerations_on_array_open_all_schemas` is set, else False. */ bool serialize_enumerations() const; diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index ea967c4fd0a..fd929c9689b 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -2155,7 +2155,7 @@ capi_return_t tiledb_handle_load_array_schema_request( static_cast(serialization_type), request->buffer()); - if (load_schema_req.include_enumerations()) { + if (load_schema_req.include_enumerations_latest_schema()) { array->load_all_enumerations(); } diff --git a/tiledb/sm/config/config.cc b/tiledb/sm/config/config.cc index d879122e6c5..4e15c56702c 100644 --- a/tiledb/sm/config/config.cc +++ b/tiledb/sm/config/config.cc @@ -94,6 +94,8 @@ const std::string Config::REST_CURL_VERBOSE = "false"; const std::string Config::REST_CURL_TCP_KEEPALIVE = "true"; const std::string Config::REST_CURL_RETRY_ERRORS = "true"; const std::string Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN = "false"; +const std::string Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS = + "false"; const std::string Config::REST_LOAD_METADATA_ON_ARRAY_OPEN = "true"; const std::string Config::REST_LOAD_NON_EMPTY_DOMAIN_ON_ARRAY_OPEN = "true"; const std::string Config::REST_USE_REFACTORED_ARRAY_OPEN = "true"; @@ -264,6 +266,9 @@ const std::map default_config_values = { std::make_pair( "rest.load_enumerations_on_array_open", Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN), + std::make_pair( + "rest.load_enumerations_on_array_open_all_schemas", + Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS), std::make_pair( "rest.load_metadata_on_array_open", Config::REST_LOAD_METADATA_ON_ARRAY_OPEN), diff --git a/tiledb/sm/config/config.h b/tiledb/sm/config/config.h index 1f75e6f97a0..ef8dda564ca 100644 --- a/tiledb/sm/config/config.h +++ b/tiledb/sm/config/config.h @@ -116,9 +116,18 @@ class Config { /** If we should retry Curl errors in requests to REST. */ static const std::string REST_CURL_RETRY_ERRORS; - /** If the array enumerations should be loaded on array open */ + /** + * If the array enumerations should be loaded on array open for the latest + * array schema only. + */ static const std::string REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN; + /** + * If the array enumerations should be loaded on array open for all array + * schemas. + */ + static const std::string REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS; + /** If the array metadata should be loaded on array open */ static const std::string REST_LOAD_METADATA_ON_ARRAY_OPEN; diff --git a/tiledb/sm/cpp_api/config.h b/tiledb/sm/cpp_api/config.h index 0bccd534930..2ab91a6338a 100644 --- a/tiledb/sm/cpp_api/config.h +++ b/tiledb/sm/cpp_api/config.h @@ -928,8 +928,12 @@ class Config { * together with the open array
* **Default**: true * - `rest.load_enumerations_on_array_open`
- * If true, enumerations will be loaded and sent to server together with - * the open array. + * If true, enumerations will be loaded for the latest array schema on + * array open. + * **Default**: false + * - `rest.load_enumerations_on_array_open_all_schemas`
+ * If true, enumerations will be loaded for all array schemas on array + * open. * **Default**: false * - `rest.use_refactored_array_open`
* If true, the new REST routes and APIs for opening an array will be used diff --git a/tiledb/sm/serialization/array.cc b/tiledb/sm/serialization/array.cc index 1958b8406ce..4724cd7c36b 100644 --- a/tiledb/sm/serialization/array.cc +++ b/tiledb/sm/serialization/array.cc @@ -148,7 +148,8 @@ Status array_to_capnp( array_builder->setQueryType(query_type_str(array->get_query_type())); if (array->use_refactored_array_open() && array->serialize_enumerations()) { - array->load_all_enumerations(true); + array->load_all_enumerations(array->config().get( + "rest.load_enumerations_on_array_open_all_schemas", Config::must_find)); } const auto& array_schema_latest = array->array_schema_latest(); diff --git a/tiledb/sm/serialization/array_schema.cc b/tiledb/sm/serialization/array_schema.cc index 4496097ea8e..bc8baa186ba 100644 --- a/tiledb/sm/serialization/array_schema.cc +++ b/tiledb/sm/serialization/array_schema.cc @@ -1638,7 +1638,7 @@ void load_array_schema_request_to_capnp( throw_if_not_ok(config_to_capnp(config, &config_builder)); // This boolean is only serialized to support clients using TileDB < 2.26. // Future options should only be serialized within the Config object above. - builder.setIncludeEnumerations(req.include_enumerations()); + builder.setIncludeEnumerations(req.include_enumerations_latest_schema()); } void serialize_load_array_schema_request( diff --git a/tiledb/sm/serialization/array_schema.h b/tiledb/sm/serialization/array_schema.h index 54406042b70..aac214c57a9 100644 --- a/tiledb/sm/serialization/array_schema.h +++ b/tiledb/sm/serialization/array_schema.h @@ -60,16 +60,23 @@ namespace serialization { class LoadArraySchemaRequest { public: explicit LoadArraySchemaRequest(const Config& config) - : include_enumerations_(config.get( + : include_enumerations_latest_schema_(config.get( + "rest.load_enumerations_on_array_open", Config::must_find)) + , include_enumerations_all_schemas_(config.get( "rest.load_enumerations_on_array_open", Config::must_find)) { } - inline bool include_enumerations() const { - return include_enumerations_; + inline bool include_enumerations_latest_schema() const { + return include_enumerations_latest_schema_; + } + + inline bool include_enumerations_all_schemas() const { + return include_enumerations_all_schemas_; } private: - bool include_enumerations_; + bool include_enumerations_latest_schema_; + bool include_enumerations_all_schemas_; }; #ifdef TILEDB_SERIALIZATION