Skip to content

Commit

Permalink
Add configuration parameter to load enumerations on all schemas. (#5379)
Browse files Browse the repository at this point in the history
This adds a new config parameter
`rest.load_enumerations_on_array_open_all_schemas` to control loading
enumerations on all array schemas. The previously existing
`rest.load_enumerations_on_array_open` config parameter is used to
control loading enumerations on the latest schema only.

---
TYPE: FEATURE
DESC: Add `rest.load_enumerations_on_array_open_all_schemas` config
parameter.

---------

Co-authored-by: Isaiah Norton <[email protected]>
  • Loading branch information
shaunrd0 and ihnorton committed Nov 20, 2024
1 parent 545d2cb commit d3dfb6c
Show file tree
Hide file tree
Showing 13 changed files with 430 additions and 63 deletions.
3 changes: 3 additions & 0 deletions test/src/unit-capi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ void check_save_to_file() {
ss << "rest.curl.verbose false\n";
ss << "rest.http_compressor any\n";
ss << "rest.load_enumerations_on_array_open false\n";
ss << "rest.load_enumerations_on_array_open_all_schemas false\n";
ss << "rest.load_metadata_on_array_open true\n";
ss << "rest.load_non_empty_domain_on_array_open true\n";
ss << "rest.retry_count 25\n";
Expand Down Expand Up @@ -617,6 +618,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
all_param_values["rest.load_metadata_on_array_open"] = "false";
all_param_values["rest.load_non_empty_domain_on_array_open"] = "false";
all_param_values["rest.load_enumerations_on_array_open"] = "false";
all_param_values["rest.load_enumerations_on_array_open_all_schemas"] =
"false";
all_param_values["rest.use_refactored_array_open"] = "false";
all_param_values["rest.use_refactored_array_open_and_query_submit"] = "false";
all_param_values["rest.payer_namespace"] = "";
Expand Down
357 changes: 346 additions & 11 deletions test/src/unit-cppapi-enumerations.cc

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion test/src/unit-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2488,7 +2488,7 @@ TEST_CASE_METHOD(
REQUIRE(a1->serialize_enumerations() == (do_load == "true"));
REQUIRE(
a1->array_schema_latest_ptr()->get_loaded_enumeration_names().size() ==
(do_load == "true" ? 2 : 0));
0);

auto a2 = make_shared<Array>(HERE(), ctx.resources(), uri_);

Expand Down
21 changes: 21 additions & 0 deletions test/src/unit-rest-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,15 @@ TEST_CASE_METHOD(

create_array(uri_);
Array opened_array(ctx_, uri_, TILEDB_READ);
if (!vfs_test_setup_.is_rest() && load_enmrs) {
if (opened_array.ptr()->array()->use_refactored_array_open()) {
CHECK_NOTHROW(
ArrayExperimental::load_enumerations_all_schemas(ctx_, opened_array));
} else {
CHECK_NOTHROW(
ArrayExperimental::load_all_enumerations(ctx_, opened_array));
}
}
auto array = opened_array.ptr()->array();
auto schema = array->array_schema_latest_ptr();
REQUIRE(schema->is_enumeration_loaded("my_enum") == load_enmrs);
Expand Down Expand Up @@ -218,6 +227,9 @@ TEST_CASE_METHOD(
CHECK_NOTHROW(sm::Array::evolve_array_schema(
ctx_.ptr()->resources(), uri, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
if (load_enmrs && !vfs_test_setup_.is_rest()) {
array->load_all_enumerations(array->use_refactored_array_open());
}
schema = array->array_schema_latest_ptr();
std::string schema_name_2 = schema->name();
REQUIRE(schema->is_enumeration_loaded("my_enum") == load_enmrs);
Expand All @@ -227,6 +239,9 @@ TEST_CASE_METHOD(
expected_enmrs[schema_name_2] = {enmr1, enmr2, var_enmr.ptr()->enumeration()};
actual_enmrs = array->get_enumerations_all_schemas();
if (!load_enmrs) {
if (!vfs_test_setup_.is_rest()) {
array->load_all_enumerations(array->use_refactored_array_open());
}
REQUIRE(schema->is_enumeration_loaded("my_enum") == true);
REQUIRE(schema->is_enumeration_loaded("fruit") == true);
REQUIRE(schema->is_enumeration_loaded("ase_var_enmr") == true);
Expand All @@ -242,6 +257,9 @@ TEST_CASE_METHOD(
CHECK_NOTHROW(sm::Array::evolve_array_schema(
ctx_.ptr()->resources(), uri, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
if (load_enmrs && !vfs_test_setup_.is_rest()) {
array->load_all_enumerations(array->use_refactored_array_open());
}
schema = array->array_schema_latest_ptr();
std::string schema_name_3 = schema->name();
REQUIRE_THROWS_WITH(
Expand All @@ -253,6 +271,9 @@ TEST_CASE_METHOD(
expected_enmrs[schema_name_3] = {enmr2, var_enmr.ptr()->enumeration()};
actual_enmrs = array->get_enumerations_all_schemas();
if (!load_enmrs) {
if (!vfs_test_setup_.is_rest()) {
array->load_all_enumerations(array->use_refactored_array_open());
}
REQUIRE_THROWS_WITH(
schema->is_enumeration_loaded("my_enum"),
Catch::Matchers::ContainsSubstring("unknown enumeration"));
Expand Down
56 changes: 16 additions & 40 deletions tiledb/sm/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,7 @@ Status Array::open(
rest_client->get_array_schema_from_rest(array_uri_);
throw_if_not_ok(st);
set_array_schema_latest(array_schema_latest.value());
if (config_.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find)) {
if (serialize_enumerations()) {
// The route for array open v1 does not currently support loading
// enumerations. Once #5359 is merged and deployed to REST this will
// not be the case.
Expand Down Expand Up @@ -596,17 +595,6 @@ Status Array::open(
return LOG_STATUS(Status_ArrayError(e.what()));
}

// Handle any array open config options for local arrays.
if (!remote_) {
// For fetching remote enumerations REST calls
// tiledb_handle_load_enumerations_request which loads enumerations. For
// local arrays we don't call this method.
if (config_.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find)) {
load_all_enumerations(use_refactored_array_open());
}
}

is_opening_or_closing_ = false;
return Status::Ok();
}
Expand Down Expand Up @@ -904,32 +892,24 @@ Array::get_enumerations_all_schemas() {
}

// Store the loaded enumerations into the schemas.
auto latest_schema = opened_array_->array_schema_latest_ptr();
for (const auto& schema_enmrs : ret) {
for (const auto& [schema_name, enmrs] : ret) {
// This case will only be hit for remote arrays if the client evolves the
// schema and does not reopen the array before loading all enumerations.
if (!array_schemas_all().contains(schema_enmrs.first)) {
if (!array_schemas_all().contains(schema_name)) {
throw ArrayException(
"Array opened using timestamp range (" +
std::to_string(array_dir_timestamp_start_) + ", " +
std::to_string(array_dir_timestamp_end_) +
") has no loaded schema named '" + schema_enmrs.first +
") has no loaded schema named '" + schema_name +
"'; If the array was recently evolved be sure to reopen it after "
"applying the evolution.");
}

auto schema = array_schemas_all().at(schema_enmrs.first);
for (const auto& enmr : schema_enmrs.second) {
auto schema = array_schemas_all().at(schema_name);
for (const auto& enmr : enmrs) {
if (!schema->is_enumeration_loaded(enmr->name())) {
schema->store_enumeration(enmr);
}
// Also store enumerations into the latest schema when we encounter
// it.
if (schema_enmrs.first == latest_schema->name()) {
if (!latest_schema->is_enumeration_loaded(enmr->name())) {
latest_schema->store_enumeration(enmr);
}
}
}
}

Expand Down Expand Up @@ -989,9 +969,12 @@ std::vector<shared_ptr<const Enumeration>> Array::get_enumerations(
paths_to_load, *encryption_key(), memory_tracker_);
}

// Store the loaded enumerations in the schema
// Store the loaded enumerations in the latest schema
auto schema = opened_array_->array_schema_latest_ptr();
for (auto& enmr : loaded) {
opened_array_->array_schema_latest_ptr()->store_enumeration(enmr);
if (!schema->is_enumeration_loaded(enmr->name())) {
schema->store_enumeration(enmr);
}
}
}

Expand Down Expand Up @@ -1147,11 +1130,6 @@ Status Array::reopen(uint64_t timestamp_start, uint64_t timestamp_end) {
set_array_schemas_all(std::move(array_schemas_all));
set_fragment_metadata(std::move(fragment_metadata));

if (config_.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find)) {
load_all_enumerations(use_refactored_array_open());
}

return Status::Ok();
}

Expand Down Expand Up @@ -1615,13 +1593,11 @@ bool Array::serialize_non_empty_domain() const {
}

bool Array::serialize_enumerations() const {
auto serialize = config_.get<bool>("rest.load_enumerations_on_array_open");
if (!serialize.has_value()) {
throw std::runtime_error(
"Cannot get rest.load_enumerations_on_array_open configuration option "
"from config");
}
return serialize.value();
return config_.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find) ||
config_.get<bool>(
"rest.load_enumerations_on_array_open_all_schemas",
Config::must_find);
}

bool Array::serialize_metadata() const {
Expand Down
8 changes: 7 additions & 1 deletion tiledb/sm/array/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ class OpenedArray {
inline void set_array_schema_latest(
const shared_ptr<ArraySchema>& array_schema) {
array_schema_latest_ = array_schema;
// Update the latest schema in the map of all array schemas.
array_schemas_all_[array_schema->name()] = array_schema;
}

/** Returns all array schemas. */
Expand Down Expand Up @@ -985,7 +987,11 @@ class Array {
bool serialize_non_empty_domain() const;

/**
* Checks the config to se if enumerations should be serialized on array open.
* Checks the config to see if enumerations should be serialized on array
* open.
*
* @return True if either `rest.load_enumerations_on_array_open` or
* `rest.load_enumerations_on_array_open_all_schemas` is set, else False.
*/
bool serialize_enumerations() const;

Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/c_api/tiledb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2155,7 +2155,7 @@ capi_return_t tiledb_handle_load_array_schema_request(
static_cast<tiledb::sm::SerializationType>(serialization_type),
request->buffer());

if (load_schema_req.include_enumerations()) {
if (load_schema_req.include_enumerations_latest_schema()) {
array->load_all_enumerations();
}

Expand Down
5 changes: 5 additions & 0 deletions tiledb/sm/config/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ const std::string Config::REST_CURL_VERBOSE = "false";
const std::string Config::REST_CURL_TCP_KEEPALIVE = "true";
const std::string Config::REST_CURL_RETRY_ERRORS = "true";
const std::string Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN = "false";
const std::string Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS =
"false";
const std::string Config::REST_LOAD_METADATA_ON_ARRAY_OPEN = "true";
const std::string Config::REST_LOAD_NON_EMPTY_DOMAIN_ON_ARRAY_OPEN = "true";
const std::string Config::REST_USE_REFACTORED_ARRAY_OPEN = "true";
Expand Down Expand Up @@ -264,6 +266,9 @@ const std::map<std::string, std::string> default_config_values = {
std::make_pair(
"rest.load_enumerations_on_array_open",
Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN),
std::make_pair(
"rest.load_enumerations_on_array_open_all_schemas",
Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS),
std::make_pair(
"rest.load_metadata_on_array_open",
Config::REST_LOAD_METADATA_ON_ARRAY_OPEN),
Expand Down
11 changes: 10 additions & 1 deletion tiledb/sm/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,18 @@ class Config {
/** If we should retry Curl errors in requests to REST. */
static const std::string REST_CURL_RETRY_ERRORS;

/** If the array enumerations should be loaded on array open */
/**
* If the array enumerations should be loaded on array open for the latest
* array schema only.
*/
static const std::string REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN;

/**
* If the array enumerations should be loaded on array open for all array
* schemas.
*/
static const std::string REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS;

/** If the array metadata should be loaded on array open */
static const std::string REST_LOAD_METADATA_ON_ARRAY_OPEN;

Expand Down
8 changes: 6 additions & 2 deletions tiledb/sm/cpp_api/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -928,8 +928,12 @@ class Config {
* together with the open array <br>
* **Default**: true
* - `rest.load_enumerations_on_array_open` <br>
* If true, enumerations will be loaded and sent to server together with
* the open array.
* If true, enumerations will be loaded for the latest array schema on
* array open.
* **Default**: false
* - `rest.load_enumerations_on_array_open_all_schemas` <br>
* If true, enumerations will be loaded for all array schemas on array
* open.
* **Default**: false
* - `rest.use_refactored_array_open` <br>
* If true, the new REST routes and APIs for opening an array will be used
Expand Down
3 changes: 2 additions & 1 deletion tiledb/sm/serialization/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ Status array_to_capnp(
array_builder->setQueryType(query_type_str(array->get_query_type()));

if (array->use_refactored_array_open() && array->serialize_enumerations()) {
array->load_all_enumerations(true);
array->load_all_enumerations(array->config().get<bool>(
"rest.load_enumerations_on_array_open_all_schemas", Config::must_find));
}

const auto& array_schema_latest = array->array_schema_latest();
Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/serialization/array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1638,7 +1638,7 @@ void load_array_schema_request_to_capnp(
throw_if_not_ok(config_to_capnp(config, &config_builder));
// This boolean is only serialized to support clients using TileDB < 2.26.
// Future options should only be serialized within the Config object above.
builder.setIncludeEnumerations(req.include_enumerations());
builder.setIncludeEnumerations(req.include_enumerations_latest_schema());
}

void serialize_load_array_schema_request(
Expand Down
15 changes: 11 additions & 4 deletions tiledb/sm/serialization/array_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,23 @@ namespace serialization {
class LoadArraySchemaRequest {
public:
explicit LoadArraySchemaRequest(const Config& config)
: include_enumerations_(config.get<bool>(
: include_enumerations_latest_schema_(config.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find))
, include_enumerations_all_schemas_(config.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find)) {
}

inline bool include_enumerations() const {
return include_enumerations_;
inline bool include_enumerations_latest_schema() const {
return include_enumerations_latest_schema_;
}

inline bool include_enumerations_all_schemas() const {
return include_enumerations_all_schemas_;
}

private:
bool include_enumerations_;
bool include_enumerations_latest_schema_;
bool include_enumerations_all_schemas_;
};

#ifdef TILEDB_SERIALIZATION
Expand Down

0 comments on commit d3dfb6c

Please sign in to comment.