-
Notifications
You must be signed in to change notification settings - Fork 186
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Load enumerations for all array schemas in a single request. #5349
Changes from all commits
7b78909
5f83015
f7cadf3
3662216
336b3fa
483b3a1
a4fd5da
9b25263
3898359
65c7362
d99f8e9
8ec2827
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -147,10 +147,16 @@ struct tiledb_array_handle_t | |
return array_->get_enumeration(enumeration_name); | ||
} | ||
|
||
std::unordered_map< | ||
std::string, | ||
std::vector<shared_ptr<const tiledb::sm::Enumeration>>> | ||
get_enumerations_all_schemas() { | ||
return array_->get_enumerations_all_schemas(); | ||
} | ||
|
||
std::vector<shared_ptr<const tiledb::sm::Enumeration>> get_enumerations( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reverted changes to this function from #5237. |
||
const std::vector<std::string>& enumeration_names, | ||
shared_ptr<tiledb::sm::ArraySchema> schema) { | ||
return array_->get_enumerations(enumeration_names, schema); | ||
const std::vector<std::string>& enumeration_names) { | ||
return array_->get_enumerations(enumeration_names); | ||
} | ||
|
||
void get_metadata( | ||
|
@@ -179,8 +185,8 @@ struct tiledb_array_handle_t | |
return array_->is_open(); | ||
} | ||
|
||
void load_all_enumerations() const { | ||
array_->load_all_enumerations(); | ||
void load_all_enumerations(bool all_schemas = false) const { | ||
array_->load_all_enumerations(all_schemas); | ||
} | ||
|
||
tiledb::sm::NDRange& loaded_non_empty_domain() { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -805,33 +805,85 @@ void Array::encryption_type( | |
|
||
shared_ptr<const Enumeration> Array::get_enumeration( | ||
const std::string& enumeration_name) { | ||
return get_enumerations({enumeration_name})[0]; | ||
} | ||
|
||
std::unordered_map<std::string, std::vector<shared_ptr<const Enumeration>>> | ||
Array::get_enumerations_all_schemas() { | ||
if (!is_open_) { | ||
throw ArrayException("Unable to load enumerations; Array is not open."); | ||
} | ||
|
||
auto schema = opened_array_->array_schema_latest_ptr(); | ||
if (!schema->has_enumeration(enumeration_name)) { | ||
throw ArrayException( | ||
"Unable to get enumeration; Enumeration '" + enumeration_name + | ||
"' does not exist."); | ||
} else if (schema->is_enumeration_loaded(enumeration_name)) { | ||
return schema->get_enumeration(enumeration_name); | ||
std::unordered_map<std::string, std::vector<shared_ptr<const Enumeration>>> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: a general comment, those complex types are easier to read if modeled as structs so that we avoid There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good call! I did not change it here but I'll keep it in mind for the future.. I have been confused by |
||
ret; | ||
if (remote_) { | ||
auto rest_client = resources_.rest_client(); | ||
if (rest_client == nullptr) { | ||
throw ArrayException( | ||
"Error loading enumerations; Remote array with no REST client."); | ||
} | ||
|
||
// Pass an empty list of enumeration names. REST will use timestamps to | ||
// load all enumerations on all schemas for the array within that range. | ||
ret = rest_client->post_enumerations_from_rest( | ||
array_uri_, | ||
array_dir_timestamp_start_, | ||
array_dir_timestamp_end_, | ||
this, | ||
{}, | ||
memory_tracker_); | ||
|
||
// Store the enumerations from the REST response. | ||
for (const auto& schema_enmrs : ret) { | ||
auto schema = array_schemas_all().at(schema_enmrs.first); | ||
for (const auto& enmr : schema_enmrs.second) { | ||
schema->store_enumeration(enmr); | ||
} | ||
} | ||
} else { | ||
for (const auto& schema : array_schemas_all()) { | ||
std::unordered_set<std::string> enmrs_to_load; | ||
auto enumeration_names = schema.second->get_enumeration_names(); | ||
// Dedupe requested names and filter out anything already loaded. | ||
for (auto& enmr_name : enumeration_names) { | ||
if (schema.second->is_enumeration_loaded(enmr_name)) { | ||
continue; | ||
} | ||
enmrs_to_load.insert(enmr_name); | ||
} | ||
|
||
// Create a vector of paths to be loaded. | ||
std::vector<std::string> paths_to_load; | ||
for (auto& enmr_name : enmrs_to_load) { | ||
auto path = schema.second->get_enumeration_path_name(enmr_name); | ||
paths_to_load.push_back(path); | ||
} | ||
|
||
// Load the enumerations from storage | ||
auto loaded = array_directory().load_enumerations_from_paths( | ||
paths_to_load, *encryption_key(), memory_tracker_); | ||
|
||
// Store the loaded enumerations in the schema. | ||
for (auto& enmr : loaded) { | ||
schema.second->store_enumeration(enmr); | ||
} | ||
ret[schema.first] = loaded; | ||
} | ||
} | ||
|
||
return get_enumerations({enumeration_name}, schema)[0]; | ||
return ret; | ||
} | ||
|
||
std::vector<shared_ptr<const Enumeration>> Array::get_enumerations( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reverted changes to this function from #5237. |
||
const std::vector<std::string>& enumeration_names, | ||
shared_ptr<ArraySchema> schema) { | ||
const std::vector<std::string>& enumeration_names) { | ||
if (!is_open_) { | ||
throw ArrayException("Unable to load enumerations; Array is not open."); | ||
} | ||
|
||
// Dedupe requested names and filter out anything already loaded. | ||
std::unordered_set<std::string> enmrs_to_load; | ||
for (auto& enmr_name : enumeration_names) { | ||
if (schema->is_enumeration_loaded(enmr_name)) { | ||
if (array_schema_latest().is_enumeration_loaded(enmr_name)) { | ||
continue; | ||
} | ||
enmrs_to_load.insert(enmr_name); | ||
|
@@ -856,16 +908,16 @@ std::vector<shared_ptr<const Enumeration>> Array::get_enumerations( | |
|
||
loaded = rest_client->post_enumerations_from_rest( | ||
array_uri_, | ||
schema->timestamp_range().first, | ||
schema->timestamp_range().second, | ||
array_dir_timestamp_start_, | ||
array_dir_timestamp_end_, | ||
this, | ||
names_to_load, | ||
memory_tracker_); | ||
memory_tracker_)[array_schema_latest().name()]; | ||
} else { | ||
// Create a vector of paths to be loaded. | ||
std::vector<std::string> paths_to_load; | ||
for (auto& enmr_name : enmrs_to_load) { | ||
auto path = schema->get_enumeration_path_name(enmr_name); | ||
auto path = array_schema_latest().get_enumeration_path_name(enmr_name); | ||
paths_to_load.push_back(path); | ||
} | ||
|
||
|
@@ -876,25 +928,36 @@ std::vector<shared_ptr<const Enumeration>> Array::get_enumerations( | |
|
||
// Store the loaded enumerations in the schema | ||
for (auto& enmr : loaded) { | ||
schema->store_enumeration(enmr); | ||
opened_array_->array_schema_latest_ptr()->store_enumeration(enmr); | ||
} | ||
} | ||
|
||
// Return the requested list of enumerations | ||
std::vector<shared_ptr<const Enumeration>> ret(enumeration_names.size()); | ||
for (size_t i = 0; i < enumeration_names.size(); i++) { | ||
ret[i] = schema->get_enumeration(enumeration_names[i]); | ||
ret[i] = array_schema_latest().get_enumeration(enumeration_names[i]); | ||
} | ||
return ret; | ||
} | ||
|
||
void Array::load_all_enumerations() { | ||
void Array::load_all_enumerations(bool all_schemas) { | ||
if (!is_open_) { | ||
throw ArrayException("Unable to load all enumerations; Array is not open."); | ||
} | ||
// Load all enumerations, discarding the returned list of loaded enumerations. | ||
for (const auto& schema : array_schemas_all()) { | ||
get_enumerations(schema.second->get_enumeration_names(), schema.second); | ||
if (all_schemas) { | ||
// Unless we are using array open V3, Array::array_schemas_all_ will not be | ||
// initialized. We throw an exception since this is required to store the | ||
// loaded enumerations. | ||
if (!use_refactored_array_open()) { | ||
throw ArrayException( | ||
"Unable to load enumerations for all array schemas; The array must " | ||
"be opened using `rest.use_refactored_array_open=true`"); | ||
} | ||
|
||
get_enumerations_all_schemas(); | ||
} else { | ||
get_enumerations(array_schema_latest().get_enumeration_names()); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding this API instead of changing parameters of tiledb_array_load_all_enumerations allows this PR to fix the regression without changes to REST or other repositories. We will have a new binding to wrap but that's just for completeness, this will only be used by the client to submit the request to REST. Had we changed the parameters like I did initially REST would fail to link against this branch until we updated the modified binding for the APIs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean, we can't really change a C API after we have released, can we? Or that's some exception for experimentals? What we can do if it's incorrect though is mark it for deprecation. But rather than incorrect, is
tiledb_array_load_all_enumerations
just fetching the enumerations for the latest schema only?Then IMO it's good choice to add another API anyway since it's serving a different purpose and has different performance impact.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC for experimental APIs we can change them since they are experimental, but it's a breaking change and I was trying to avoid doing that. Since
tiledb_array_load_all_enumerations
only loads for the latest schema I thought a new API would not be out of the question.Yeah I think the name
tiledb_array_load_all_enumerations
is a little misleading as you probably noticed but I updated those docs to clarify and if we want to rename or restructure those methods I think we could do that separately and go through a deprecation cycle.