Skip to content

Commit

Permalink
Fix schema timestamps used when loading enumerations from REST. (#5291)
Browse files Browse the repository at this point in the history
This fixes a bug loading enumerations after a REST request was made
using [array directory timestamp
ranges](https://github.com/TileDB-Inc/TileDB/blob/dev/tiledb/sm/array/array.cc#L849-L855).
The response from the request gives the enumerations on the latest
schema in that range, which can result in attempting to store
enumerations on a schema where they no longer exist after being dropped
in a previous array schema evolution.

Thanks @johnkerl for reporting, in the end I found the exact error from
your issue was thrown because the enumeration happened to have the same
name as a previously dropped enumeration. I was able to reproduce your
error in the test case I added here by evolving the schema a third time
to add back an enumeration with an identical name of one that was
previously dropped.

---
TYPE: BUG
DESC: Fix schema timestamps used when loading enumerations from REST.

---------

Co-authored-by: Ypatia Tsavliri <[email protected]>
  • Loading branch information
shaunrd0 and ypatia authored Sep 9, 2024
1 parent ef7aba6 commit edb48b1
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 78 deletions.
175 changes: 144 additions & 31 deletions test/src/unit-cppapi-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include <fstream>

#include <test/support/tdb_catch.h>
#include "test/support/src/vfs_helpers.h"
#include "tiledb/api/c_api/array_schema/array_schema_api_internal.h"
#include "tiledb/api/c_api/enumeration/enumeration_api_internal.h"
#include "tiledb/sm/array_schema/array_schema.h"
#include "tiledb/sm/c_api/tiledb_struct_def.h"
Expand All @@ -43,14 +45,14 @@ using namespace tiledb;

struct CPPEnumerationFx {
CPPEnumerationFx();
~CPPEnumerationFx();
~CPPEnumerationFx() = default;

template <typename T>
void check_dump(const T& val);

void create_array(bool with_empty_enumeration = false);
void rm_array();

tiledb::test::VFSTestSetup vfs_test_setup_;
std::string uri_;
Context ctx_;
VFS vfs_;
Expand Down Expand Up @@ -283,7 +285,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumerations From Disk - Array::get_enumeration",
"[enumeration][array-get-enumeration]") {
"[enumeration][array-get-enumeration][rest]") {
create_array();
auto array = Array(ctx_, uri_, TILEDB_READ);
auto enmr = ArrayExperimental::get_enumeration(ctx_, array, "an_enumeration");
Expand All @@ -297,7 +299,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumerations From Disk - Attribute::get_enumeration_name",
"[enumeration][attr-get-enumeration-name]") {
"[enumeration][attr-get-enumeration-name][rest]") {
create_array();
auto schema = Array::load_schema(ctx_, uri_);

Expand All @@ -313,7 +315,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Array::load_all_enumerations",
"[enumeration][array-load-all-enumerations]") {
"[enumeration][array-load-all-enumerations][rest]") {
create_array();
auto array = Array(ctx_, uri_, TILEDB_READ);
REQUIRE_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array));
Expand All @@ -322,11 +324,132 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"C API: Array load_all_enumerations - Check nullptr",
"[enumeration][array-load-all-enumerations]") {
"[enumeration][array-load-all-enumerations][rest]") {
auto rc = tiledb_array_load_all_enumerations(ctx_.ptr().get(), nullptr);
REQUIRE(rc != TILEDB_OK);
}

TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP API: Load All Enumerations - All Schemas",
"[enumeration][array][load-all-enumerations][all-schemas][rest]") {
create_array();
auto array = tiledb::Array(ctx_, uri_, TILEDB_READ);
auto schema = array.load_schema(ctx_, uri_);
REQUIRE(
schema.ptr()->array_schema()->has_enumeration("an_enumeration") == true);
REQUIRE(
schema.ptr()->array_schema()->is_enumeration_loaded("an_enumeration") ==
false);
std::string schema_name_1 = schema.ptr()->array_schema()->name();

// Evolve once to add an enumeration.
ArraySchemaEvolution ase(ctx_);
std::vector<std::string> var_values{"one", "two", "three"};
auto var_enmr = Enumeration::create(ctx_, "ase_var_enmr", var_values);
ase.add_enumeration(var_enmr);
auto attr4 = Attribute::create<uint16_t>(ctx_, "attr4");
AttributeExperimental::set_enumeration_name(ctx_, attr4, "ase_var_enmr");
ase.add_attribute(attr4);
ase.array_evolve(uri_);
array.reopen();
ArrayExperimental::load_all_enumerations(ctx_, array);
auto all_schemas = array.ptr()->array_->array_schemas_all();
schema = array.load_schema(ctx_, uri_);
std::string schema_name_2 = schema.ptr()->array_schema()->name();

// Check all schemas.
CHECK(all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true);
CHECK(
all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") ==
true);
CHECK(all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") ==
true);
CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
true);

// Evolve a second time to drop an enumeration.
ArraySchemaEvolution ase2(ctx_);
ase2.drop_enumeration("an_enumeration");
ase2.drop_attribute("attr1");
CHECK_NOTHROW(ase2.array_evolve(uri_));
// Apply evolution to the array and reopen.
CHECK_NOTHROW(array.close());
CHECK_NOTHROW(array.open(TILEDB_READ));
ArrayExperimental::load_all_enumerations(ctx_, array);
all_schemas = array.ptr()->array_->array_schemas_all();
schema = array.load_schema(ctx_, uri_);
std::string schema_name_3 = schema.ptr()->array_schema()->name();

// Check all schemas.
CHECK(all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true);
CHECK(
all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") ==
true);
CHECK(all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") ==
true);
CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
true);
CHECK(all_schemas[schema_name_3]->has_enumeration("an_enumeration") == false);
CHECK(all_schemas[schema_name_3]->has_enumeration("ase_var_enmr") == true);
CHECK(
all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") ==
true);

// Evolve a third time to add an enumeration with a name equal to a previously
// dropped enumeration.
ArraySchemaEvolution ase3(ctx_);
auto old_enmr = Enumeration::create(ctx_, "an_enumeration", var_values);
ase3.add_enumeration(old_enmr);
auto attr5 = Attribute::create<uint16_t>(ctx_, "attr5");
AttributeExperimental::set_enumeration_name(ctx_, attr5, "an_enumeration");
ase.add_attribute(attr5);
CHECK_NOTHROW(ase3.array_evolve(uri_));

// Apply evolution to the array and reopen.
CHECK_NOTHROW(array.close());
CHECK_NOTHROW(array.open(TILEDB_READ));
ArrayExperimental::load_all_enumerations(ctx_, array);
all_schemas = array.ptr()->array_->array_schemas_all();
schema = array.load_schema(ctx_, uri_);
std::string schema_name_4 = schema.ptr()->array_schema()->name();

// Check all schemas.
CHECK(all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true);
CHECK(
all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") ==
true);
CHECK(all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") ==
true);
CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
true);
CHECK(all_schemas[schema_name_3]->has_enumeration("an_enumeration") == false);
CHECK(all_schemas[schema_name_3]->has_enumeration("ase_var_enmr") == true);
CHECK(
all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") ==
true);
CHECK(all_schemas[schema_name_4]->has_enumeration("an_enumeration") == true);
CHECK(
all_schemas[schema_name_4]->is_enumeration_loaded("an_enumeration") ==
true);
CHECK(all_schemas[schema_name_4]->has_enumeration("ase_var_enmr") == true);
CHECK(
all_schemas[schema_name_4]->is_enumeration_loaded("ase_var_enmr") ==
true);
}

TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: ArraySchemaEvolution - Add Enumeration",
Expand All @@ -340,7 +463,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"C API: ArraySchemaEvolution - Add Enumeration - Check nullptr",
"[enumeration][array-schema-evolution][error]") {
"[enumeration][array-schema-evolution][error][rest]") {
auto rc = tiledb_array_schema_evolution_add_enumeration(
ctx_.ptr().get(), nullptr, nullptr);
REQUIRE(rc != TILEDB_OK);
Expand All @@ -359,7 +482,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"C API: ArraySchemaEvolution - Extend Enumeration - Check nullptr",
"[enumeration][array-schema-evolution][drop-enumeration]") {
"[enumeration][array-schema-evolution][drop-enumeration][rest]") {
std::vector<std::string> values = {"fred", "wilma", "barney", "pebbles"};
auto enmr = Enumeration::create(ctx_, enmr_name, values);

Expand All @@ -384,7 +507,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"C API: ArraySchemaEvolution - Drop Enumeration - Check nullptr",
"[enumeration][array-schema-evolution][drop-enumeration]") {
"[enumeration][array-schema-evolution][drop-enumeration][rest]") {
auto rc = tiledb_array_schema_evolution_drop_enumeration(
ctx_.ptr().get(), nullptr, "foo");
REQUIRE(rc != TILEDB_OK);
Expand All @@ -398,7 +521,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumeration Query - Basic",
"[enumeration][query][basic]") {
"[enumeration][query][basic][rest]") {
// Basic smoke test. Check that a simple query condition applied against
// an array returns sane results.
create_array();
Expand Down Expand Up @@ -434,7 +557,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumeration Query - Negation",
"[enumeration][query][negation]") {
"[enumeration][query][negation][rest]") {
// Another basic query test, the only twist here is that we're checking
// that query condition negation works as expected.
create_array();
Expand Down Expand Up @@ -473,7 +596,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumeration Query - Combination",
"[enumeration][query][combination]") {
"[enumeration][query][combination][rest]") {
// Same test as before except using multi-condition query condtions
create_array();

Expand Down Expand Up @@ -529,7 +652,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumeration Query - Invalid Enumeration Value is Always False",
"[enumeration][query][basic]") {
"[enumeration][query][basic][rest]") {
create_array();

// Attempt to query with an enumeration value that isn't in the Enumeration
Expand Down Expand Up @@ -563,7 +686,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumeration Query - Invalid Enumeration Value Accepted by EQ",
"[enumeration][query][basic]") {
"[enumeration][query][basic][rest]") {
create_array();

// Attempt to query with an enumeration value that isn't in the Enumeration
Expand All @@ -590,7 +713,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumeration Query - Invalid Enumeration Value Accepted by IN",
"[enumeration][query][basic]") {
"[enumeration][query][basic][rest]") {
create_array();

// Attempt to query with an enumeration value that isn't in the Enumeration
Expand All @@ -617,7 +740,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumeration Query - Set Use Enumeration",
"[enumeration][query][set-use-enumeration]") {
"[enumeration][query][set-use-enumeration][rest]") {
QueryCondition qc(ctx_);
qc.init("attr1", "fred", 4, TILEDB_EQ);
REQUIRE_NOTHROW(
Expand All @@ -629,7 +752,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"C API: Enumeration Query - Check nullptr",
"[enumeration][query][check-nullptr]") {
"[enumeration][query][check-nullptr][rest]") {
auto rc =
tiledb_query_condition_set_use_enumeration(ctx_.ptr().get(), nullptr, 0);
REQUIRE(rc != TILEDB_OK);
Expand All @@ -638,7 +761,7 @@ TEST_CASE_METHOD(
TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: Enumeration Query - Attempt to query on empty enumeration",
"[enumeration][query][empty-results]") {
"[enumeration][query][empty-results][rest]") {
create_array(true);

// Attempt to query with an enumeration value that isn't in the Enumeration
Expand All @@ -663,13 +786,9 @@ TEST_CASE_METHOD(
}

CPPEnumerationFx::CPPEnumerationFx()
: uri_("enumeration_test_array")
, vfs_(ctx_) {
rm_array();
}

CPPEnumerationFx::~CPPEnumerationFx() {
rm_array();
: uri_(vfs_test_setup_.array_uri("enumeration_test_array"))
, ctx_(vfs_test_setup_.ctx())
, vfs_(vfs_test_setup_.ctx()) {
}

template <typename T>
Expand Down Expand Up @@ -744,9 +863,3 @@ void CPPEnumerationFx::create_array(bool with_empty_enumeration) {
query.finalize();
array.close();
}

void CPPEnumerationFx::rm_array() {
if (vfs_.is_dir(uri_)) {
vfs_.remove_dir(uri_);
}
}
2 changes: 1 addition & 1 deletion test/src/unit-cppapi-schema-evolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ TEST_CASE(

TEST_CASE(
"SchemaEvolution Error Handling Tests",
"[cppapi][schema][evolution][errors]") {
"[cppapi][schema][evolution][errors][rest]") {
auto ase = make_shared<tiledb::sm::ArraySchemaEvolution>(
HERE(), tiledb::test::create_test_memory_tracker());
REQUIRE_THROWS(ase->evolve_schema(nullptr));
Expand Down
2 changes: 1 addition & 1 deletion tiledb/api/c_api/array_schema/array_schema_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ capi_return_t tiledb_array_schema_alloc(

// Create ArraySchema object
auto memory_tracker = ctx->resources().create_memory_tracker();
memory_tracker->set_type(MemoryTrackerType::ARRAY_CREATE);
memory_tracker->set_type(tiledb::sm::MemoryTrackerType::ARRAY_CREATE);
*array_schema = tiledb_array_schema_t::make_handle(
static_cast<tiledb::sm::ArrayType>(array_type), memory_tracker);

Expand Down
Loading

0 comments on commit edb48b1

Please sign in to comment.