Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nullable attribute result size estimation APIs #1945

Merged
merged 1 commit into from
Dec 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

## New features

* Support for nullable attributes. [#1895](https://github.com/TileDB-Inc/TileDB/pull/1895) [#1938](https://github.com/TileDB-Inc/TileDB/pull/1938) [#1948](https://github.com/TileDB-Inc/TileDB/pull/1948)
* Support for nullable attributes. [#1895](https://github.com/TileDB-Inc/TileDB/pull/1895) [#1938](https://github.com/TileDB-Inc/TileDB/pull/1938) [#1948](https://github.com/TileDB-Inc/TileDB/pull/1948) [#1945](https://github.com/TileDB-Inc/TileDB/pull/1945)
* Support for Hilbert order sorting for sparse arrays. [#1880](https://github.com/TileDB-Inc/TileDB/pull/1880)
* Support for AWS S3 "AssumeRole" temporary credentials [#1882](https://github.com/TileDB-Inc/TileDB/pull/1882)
* Experimental support for an in-memory backend used with bootstrap option "--enable-memfs" [#1873](https://github.com/TileDB-Inc/TileDB/pull/1873)
Expand Down
77 changes: 77 additions & 0 deletions test/src/unit-cppapi-fill_values.cc
Original file line number Diff line number Diff line change
Expand Up @@ -679,3 +679,80 @@ TEST_CASE(

CHECK_NOTHROW(vfs.remove_dir(array_name));
}

TEST_CASE(
"C++ API: Test result estimation, partial dense arrays, nullable",
"[cppapi][fill-values][partial][est-result][nullable]") {
Context ctx;
VFS vfs(ctx);
std::string array_name = "fill_values_est_result_partial_nullable";

// First test with default fill values
if (vfs.is_dir(array_name))
CHECK_NOTHROW(vfs.remove_dir(array_name));

SECTION("- Default fill values") {
create_array_1d(array_name, true);
write_array_1d_partial(array_name, true);

Array array(ctx, array_name, TILEDB_READ);
Query query(ctx, array, TILEDB_READ);
auto est_a1 = query.est_result_size_nullable("a1");
auto est_a2 = query.est_result_size_var_nullable("a2");
auto est_a3 = query.est_result_size_nullable("a3");
auto est_d = query.est_result_size("d");
CHECK(est_d == 10 * sizeof(int32_t));
CHECK(est_a1[0] == 10 * sizeof(int32_t));
CHECK(est_a1[1] == 10 * sizeof(uint8_t));
CHECK(est_a2[0] == 80);
CHECK(est_a2[1] == 10 * sizeof(char));
CHECK(est_a2[2] == 10 * sizeof(uint8_t));
CHECK(est_a3[0] == 10 * 2 * sizeof(double));
CHECK(est_a3[1] == 10 * sizeof(uint8_t));
}

SECTION("- Custom fill values") {
std::string s("abc");
create_array_1d(array_name, true, 0, s, {1.0, 2.0});
write_array_1d_partial(array_name, true);

Array array(ctx, array_name, TILEDB_READ);
Query query(ctx, array, TILEDB_READ);
auto est_a1 = query.est_result_size_nullable("a1");
auto est_a2 = query.est_result_size_var_nullable("a2");
auto est_a3 = query.est_result_size_nullable("a3");
auto est_d = query.est_result_size("d");
CHECK(est_d == 10 * sizeof(int32_t));
CHECK(est_a1[0] == 10 * sizeof(int32_t));
CHECK(est_a1[1] == 10 * sizeof(uint8_t));
CHECK(est_a2[0] == 80);
CHECK(est_a2[1] == 10 * 3 * sizeof(char));
CHECK(est_a2[2] == 10 * sizeof(uint8_t));
CHECK(est_a3[0] == 10 * 2 * sizeof(double));
CHECK(est_a3[1] == 10 * sizeof(uint8_t));
}

SECTION("- Default fill values, multi-range") {
create_array_1d(array_name, true);
write_array_1d_partial(array_name, true);

Array array(ctx, array_name, TILEDB_READ);
Query query(ctx, array, TILEDB_READ);
query.add_range<int32_t>(0, 2, 3);
query.add_range<int32_t>(0, 9, 10);
auto est_a1 = query.est_result_size_nullable("a1");
auto est_a2 = query.est_result_size_var_nullable("a2");
auto est_a3 = query.est_result_size_nullable("a3");
auto est_d = query.est_result_size("d");
CHECK(est_d == 4 * sizeof(int32_t));
CHECK(est_a1[0] == 4 * sizeof(int32_t));
CHECK(est_a1[1] == 4 * sizeof(uint8_t));
CHECK(est_a2[0] == 32);
CHECK(est_a2[1] == 4 * sizeof(char));
CHECK(est_a2[2] == 4 * sizeof(uint8_t));
CHECK(est_a3[0] == 4 * 2 * sizeof(double));
CHECK(est_a3[1] == 4 * sizeof(uint8_t));
}

CHECK_NOTHROW(vfs.remove_dir(array_name));
}
37 changes: 37 additions & 0 deletions tiledb/sm/c_api/tiledb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3213,6 +3213,43 @@ int32_t tiledb_query_get_est_result_size_var(
return TILEDB_OK;
}

int32_t tiledb_query_get_est_result_size_nullable(
tiledb_ctx_t* ctx,
const tiledb_query_t* query,
const char* name,
uint64_t* size_val,
uint64_t* size_validity) {
if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, query) == TILEDB_ERR)
return TILEDB_ERR;

if (SAVE_ERROR_CATCH(
ctx,
query->query_->get_est_result_size_nullable(
name, size_val, size_validity)))
return TILEDB_ERR;

return TILEDB_OK;
}

int32_t tiledb_query_get_est_result_size_var_nullable(
tiledb_ctx_t* ctx,
const tiledb_query_t* query,
const char* name,
uint64_t* size_off,
uint64_t* size_val,
uint64_t* size_validity) {
if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, query) == TILEDB_ERR)
return TILEDB_ERR;

if (SAVE_ERROR_CATCH(
ctx,
query->query_->get_est_result_size_nullable(
name, size_off, size_val, size_validity)))
return TILEDB_ERR;

return TILEDB_OK;
}

int32_t tiledb_query_get_fragment_num(
tiledb_ctx_t* ctx, const tiledb_query_t* query, uint32_t* num) {
if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, query) == TILEDB_ERR)
Expand Down
55 changes: 55 additions & 0 deletions tiledb/sm/c_api/tiledb.h
Original file line number Diff line number Diff line change
Expand Up @@ -4313,6 +4313,61 @@ TILEDB_EXPORT int32_t tiledb_query_get_est_result_size_var(
uint64_t* size_off,
uint64_t* size_val);

/**
* Retrieves the estimated result size for a fixed-sized, nullable attribute.
*
* **Example:**
*
* @code{.c}
* uint64_t size_val;
* uint64_t size_validity;
* tiledb_query_get_est_result_size_nullable(ctx, query, "a", &size_val,
* &size_validity);
* @endcode
*
* @param ctx The TileDB context
* @param query The query.
* @param name The attribute name.
* @param size_val The size of the values (in bytes) to be retrieved.
* @param size_validity The size of the validity values (in bytes) to be
* retrieved.
* @return `TILEDB_OK` for success and `TILEDB_ERR` for error.
*/
TILEDB_EXPORT int32_t tiledb_query_get_est_result_size_nullable(
tiledb_ctx_t* ctx,
const tiledb_query_t* query,
const char* name,
uint64_t* size_val,
uint64_t* size_validity);

/**
* Retrieves the estimated result size for a var-sized, nullable attribute.
*
* **Example:**
*
* @code{.c}
* uint64_t size_off, size_val, size_validity;
* tiledb_query_get_est_result_size_var_nullable(
* ctx, query, "a", &size_off, &size_val, &size_validity);
* @endcode
*
* @param ctx The TileDB context
* @param query The query.
* @param name The attribute name.
* @param size_off The size of the offsets (in bytes) to be retrieved.
* @param size_val The size of the values (in bytes) to be retrieved.
* @param size_validity The size of the validity values (in bytes) to be
* retrieved.
* @return `TILEDB_OK` for success and `TILEDB_ERR` for error.
*/
TILEDB_EXPORT int32_t tiledb_query_get_est_result_size_var_nullable(
tiledb_ctx_t* ctx,
const tiledb_query_t* query,
const char* name,
uint64_t* size_off,
uint64_t* size_val,
uint64_t* size_validity);

/**
* Retrieves the number of written fragments. Applicable only to WRITE
* queries.
Expand Down
64 changes: 63 additions & 1 deletion tiledb/sm/cpp_api/query.h
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,7 @@ class Query {
* @endcode
*
* @param attr_name The attribute name.
* @return A pair with first element containing the estimated size of
* @return An array with first element containing the estimated size of
* the result offsets in bytes, and second element containing the
* estimated size of the result values in bytes.
*/
Expand All @@ -875,6 +875,68 @@ class Query {
return {size_off, size_val};
}

/**
* Retrieves the estimated result size for a fixed-size, nullable attribute.
*
* **Example:**
*
* @code{.cpp}
* std::array<uint64_t, 2> est_size =
* query.est_result_size_nullable("attr1");
* @endcode
*
* @param attr_name The attribute name.
* @return An array with first element containing the estimated size of
* the result values in bytes, and second element containing the
* estimated size of the result validity values in bytes.
*/
std::array<uint64_t, 2> est_result_size_nullable(
const std::string& attr_name) const {
auto& ctx = ctx_.get();
uint64_t size_val = 0;
uint64_t size_validity = 0;
ctx.handle_error(tiledb_query_get_est_result_size_nullable(
ctx.ptr().get(),
query_.get(),
attr_name.c_str(),
&size_val,
&size_validity));
return {size_val, size_validity};
}

/**
* Retrieves the estimated result size for a variable-size, nullable
* attribute.
*
* **Example:**
*
* @code{.cpp}
* std::array<uint64_t, 3> est_size =
* query.est_result_size_var_nullable("attr1");
* @endcode
*
* @param attr_name The attribute name.
* @return An array with first element containing the estimated size of
* the offset values in bytes, second element containing the
* estimated size of the result values in bytes, and the third element
* containing the estimated size of the validity values in bytes.
*/
std::array<uint64_t, 3> est_result_size_var_nullable(
const std::string& attr_name) const {
auto& ctx = ctx_.get();
uint64_t size_off = 0;
uint64_t size_val = 0;
uint64_t size_validity = 0;
ctx.handle_error(tiledb_query_get_est_result_size_var_nullable(
ctx.ptr().get(),
query_.get(),
attr_name.c_str(),
&size_off,
&size_val,
&size_validity));
return {size_off, size_val, size_validity};
}

/**
* Returns the number of written fragments. Applicable only to WRITE queries.
*/
Expand Down
84 changes: 84 additions & 0 deletions tiledb/sm/query/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,12 @@ Status Query::get_est_result_size(const char* name, uint64_t* size) {
"Cannot get estimated result size; Not applicable to zipped "
"coordinates in arrays with domains with variable-sized dimensions"));

if (array_->array_schema()->is_nullable(name))
return LOG_STATUS(Status::WriterError(
std::string(
"Cannot get estimated result size; Input attribute/dimension '") +
name + "' is nullable"));

if (array_->is_remote() && !reader_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr)
Expand All @@ -315,6 +321,12 @@ Status Query::get_est_result_size(
"Cannot get estimated result size; Operation currently "
"unsupported for write queries"));

if (array_->array_schema()->is_nullable(name))
return LOG_STATUS(Status::WriterError(
std::string(
"Cannot get estimated result size; Input attribute/dimension '") +
name + "' is nullable"));

if (array_->is_remote() && !reader_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr)
Expand All @@ -331,6 +343,78 @@ Status Query::get_est_result_size(
return reader_.get_est_result_size(name, size_off, size_val);
}

Status Query::get_est_result_size_nullable(
const char* name, uint64_t* size_val, uint64_t* size_validity) {
if (type_ == QueryType::WRITE)
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Operation currently "
"unsupported for write queries"));

if (name == nullptr)
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Name cannot be null"));

if (!array_->array_schema()->attribute(name))
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Nullable API is only"
"applicable to attributes"));

if (!array_->array_schema()->is_nullable(name))
return LOG_STATUS(Status::WriterError(
std::string("Cannot get estimated result size; Input attribute '") +
name + "' is not nullable"));

if (array_->is_remote() && !reader_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr)
return LOG_STATUS(
Status::QueryError("Error in query estimate result size; remote "
"array with no rest client."));

return LOG_STATUS(
Status::QueryError("Error in query estimate result size; unimplemented "
"for nullable attributes in remote arrays."));
}

return reader_.get_est_result_size_nullable(name, size_val, size_validity);
}

Status Query::get_est_result_size_nullable(
const char* name,
uint64_t* size_off,
uint64_t* size_val,
uint64_t* size_validity) {
if (type_ == QueryType::WRITE)
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Operation currently "
"unsupported for write queries"));

if (!array_->array_schema()->attribute(name))
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Nullable API is only"
"applicable to attributes"));

if (!array_->array_schema()->is_nullable(name))
return LOG_STATUS(Status::WriterError(
std::string("Cannot get estimated result size; Input attribute '") +
name + "' is not nullable"));

if (array_->is_remote() && !reader_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr)
return LOG_STATUS(
Status::QueryError("Error in query estimate result size; remote "
"array with no rest client."));

return LOG_STATUS(
Status::QueryError("Error in query estimate result size; unimplemented "
"for nullable attributes in remote arrays."));
}

return reader_.get_est_result_size_nullable(
name, size_off, size_val, size_validity);
}

std::unordered_map<std::string, Subarray::ResultSize>
Query::get_est_result_size_map() {
return reader_.get_est_result_size_map();
Expand Down
Loading