Skip to content

Commit

Permalink
Nullable attribute result size estimation APIs
Browse files Browse the repository at this point in the history
This adds the C/C++ APIs for getting estimated result sizes. Note that this
API will error-out at runtime for rest arrays because we don't currently have
a REST API for this.
  • Loading branch information
Joe Maley committed Dec 2, 2020
1 parent fa4b24c commit 1d36918
Show file tree
Hide file tree
Showing 9 changed files with 366 additions and 2 deletions.
2 changes: 1 addition & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

## New features

* Support for nullable attributes. [#1895](https://github.com/TileDB-Inc/TileDB/pull/1895) [#1938](https://github.com/TileDB-Inc/TileDB/pull/1938)
* Support for nullable attributes. [#1895](https://github.com/TileDB-Inc/TileDB/pull/1895) [#1938](https://github.com/TileDB-Inc/TileDB/pull/1938) [#1945](https://github.com/TileDB-Inc/TileDB/pull/1945)
* Support for Hilbert order sorting for sparse arrays. [#1880](https://github.com/TileDB-Inc/TileDB/pull/1880)
* Support for AWS S3 "AssumeRole" temporary credentials [#1882](https://github.com/TileDB-Inc/TileDB/pull/1882)
* Experimental support for an in-memory backend used with bootstrap option "--enable-memfs" [#1873](https://github.com/TileDB-Inc/TileDB/pull/1873)
Expand Down
77 changes: 77 additions & 0 deletions test/src/unit-cppapi-fill_values.cc
Original file line number Diff line number Diff line change
Expand Up @@ -679,3 +679,80 @@ TEST_CASE(

CHECK_NOTHROW(vfs.remove_dir(array_name));
}

TEST_CASE(
"C++ API: Test result estimation, partial dense arrays, nullable",
"[cppapi][fill-values][partial][est-result][nullable]") {
Context ctx;
VFS vfs(ctx);
std::string array_name = "fill_values_est_result_partial_nullable";

// First test with default fill values
if (vfs.is_dir(array_name))
CHECK_NOTHROW(vfs.remove_dir(array_name));

SECTION("- Default fill values") {
create_array_1d(array_name, true);
write_array_1d_partial(array_name, true);

Array array(ctx, array_name, TILEDB_READ);
Query query(ctx, array, TILEDB_READ);
auto est_a1 = query.est_result_size_nullable("a1");
auto est_a2 = query.est_result_size_var_nullable("a2");
auto est_a3 = query.est_result_size_nullable("a3");
auto est_d = query.est_result_size("d");
CHECK(est_d == 10 * sizeof(int32_t));
CHECK(est_a1[0] == 10 * sizeof(int32_t));
CHECK(est_a1[1] == 10 * sizeof(uint8_t));
CHECK(est_a2[0] == 80);
CHECK(est_a2[1] == 10 * sizeof(char));
CHECK(est_a2[2] == 10 * sizeof(uint8_t));
CHECK(est_a3[0] == 10 * 2 * sizeof(double));
CHECK(est_a3[1] == 10 * sizeof(uint8_t));
}

SECTION("- Custom fill values") {
std::string s("abc");
create_array_1d(array_name, true, 0, s, {1.0, 2.0});
write_array_1d_partial(array_name, true);

Array array(ctx, array_name, TILEDB_READ);
Query query(ctx, array, TILEDB_READ);
auto est_a1 = query.est_result_size_nullable("a1");
auto est_a2 = query.est_result_size_var_nullable("a2");
auto est_a3 = query.est_result_size_nullable("a3");
auto est_d = query.est_result_size("d");
CHECK(est_d == 10 * sizeof(int32_t));
CHECK(est_a1[0] == 10 * sizeof(int32_t));
CHECK(est_a1[1] == 10 * sizeof(uint8_t));
CHECK(est_a2[0] == 80);
CHECK(est_a2[1] == 10 * 3 * sizeof(char));
CHECK(est_a2[2] == 10 * sizeof(uint8_t));
CHECK(est_a3[0] == 10 * 2 * sizeof(double));
CHECK(est_a3[1] == 10 * sizeof(uint8_t));
}

SECTION("- Default fill values, multi-range") {
create_array_1d(array_name, true);
write_array_1d_partial(array_name, true);

Array array(ctx, array_name, TILEDB_READ);
Query query(ctx, array, TILEDB_READ);
query.add_range<int32_t>(0, 2, 3);
query.add_range<int32_t>(0, 9, 10);
auto est_a1 = query.est_result_size_nullable("a1");
auto est_a2 = query.est_result_size_var_nullable("a2");
auto est_a3 = query.est_result_size_nullable("a3");
auto est_d = query.est_result_size("d");
CHECK(est_d == 4 * sizeof(int32_t));
CHECK(est_a1[0] == 4 * sizeof(int32_t));
CHECK(est_a1[1] == 4 * sizeof(uint8_t));
CHECK(est_a2[0] == 32);
CHECK(est_a2[1] == 4 * sizeof(char));
CHECK(est_a2[2] == 4 * sizeof(uint8_t));
CHECK(est_a3[0] == 4 * 2 * sizeof(double));
CHECK(est_a3[1] == 4 * sizeof(uint8_t));
}

CHECK_NOTHROW(vfs.remove_dir(array_name));
}
37 changes: 37 additions & 0 deletions tiledb/sm/c_api/tiledb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3213,6 +3213,43 @@ int32_t tiledb_query_get_est_result_size_var(
return TILEDB_OK;
}

int32_t tiledb_query_get_est_result_size_nullable(
tiledb_ctx_t* ctx,
const tiledb_query_t* query,
const char* name,
uint64_t* size_val,
uint64_t* size_validity) {
if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, query) == TILEDB_ERR)
return TILEDB_ERR;

if (SAVE_ERROR_CATCH(
ctx,
query->query_->get_est_result_size_nullable(
name, size_val, size_validity)))
return TILEDB_ERR;

return TILEDB_OK;
}

int32_t tiledb_query_get_est_result_size_var_nullable(
tiledb_ctx_t* ctx,
const tiledb_query_t* query,
const char* name,
uint64_t* size_off,
uint64_t* size_val,
uint64_t* size_validity) {
if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, query) == TILEDB_ERR)
return TILEDB_ERR;

if (SAVE_ERROR_CATCH(
ctx,
query->query_->get_est_result_size_nullable(
name, size_off, size_val, size_validity)))
return TILEDB_ERR;

return TILEDB_OK;
}

int32_t tiledb_query_get_fragment_num(
tiledb_ctx_t* ctx, const tiledb_query_t* query, uint32_t* num) {
if (sanity_check(ctx) == TILEDB_ERR || sanity_check(ctx, query) == TILEDB_ERR)
Expand Down
55 changes: 55 additions & 0 deletions tiledb/sm/c_api/tiledb.h
Original file line number Diff line number Diff line change
Expand Up @@ -4309,6 +4309,61 @@ TILEDB_EXPORT int32_t tiledb_query_get_est_result_size_var(
uint64_t* size_off,
uint64_t* size_val);

/**
* Retrieves the estimated result size for a fixed-sized, nullable attribute.
*
* **Example:**
*
* @code{.c}
* uint64_t size_val;
* uint64_t size_validity;
* tiledb_query_get_est_result_size_nullable(ctx, query, "a", &size_val,
* &size_validity);
* @endcode
*
* @param ctx The TileDB context
* @param query The query.
* @param name The attribute name.
* @param size_val The size of the values (in bytes) to be retrieved.
* @param size_validity The size of the validity values (in bytes) to be
* retrieved.
* @return `TILEDB_OK` for success and `TILEDB_ERR` for error.
*/
TILEDB_EXPORT int32_t tiledb_query_get_est_result_size_nullable(
tiledb_ctx_t* ctx,
const tiledb_query_t* query,
const char* name,
uint64_t* size_val,
uint64_t* size_validity);

/**
* Retrieves the estimated result size for a var-sized, nullable attribute.
*
* **Example:**
*
* @code{.c}
* uint64_t size_off, size_val, size_validity;
* tiledb_query_get_est_result_size_var_nullable(
* ctx, query, "a", &size_off, &size_val, &size_validity);
* @endcode
*
* @param ctx The TileDB context
* @param query The query.
* @param name The attribute name.
* @param size_off The size of the offsets (in bytes) to be retrieved.
* @param size_val The size of the values (in bytes) to be retrieved.
* @param size_validity The size of the validity values (in bytes) to be
* retrieved.
* @return `TILEDB_OK` for success and `TILEDB_ERR` for error.
*/
TILEDB_EXPORT int32_t tiledb_query_get_est_result_size_var_nullable(
tiledb_ctx_t* ctx,
const tiledb_query_t* query,
const char* name,
uint64_t* size_off,
uint64_t* size_val,
uint64_t* size_validity);

/**
* Retrieves the number of written fragments. Applicable only to WRITE
* queries.
Expand Down
64 changes: 63 additions & 1 deletion tiledb/sm/cpp_api/query.h
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,7 @@ class Query {
* @endcode
*
* @param attr_name The attribute name.
* @return A pair with first element containing the estimated size of
* @return An array with first element containing the estimated size of
* the result offsets in bytes, and second element containing the
* estimated size of the result values in bytes.
*/
Expand All @@ -875,6 +875,68 @@ class Query {
return {size_off, size_val};
}

/**
* Retrieves the estimated result size for a fixed-size, nullable attribute.
*
* **Example:**
*
* @code{.cpp}
* std::array<uint64_t, 2> est_size =
* query.est_result_size_nullable("attr1");
* @endcode
*
* @param attr_name The attribute name.
* @return An array with first element containing the estimated size of
* the result values in bytes, and second element containing the
* estimated size of the result validity values in bytes.
*/
std::array<uint64_t, 2> est_result_size_nullable(
const std::string& attr_name) const {
auto& ctx = ctx_.get();
uint64_t size_val = 0;
uint64_t size_validity = 0;
ctx.handle_error(tiledb_query_get_est_result_size_nullable(
ctx.ptr().get(),
query_.get(),
attr_name.c_str(),
&size_val,
&size_validity));
return {size_val, size_validity};
}

/**
* Retrieves the estimated result size for a variable-size, nullable
* attribute.
*
* **Example:**
*
* @code{.cpp}
* std::array<uint64_t, 3> est_size =
* query.est_result_size_var_nullable("attr1");
* @endcode
*
* @param attr_name The attribute name.
* @return An array with first element containing the estimated size of
* the offset values in bytes, second element containing the
* estimated size of the result values in bytes, and the third element
* containing the estimated size of the validity values in bytes.
*/
std::array<uint64_t, 3> est_result_size_var_nullable(
const std::string& attr_name) const {
auto& ctx = ctx_.get();
uint64_t size_off = 0;
uint64_t size_val = 0;
uint64_t size_validity = 0;
ctx.handle_error(tiledb_query_get_est_result_size_var_nullable(
ctx.ptr().get(),
query_.get(),
attr_name.c_str(),
&size_off,
&size_val,
&size_validity));
return {size_off, size_val, size_validity};
}

/**
* Returns the number of written fragments. Applicable only to WRITE queries.
*/
Expand Down
84 changes: 84 additions & 0 deletions tiledb/sm/query/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,12 @@ Status Query::get_est_result_size(const char* name, uint64_t* size) {
"Cannot get estimated result size; Not applicable to zipped "
"coordinates in arrays with domains with variable-sized dimensions"));

if (array_->array_schema()->is_nullable(name))
return LOG_STATUS(Status::WriterError(
std::string(
"Cannot get estimated result size; Input attribute/dimension '") +
name + "' is nullable"));

if (array_->is_remote() && !reader_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr)
Expand All @@ -315,6 +321,12 @@ Status Query::get_est_result_size(
"Cannot get estimated result size; Operation currently "
"unsupported for write queries"));

if (array_->array_schema()->is_nullable(name))
return LOG_STATUS(Status::WriterError(
std::string(
"Cannot get estimated result size; Input attribute/dimension '") +
name + "' is nullable"));

if (array_->is_remote() && !reader_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr)
Expand All @@ -331,6 +343,78 @@ Status Query::get_est_result_size(
return reader_.get_est_result_size(name, size_off, size_val);
}

Status Query::get_est_result_size_nullable(
const char* name, uint64_t* size_val, uint64_t* size_validity) {
if (type_ == QueryType::WRITE)
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Operation currently "
"unsupported for write queries"));

if (name == nullptr)
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Name cannot be null"));

if (!array_->array_schema()->attribute(name))
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Nullable API is only"
"applicable to attributes"));

if (!array_->array_schema()->is_nullable(name))
return LOG_STATUS(Status::WriterError(
std::string("Cannot get estimated result size; Input attribute '") +
name + "' is not nullable"));

if (array_->is_remote() && !reader_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr)
return LOG_STATUS(
Status::QueryError("Error in query estimate result size; remote "
"array with no rest client."));

return LOG_STATUS(
Status::QueryError("Error in query estimate result size; unimplemented "
"for nullable attributes in remote arrays."));
}

return reader_.get_est_result_size_nullable(name, size_val, size_validity);
}

Status Query::get_est_result_size_nullable(
const char* name,
uint64_t* size_off,
uint64_t* size_val,
uint64_t* size_validity) {
if (type_ == QueryType::WRITE)
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Operation currently "
"unsupported for write queries"));

if (!array_->array_schema()->attribute(name))
return LOG_STATUS(Status::QueryError(
"Cannot get estimated result size; Nullable API is only"
"applicable to attributes"));

if (!array_->array_schema()->is_nullable(name))
return LOG_STATUS(Status::WriterError(
std::string("Cannot get estimated result size; Input attribute '") +
name + "' is not nullable"));

if (array_->is_remote() && !reader_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr)
return LOG_STATUS(
Status::QueryError("Error in query estimate result size; remote "
"array with no rest client."));

return LOG_STATUS(
Status::QueryError("Error in query estimate result size; unimplemented "
"for nullable attributes in remote arrays."));
}

return reader_.get_est_result_size_nullable(
name, size_off, size_val, size_validity);
}

std::unordered_map<std::string, Subarray::ResultSize>
Query::get_est_result_size_map() {
return reader_.get_est_result_size_map();
Expand Down
Loading

0 comments on commit 1d36918

Please sign in to comment.