Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Query Condition NOT support #3844

Merged
merged 8 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions test/src/cpp-integration-query-condition.cc
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,138 @@ TEST_CASE(
}
}

TEST_CASE(
"Testing read query with basic negated QC, with no range.",
"[query][query-condition][negation]") {
// Initial setup.
std::srand(static_cast<uint32_t>(time(0)));
Context ctx;
VFS vfs(ctx);

if (vfs.is_dir(array_name)) {
vfs.remove_dir(array_name);
}

// Define query condition (b < 4.0).
QueryCondition qc(ctx);
float val = 4.0f;
qc.init("b", &val, sizeof(float), TILEDB_LT);

QueryCondition neg_qc = qc.negate();

// Create buffers with size of the results of the two queries.
std::vector<int> a_data_read(num_rows * num_rows);
std::vector<float> b_data_read(num_rows * num_rows);

// These buffers store the results of the second query made with the query
// condition specified above.
std::vector<int> a_data_read_2(num_rows * num_rows);
std::vector<float> b_data_read_2(num_rows * num_rows);

// Generate test parameters.
TestParams params = GENERATE(
TestParams(TILEDB_SPARSE, TILEDB_GLOBAL_ORDER, false, true),
TestParams(TILEDB_SPARSE, TILEDB_UNORDERED, true, false),
TestParams(TILEDB_DENSE, TILEDB_ROW_MAJOR, false, false));

// Setup by creating buffers to store all elements of the original array.
create_array(
ctx,
params.array_type_,
params.set_dups_,
params.add_utf8_attr_,
a_data_read,
b_data_read);

// Create the query, which reads over the entire array with query condition
// (b < 4.0).
Config config;
if (params.legacy_) {
config.set("sm.query.sparse_global_order.reader", "legacy");
config.set("sm.query.sparse_unordered_with_dups.reader", "legacy");
}
Context ctx2 = Context(config);
Array array(ctx2, array_name, TILEDB_READ);
Query query(ctx2, array);

// Set a subarray for dense.
if (params.array_type_ == TILEDB_DENSE) {
int range[] = {1, num_rows};
query.add_range("rows", range[0], range[1])
.add_range("cols", range[0], range[1]);
}

// Perform query and validate.
perform_query(a_data_read_2, b_data_read_2, neg_qc, params.layout_, query);
if (params.array_type_ == TILEDB_SPARSE) {
// Check the query for accuracy. The query results should contain 200
// elements. Each of these elements should have the cell value 1 on
// attribute a and should match the original value in the array that reads
// all elements.
auto table = query.result_buffer_elements();
REQUIRE(table.size() == 2);
REQUIRE(table["a"].first == 0);
REQUIRE(table["a"].second == 200);
REQUIRE(table["b"].first == 0);
REQUIRE(table["b"].second == 200);

// The unordered query should return the results in global order. Therefore,
// we iterate over each tile to collect our results.
int i = 0;
for (int tile_r = 1; tile_r <= num_rows; tile_r += 4) {
for (int tile_c = 1; tile_c <= num_rows; tile_c += 4) {
// Iterating over each tile.
for (int r = tile_r; r < tile_r + 4; ++r) {
for (int c = tile_c + 1; c < tile_c + 4; c += 2) {
int original_arr_i = index_from_row_col(r, c);
REQUIRE(a_data_read_2[i] == 0);
REQUIRE(a_data_read_2[i] == a_data_read[original_arr_i]);
REQUIRE(
fabs(b_data_read_2[i] - b_data_read[original_arr_i]) <
std::numeric_limits<float>::epsilon());
i += 1;
}
}
}
}
} else {
// Check the query for accuracy. The query results should contain 400
// elements. Elements that meet the query condition should have the cell
// value 1 on attribute a and should match the original value in the array
// on attribute b. Elements that do not should have the fill value for both
// attributes.
size_t total_num_elements = static_cast<size_t>(num_rows * num_rows);
auto table = query.result_buffer_elements();
REQUIRE(table.size() == 2);
REQUIRE(table["a"].first == 0);
REQUIRE(table["a"].second == total_num_elements);
REQUIRE(table["b"].first == 0);
REQUIRE(table["b"].second == total_num_elements);

for (int i = 0; i < num_rows * num_rows; ++i) {
if (i % 2 == 1) {
REQUIRE(a_data_read_2[i] == 0);
REQUIRE(a_data_read_2[i] == a_data_read[i]);
REQUIRE(
fabs(b_data_read_2[i] - b_data_read[i]) <
std::numeric_limits<float>::epsilon());
} else {
REQUIRE(a_data_read_2[i] == a_fill_value);
REQUIRE(
fabs(b_data_read_2[i] - b_fill_value) <
std::numeric_limits<float>::epsilon());
}
}
}

query.finalize();
array.close();

if (vfs.is_dir(array_name)) {
vfs.remove_dir(array_name);
}
}

TEST_CASE(
"Testing read query with basic QC, with range within a tile.",
"[query][query-condition]") {
Expand Down
53 changes: 42 additions & 11 deletions tiledb/sm/c_api/tiledb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2629,7 +2629,9 @@ int32_t tiledb_query_condition_combine(
// Sanity check
if (sanity_check(ctx) == TILEDB_ERR ||
sanity_check(ctx, left_cond) == TILEDB_ERR ||
sanity_check(ctx, right_cond) == TILEDB_ERR)
(combination_op != TILEDB_NOT &&
sanity_check(ctx, right_cond) == TILEDB_ERR) ||
(combination_op == TILEDB_NOT && right_cond != nullptr))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I like using one half of a binary operator to be a unary operator. OTOH, not sure if it is worth changing if all we are going to have is AND, OR, NOT.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had this same exact thought. My first reaction was to do away with the existing implementation and then provide a tiledb_query_condition_negate function. I polled the language binding crowd and the only responses I got were in favor of keeping things as is, reusing tiledb_query_condition_combine.

If I had it to do all over again, I would change the combine API from:

TILEDB_EXPORT int32_t tiledb_query_condition_combine(
    tiledb_ctx_t* ctx,
    const tiledb_query_condition_t* left_cond,
    const tiledb_query_condition_t* right_cond,
    tiledb_query_condition_combination_op_t combination_op,
    tiledb_query_condition_t** combined_cond) TILEDB_NOEXCEPT;

to:

TILEDB_EXPORT int32_t tiledb_query_condition_combine(
    tiledb_ctx_t* ctx,
    const tiledb_query_condition_t** conditions,
    size_t num_conditions,
    tiledb_query_condition_combination_op_t combination_op,
    tiledb_query_condition_t** combined_cond) TILEDB_NOEXCEPT;

The second definition removes the baked in 2-arity semantics of the combination_op so that NOT and >2-arity AND/OR operators would also be naturally expressed. I briefly considered combining this approach with #3814 but @eric-hughes-tiledb had some other concerns around the general approach of #3814 so I'm leaving it be for now.

return TILEDB_ERR;

// Create the combined query condition struct
Expand All @@ -2655,21 +2657,42 @@ int32_t tiledb_query_condition_combine(
return TILEDB_OOM;
}

if (SAVE_ERROR_CATCH(
ctx,
left_cond->query_condition_->combine(
*right_cond->query_condition_,
static_cast<tiledb::sm::QueryConditionCombinationOp>(
combination_op),
(*combined_cond)->query_condition_))) {
delete (*combined_cond)->query_condition_;
delete *combined_cond;
return TILEDB_ERR;
if (combination_op == TILEDB_NOT) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be useful here to have a comment that we are partially evaluating the tree and applying NOT via DeMorgan's theorem.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.

if (SAVE_ERROR_CATCH(
ctx,
left_cond->query_condition_->negate(
static_cast<tiledb::sm::QueryConditionCombinationOp>(
combination_op),
(*combined_cond)->query_condition_))) {
delete (*combined_cond)->query_condition_;
delete *combined_cond;
return TILEDB_ERR;
}
} else {
if (SAVE_ERROR_CATCH(
ctx,
left_cond->query_condition_->combine(
*right_cond->query_condition_,
static_cast<tiledb::sm::QueryConditionCombinationOp>(
combination_op),
(*combined_cond)->query_condition_))) {
delete (*combined_cond)->query_condition_;
delete *combined_cond;
return TILEDB_ERR;
}
}

return TILEDB_OK;
}

int32_t tiledb_query_condition_negate(
tiledb_ctx_t* const ctx,
const tiledb_query_condition_t* const cond,
tiledb_query_condition_t** const negated_cond) {
return api::tiledb_query_condition_combine(
ctx, cond, nullptr, TILEDB_NOT, negated_cond);
}

/* ****************************** */
/* ARRAY */
/* ****************************** */
Expand Down Expand Up @@ -6848,6 +6871,14 @@ int32_t tiledb_query_condition_combine(
ctx, left_cond, right_cond, combination_op, combined_cond);
}

int32_t tiledb_query_condition_negate(
tiledb_ctx_t* const ctx,
const tiledb_query_condition_t* const cond,
tiledb_query_condition_t** const negated_cond) noexcept {
return api_entry<tiledb::api::tiledb_query_condition_negate>(
ctx, cond, negated_cond);
}

/* ****************************** */
/* UPDATE CONDITION */
/* ****************************** */
Expand Down
44 changes: 44 additions & 0 deletions tiledb/sm/c_api/tiledb.h
Original file line number Diff line number Diff line change
Expand Up @@ -3143,6 +3143,50 @@ TILEDB_EXPORT int32_t tiledb_query_condition_combine(
tiledb_query_condition_combination_op_t combination_op,
tiledb_query_condition_t** combined_cond) TILEDB_NOEXCEPT;

/**
* Create a query condition representing a negation of the input query
* condition. Currently this is performed by applying De Morgan's theorem
* recursively to the query condition's internal representation.
*
* **Example:**
*
* @code{.c}
* tiledb_query_condition_t* query_condition_1;
* tiledb_query_condition_alloc(ctx, &query_condition_1);
* uint32_t value_1 = 5;
* tiledb_query_condition_init(
* ctx,
* query_condition_1,
* "longitude",
* &value_1,
* sizeof(value_1),
* TILEDB_LT);
*
* tiledb_query_condition_t* query_condition_2;
* tiledb_query_condition_negate(
* ctx,
* query_condition_1,
* &query_condition_2);
*
* tiledb_query_condition_free(&query_condition_1);
*
* tiledb_query_set_condition(ctx, query, query_condition_2);
* tiledb_query_submit(ctx, query);
* tiledb_query_condition_free(&query_condition_2);
* @endcode
*
* @param[in] ctx The TileDB context.
* @param[in] left_cond The first input condition.
* @param[in] right_cond The second input condition.
* @param[in] combination_op The combination operation.
* @param[out] combined_cond The output condition holder.
* @return `TILEDB_OK` for success and `TILEDB_OOM` or `TILEDB_ERR` for error.
*/
TILEDB_EXPORT int32_t tiledb_query_condition_negate(
tiledb_ctx_t* ctx,
const tiledb_query_condition_t* cond,
tiledb_query_condition_t** negated_cond) TILEDB_NOEXCEPT;

/* ********************************* */
/* SUBARRAY */
/* ********************************* */
Expand Down
24 changes: 24 additions & 0 deletions tiledb/sm/cpp_api/query_condition.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,30 @@ class QueryCondition {
return QueryCondition(ctx_, combined_qc);
}

/**
* Return a query condition representing a negation of this query condition.
* Currently this is performed by applying De Morgan's theorem recursively
* to the query condition's internal representation.
*
* **Example:**
*
* @code{.cpp}
* int qc1_cmp_value = 10;
* tiledb::QueryCondition qc1;
* qc1.init("a1", &qc1_cmp_value, sizeof(int), TILEDB_LT);
* tiledb::QueryCondition qc2 = qc1.negate();
* query.set_condition(qc2);
* @endcode
*/
QueryCondition negate() const {
auto& ctx = ctx_.get();
tiledb_query_condition_t* negated_qc;
ctx.handle_error(tiledb_query_condition_negate(
ctx.ptr().get(), query_condition_.get(), &negated_qc));

return QueryCondition(ctx_, negated_qc);
}

/* ********************************* */
/* STATIC FUNCTIONS */
/* ********************************* */
Expand Down
16 changes: 15 additions & 1 deletion tiledb/sm/query/query_condition.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,28 @@ Status QueryCondition::combine(
combination_op != QueryConditionCombinationOp::OR) {
return Status_QueryConditionError(
"Cannot combine query conditions; Only the 'AND' "
"and 'OR' combination ops are supported");
"and 'OR' combination ops are supported in this function.");
}

combined_cond->field_names_.clear();
combined_cond->tree_ = this->tree_->combine(rhs.tree_, combination_op);
return Status::Ok();
}

Status QueryCondition::negate(
QueryConditionCombinationOp combination_op,
QueryCondition* combined_cond) const {
if (combination_op != QueryConditionCombinationOp::NOT) {
return Status_QueryConditionError(
"Cannot negate query condition; Only the 'NOT' "
"combination op is supported in this function.");
}

combined_cond->field_names_.clear();
combined_cond->tree_ = this->tree_->get_negated_tree();
return Status::Ok();
}

bool QueryCondition::empty() const {
return tree_ == nullptr;
}
Expand Down
10 changes: 10 additions & 0 deletions tiledb/sm/query/query_condition.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,16 @@ class QueryCondition {
QueryConditionCombinationOp combination_op,
QueryCondition* combined_cond) const;

/**
* @brief API call to negate query condition.
*
* @param combined_cond Where the negated condition is stored.
* @return Status
*/
Status negate(
QueryConditionCombinationOp combination_op,
QueryCondition* combined_cond) const;

/**
* Returns true if this condition does not have any nodes in the AST
* representing the query condition.
Expand Down
Loading