Skip to content

Commit

Permalink
Coalesce Subarray Ranges (#1755)
Browse files Browse the repository at this point in the history
For ranges of discrete data types, this patch will attempt to coalesce ranges
as they are added to a subarray. Ranges will be coalesced with the last existing
range if they form a contiguous raange.

Additionally, this introduces `bool Subarray::coalesce_ranges_` which controls
whether or not the instance will attempt to coalesce ranges as they are added.
This is on by default, but exists as a way for unit tests to disable them. The
motiviation it prevent the need for immediately refactoring all of the hard-coded
ranges in the subpartitioner unit tests. Subarrays split or cropped from an
existing subarray will be use the same coalescing behavior as the parent.

Co-authored-by: Joe Maley <[email protected]>
  • Loading branch information
joe maley and Joe Maley authored Aug 10, 2020
1 parent 4878ae6 commit 4728b34
Show file tree
Hide file tree
Showing 10 changed files with 329 additions and 47 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* Support seeking for CURL to allow redirects for posting to REST [#1728](https://github.com/TileDB-Inc/TileDB/pull/1728)
* Changed default setting for `vfs.s3.proxy_scheme` from `https` to `http` to match common usage needs [#1759](https://github.com/TileDB-Inc/TileDB/pull/1759)
* Enabled parallelization with native system threads when TBB is disabled [#1760](https://github.com/TileDB-Inc/TileDB/pull/1760)
* Subarray ranges will be automatically coalesced as they are added [#1755](https://github.com/TileDB-Inc/TileDB/pull/1755)

## Deprecations

Expand Down
36 changes: 23 additions & 13 deletions test/src/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -479,8 +479,9 @@ void create_subarray(
tiledb::sm::Array* array,
const SubarrayRanges<T>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray) {
tiledb::sm::Subarray ret(array, layout);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges) {
tiledb::sm::Subarray ret(array, layout, coalesce_ranges);

auto dim_num = (unsigned)ranges.size();
for (unsigned d = 0; d < dim_num; ++d) {
Expand Down Expand Up @@ -745,7 +746,6 @@ int32_t num_fragments(const std::string& array_name) {
return ret;
}

// Explicit template instantiations
template void check_subarray<int8_t>(
tiledb::sm::Subarray& subarray, const SubarrayRanges<int8_t>& ranges);
template void check_subarray<uint8_t>(
Expand All @@ -771,52 +771,62 @@ template void create_subarray<int8_t>(
tiledb::sm::Array* array,
const SubarrayRanges<int8_t>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);
template void create_subarray<uint8_t>(
tiledb::sm::Array* array,
const SubarrayRanges<uint8_t>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);
template void create_subarray<int16_t>(
tiledb::sm::Array* array,
const SubarrayRanges<int16_t>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);
template void create_subarray<uint16_t>(
tiledb::sm::Array* array,
const SubarrayRanges<uint16_t>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);
template void create_subarray<int32_t>(
tiledb::sm::Array* array,
const SubarrayRanges<int32_t>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);
template void create_subarray<uint32_t>(
tiledb::sm::Array* array,
const SubarrayRanges<uint32_t>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);
template void create_subarray<int64_t>(
tiledb::sm::Array* array,
const SubarrayRanges<int64_t>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);
template void create_subarray<uint64_t>(
tiledb::sm::Array* array,
const SubarrayRanges<uint64_t>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);
template void create_subarray<float>(
tiledb::sm::Array* array,
const SubarrayRanges<float>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);
template void create_subarray<double>(
tiledb::sm::Array* array,
const SubarrayRanges<double>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges);

template void check_partitions<int8_t>(
tiledb::sm::SubarrayPartitioner& partitioner,
Expand Down
6 changes: 4 additions & 2 deletions test/src/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ typedef std::map<std::string, QueryBuffer> QueryBuffers;
*
* @tparam T The datatype of the subarray of the partitioner.
* @param partitioner The partitioner.
* @param ranges The ranges to be checked.
* @param partitions The ranges to be checked.
* @param last_unsplittable Whether the last partition is unsplittable.
*/
template <class T>
Expand Down Expand Up @@ -273,13 +273,15 @@ void create_azure_container(
* @param ranges The ranges of the subarray to be created.
* @param layout The layout of the subarray.
* @param subarray The subarray to be set.
* @param coalesce_ranges Whether the subarray should coalesce ranges.
*/
template <class T>
void create_subarray(
tiledb::sm::Array* array,
const SubarrayRanges<T>& ranges,
tiledb::sm::Layout layout,
tiledb::sm::Subarray* subarray);
tiledb::sm::Subarray* subarray,
bool coalesce_ranges = false);

/**
* Helper method that creates a TileDB context and a VFS object.
Expand Down
14 changes: 7 additions & 7 deletions test/src/unit-capi-sparse_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3583,7 +3583,7 @@ TEST_CASE_METHOD(
tiledb_query_set_buffer(ctx_, query, TILEDB_COORDS, coords, &coords_size);
REQUIRE(rc == TILEDB_OK);

// Set some subarray
// Set some subarray.
uint64_t s00[] = {1, 1};
uint64_t s01[] = {3, 4};
uint64_t s10[] = {2, 2};
Expand All @@ -3608,17 +3608,17 @@ TEST_CASE_METHOD(

CHECK(a1_size == 5 * sizeof(int));
CHECK(a1[0] == 1);
CHECK(a1[1] == 5);
CHECK(a1[2] == 2);
CHECK(a1[1] == 2);
CHECK(a1[2] == 5);
CHECK(a1[3] == 6);
CHECK(a1[4] == 7);
CHECK(coords_size == 10 * sizeof(uint64_t));
CHECK(coords[0] == 1);
CHECK(coords[1] == 2);
CHECK(coords[2] == 4);
CHECK(coords[3] == 2);
CHECK(coords[4] == 1);
CHECK(coords[5] == 4);
CHECK(coords[2] == 1);
CHECK(coords[3] == 4);
CHECK(coords[4] == 4);
CHECK(coords[5] == 2);
CHECK(coords[6] == 3);
CHECK(coords[7] == 3);
CHECK(coords[8] == 3);
Expand Down
3 changes: 2 additions & 1 deletion test/src/unit-cppapi-subarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ TEST_CASE(
auto range_num = query.range_num(0);
CHECK(range_num == 1);
range_num = query.range_num(1);
CHECK(range_num == 2);
// Ranges `col_range0` and `col_range1` are coalesced.
CHECK(range_num == 1);

// Allocate buffers large enough to hold 2 cells at a time.
std::vector<char> data(2, '\0');
Expand Down
18 changes: 18 additions & 0 deletions tiledb/sm/misc/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include <cstring>
#include <vector>

#include "tiledb/sm/misc/logger.h"

namespace tiledb {
namespace sm {

Expand Down Expand Up @@ -129,6 +131,14 @@ class Range {
return &range_[0];
}

/** Copies 'start' into this range's start bytes for fixed-size ranges. */
void set_start(const void* const start) {
if (range_start_size_ != 0)
LOG_FATAL("Unexpected var-sized range; cannot set end range.");
const size_t fixed_size = range_.size() / 2;
std::memcpy(&range_[0], start, fixed_size);
}

/** Returns the start as a string. */
std::string start_str() const {
if (start_size() == 0)
Expand Down Expand Up @@ -168,6 +178,14 @@ class Range {
return range_.empty() ? nullptr : &range_[end_pos];
}

/** Copies 'end' into this range's end bytes for fixed-size ranges. */
void set_end(const void* const end) {
if (range_start_size_ != 0)
LOG_FATAL("Unexpected var-sized range; cannot set end range.");
const size_t fixed_size = range_.size() / 2;
std::memcpy(&range_[fixed_size], end, fixed_size);
}

/** Returns true if the range is empty. */
bool empty() const {
return range_.empty();
Expand Down
10 changes: 5 additions & 5 deletions tiledb/sm/serialization/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ Status subarray_partitioner_from_capnp(
RETURN_NOT_OK(layout_enum(subarray_reader.getLayout(), &layout));

// Subarray, which is used to initialize the partitioner.
Subarray subarray(array, layout);
Subarray subarray(array, layout, false);
RETURN_NOT_OK(subarray_from_capnp(reader.getSubarray(), &subarray));
*partitioner =
SubarrayPartitioner(subarray, memory_budget, memory_budget_var);
Expand Down Expand Up @@ -308,7 +308,7 @@ Status subarray_partitioner_from_capnp(
partition_info->end_ = partition_info_reader.getEnd();
partition_info->split_multi_range_ =
partition_info_reader.getSplitMultiRange();
partition_info->partition_ = Subarray(array, layout);
partition_info->partition_ = Subarray(array, layout, false);
RETURN_NOT_OK(subarray_from_capnp(
partition_info_reader.getSubarray(), &partition_info->partition_));
}
Expand All @@ -322,15 +322,15 @@ Status subarray_partitioner_from_capnp(
const unsigned num_sr = sr_reader.size();
for (unsigned i = 0; i < num_sr; i++) {
auto subarray_reader_ = sr_reader[i];
state->single_range_.emplace_back(array, layout);
state->single_range_.emplace_back(array, layout, false);
Subarray& subarray_ = state->single_range_.back();
RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_));
}
auto m_reader = state_reader.getMultiRange();
const unsigned num_m = m_reader.size();
for (unsigned i = 0; i < num_m; i++) {
auto subarray_reader_ = m_reader[i];
state->multi_range_.emplace_back(array, layout);
state->multi_range_.emplace_back(array, layout, false);
Subarray& subarray_ = state->multi_range_.back();
RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_));
}
Expand Down Expand Up @@ -411,7 +411,7 @@ Status reader_from_capnp(
RETURN_NOT_OK(reader->set_layout(layout));

// Subarray
Subarray subarray(array, layout);
Subarray subarray(array, layout, false);
auto subarray_reader = reader_reader.getSubarray();
RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray));
RETURN_NOT_OK(reader->set_subarray(subarray));
Expand Down
Loading

0 comments on commit 4728b34

Please sign in to comment.