Skip to content

Commit

Permalink
Optimize ResultTile::compute_results_sparse<char>
Browse files Browse the repository at this point in the history
This makes three optimizations to `ResultTile::compute_results_sparse<char>`:

1. When executing on a dim_idx > 0, values in `r_bitmap` may be zero. In this
scenario, we can skip checking the associated coordinate for an intersection.
This saves us one string comparison for each coordinate.

2. When executing on a dim_idx > 0, we may have many contiguous zeroes in
`r_bitmap`. In this scenario, we can do a `memcmp` in place of a loop that
checks each individual coordinate. This size of this `memcmp` is fixed to
256.

3. When executing on dim_idx == 0, we know that coordinates are sorted. For
any arbitrary subrange in the list of coordinate values, we know that all values
in the subrange are identical if the first and last values are equal. In this
patch, the list of coordinate values are partitioned into 6 ranges. If the
first and last values match in one of these ranges, we perform a single computation
for `r_bitmap` and apply it to all values in the range.

* This patch has two fixed values, 256 (see #2) and 6 (see #3). I intentionally
did not put these into our misc/constants.h because these are only applicable
in this path.

** For #3, I tried an approach where I start with a single range and split it
for each time there is not a match. This could be done either recursively or
iteratively. I did not try doing it recursively because recursive routines
can not be inlined and would be too expensive. I tried an iterative implementation
and it was significantly slower than 6 fixed ranges because of the state it
had to track.
  • Loading branch information
Joe Maley authored and github-actions[bot] committed Dec 18, 2020
1 parent 47bee7c commit 42d78b4
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 57 deletions.
9 changes: 7 additions & 2 deletions tiledb/sm/query/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ Status Reader::compute_range_result_coords(
std::vector<ResultCoords>* result_coords) {
auto coords_num = tile->cell_num();
auto dim_num = array_schema_->dim_num();
auto cell_order = array_schema_->cell_order();
auto range_coords = subarray->get_range_coords(range_idx);

if (array_schema_->dense()) {
Expand Down Expand Up @@ -650,9 +651,13 @@ Status Reader::compute_range_result_coords(

// Compute result and overwritten bitmap per dimension
for (unsigned d = 0; d < dim_num; ++d) {
const auto& ranges = subarray->ranges_for_dim(d);
// For col-major cell ordering, iterate the dimensions
// in reverse.
const unsigned dim_idx =
cell_order == Layout::COL_MAJOR ? dim_num - d - 1 : d;
const auto& ranges = subarray->ranges_for_dim(dim_idx);
RETURN_NOT_OK(tile->compute_results_sparse(
d, ranges[range_coords[d]], &result_bitmap));
dim_idx, ranges[range_coords[dim_idx]], &result_bitmap, cell_order));
}

// Gather results
Expand Down
240 changes: 188 additions & 52 deletions tiledb/sm/query/result_tile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -425,19 +425,71 @@ void ResultTile::compute_results_dense(
}
}

inline uint8_t ResultTile::str_coord_intersects(
const uint64_t c_offset,
const uint64_t c_size,
const char* const buff_str,
const std::string& range_start,
const std::string& range_end) {
// Test against start
bool geq_start = true;
bool all_chars_match = true;
uint64_t min_size = std::min<uint64_t>(c_size, range_start.size());
for (uint64_t i = 0; i < min_size; ++i) {
if (buff_str[c_offset + i] < range_start[i]) {
geq_start = false;
all_chars_match = false;
break;
} else if (buff_str[c_offset + i] > range_start[i]) {
all_chars_match = false;
break;
} // Else characters match
}
if (geq_start && all_chars_match && c_size < range_start.size()) {
geq_start = false;
}

// Test against end
bool seq_end = false;
if (geq_start) {
seq_end = true;
all_chars_match = true;
min_size = std::min<uint64_t>(c_size, range_end.size());
for (uint64_t i = 0; i < min_size; ++i) {
if (buff_str[c_offset + i] > range_end[i]) {
geq_start = false;
break;
} else if (buff_str[c_offset + i] < range_end[i]) {
all_chars_match = false;
break;
} // Else characters match
}
if (seq_end && all_chars_match && c_size > range_end.size()) {
seq_end = false;
}
}

return geq_start && seq_end;
}

template <>
void ResultTile::compute_results_sparse<char>(
const ResultTile* result_tile,
unsigned dim_idx,
const Range& range,
std::vector<uint8_t>* result_bitmap) {
std::vector<uint8_t>* result_bitmap,
const Layout& cell_order) {
auto coords_num = result_tile->cell_num();
auto dim_num = result_tile->domain()->dim_num();
auto range_start = range.start_str();
auto range_start_size = (uint64_t)range_start.size();
auto range_end = range.end_str();
auto range_end_size = (uint64_t)range_end.size();
auto& r_bitmap = (*result_bitmap);

// Sanity check.
assert(coords_num != 0);
if (coords_num == 0)
return;

// Get coordinate tile
const auto& coord_tile = result_tile->coord_tile(dim_idx);

Expand All @@ -458,53 +510,125 @@ void ResultTile::compute_results_sparse<char>(
coord_tile_str.chunked_buffer()->get_contiguous_unsafe());
auto buff_str_size = coord_tile_str.size();

// Compute results
uint64_t offset = 0, pos = 0, str_size = 0, min_size = 0;
bool geq_start = false, seq_end = false, all_chars_match = false;
for (; pos < coords_num; ++pos) {
offset = buff_off[pos];
str_size = (pos < coords_num - 1) ? buff_off[pos + 1] - offset :
buff_str_size - offset;

// Test against start
geq_start = true;
all_chars_match = true;
min_size = std::min(str_size, range_start_size);
for (uint64_t i = 0; i < min_size; ++i) {
if (buff_str[offset + i] < range_start[i]) {
geq_start = false;
all_chars_match = false;
break;
} else if (buff_str[offset + i] > range_start[i]) {
all_chars_match = false;
break;
} // Else characters match
}
if (geq_start && all_chars_match && str_size < range_start_size) {
geq_start = false;
}

// Test against end
if (geq_start) {
seq_end = true;
all_chars_match = true;
min_size = std::min(str_size, range_end_size);
for (uint64_t i = 0; i < min_size; ++i) {
if (buff_str[offset + i] > range_end[i]) {
geq_start = false;
break;
} else if (buff_str[offset + i] < range_end[i]) {
all_chars_match = false;
break;
} // Else characters match
}
if (seq_end && all_chars_match && str_size > range_end_size) {
seq_end = false;
// For row-major cell orders, the first dimension is sorted.
// For col-major cell orders, the last dimension is sorted.
// If the coordinate value at index 0 is equivalent to the
// coordinate value at index 100, we know that all coordinates
// between them are also equivalent. In this scenario where
// coordinate value i=0 matches the coordinate value at i=100,
// we can calculate `r_bitmap[0]` and apply assign its value to
// `r_bitmap[i]` where i in the range [1, 100].
//
// Calculating `r_bitmap[i]` is expensive for strings because
// it invokes a string comparison. We partition the coordinates
// into a fixed number of ranges. For each partition, we will
// compare the start and ending coordinate values to see if
// they are equivalent. If so, we save the expense of computing
// `r_bitmap` for each corresponding coordinate. If they do
// not match, we must compare each coordinate in the partition.
static const uint64_t c_partition_num = 6;
const bool is_sorted_dim =
((cell_order == Layout::ROW_MAJOR && dim_idx == 0) ||
(cell_order == Layout::COL_MAJOR && dim_idx == dim_num - 1));
if (is_sorted_dim && coords_num > c_partition_num) {
// We calculate the size of each partition by dividing the total
// number of coordinates by the number of partitions. If this
// does not evenly divide, we will append the remaining coordinates
// onto the last partition.
const uint64_t c_partition_size_div = coords_num / c_partition_num;
const uint64_t c_partition_size_rem = coords_num % c_partition_num;

// Loop over each coordinate partition.
for (uint64_t p = 0; p < c_partition_num; ++p) {
// Calculate the size of this partition.
const uint64_t c_partition_size =
c_partition_size_div +
(p == (c_partition_num - 1) ? c_partition_size_rem : 0);

// Calculate the position of the first and last coordinates
// in this partition.
const uint64_t first_c_pos = p * c_partition_size_div;
const uint64_t last_c_pos = first_c_pos + c_partition_size - 1;

// The coordinate values are determined by their offset and size
// within `buff_str`. Calculate the offset and size for the
// first and last coordinates in this partition.
const uint64_t first_c_offset = buff_off[first_c_pos];
const uint64_t last_c_offset = buff_off[last_c_pos];
const uint64_t first_c_size = buff_off[first_c_pos + 1] - first_c_offset;
const uint64_t last_c_size = (last_c_pos == coords_num - 1) ?
buff_str_size - last_c_offset :
buff_off[last_c_pos + 1] - last_c_offset;

// Fetch the coordinate values for the first and last coordinates
// in this partition.
const char* const first_c_coord = &buff_str[first_c_offset];
const char* const second_coord = &buff_str[last_c_offset];

// Check if the first and last coordinates have an identical
// string value.
if (first_c_size == last_c_size &&
strncmp(first_c_coord, second_coord, first_c_size) == 0) {
assert(r_bitmap[0] == 1);
const uint8_t intersects = str_coord_intersects(
first_c_offset, first_c_size, buff_str, range_start, range_end);
memset(&r_bitmap[first_c_pos], intersects, c_partition_size);
} else {
// Compute results
uint64_t c_offset = 0, c_size = 0;
for (uint64_t pos = first_c_pos; pos <= last_c_pos; ++pos) {
c_offset = buff_off[pos];
c_size = (pos < last_c_pos) ? buff_off[pos + 1] - c_offset :
buff_str_size - c_offset;
r_bitmap[pos] = str_coord_intersects(
c_offset, c_size, buff_str, range_start, range_end);
}
}
}

// Set the result
r_bitmap[pos] &= (geq_start && seq_end);
// We've calculated `r_bitmap` for all coordinate values in
// the sorted dimension.
return;
}

// Here, we're computing on all other dimensions. After the first
// dimension, it is possible that coordinates have already been determined
// to not intersect with the range in an earlier dimension. For example,
// if `dim_idx` is 2 for a row-major cell order, we have already checked
// this coordinate for an intersection in dimension-0 and dimension-1. If
// either did not intersect a coordindate, its associated value in
// `r_bitmap` will be zero. To optimize for the scenario where there are
// many contiguous zeroes, we can `memcmp` a large range of `r_bitmap`
// values to avoid a comparison on each individual element.
//
// Often, a memory comparison for one byte is as quick as comparing
// 4 or 8 bytes. We will only get a benefit if we successfully
// find a `memcmp` on a much larger range.
static const uint64_t zeroed_size = coords_num < 256 ? coords_num : 256;
for (uint64_t i = 0; i < coords_num; i += zeroed_size) {
const uint64_t partition_size =
(i < coords_num - zeroed_size) ? zeroed_size : coords_num - i;

// Check if all `r_bitmap` values are zero between `i` and
// `partition_size`.
if (r_bitmap[i] == 0 &&
memcmp(&r_bitmap[i], &r_bitmap[i + 1], partition_size - 1) == 0)
continue;

// Here, we know that there is at least one `1` value within
// the `r_bitmap` values within this partition. We must check
// each value for an intersection.
uint64_t c_offset = 0, c_size = 0;
for (uint64_t pos = i; pos < i + partition_size; ++pos) {
if (r_bitmap[pos] == 0)
continue;

c_offset = buff_off[pos];
c_size = (pos < coords_num - 1) ? buff_off[pos + 1] - c_offset :
buff_str_size - c_offset;
r_bitmap[pos] = str_coord_intersects(
c_offset, c_size, buff_str, range_start, range_end);
}
}
}

Expand All @@ -513,13 +637,22 @@ void ResultTile::compute_results_sparse(
const ResultTile* result_tile,
unsigned dim_idx,
const Range& range,
std::vector<uint8_t>* result_bitmap) {
std::vector<uint8_t>* result_bitmap,
const Layout& cell_order) {
// We do not use `cell_order` for this template type.
(void)cell_order;

// For easy reference.
auto coords_num = result_tile->cell_num();
auto r = (const T*)range.data();
auto stores_zipped_coords = result_tile->stores_zipped_coords();
auto dim_num = result_tile->domain()->dim_num();
auto& r_bitmap = (*result_bitmap);

// Variables for the loop over `coords_num`.
T c;
const T& r0 = r[0];
const T& r1 = r[1];

// Handle separate coordinate tiles
if (!stores_zipped_coords) {
Expand All @@ -532,8 +665,9 @@ void ResultTile::compute_results_sparse(
static_cast<const T*>(chunked_buffer->get_contiguous_unsafe());
for (uint64_t pos = 0; pos < coords_num; ++pos) {
c = coords[pos];
r_bitmap[pos] &= (uint8_t)(c >= r[0] && c <= r[1]);
r_bitmap[pos] &= (uint8_t)(c >= r0 && c <= r1);
}

return;
}

Expand All @@ -548,7 +682,7 @@ void ResultTile::compute_results_sparse(
static_cast<const T*>(chunked_buffer->get_contiguous_unsafe());
for (uint64_t pos = 0; pos < coords_num; ++pos) {
c = coords[pos * dim_num + dim_idx];
r_bitmap[pos] &= (uint8_t)(c >= r[0] && c <= r[1]);
r_bitmap[pos] &= (uint8_t)(c >= r0 && c <= r1);
}
}

Expand All @@ -574,9 +708,11 @@ Status ResultTile::compute_results_dense(
Status ResultTile::compute_results_sparse(
unsigned dim_idx,
const Range& range,
std::vector<uint8_t>* result_bitmap) const {
std::vector<uint8_t>* result_bitmap,
const Layout& cell_order) const {
assert(compute_results_sparse_func_[dim_idx] != nullptr);
compute_results_sparse_func_[dim_idx](this, dim_idx, range, result_bitmap);
compute_results_sparse_func_[dim_idx](
this, dim_idx, range, result_bitmap, cell_order);
return Status::Ok();
}

Expand Down
34 changes: 31 additions & 3 deletions tiledb/sm/query/result_tile.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <unordered_map>
#include <vector>

#include "tiledb/sm/enums/layout.h"
#include "tiledb/sm/misc/constants.h"
#include "tiledb/sm/misc/types.h"
#include "tiledb/sm/tile/tile.h"
Expand Down Expand Up @@ -199,7 +200,8 @@ class ResultTile {
const ResultTile* result_tile,
unsigned dim_idx,
const Range& range,
std::vector<uint8_t>* result_bitmap);
std::vector<uint8_t>* result_bitmap,
const Layout& cell_order);

/**
* Applicable only to sparse tiles of dense arrays.
Expand Down Expand Up @@ -229,7 +231,8 @@ class ResultTile {
Status compute_results_sparse(
unsigned dim_idx,
const Range& range,
std::vector<uint8_t>* result_bitmap) const;
std::vector<uint8_t>* result_bitmap,
const Layout& cell_order) const;

private:
/* ********************************* */
Expand Down Expand Up @@ -283,7 +286,11 @@ class ResultTile {
* for each dimension, based on the dimension datatype.
*/
std::vector<std::function<void(
const ResultTile*, unsigned, const Range&, std::vector<uint8_t>*)>>
const ResultTile*,
unsigned,
const Range&,
std::vector<uint8_t>*,
const Layout&)>>
compute_results_sparse_func_;

/* ********************************* */
Expand All @@ -298,6 +305,27 @@ class ResultTile {

/** Implements coord() for unzipped coordinates. */
const void* unzipped_coord(uint64_t pos, unsigned dim_idx) const;

/**
* A helper routine used in `compute_results_sparse<char>` to
* determine if a given string-valued coordinate intersects
* the given start and end range.
*
* @param c_offset The offset of the coordinate value
* within `buff_str`.
* @param c_cize The size of the coordinate value.
* @param buff_str The buffer containing the coordinate value
* at `c_offset`.
* @param range_start The starting range value.
* @param range_end The ending range value.
* @return uint8_t 0 for no intersection, 1 for instersection.
*/
static uint8_t str_coord_intersects(
const uint64_t c_offset,
const uint64_t c_size,
const char* const buff_str,
const std::string& range_start,
const std::string& range_end);
};

} // namespace sm
Expand Down

0 comments on commit 42d78b4

Please sign in to comment.