Skip to content

Commit

Permalink
Optimize copy_cells. (#1694)
Browse files Browse the repository at this point in the history
Copying variable cells is particularly expensive because it has to construct
and destruct the following two variables:
```
std::vector<std::vector<uint64_t>> offset_offsets_per_cs;
std::vector<std::vector<uint64_t>> var_offsets_per_cs;
```

Most of the expensive is spent constructing and deconstructing the
`vector<uint64_t>` elements. This patch caches both of these variables for
re-use between calls to `copy_var_cells` within a single read. This works
out well because the top-level array is the same size between attributes (the
number of result cell slabs).

In the multi-fragment read scenario that I'm currently testing, this provides
a significant reduction in execution time:
```
// Without the patch
Time to copy var-sized attribute values: 2.97894 secs
```

```
// With the patch
Time to copy var-sized attribute values: 0.988014 secs
```

While this provides a `2s` speedup in this path, I only observe a `1s` speedup
in total read time. This is because the destruction of the above two variables
is still expensive: it's just been moved into the `copy_attribute_values`
path:
```
// Without the patch
Time to copy result attribute values: 4.8275 secs
```

```
// With the patch
Time to copy result attribute values: 3.80167 secs
```

There will be a follow-up patch to optimize for minimizing destruction time.

Co-authored-by: Joe Maley <[email protected]>
  • Loading branch information
joe maley and Joe Maley authored Jun 20, 2020
1 parent c69e3a9 commit fd46117
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 13 deletions.
34 changes: 23 additions & 11 deletions tiledb/sm/query/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -883,14 +883,17 @@ Status Reader::compute_sparse_result_tiles(
Status Reader::copy_cells(
const std::string& attribute,
uint64_t stride,
const std::vector<ResultCellSlab>& result_cell_slabs) {
const std::vector<ResultCellSlab>& result_cell_slabs,
CopyCellsContextCache* const ctx_cache) {
assert(ctx_cache);

if (result_cell_slabs.empty()) {
zero_out_buffer_sizes();
return Status::Ok();
}

if (array_schema_->var_size(attribute))
return copy_var_cells(attribute, stride, result_cell_slabs);
return copy_var_cells(attribute, stride, result_cell_slabs, ctx_cache);
return copy_fixed_cells(attribute, stride, result_cell_slabs);
}

Expand Down Expand Up @@ -976,7 +979,10 @@ Status Reader::copy_fixed_cells(
Status Reader::copy_var_cells(
const std::string& name,
uint64_t stride,
const std::vector<ResultCellSlab>& result_cell_slabs) {
const std::vector<ResultCellSlab>& result_cell_slabs,
CopyCellsContextCache* const ctx_cache) {
assert(ctx_cache);

auto stat_type = (array_schema_->is_attr(name)) ?
stats::Stats::TimerType::READ_COPY_VAR_ATTR_VALUES :
stats::Stats::TimerType::READ_COPY_VAR_COORDS;
Expand All @@ -995,15 +1001,15 @@ Status Reader::copy_var_cells(
auto fill_value_size = (uint64_t)fill_value.size();

// Compute the destinations of offsets and var-len data in the buffers.
std::vector<std::vector<uint64_t>> offset_offsets_per_cs;
std::vector<std::vector<uint64_t>> var_offsets_per_cs;
auto offset_offsets_per_cs = &ctx_cache->offset_offsets_per_cs;
auto var_offsets_per_cs = &ctx_cache->var_offsets_per_cs;
uint64_t total_offset_size, total_var_size;
RETURN_NOT_OK(compute_var_cell_destinations(
name,
stride,
result_cell_slabs,
&offset_offsets_per_cs,
&var_offsets_per_cs,
offset_offsets_per_cs,
var_offsets_per_cs,
&total_offset_size,
&total_var_size));

Expand All @@ -1017,8 +1023,8 @@ Status Reader::copy_var_cells(
const auto num_cs = result_cell_slabs.size();
auto statuses = parallel_for(0, num_cs, [&](uint64_t cs_idx) {
const auto& cs = result_cell_slabs[cs_idx];
const auto& offset_offsets = offset_offsets_per_cs[cs_idx];
const auto& var_offsets = var_offsets_per_cs[cs_idx];
const auto& offset_offsets = (*offset_offsets_per_cs)[cs_idx];
const auto& var_offsets = (*var_offsets_per_cs)[cs_idx];

// Get tile information, if the range is nonempty.
uint64_t* tile_offsets = nullptr;
Expand Down Expand Up @@ -2199,6 +2205,8 @@ Status Reader::copy_coordinates(

uint64_t stride = UINT64_MAX;

CopyCellsContextCache ctx_cache;

// Copy coordinates
for (const auto& it : buffers_) {
const auto& name = it.first;
Expand All @@ -2207,7 +2215,8 @@ Status Reader::copy_coordinates(
if (!(name == constants::coords || array_schema_->is_dim(name)))
continue;

RETURN_CANCEL_OR_ERROR(copy_cells(name, stride, result_cell_slabs));
RETURN_CANCEL_OR_ERROR(
copy_cells(name, stride, result_cell_slabs, &ctx_cache));
clear_tiles(name, result_tiles);
}

Expand Down Expand Up @@ -2239,6 +2248,8 @@ Status Reader::copy_attribute_values(
++it;
}

CopyCellsContextCache ctx_cache;

// Copy result cells only for the attributes
for (const auto& it : buffers_) {
const auto& name = it.first;
Expand All @@ -2249,7 +2260,8 @@ Status Reader::copy_attribute_values(

RETURN_CANCEL_OR_ERROR(read_tiles(name, result_tiles));
RETURN_CANCEL_OR_ERROR(unfilter_tiles(name, result_tiles, &cs_ranges));
RETURN_CANCEL_OR_ERROR(copy_cells(name, stride, result_cell_slabs));
RETURN_CANCEL_OR_ERROR(
copy_cells(name, stride, result_cell_slabs, &ctx_cache));
clear_tiles(name, result_tiles);
}

Expand Down
27 changes: 25 additions & 2 deletions tiledb/sm/query/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,23 @@ class Reader {
std::vector<ResultCellSlab>* result_cell_slabs) const;

private:
/* ********************************* */
/* PRIVATE DATATYPES */
/* ********************************* */

/**
* Contains data structures for re-use between invocations of
* `copy_cells`. The intent is to reduce CPU time spent allocating
* and deallocating expensive data structures.
*/
struct CopyCellsContextCache {
/** An input for `compute_var_cell_destinations`. */
std::vector<std::vector<uint64_t>> offset_offsets_per_cs;

/** An input for `compute_var_cell_destinations`. */
std::vector<std::vector<uint64_t>> var_offsets_per_cs;
};

/* ********************************* */
/* PRIVATE ATTRIBUTES */
/* ********************************* */
Expand Down Expand Up @@ -687,12 +704,15 @@ class Reader {
* cell slabs are all contiguous. Otherwise, each cell in the
* result cell slabs are `stride` cells apart from each other.
* @param result_cell_slabs The result cell slabs to copy cells for.
* @param ctx_cache An opaque context cache that may be shared between
* calls to improve performance.
* @return Status
*/
Status copy_cells(
const std::string& attribute,
uint64_t stride,
const std::vector<ResultCellSlab>& result_cell_slabs);
const std::vector<ResultCellSlab>& result_cell_slabs,
CopyCellsContextCache* ctx_cache);

/**
* Copies the cells for the input **fixed-sized** attribute/dimension and
Expand All @@ -719,12 +739,15 @@ class Reader {
* cell slabs are all contiguous. Otherwise, each cell in the
* result cell slabs are `stride` cells apart from each other.
* @param result_cell_slabs The result cell slabs to copy cells for.
* @param ctx_cache An opaque context cache that may be shared between
* calls to improve performance.
* @return Status
*/
Status copy_var_cells(
const std::string& name,
uint64_t stride,
const std::vector<ResultCellSlab>& result_cell_slabs);
const std::vector<ResultCellSlab>& result_cell_slabs,
CopyCellsContextCache* ctx_cache);

/**
* Computes offsets into destination buffers for the given
Expand Down

0 comments on commit fd46117

Please sign in to comment.