Skip to content

Commit

Permalink
Optimize multi-fragment unfiltering, part 2 (#1693)
Browse files Browse the repository at this point in the history
For selective filtering, we need to build a map to associate each tile with
its cell slab ranges. In the single-fragment scenario that I'm testing, the
map is built from 767 result cell slabs. In the multi-fragment scenario, it
is built from 3250033 result cell slabs. This map takes ~1ms to build for
the single-fragment scenario, but ~80ms for the multi-fragment scenario.

This map only needs to be built once per read, but it is erroneously built
once per attribute. There are 19 attributes in the scenario under test. The
overhead is ~1500ms (19 * 80ms). This patch moves the map construction so
that it is built once per read. This reduces the overhead in the multi-fragment
scenario from ~1500ms to ~80ms.

```
// Single-fragment scenario
Time to unfilter attribute tiles: 0.383881 secs
```

```
// Multi-fragment scenario
Time to unfilter attribute tiles: 0.410692 secs
```

This should conclude optimization in the unfiltering path for this test
scenario. I will continue to investigate the overhead in the copy-cells path.

Co-authored-by: Joe Maley <[email protected]>
  • Loading branch information
joe maley and Joe Maley committed Jul 22, 2020
1 parent d89f03e commit 928e15c
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 32 deletions.
53 changes: 24 additions & 29 deletions tiledb/sm/query/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1673,7 +1673,9 @@ void Reader::fill_dense_coords_col_slab(
Status Reader::unfilter_tiles(
const std::string& name,
const std::vector<ResultTile*>& result_tiles,
const std::vector<ResultCellSlab>* const result_cell_slabs) const {
const std::unordered_map<
ResultTile*,
std::vector<std::pair<uint64_t, uint64_t>>>* const cs_ranges) const {
auto stat_type = (array_schema_->is_attr(name)) ?
stats::Stats::TimerType::READ_UNFILTER_ATTR_TILES :
stats::Stats::TimerType::READ_UNFILTER_COORD_TILES;
Expand All @@ -1683,29 +1685,6 @@ Status Reader::unfilter_tiles(
auto num_tiles = static_cast<uint64_t>(result_tiles.size());
auto encryption_key = array_->encryption_key();

// Build an association from the result tile to the cell slab ranges
// that it contains.
std::unordered_map<ResultTile*, std::vector<std::pair<uint64_t, uint64_t>>>
cs_ranges;

// If 'result_cell_slabs' is NULL, the caller intends to bypass
// selective unfiltering.
if (result_cell_slabs) {
std::vector<ResultCellSlab>::const_iterator it =
result_cell_slabs->cbegin();
while (it != result_cell_slabs->cend()) {
std::pair<uint64_t, uint64_t> range =
std::make_pair(it->start_, it->start_ + it->length_);
if (cs_ranges.find(it->tile_) == cs_ranges.end()) {
std::vector<std::pair<uint64_t, uint64_t>> ranges(1, std::move(range));
cs_ranges.insert(std::make_pair(it->tile_, std::move(ranges)));
} else {
cs_ranges[it->tile_].emplace_back(std::move(range));
}
++it;
}
}

auto statuses = parallel_for(0, num_tiles, [&, this](uint64_t i) {
auto& tile = result_tiles[i];
auto& fragment = fragment_metadata_[tile->frag_idx()];
Expand Down Expand Up @@ -1741,9 +1720,9 @@ Status Reader::unfilter_tiles(
const std::vector<std::pair<uint64_t, uint64_t>>*
result_cell_slab_ranges = nullptr;
static const std::vector<std::pair<uint64_t, uint64_t>> empty_ranges;
if (result_cell_slabs) {
result_cell_slab_ranges = cs_ranges.find(tile) != cs_ranges.end() ?
&cs_ranges[tile] :
if (cs_ranges) {
result_cell_slab_ranges = cs_ranges->find(tile) != cs_ranges->end() ?
&cs_ranges->at(tile) :
&empty_ranges;
}

Expand Down Expand Up @@ -2240,6 +2219,23 @@ Status Reader::copy_attribute_values(
const std::vector<ResultCellSlab>& result_cell_slabs) {
STATS_START_TIMER(stats::Stats::TimerType::READ_COPY_ATTR_VALUES);

// Build an association from the result tile to the cell slab ranges
// that it contains.
std::unordered_map<ResultTile*, std::vector<std::pair<uint64_t, uint64_t>>>
cs_ranges;
std::vector<ResultCellSlab>::const_iterator it = result_cell_slabs.cbegin();
while (it != result_cell_slabs.cend()) {
std::pair<uint64_t, uint64_t> range =
std::make_pair(it->start_, it->start_ + it->length_);
if (cs_ranges.find(it->tile_) == cs_ranges.end()) {
std::vector<std::pair<uint64_t, uint64_t>> ranges(1, std::move(range));
cs_ranges.insert(std::make_pair(it->tile_, std::move(ranges)));
} else {
cs_ranges[it->tile_].emplace_back(std::move(range));
}
++it;
}

// Copy result cells only for the attributes
for (const auto& it : buffers_) {
const auto& name = it.first;
Expand All @@ -2249,8 +2245,7 @@ Status Reader::copy_attribute_values(
continue;

RETURN_CANCEL_OR_ERROR(read_tiles(name, result_tiles));
RETURN_CANCEL_OR_ERROR(
unfilter_tiles(name, result_tiles, &result_cell_slabs));
RETURN_CANCEL_OR_ERROR(unfilter_tiles(name, result_tiles, &cs_ranges));
RETURN_CANCEL_OR_ERROR(copy_cells(name, stride, result_cell_slabs));
clear_tiles(name, result_tiles);
}
Expand Down
10 changes: 7 additions & 3 deletions tiledb/sm/query/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -924,14 +924,18 @@ class Reader {
*
* @param name Attribute/dimension whose tiles will be unfiltered.
* @param result_tiles Vector containing the tiles to be unfiltered.
* @param result_cell_slabs The result cell slabs to use for selective
* unfiltering. This is optional and will be ignored if NULL.
* @param cs_ranges An optional association from the result tile to
* the cell slab ranges that it contains. If given, this will be
* used for selective unfiltering.
* @return Status
*/
Status unfilter_tiles(
const std::string& name,
const std::vector<ResultTile*>& result_tiles,
const std::vector<ResultCellSlab>* result_cell_slabs = nullptr) const;
const std::unordered_map<
ResultTile*,
std::vector<std::pair<uint64_t, uint64_t>>>* const cs_ranges =
nullptr) const;

/**
* Runs the input fixed-sized tile for the input attribute or dimension
Expand Down

0 comments on commit 928e15c

Please sign in to comment.