diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index 39cbe0ee36f8..b0aa1f1c6a7b 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -64,9 +64,10 @@ struct BufferInfo { // initial end offset of this buffer which will be the starting // offset of next prefetch. // - // Note: No need to update/use in case async_io disabled. Prefetching will - // happen only when there is a cache miss. So start offset for prefetching - // will always be the passed offset to FilePrefetchBuffer (offset to read). + // For example - if end offset of previous buffer was 100 and because of + // readahead_size optimization, end_offset was trimmed to 60. Then for next + // prefetch call, start_offset should be intialized to 100 i.e start_offset = + // buf->initial_end_offset_. uint64_t initial_end_offset_ = 0; }; diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index 9f174c48c792..97adc6e5f079 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -16,13 +16,34 @@ void BlockBasedTableIterator::Seek(const Slice& target) { SeekImpl(&target, true); } +void BlockBasedTableIterator::SeekSecondPass(const Slice* target) { + AsyncInitDataBlock(/*is_first_pass=*/false); + + if (target) { + block_iter_.Seek(*target); + } else { + block_iter_.SeekToFirst(); + } + FindKeyForward(); + + CheckOutOfBound(); + + if (target) { + assert(!Valid() || icomp_.Compare(*target, key()) <= 0); + } +} + void BlockBasedTableIterator::SeekImpl(const Slice* target, bool async_prefetch) { bool is_first_pass = !async_read_in_progress_; - if (is_first_pass) { - ResetBlockCacheLookupVar(); + + if (!is_first_pass) { + SeekSecondPass(target); + return; } + ResetBlockCacheLookupVar(); + bool autotune_readaheadsize = is_first_pass && read_options_.auto_readahead_size && read_options_.iterate_upper_bound; @@ -33,134 +54,120 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, readahead_cache_lookup_ = true; } - if (is_first_pass) { - is_out_of_bound_ = false; - is_at_first_key_from_index_ = false; - seek_stat_state_ = kNone; - bool filter_checked = false; - if (target && !CheckPrefixMayMatch(*target, IterDirection::kForward, - &filter_checked)) { - ResetDataIter(); - RecordTick(table_->GetStatistics(), is_last_level_ - ? LAST_LEVEL_SEEK_FILTERED - : NON_LAST_LEVEL_SEEK_FILTERED); - return; - } - if (filter_checked) { - seek_stat_state_ = kFilterUsed; - RecordTick(table_->GetStatistics(), - is_last_level_ ? LAST_LEVEL_SEEK_FILTER_MATCH - : NON_LAST_LEVEL_SEEK_FILTER_MATCH); - } + is_out_of_bound_ = false; + is_at_first_key_from_index_ = false; + seek_stat_state_ = kNone; + bool filter_checked = false; + if (target && + !CheckPrefixMayMatch(*target, IterDirection::kForward, &filter_checked)) { + ResetDataIter(); + RecordTick(table_->GetStatistics(), is_last_level_ + ? LAST_LEVEL_SEEK_FILTERED + : NON_LAST_LEVEL_SEEK_FILTERED); + return; + } + if (filter_checked) { + seek_stat_state_ = kFilterUsed; + RecordTick(table_->GetStatistics(), is_last_level_ + ? LAST_LEVEL_SEEK_FILTER_MATCH + : NON_LAST_LEVEL_SEEK_FILTER_MATCH); + } - bool need_seek_index = true; - - // In case of readahead_cache_lookup_, index_iter_ could change to find the - // readahead size in BlockCacheLookupForReadAheadSize so it needs to - // reseek. - if (IsIndexAtCurr() && block_iter_points_to_real_block_ && - block_iter_.Valid()) { - // Reseek. - prev_block_offset_ = index_iter_->value().handle.offset(); - - if (target) { - // We can avoid an index seek if: - // 1. The new seek key is larger than the current key - // 2. The new seek key is within the upper bound of the block - // Since we don't necessarily know the internal key for either - // the current key or the upper bound, we check user keys and - // exclude the equality case. Considering internal keys can - // improve for the boundary cases, but it would complicate the - // code. - if (user_comparator_.Compare(ExtractUserKey(*target), - block_iter_.user_key()) > 0 && - user_comparator_.Compare(ExtractUserKey(*target), - index_iter_->user_key()) < 0) { - need_seek_index = false; - } + bool need_seek_index = true; + + // In case of readahead_cache_lookup_, index_iter_ could change to find the + // readahead size in BlockCacheLookupForReadAheadSize so it needs to + // reseek. + if (IsIndexAtCurr() && block_iter_points_to_real_block_ && + block_iter_.Valid()) { + // Reseek. + prev_block_offset_ = index_iter_->value().handle.offset(); + + if (target) { + // We can avoid an index seek if: + // 1. The new seek key is larger than the current key + // 2. The new seek key is within the upper bound of the block + // Since we don't necessarily know the internal key for either + // the current key or the upper bound, we check user keys and + // exclude the equality case. Considering internal keys can + // improve for the boundary cases, but it would complicate the + // code. + if (user_comparator_.Compare(ExtractUserKey(*target), + block_iter_.user_key()) > 0 && + user_comparator_.Compare(ExtractUserKey(*target), + index_iter_->user_key()) < 0) { + need_seek_index = false; } } + } - if (need_seek_index) { - if (target) { - index_iter_->Seek(*target); - } else { - index_iter_->SeekToFirst(); - } - is_index_at_curr_block_ = true; - if (!index_iter_->Valid()) { - ResetDataIter(); - return; - } + if (need_seek_index) { + if (target) { + index_iter_->Seek(*target); + } else { + index_iter_->SeekToFirst(); + } + is_index_at_curr_block_ = true; + if (!index_iter_->Valid()) { + ResetDataIter(); + return; } + } - if (autotune_readaheadsize) { - FindReadAheadSizeUpperBound(); - if (target) { - index_iter_->Seek(*target); - } else { - index_iter_->SeekToFirst(); - } + if (autotune_readaheadsize) { + FindReadAheadSizeUpperBound(); + if (target) { + index_iter_->Seek(*target); + } else { + index_iter_->SeekToFirst(); + } - // Check for IO error. - if (!index_iter_->Valid()) { - ResetDataIter(); - return; - } + // Check for IO error. + if (!index_iter_->Valid()) { + ResetDataIter(); + return; } } - if (is_first_pass) { // After reseek, index_iter_ point to the right key i.e. target in // case of readahead_cache_lookup_. So index_iter_ can be used directly. - IndexValue v = index_iter_->value(); - const bool same_block = block_iter_points_to_real_block_ && - v.handle.offset() == prev_block_offset_; - - if (!v.first_internal_key.empty() && !same_block && - (!target || icomp_.Compare(*target, v.first_internal_key) <= 0) && - allow_unprepared_value_) { - // Index contains the first key of the block, and it's >= target. - // We can defer reading the block. - is_at_first_key_from_index_ = true; - // ResetDataIter() will invalidate block_iter_. Thus, there is no need to - // call CheckDataBlockWithinUpperBound() to check for iterate_upper_bound - // as that will be done later when the data block is actually read. - ResetDataIter(); - } else { - // Need to use the data block. - if (!same_block) { - if (read_options_.async_io && async_prefetch) { - AsyncInitDataBlock(is_first_pass); - if (async_read_in_progress_) { - // Status::TryAgain indicates asynchronous request for retrieval of - // data blocks has been submitted. So it should return at this point - // and Seek should be called again to retrieve the requested block - // and execute the remaining code. - return; - } - } else { - InitDataBlock(); + IndexValue v = index_iter_->value(); + const bool same_block = block_iter_points_to_real_block_ && + v.handle.offset() == prev_block_offset_; + + if (!v.first_internal_key.empty() && !same_block && + (!target || icomp_.Compare(*target, v.first_internal_key) <= 0) && + allow_unprepared_value_) { + // Index contains the first key of the block, and it's >= target. + // We can defer reading the block. + is_at_first_key_from_index_ = true; + // ResetDataIter() will invalidate block_iter_. Thus, there is no need to + // call CheckDataBlockWithinUpperBound() to check for iterate_upper_bound + // as that will be done later when the data block is actually read. + ResetDataIter(); + } else { + // Need to use the data block. + if (!same_block) { + if (read_options_.async_io && async_prefetch) { + AsyncInitDataBlock(/*is_first_pass=*/true); + if (async_read_in_progress_) { + // Status::TryAgain indicates asynchronous request for retrieval of + // data blocks has been submitted. So it should return at this point + // and Seek should be called again to retrieve the requested block + // and execute the remaining code. + return; } } else { - // When the user does a reseek, the iterate_upper_bound might have - // changed. CheckDataBlockWithinUpperBound() needs to be called - // explicitly if the reseek ends up in the same data block. - // If the reseek ends up in a different block, InitDataBlock() will do - // the iterator upper bound check. - CheckDataBlockWithinUpperBound(); + InitDataBlock(); } - - if (target) { - block_iter_.Seek(*target); - } else { - block_iter_.SeekToFirst(); - } - FindKeyForward(); + } else { + // When the user does a reseek, the iterate_upper_bound might have + // changed. CheckDataBlockWithinUpperBound() needs to be called + // explicitly if the reseek ends up in the same data block. + // If the reseek ends up in a different block, InitDataBlock() will do + // the iterator upper bound check. + CheckDataBlockWithinUpperBound(); } - } else { - // second pass - AsyncInitDataBlock(is_first_pass); if (target) { block_iter_.Seek(*target); diff --git a/table/block_based/block_based_table_iterator.h b/table/block_based/block_based_table_iterator.h index 5cc9a6a449d9..77fe6aa6de83 100644 --- a/table/block_based/block_based_table_iterator.h +++ b/table/block_based/block_based_table_iterator.h @@ -329,6 +329,8 @@ class BlockBasedTableIterator : public InternalIteratorBase { // is used to disable the lookup. IterDirection direction_ = IterDirection::kForward; + void SeekSecondPass(const Slice* target); + // If `target` is null, seek to first. void SeekImpl(const Slice* target, bool async_prefetch); diff --git a/unreleased_history/new_features/async_support_tune_readahead.md b/unreleased_history/new_features/async_support_tune_readahead.md new file mode 100644 index 000000000000..8794a493a568 --- /dev/null +++ b/unreleased_history/new_features/async_support_tune_readahead.md @@ -0,0 +1 @@ +Provide support for async_io to trim readahead_size by doing block cache lookup