Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ORC-262: [C++] Support async I/O prefetch #2048

Closed
wants to merge 48 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
09ad258
support async io prefetch for orc c++ lib
taiyang-li Sep 24, 2024
b0aeb9e
fix failed ci
taiyang-li Oct 9, 2024
bad04f8
fix styles
taiyang-li Oct 9, 2024
8692901
add uts
taiyang-li Oct 9, 2024
a6159e5
fix failed uts
taiyang-li Oct 10, 2024
1808c90
fix building
taiyang-li Oct 10, 2024
4a3fe36
fix format
taiyang-li Oct 10, 2024
2d8f77d
fix failed ci
taiyang-li Oct 10, 2024
90c3539
fix bugs when ranges has the same offsets
taiyang-li Oct 10, 2024
52e02a4
fix bugs when ranges has the same offsets
taiyang-li Oct 10, 2024
a6691bc
fix failed cit
taiyang-li Oct 10, 2024
a2d6723
Update Cache.hh
taiyang-li Oct 16, 2024
fc58f1f
change as request
taiyang-li Oct 25, 2024
07c803d
change as request
taiyang-li Oct 25, 2024
156f4eb
change as request
taiyang-li Oct 25, 2024
bb5b3ae
fix format
taiyang-li Oct 25, 2024
ba57df2
fix style
taiyang-li Nov 4, 2024
3303fe9
fix style
taiyang-li Nov 5, 2024
ac5a188
fix conflicts
taiyang-li Nov 11, 2024
2046a01
fix style
taiyang-li Nov 11, 2024
48ddd50
hide cacheoptions
taiyang-li Nov 15, 2024
f3d76f1
protext read range cache from parallel accessing
taiyang-li Nov 15, 2024
64861ce
hide some private structures
taiyang-li Nov 15, 2024
2bd47b0
fix building
taiyang-li Nov 15, 2024
75dbf47
fix code style
taiyang-li Nov 19, 2024
0e14b04
change as requested
taiyang-li Nov 21, 2024
e7ec8f9
change
taiyang-li Nov 21, 2024
60ca6e9
add metrics
taiyang-li Nov 21, 2024
5e7c4db
revert files
taiyang-li Nov 21, 2024
9c00e51
Update c++/include/orc/OrcFile.hh
taiyang-li Nov 21, 2024
5dc0266
Update c++/include/orc/Reader.hh
taiyang-li Nov 21, 2024
7e40819
Update c++/src/io/Cache.hh
taiyang-li Nov 21, 2024
bed31a5
Update c++/src/io/Cache.hh
taiyang-li Nov 21, 2024
8dd4ca4
Update c++/include/orc/Reader.hh
taiyang-li Nov 21, 2024
ce3d455
Update c++/include/orc/OrcFile.hh
taiyang-li Nov 21, 2024
e20fc4e
Update c++/src/Options.hh
taiyang-li Nov 21, 2024
6b960bc
change as reeust
taiyang-li Nov 27, 2024
5596d74
change as reeust
taiyang-li Nov 27, 2024
0964efd
fix format and building
taiyang-li Nov 27, 2024
d44ff5a
fix style
taiyang-li Nov 27, 2024
e4610e7
fix format
taiyang-li Nov 27, 2024
fbe7945
Update c++/src/io/Cache.hh
taiyang-li Nov 28, 2024
e85f23a
Update c++/include/orc/Reader.hh
taiyang-li Nov 28, 2024
2ef6a41
Update c++/include/orc/Reader.hh
taiyang-li Nov 28, 2024
c822928
Update c++/src/io/Cache.hh
taiyang-li Nov 28, 2024
e277a65
add params for uts
taiyang-li Nov 28, 2024
b8d2221
add some tests
taiyang-li Nov 28, 2024
0d56a7e
fix stuyle
taiyang-li Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions c++/include/orc/OrcFile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#ifndef ORC_FILE_HH
#define ORC_FILE_HH

#include <future>
#include <string>

#include "orc/Reader.hh"
Expand Down Expand Up @@ -58,6 +59,18 @@ namespace orc {
*/
virtual void read(void* buf, uint64_t length, uint64_t offset) = 0;

/**
* Read data asynchronously into the buffer. The buffer is allocated by the caller.
* @param buf the buffer to read into
* @param length the number of bytes to read.
* @param offset the position in the stream to read from.
* @return a future that will be set when the read is complete.
*/
virtual std::future<void> readAsync(void* buf, uint64_t length, uint64_t offset) {
return std::async(std::launch::async,
[this, buf, length, offset] { this->read(buf, length, offset); });
}

/**
* Get the name of the stream for error messages.
*/
Expand Down
40 changes: 40 additions & 0 deletions c++/include/orc/Reader.hh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ namespace orc {
struct ReaderOptionsPrivate;
struct RowReaderOptionsPrivate;

struct CacheOptions {
// The maximum distance in bytes between two consecutive
// ranges; beyond this value, ranges are not combined
uint64_t holeSizeLimit = 8192;

// The maximum size in bytes of a combined range; if
// combining two consecutive ranges would produce a range of a
// size greater than this, they are not combined
uint64_t rangeSizeLimit = 32 * 1024 * 1024;
};

/**
* Expose the reader metrics including the latency and
* number of calls of the decompression/decoding/IO modules.
Expand All @@ -59,6 +70,8 @@ namespace orc {
std::atomic<uint64_t> IOBlockingLatencyUs{0};
std::atomic<uint64_t> SelectedRowGroupCount{0};
std::atomic<uint64_t> EvaluatedRowGroupCount{0};
std::atomic<uint64_t> ReadRangeCacheHits{0};
std::atomic<uint64_t> ReadRangeCacheMisses{0};
};
ReaderMetrics* getDefaultReaderMetrics();

Expand Down Expand Up @@ -116,6 +129,11 @@ namespace orc {
*/
ReaderOptions& setReaderMetrics(ReaderMetrics* metrics);

/**
* Set the cache options.
*/
ReaderOptions& setCacheOptions(const CacheOptions& cacheOptions);

/**
* Set the location of the tail as defined by the logical length of the
* file.
Expand Down Expand Up @@ -147,6 +165,11 @@ namespace orc {
* Get the reader metrics.
*/
ReaderMetrics* getReaderMetrics() const;

/**
* Set the cache options.
*/
const CacheOptions& getCacheOptions() const;
};

/**
Expand Down Expand Up @@ -624,6 +647,23 @@ namespace orc {
*/
virtual std::map<uint32_t, RowGroupIndex> getRowGroupIndex(
uint32_t stripeIndex, const std::set<uint32_t>& included = {}) const = 0;

/**
* Trigger IO prefetch and cache the prefetched contents asynchronously.
taiyang-li marked this conversation as resolved.
Show resolved Hide resolved
* It is thread safe. Users should make sure requested stripes and columns
* are not overlapped, otherwise the overlapping part will be prefetched multiple time,
* which doesn't affect correctness but waste IO and memory resources.
* @param stripes the stripes to prefetch
* @param includeTypes the types to prefetch
*/
virtual void preBuffer(const std::vector<uint32_t>& stripes,
const std::list<uint64_t>& includeTypes) = 0;

/**
* Release cached entries whose right boundary is less than or equal to the given boundary.
* @param boundary the boundary value to release cache entries
*/
virtual void releaseBuffer(uint64_t boundary) = 0;
};

/**
Expand Down
1 change: 1 addition & 0 deletions c++/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ set(SOURCE_FILES
orc_proto.pb.h
io/InputStream.cc
io/OutputStream.cc
io/Cache.cc
sargs/ExpressionTree.cc
sargs/Literal.cc
sargs/PredicateLeaf.cc
Expand Down
12 changes: 12 additions & 0 deletions c++/src/Options.hh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include "orc/OrcFile.hh"
#include "orc/Reader.hh"

#include "io/Cache.hh"

#include <limits>

namespace orc {
Expand All @@ -43,6 +45,7 @@ namespace orc {
MemoryPool* memoryPool;
std::string serializedTail;
ReaderMetrics* metrics;
CacheOptions cacheOptions;

ReaderOptionsPrivate() {
tailLocation = std::numeric_limits<uint64_t>::max();
Expand Down Expand Up @@ -122,6 +125,15 @@ namespace orc {
return privateBits_->errorStream;
}

ReaderOptions& ReaderOptions::setCacheOptions(const CacheOptions& cacheOptions) {
privateBits_->cacheOptions = cacheOptions;
return *this;
}

const CacheOptions& ReaderOptions::getCacheOptions() const {
return privateBits_->cacheOptions;
}

/**
* RowReaderOptions Implementation
*/
Expand Down
88 changes: 88 additions & 0 deletions c++/src/Reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1523,6 +1523,94 @@ namespace orc {
return ret;
}

void ReaderImpl::releaseBuffer(uint64_t boundary) {
std::lock_guard<std::mutex> lock(contents_->readCacheMutex);

if (contents_->readCache) {
contents_->readCache->evictEntriesBefore(boundary);
}
}

void ReaderImpl::preBuffer(const std::vector<uint32_t>& stripes,
const std::list<uint64_t>& includeTypes) {
std::vector<uint32_t> newStripes;
for (auto stripe : stripes) {
if (stripe < static_cast<uint32_t>(footer_->stripes_size())) newStripes.push_back(stripe);
}

std::list<uint64_t> newIncludeTypes;
for (auto type : includeTypes) {
if (type < static_cast<uint64_t>(footer_->types_size())) newIncludeTypes.push_back(type);
}

if (newStripes.empty() || newIncludeTypes.empty()) {
return;
}

orc::RowReaderOptions rowReaderOptions;
rowReaderOptions.includeTypes(newIncludeTypes);
ColumnSelector columnSelector(contents_.get());
std::vector<bool> selectedColumns;
columnSelector.updateSelected(selectedColumns, rowReaderOptions);

std::vector<ReadRange> ranges;
taiyang-li marked this conversation as resolved.
Show resolved Hide resolved
ranges.reserve(newIncludeTypes.size());
for (auto stripe : newStripes) {
// get stripe information
const auto& stripeInfo = footer_->stripes(stripe);
uint64_t stripeFooterStart =
stripeInfo.offset() + stripeInfo.index_length() + stripeInfo.data_length();
uint64_t stripeFooterLength = stripeInfo.footer_length();

// get stripe footer
std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(
contents_->compression,
std::make_unique<SeekableFileInputStream>(contents_->stream.get(), stripeFooterStart,
stripeFooterLength, *contents_->pool),
contents_->blockSize, *contents_->pool, contents_->readerMetrics);
proto::StripeFooter stripeFooter;
if (!stripeFooter.ParseFromZeroCopyStream(pbStream.get())) {
throw ParseError(std::string("bad StripeFooter from ") + pbStream->getName());
}

// traverse all streams in stripe footer, choose selected streams to prebuffer
uint64_t offset = stripeInfo.offset();
for (int i = 0; i < stripeFooter.streams_size(); i++) {
const proto::Stream& stream = stripeFooter.streams(i);
if (offset + stream.length() > stripeFooterStart) {
std::stringstream msg;
msg << "Malformed stream meta at stream index " << i << " in stripe " << stripe
<< ": streamOffset=" << offset << ", streamLength=" << stream.length()
<< ", stripeOffset=" << stripeInfo.offset()
<< ", stripeIndexLength=" << stripeInfo.index_length()
<< ", stripeDataLength=" << stripeInfo.data_length();
throw ParseError(msg.str());
}

if (stream.has_kind() && selectedColumns[stream.column()]) {
const auto& kind = stream.kind();
if (kind == proto::Stream_Kind_DATA || kind == proto::Stream_Kind_DICTIONARY_DATA ||
taiyang-li marked this conversation as resolved.
Show resolved Hide resolved
kind == proto::Stream_Kind_PRESENT || kind == proto::Stream_Kind_LENGTH ||
kind == proto::Stream_Kind_SECONDARY) {
ranges.emplace_back(offset, stream.length());
}
}

offset += stream.length();
}

{
std::lock_guard<std::mutex> lock(contents_->readCacheMutex);

if (!contents_->readCache) {
contents_->readCache = std::make_shared<ReadRangeCache>(
getStream(), options_.getCacheOptions(), contents_->pool, contents_->readerMetrics);
}
contents_->readCache->cache(std::move(ranges));
}
}
}

RowReader::~RowReader() {
// PASS
}
Expand Down
16 changes: 16 additions & 0 deletions c++/src/Reader.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

#include "ColumnReader.hh"
#include "RLE.hh"
#include "io/Cache.hh"

#include "SchemaEvolution.hh"
#include "TypeImpl.hh"
#include "sargs/SargsApplier.hh"
Expand Down Expand Up @@ -70,6 +72,11 @@ namespace orc {
bool isDecimalAsLong;
std::unique_ptr<proto::Metadata> metadata;
ReaderMetrics* readerMetrics;

// mutex to protect readCache_ from concurrent access
std::mutex readCacheMutex;
// cached io ranges. only valid when preBuffer is invoked.
std::shared_ptr<ReadRangeCache> readCache;
};

proto::StripeFooter getStripeFooter(const proto::StripeInformation& info,
Expand Down Expand Up @@ -245,6 +252,10 @@ namespace orc {
const SchemaEvolution* getSchemaEvolution() const {
return &schemaEvolution_;
}

std::shared_ptr<ReadRangeCache> getReadCache() const {
return contents_->readCache;
}
};

class ReaderImpl : public Reader {
Expand All @@ -260,6 +271,7 @@ namespace orc {
// footer
proto::Footer* footer_;
uint64_t numberOfStripes_;

uint64_t getMemoryUse(int stripeIx, std::vector<bool>& selectedColumns);

// internal methods
Expand Down Expand Up @@ -375,6 +387,10 @@ namespace orc {
std::map<uint32_t, BloomFilterIndex> getBloomFilters(
uint32_t stripeIndex, const std::set<uint32_t>& included) const override;

void preBuffer(const std::vector<uint32_t>& stripes,
const std::list<uint64_t>& includeTypes) override;
void releaseBuffer(uint64_t boundary) override;

std::map<uint32_t, RowGroupIndex> getRowGroupIndex(
uint32_t stripeIndex, const std::set<uint32_t>& included) const override;
};
Expand Down
25 changes: 20 additions & 5 deletions c++/src/StripeStream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "StripeStream.hh"
#include "RLE.hh"
#include "Reader.hh"
#include "io/Cache.hh"
taiyang-li marked this conversation as resolved.
Show resolved Hide resolved
#include "orc/Exceptions.hh"

#include "wrap/coded-stream-wrapper.h"
Expand All @@ -37,7 +38,8 @@ namespace orc {
stripeStart_(stripeStart),
input_(input),
writerTimezone_(writerTimezone),
readerTimezone_(readerTimezone) {
readerTimezone_(readerTimezone),
readCache_(reader.getReadCache()) {
taiyang-li marked this conversation as resolved.
Show resolved Hide resolved
// PASS
}

Expand Down Expand Up @@ -89,7 +91,6 @@ namespace orc {
if (stream.has_kind() && stream.kind() == kind &&
stream.column() == static_cast<uint64_t>(columnId)) {
uint64_t streamLength = stream.length();
uint64_t myBlock = shouldStream ? input_.getNaturalReadSize() : streamLength;
if (offset + streamLength > dataEnd) {
std::stringstream msg;
msg << "Malformed stream meta at stream index " << i << " in stripe " << stripeIndex_
Expand All @@ -99,9 +100,23 @@ namespace orc {
<< ", stripeDataLength=" << stripeInfo_.data_length();
throw ParseError(msg.str());
}
return createDecompressor(reader_.getCompression(),
std::make_unique<SeekableFileInputStream>(
&input_, offset, stream.length(), *pool, myBlock),

BufferSlice slice;
if (readCache_) {
ReadRange range{offset, streamLength};
slice = readCache_->read(range);
}

uint64_t myBlock = shouldStream ? input_.getNaturalReadSize() : streamLength;
std::unique_ptr<SeekableInputStream> seekableInput;
if (slice.buffer) {
seekableInput = std::make_unique<SeekableArrayInputStream>(
slice.buffer->data() + slice.offset, slice.length);
} else {
seekableInput = std::make_unique<SeekableFileInputStream>(&input_, offset, streamLength,
*pool, myBlock);
}
return createDecompressor(reader_.getCompression(), std::move(seekableInput),
reader_.getCompressionSize(), *pool,
reader_.getFileContents().readerMetrics);
}
Expand Down
2 changes: 2 additions & 0 deletions c++/src/StripeStream.hh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
namespace orc {

class RowReaderImpl;
class ReadRangeCache;

/**
* StripeStream Implementation
Expand All @@ -45,6 +46,7 @@ namespace orc {
InputStream& input_;
const Timezone& writerTimezone_;
const Timezone& readerTimezone_;
std::shared_ptr<ReadRangeCache> readCache_;

public:
StripeStreamsImpl(const RowReaderImpl& reader, uint64_t index,
Expand Down
Loading
Loading