Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-43444: [C++] Add benchmark for binary view builder #43445

Merged
merged 5 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cpp/src/arrow/array/builder_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,9 +500,9 @@ class ARROW_EXPORT StringHeapBuilder {
ARROW_RETURN_NOT_OK(Reserve(length));
}

auto v =
util::ToBinaryView(value, static_cast<int32_t>(length),
static_cast<int32_t>(blocks_.size() - 1), current_offset_);
auto v = util::ToLargeBinaryView(value, static_cast<int32_t>(length),
static_cast<int32_t>(blocks_.size() - 1),
current_offset_);

memcpy(current_out_buffer_, value, static_cast<size_t>(length));
current_out_buffer_ += length;
Expand Down
39 changes: 39 additions & 0 deletions cpp/src/arrow/builder_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,43 @@ static void BuildBinaryArray(benchmark::State& state) { // NOLINT non-const ref
state.SetItemsProcessed(state.iterations() * kItemsProcessed);
}

static void BuildSmallBinaryViewArray(
benchmark::State& state) { // NOLINT non-const reference
std::string_view kBinaryStrings[] = {"1", "12345678", "12345", "123456789", "12", "", " "};

for (auto _ : state) {
BinaryViewBuilder builder(memory_tracker.memory_pool());

for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
ABORT_NOT_OK(builder.Append(kBinaryStrings[i % 7]));
}

std::shared_ptr<Array> out;
ABORT_NOT_OK(builder.Finish(&out));
}

state.SetBytesProcessed(state.iterations() * kBytesProcessed);
state.SetItemsProcessed(state.iterations() * kItemsProcessed);
}

static void BuildLargeBinaryViewArray(
benchmark::State& state) { // NOLINT non-const reference
const char* kLargeBinaryString = "12345678901234567890123456789012345678901234567890";
for (auto _ : state) {
BinaryViewBuilder builder(memory_tracker.memory_pool());

for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
ABORT_NOT_OK(builder.Append(kLargeBinaryString));
}

std::shared_ptr<Array> out;
ABORT_NOT_OK(builder.Finish(&out));
}

state.SetBytesProcessed(state.iterations() * kBytesProcessed);
state.SetItemsProcessed(state.iterations() * kItemsProcessed);
}

static void BuildChunkedBinaryArray(
benchmark::State& state) { // NOLINT non-const reference
// 1MB chunks
Expand Down Expand Up @@ -458,6 +495,8 @@ BENCHMARK(BuildBinaryArray);
BENCHMARK(BuildChunkedBinaryArray);
BENCHMARK(BuildFixedSizeBinaryArray);
BENCHMARK(BuildDecimalArray);
BENCHMARK(BuildSmallBinaryViewArray);
BENCHMARK(BuildLargeBinaryViewArray);

BENCHMARK(BuildInt64DictionaryArrayRandom);
BENCHMARK(BuildInt64DictionaryArraySequential);
Expand Down
18 changes: 12 additions & 6 deletions cpp/src/arrow/util/binary_view_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
namespace arrow::util {

inline BinaryViewType::c_type ToInlineBinaryView(const void* data, int32_t size) {
assert(size <= BinaryViewType::kInlineSize);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://godbolt.org/z/q8xjb8Ehe
From here, COMPILER ASSUMER might generate better code in C++

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which I think we be assumed when this is inlined from the if below.

// Small string: inlined. Bytes beyond size are zeroed
BinaryViewType::c_type out;
out.inlined = {size, {}};
Expand All @@ -34,22 +35,27 @@ inline BinaryViewType::c_type ToInlineBinaryView(const void* data, int32_t size)
}

inline BinaryViewType::c_type ToInlineBinaryView(std::string_view v) {
assert(v.size() <= BinaryViewType::kInlineSize);
return ToInlineBinaryView(v.data(), static_cast<int32_t>(v.size()));
}

inline BinaryViewType::c_type ToBinaryView(const void* data, int32_t size,
int32_t buffer_index, int32_t offset) {
if (size <= BinaryViewType::kInlineSize) {
return ToInlineBinaryView(data, size);
}

inline BinaryViewType::c_type ToLargeBinaryView(const void* data, int32_t size,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: call this ToNonInlineBinaryView to not confuse people that might think a LARGE_BINARY_VIEW type exists. [I misread it like this at first :)]

int32_t buffer_index, int32_t offset) {
// Large string: store index/offset.
BinaryViewType::c_type out;
out.ref = {size, {}, buffer_index, offset};
memcpy(&out.ref.prefix, data, sizeof(out.ref.prefix));
return out;
}

inline BinaryViewType::c_type ToBinaryView(const void* data, int32_t size,
int32_t buffer_index, int32_t offset) {
if (size <= BinaryViewType::kInlineSize) {
return ToInlineBinaryView(data, size);
}
return ToLargeBinaryView(data, size, buffer_index, offset);
}

inline BinaryViewType::c_type ToBinaryView(std::string_view v, int32_t buffer_index,
int32_t offset) {
return ToBinaryView(v.data(), static_cast<int32_t>(v.size()), buffer_index, offset);
Expand Down
Loading