-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-39332: [C++] Explicit error in ExecBatchBuilder when appending var length data exceeds offset limit (int32 max) #39383
Changes from all commits
b88558b
9797e24
eb91fce
a421d8e
f9b7ff8
c0db9bd
c1ae591
34d2d41
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -407,6 +407,70 @@ TEST(ExecBatchBuilder, AppendValuesBeyondLimit) { | |
ASSERT_EQ(0, pool->bytes_allocated()); | ||
} | ||
|
||
TEST(ExecBatchBuilder, AppendVarLengthBeyondLimit) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a comment referring to the GH issue? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, will do. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
// GH-39332: check appending variable-length data past 2GB. | ||
if constexpr (sizeof(void*) == 4) { | ||
GTEST_SKIP() << "Test only works on 64-bit platforms"; | ||
} | ||
|
||
std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault(); | ||
MemoryPool* pool = owned_pool.get(); | ||
constexpr auto eight_mb = 8 * 1024 * 1024; | ||
constexpr auto eight_mb_minus_one = eight_mb - 1; | ||
// String of size 8mb to repetitively fill the heading multiple of 8mbs of an array | ||
// of int32_max bytes. | ||
std::string str_8mb(eight_mb, 'a'); | ||
// String of size (8mb - 1) to be the last element of an array of int32_max bytes. | ||
std::string str_8mb_minus_1(eight_mb_minus_one, 'b'); | ||
std::shared_ptr<Array> values_8mb = ConstantArrayGenerator::String(1, str_8mb); | ||
std::shared_ptr<Array> values_8mb_minus_1 = | ||
ConstantArrayGenerator::String(1, str_8mb_minus_1); | ||
|
||
ExecBatch batch_8mb({values_8mb}, 1); | ||
ExecBatch batch_8mb_minus_1({values_8mb_minus_1}, 1); | ||
|
||
auto num_rows = std::numeric_limits<int32_t>::max() / eight_mb; | ||
std::vector<uint16_t> body_row_ids(num_rows, 0); | ||
std::vector<uint16_t> tail_row_id(1, 0); | ||
|
||
{ | ||
// Building an array of (int32_max + 1) = (8mb * num_rows + 8mb) bytes should raise an | ||
// error of overflow. | ||
ExecBatchBuilder builder; | ||
ASSERT_OK(builder.AppendSelected(pool, batch_8mb, num_rows, body_row_ids.data(), | ||
/*num_cols=*/1)); | ||
std::stringstream ss; | ||
ss << "Invalid: Overflow detected in ExecBatchBuilder when appending " << num_rows + 1 | ||
<< "-th element of length " << eight_mb << " bytes to current length " | ||
<< eight_mb * num_rows << " bytes"; | ||
ASSERT_RAISES_WITH_MESSAGE( | ||
Invalid, ss.str(), | ||
builder.AppendSelected(pool, batch_8mb, 1, tail_row_id.data(), | ||
/*num_cols=*/1)); | ||
} | ||
|
||
{ | ||
// Building an array of int32_max = (8mb * num_rows + 8mb - 1) bytes should succeed. | ||
ExecBatchBuilder builder; | ||
ASSERT_OK(builder.AppendSelected(pool, batch_8mb, num_rows, body_row_ids.data(), | ||
/*num_cols=*/1)); | ||
ASSERT_OK(builder.AppendSelected(pool, batch_8mb_minus_1, 1, tail_row_id.data(), | ||
/*num_cols=*/1)); | ||
ExecBatch built = builder.Flush(); | ||
auto datum = built[0]; | ||
ASSERT_TRUE(datum.is_array()); | ||
auto array = datum.array_as<StringArray>(); | ||
ASSERT_EQ(array->length(), num_rows + 1); | ||
for (int i = 0; i < num_rows; ++i) { | ||
ASSERT_EQ(array->GetString(i), str_8mb); | ||
} | ||
ASSERT_EQ(array->GetString(num_rows), str_8mb_minus_1); | ||
ASSERT_NE(0, pool->bytes_allocated()); | ||
} | ||
|
||
ASSERT_EQ(0, pool->bytes_allocated()); | ||
} | ||
|
||
TEST(KeyColumnArray, FromExecBatch) { | ||
ExecBatch batch = | ||
JSONToExecBatch({int64(), boolean()}, "[[1, true], [2, false], [null, null]]"); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wanted to change the type of
metadata.fixed_length
toint32_t
, but that would bring big amount related changes overwhelming to this small PR. So I tend to leave it as is and do a simple cast here. Changingfixed_length
ofRowTableMetadata
andKeyColumnMetaData
to signed type could be a future enhancement.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool, thank you!