Skip to content

Commit

Permalink
Fix segfault due to mismatched validity num and data buffer sizes
Browse files Browse the repository at this point in the history
x-ref: ch 7386
  • Loading branch information
ihnorton committed May 12, 2021
1 parent 4b594eb commit e596914
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* Fixed incorrected error raised in .df[] indexer when pyarrow not installed [#554](https://github.com/TileDB-Inc/TileDB-Py/pull/554)
* Fixed `from_pandas(attr_filters=None, dim_filters=None)` (previously used internal defaults) [#564](https://github.com/TileDB-Inc/TileDB-Py/pull/554)
* Fixed `from_pandas` write bug due to incorrect classification of str/bytes columns [#562](https://github.com/TileDB-Inc/TileDB-Py/pull/562)
* Fix segfault due to mismatched validity num and data buffer sizes [#567](https://github.com/TileDB-Inc/TileDB-Py/pull/567)

# TileDB-Py 0.8.8 Release Notes

Expand Down
26 changes: 23 additions & 3 deletions tiledb/core.cc
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,7 @@ class PyQuery {
(buf_nbytes < init_buffer_bytes_ || exact_init_bytes_)) {
buf_nbytes = init_buffer_bytes_;
offsets_num = init_buffer_bytes_ / sizeof(uint64_t);
validity_num = init_buffer_bytes_ / cell_nbytes;
}

buffers_order_.push_back(name);
Expand Down Expand Up @@ -757,10 +758,25 @@ class PyQuery {
}

void update_read_elem_num() {
for (const auto &read_info : query_->result_buffer_elements()) {
#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 3
// needs https://github.com/TileDB-Inc/TileDB/pull/2238
auto result_elements = query_->result_buffer_elements_nullable();
#else
auto result_elements = query_->result_buffer_elements_nullable();
auto result_offsets_tmp = query_->result_buffer_elements();
#endif

for (const auto &read_info : result_elements) {
auto name = read_info.first;
uint64_t offset_elem_num = 0, data_vals_num = 0, validity_vals_num = 0;
std::tie(offset_elem_num, data_vals_num) = read_info.second;
uint64_t offset_elem_num = 0, data_vals_num = 0, validity_elem_num = 0;
std::tie(offset_elem_num, data_vals_num, validity_elem_num) =
read_info.second;

#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR < 3
// we need to fix-up the offset count b/c incorrect before 2.3
// (https://github.com/TileDB-Inc/TileDB/pull/2238)
offset_elem_num = result_offsets_tmp[name].first;
#endif

BufferInfo &buf = buffers_.at(name);

Expand All @@ -787,6 +803,7 @@ class PyQuery {

buf.data_vals_read += data_vals_num;
buf.offsets_read += offset_elem_num;
buf.validity_vals_read += validity_elem_num;
}
}

Expand Down Expand Up @@ -868,12 +885,14 @@ class PyQuery {

auto final_data_nbytes = buf.data_vals_read * buf.elem_nbytes;
auto final_offsets_count = buf.offsets_read + arrow_offset_size;
auto final_validity_count = buf.validity_vals_read;

assert(final_data_nbytes <= buf.data.size());
assert(final_offsets_count <= buf.offsets.size() + arrow_offset_size);

buf.data.resize({final_data_nbytes});
buf.offsets.resize({final_offsets_count});
buf.validity.resize({final_validity_count});

if (use_arrow_) {
if (retries_ > 0) {
Expand All @@ -885,6 +904,7 @@ class PyQuery {
// reset bytes-read so that set_buffers uses the full buffer size
buf.data_vals_read = 0;
buf.offsets_read = 0;
buf.validity_vals_read = 0;
}
}
if (use_arrow_) {
Expand Down

0 comments on commit e596914

Please sign in to comment.