From e596914d67034aa11f69c085f3957c97d4291fa6 Mon Sep 17 00:00:00 2001 From: Isaiah Norton Date: Wed, 12 May 2021 12:30:39 -0400 Subject: [PATCH] Fix segfault due to mismatched validity num and data buffer sizes x-ref: ch 7386 --- HISTORY.md | 1 + tiledb/core.cc | 26 +++++++++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 6961f72c94..85eeaf6af8 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -15,6 +15,7 @@ * Fixed incorrected error raised in .df[] indexer when pyarrow not installed [#554](https://github.com/TileDB-Inc/TileDB-Py/pull/554) * Fixed `from_pandas(attr_filters=None, dim_filters=None)` (previously used internal defaults) [#564](https://github.com/TileDB-Inc/TileDB-Py/pull/554) * Fixed `from_pandas` write bug due to incorrect classification of str/bytes columns [#562](https://github.com/TileDB-Inc/TileDB-Py/pull/562) +* Fix segfault due to mismatched validity num and data buffer sizes [#567](https://github.com/TileDB-Inc/TileDB-Py/pull/567) # TileDB-Py 0.8.8 Release Notes diff --git a/tiledb/core.cc b/tiledb/core.cc index 32e275e680..883ac5af42 100644 --- a/tiledb/core.cc +++ b/tiledb/core.cc @@ -703,6 +703,7 @@ class PyQuery { (buf_nbytes < init_buffer_bytes_ || exact_init_bytes_)) { buf_nbytes = init_buffer_bytes_; offsets_num = init_buffer_bytes_ / sizeof(uint64_t); + validity_num = init_buffer_bytes_ / cell_nbytes; } buffers_order_.push_back(name); @@ -757,10 +758,25 @@ class PyQuery { } void update_read_elem_num() { - for (const auto &read_info : query_->result_buffer_elements()) { +#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 3 + // needs https://github.com/TileDB-Inc/TileDB/pull/2238 + auto result_elements = query_->result_buffer_elements_nullable(); +#else + auto result_elements = query_->result_buffer_elements_nullable(); + auto result_offsets_tmp = query_->result_buffer_elements(); +#endif + + for (const auto &read_info : result_elements) { auto name = read_info.first; - uint64_t offset_elem_num = 0, data_vals_num = 0, validity_vals_num = 0; - std::tie(offset_elem_num, data_vals_num) = read_info.second; + uint64_t offset_elem_num = 0, data_vals_num = 0, validity_elem_num = 0; + std::tie(offset_elem_num, data_vals_num, validity_elem_num) = + read_info.second; + +#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR < 3 + // we need to fix-up the offset count b/c incorrect before 2.3 + // (https://github.com/TileDB-Inc/TileDB/pull/2238) + offset_elem_num = result_offsets_tmp[name].first; +#endif BufferInfo &buf = buffers_.at(name); @@ -787,6 +803,7 @@ class PyQuery { buf.data_vals_read += data_vals_num; buf.offsets_read += offset_elem_num; + buf.validity_vals_read += validity_elem_num; } } @@ -868,12 +885,14 @@ class PyQuery { auto final_data_nbytes = buf.data_vals_read * buf.elem_nbytes; auto final_offsets_count = buf.offsets_read + arrow_offset_size; + auto final_validity_count = buf.validity_vals_read; assert(final_data_nbytes <= buf.data.size()); assert(final_offsets_count <= buf.offsets.size() + arrow_offset_size); buf.data.resize({final_data_nbytes}); buf.offsets.resize({final_offsets_count}); + buf.validity.resize({final_validity_count}); if (use_arrow_) { if (retries_ > 0) { @@ -885,6 +904,7 @@ class PyQuery { // reset bytes-read so that set_buffers uses the full buffer size buf.data_vals_read = 0; buf.offsets_read = 0; + buf.validity_vals_read = 0; } } if (use_arrow_) {