From 6578d2fca75cde91f12cbf8e9297895900baa030 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Sun, 29 Sep 2024 20:20:43 +0800 Subject: [PATCH] storage: fix block rows not match when filter column is the first non-empty column in the block (#9484) ref pingcap/tiflash#9472 storage: fix block rows not match when filter column is the first non-empty column in the block Signed-off-by: Lloyd-Pottiger Co-authored-by: JaySon --- dbms/src/Core/Block.cpp | 4 +--- dbms/src/Core/Block.h | 13 ++++++++----- .../LateMaterializationBlockInputStream.cpp | 13 +++++++++++++ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 70abd1be119..9638d400cc8 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -531,7 +531,7 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons } } - return ReturnType(true); + return static_cast(true); } /// join blocks by columns @@ -541,10 +541,8 @@ Block hstackBlocks(Blocks && blocks, const Block & header) return {}; Block res = header.cloneEmpty(); - size_t num_rows = blocks.front().rows(); for (const auto & block : blocks) { - RUNTIME_CHECK_MSG(block.rows() == num_rows, "Cannot hstack blocks with different number of rows"); for (const auto & elem : block) { if (likely(res.has(elem.name))) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 890cebd072c..bb34fd2690e 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -175,11 +175,14 @@ using BucketBlocksListMap = std::map; /// Join blocks by columns /// The schema of the output block is the same as the header block. /// The columns not in the header block will be ignored. -/// For example: -/// header: (a UInt32, b UInt32, c UInt32, d UInt32) -/// block1: (a UInt32, b UInt32, c UInt32, e UInt32), rows: 3 -/// block2: (d UInt32), rows: 3 -/// result: (a UInt32, b UInt32, c UInt32, d UInt32), rows: 3 +/// NOTE: The input blocks can have columns with different sizes, +/// but the columns in the header block must have the same size, +/// Otherwise, the returned block will contain columns with the different size. +/// Example: +/// header: (a UInt32, b UInt32, c UInt32, d UInt32) +/// block1: (a UInt32, b UInt32, c UInt32, e UInt32), rows: 3 +/// block2: (d UInt32), rows: 3 +/// result: (a UInt32, b UInt32, c UInt32, d UInt32), rows: 3 Block hstackBlocks(Blocks && blocks, const Block & header); /// Join blocks by rows diff --git a/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp index 0688b5ce381..bb36c019546 100644 --- a/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp @@ -110,12 +110,18 @@ Block LateMaterializationBlockInputStream::readImpl() // so only if the number of rows left after filtering out is large enough, // we can skip some packs of the next block, call readWithFilter to get the next block. rest_column_block = rest_column_stream->readWithFilter(*filter); + ColumnPtr filter_column; for (auto & col : filter_column_block) { if (col.name == filter_column_name) + { + filter_column = col.column; continue; + } col.column = col.column->filter(*filter, passed_count); } + if (header.has(filter_column_name)) + filter_column = filter_column->filter(*filter, passed_count); } else if (filter_out_count > 0) { @@ -126,12 +132,19 @@ Block LateMaterializationBlockInputStream::readImpl() { col.column = col.column->filter(*filter, passed_count); } + ColumnPtr filter_column; + for (auto & col : filter_column_block) { if (col.name == filter_column_name) + { + filter_column = col.column; continue; + } col.column = col.column->filter(*filter, passed_count); } + if (header.has(filter_column_name)) + filter_column = filter_column->filter(*filter, passed_count); } else {