From 6578d2fca75cde91f12cbf8e9297895900baa030 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com>
Date: Sun, 29 Sep 2024 20:20:43 +0800
Subject: [PATCH] storage: fix block rows not match when filter column is the
 first non-empty column in the block (#9484)

ref pingcap/tiflash#9472

storage: fix block rows not match when filter column is the first non-empty column in the block

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>

Co-authored-by: JaySon <tshent@qq.com>
---
 dbms/src/Core/Block.cpp                             |  4 +---
 dbms/src/Core/Block.h                               | 13 ++++++++-----
 .../LateMaterializationBlockInputStream.cpp         | 13 +++++++++++++
 3 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp
index 70abd1be119..9638d400cc8 100644
--- a/dbms/src/Core/Block.cpp
+++ b/dbms/src/Core/Block.cpp
@@ -531,7 +531,7 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons
         }
     }
 
-    return ReturnType(true);
+    return static_cast<ReturnType>(true);
 }
 
 /// join blocks by columns
@@ -541,10 +541,8 @@ Block hstackBlocks(Blocks && blocks, const Block & header)
         return {};
 
     Block res = header.cloneEmpty();
-    size_t num_rows = blocks.front().rows();
     for (const auto & block : blocks)
     {
-        RUNTIME_CHECK_MSG(block.rows() == num_rows, "Cannot hstack blocks with different number of rows");
         for (const auto & elem : block)
         {
             if (likely(res.has(elem.name)))
diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h
index 890cebd072c..bb34fd2690e 100644
--- a/dbms/src/Core/Block.h
+++ b/dbms/src/Core/Block.h
@@ -175,11 +175,14 @@ using BucketBlocksListMap = std::map<Int32, BlocksList>;
 /// Join blocks by columns
 /// The schema of the output block is the same as the header block.
 /// The columns not in the header block will be ignored.
-/// For example:
-/// header: (a UInt32, b UInt32, c UInt32, d UInt32)
-/// block1: (a UInt32, b UInt32, c UInt32, e UInt32), rows: 3
-/// block2: (d UInt32), rows: 3
-/// result: (a UInt32, b UInt32, c UInt32, d UInt32), rows: 3
+/// NOTE: The input blocks can have columns with different sizes,
+///       but the columns in the header block must have the same size,
+///       Otherwise, the returned block will contain columns with the different size.
+/// Example:
+///       header: (a UInt32, b UInt32, c UInt32, d UInt32)
+///       block1: (a UInt32, b UInt32, c UInt32, e UInt32), rows: 3
+///       block2: (d UInt32), rows: 3
+///       result: (a UInt32, b UInt32, c UInt32, d UInt32), rows: 3
 Block hstackBlocks(Blocks && blocks, const Block & header);
 
 /// Join blocks by rows
diff --git a/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp
index 0688b5ce381..bb36c019546 100644
--- a/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp
+++ b/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp
@@ -110,12 +110,18 @@ Block LateMaterializationBlockInputStream::readImpl()
                 // so only if the number of rows left after filtering out is large enough,
                 // we can skip some packs of the next block, call readWithFilter to get the next block.
                 rest_column_block = rest_column_stream->readWithFilter(*filter);
+                ColumnPtr filter_column;
                 for (auto & col : filter_column_block)
                 {
                     if (col.name == filter_column_name)
+                    {
+                        filter_column = col.column;
                         continue;
+                    }
                     col.column = col.column->filter(*filter, passed_count);
                 }
+                if (header.has(filter_column_name))
+                    filter_column = filter_column->filter(*filter, passed_count);
             }
             else if (filter_out_count > 0)
             {
@@ -126,12 +132,19 @@ Block LateMaterializationBlockInputStream::readImpl()
                 {
                     col.column = col.column->filter(*filter, passed_count);
                 }
+                ColumnPtr filter_column;
+
                 for (auto & col : filter_column_block)
                 {
                     if (col.name == filter_column_name)
+                    {
+                        filter_column = col.column;
                         continue;
+                    }
                     col.column = col.column->filter(*filter, passed_count);
                 }
+                if (header.has(filter_column_name))
+                    filter_column = filter_column->filter(*filter, passed_count);
             }
             else
             {