From cff1e567c8c5587662c2491f64a56329bda55756 Mon Sep 17 00:00:00 2001 From: jinhelin Date: Tue, 18 Jun 2024 17:53:40 +0800 Subject: [PATCH 1/3] Storages: MinMaxIndex::checkIsNull returns RSResult::All if a pack only contains null marks and delete marks --- .../Storages/DeltaMerge/Index/MinMaxIndex.cpp | 6 +- .../tests/gtest_dm_minmax_index.cpp | 101 ++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp index 1523f07d9ec..25bc366b5ce 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp @@ -576,13 +576,17 @@ RSResults MinMaxIndex::checkNullableCmp( return results; } +// If a pack only contains null marks and delete marks, checkIsNull will return RSResult::All. +// This is safe because MVCC will read the tag column and the deleted rows will be filtered out. RSResults MinMaxIndex::checkIsNull(size_t start_pack, size_t pack_count) { RSResults results(pack_count, RSResult::None); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { if (has_null_marks[i]) - results[i - start_pack] = RSResult::Some; + { + results[i - start_pack] = has_value_marks[i] ? RSResult::Some : RSResult::All; + } } return results; } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 2b160c0f582..722d4c7c7e4 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -31,6 +31,7 @@ #include #include +#include namespace DB::DM::tests { @@ -2255,4 +2256,104 @@ try } CATCH +namespace +{ +void cmpResults(const RSResults & actual, const RSResults & expected, std::source_location location) +{ + ASSERT_EQ(actual.size(), expected.size()); + if (actual == expected) + { + return; + } + // Check which result is unexpected. + for (size_t i = 0; i < actual.size(); i++) + { + ASSERT_EQ(actual[i], expected[i]) << fmt::format( + "{}: i={}, actual={}, expected={}", + location.line(), + i, + magic_enum::enum_name(actual[i]), + magic_enum::enum_name(expected[i])); + } +} + +using ColumnData = std::vector>; +using DelMarkData = std::vector; +using PackData = std::vector>; + +ColumnPtr vecToColumn(const ColumnData & v, const IDataType & col_type) +{ + auto col = col_type.createColumn(); + for (const auto & i : v) + { + if (i) + { + col->insert(*i); + } + else + { + col->insertDefault(); + } + } + return col; +} + +ColumnPtr vecToColumn(const DelMarkData & v) +{ + auto col = TAG_COLUMN_TYPE->createColumn(); + for (const auto & i : v) + { + col->insert(static_cast(i)); + } + return col; +} + +MinMaxIndexPtr createMinMaxIndex(const IDataType & col_type, const PackData & packs) +{ + auto minmax_index = std::make_shared(col_type); + for (size_t i = 0; i < packs.size(); i++) + { + const auto & [col_data, del_mark_data] = packs[i]; + auto column = vecToColumn(col_data, col_type); + auto del_mark_col = vecToColumn(del_mark_data); + RUNTIME_CHECK(column->size() == del_mark_col->size(), i, column->size(), del_mark_col->size()); + minmax_index->addPack(*column, static_cast *>(del_mark_col.get())); + } + return minmax_index; +} + +} // namespace + +TEST_F(MinMaxIndexTest, CheckIsNull) +try +{ + auto col_type = makeNullable(std::make_shared()); + // {ColumnData, DelMarkData} + PackData packs = { + {{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 0}}, + {{6, 7, 8, 9, 10}, {0, 0, 0, 0, 0}}, + {{std::nullopt, std::nullopt}, {0, 0}}, + {{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 1}}, + {{6, 7, 8, 9, 10}, {0, 0, 0, 1, 0}}, + {{std::nullopt, std::nullopt}, {1, 0}}, + {{std::nullopt, std::nullopt}, {1, 1}}, + {{1, 2, 3, 4}, {1, 1, 1, 1}}, + + }; + auto minmax_index = createMinMaxIndex(*col_type, packs); + auto expected_results + = {RSResult::Some, + RSResult::None, + RSResult::All, + RSResult::None, + RSResult::None, + RSResult::All, + RSResult::None, + RSResult::None}; + ASSERT_EQ(packs.size(), expected_results.size()); + auto actual_results = minmax_index->checkIsNull(0, packs.size()); + cmpResults(actual_results, expected_results, std::source_location::current()); +} +CATCH + } // namespace DB::DM::tests From 909f23254cfe5fe5317b19aca3f6b78fc071127c Mon Sep 17 00:00:00 2001 From: jinhelin Date: Tue, 18 Jun 2024 19:37:56 +0800 Subject: [PATCH 2/3] source_location is not supported --- .../Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 722d4c7c7e4..24808f3d36e 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -31,7 +31,6 @@ #include #include -#include namespace DB::DM::tests { @@ -2258,7 +2257,7 @@ CATCH namespace { -void cmpResults(const RSResults & actual, const RSResults & expected, std::source_location location) +void cmpResults(const RSResults & actual, const RSResults & expected, int line_number) { ASSERT_EQ(actual.size(), expected.size()); if (actual == expected) @@ -2270,7 +2269,7 @@ void cmpResults(const RSResults & actual, const RSResults & expected, std::sourc { ASSERT_EQ(actual[i], expected[i]) << fmt::format( "{}: i={}, actual={}, expected={}", - location.line(), + line_number, i, magic_enum::enum_name(actual[i]), magic_enum::enum_name(expected[i])); @@ -2352,7 +2351,7 @@ try RSResult::None}; ASSERT_EQ(packs.size(), expected_results.size()); auto actual_results = minmax_index->checkIsNull(0, packs.size()); - cmpResults(actual_results, expected_results, std::source_location::current()); + cmpResults(actual_results, expected_results, __LINE__); } CATCH From 3f859801ee1fde8571b1b51ffa56a3b298fc522e Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 18 Jun 2024 20:24:57 +0800 Subject: [PATCH 3/3] Simplify test case --- .../tests/gtest_dm_minmax_index.cpp | 125 +++++------------- 1 file changed, 36 insertions(+), 89 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 722d4c7c7e4..6c39ebf6737 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -23,15 +23,18 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include -#include +#include +#include namespace DB::DM::tests { @@ -2256,103 +2259,47 @@ try } CATCH -namespace -{ -void cmpResults(const RSResults & actual, const RSResults & expected, std::source_location location) +TEST_F(MinMaxIndexTest, CheckIsNull) +try { - ASSERT_EQ(actual.size(), expected.size()); - if (actual == expected) - { - return; - } - // Check which result is unexpected. - for (size_t i = 0; i < actual.size(); i++) + struct IsNullTestCase { - ASSERT_EQ(actual[i], expected[i]) << fmt::format( - "{}: i={}, actual={}, expected={}", - location.line(), - i, - magic_enum::enum_name(actual[i]), - magic_enum::enum_name(expected[i])); - } -} - -using ColumnData = std::vector>; -using DelMarkData = std::vector; -using PackData = std::vector>; + std::vector> column_data; + std::vector del_mark; + RSResult result; + }; -ColumnPtr vecToColumn(const ColumnData & v, const IDataType & col_type) -{ - auto col = col_type.createColumn(); - for (const auto & i : v) - { - if (i) - { - col->insert(*i); - } - else - { - col->insertDefault(); - } - } - return col; -} + std::vector cases = { + {{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 0}, RSResult::Some}, + {{6, 7, 8, 9, 10}, {0, 0, 0, 0, 0}, RSResult::None}, + {{std::nullopt, std::nullopt}, {0, 0}, RSResult::All}, + {{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 1}, RSResult::None}, + {{6, 7, 8, 9, 10}, {0, 0, 0, 1, 0}, RSResult::None}, + {{std::nullopt, std::nullopt}, {1, 0}, RSResult::All}, + {{std::nullopt, std::nullopt}, {1, 1}, RSResult::None}, + {{1, 2, 3, 4}, {1, 1, 1, 1}, RSResult::None}, + }; -ColumnPtr vecToColumn(const DelMarkData & v) -{ - auto col = TAG_COLUMN_TYPE->createColumn(); - for (const auto & i : v) + auto col_type = makeNullable(std::make_shared()); + auto minmax_index = std::make_shared(*col_type); + for (const auto & c : cases) { - col->insert(static_cast(i)); + ASSERT_EQ(c.column_data.size(), c.del_mark.size()); + auto col_data = createColumn>(c.column_data).column; + auto del_mark_col = createColumn(c.del_mark).column; + minmax_index->addPack(*col_data, static_cast *>(del_mark_col.get())); } - return col; -} -MinMaxIndexPtr createMinMaxIndex(const IDataType & col_type, const PackData & packs) -{ - auto minmax_index = std::make_shared(col_type); - for (size_t i = 0; i < packs.size(); i++) + auto actual_results = minmax_index->checkIsNull(0, cases.size()); + for (size_t i = 0; i < cases.size(); ++i) { - const auto & [col_data, del_mark_data] = packs[i]; - auto column = vecToColumn(col_data, col_type); - auto del_mark_col = vecToColumn(del_mark_data); - RUNTIME_CHECK(column->size() == del_mark_col->size(), i, column->size(), del_mark_col->size()); - minmax_index->addPack(*column, static_cast *>(del_mark_col.get())); + const auto & c = cases[i]; + ASSERT_EQ(actual_results[i], c.result) << fmt::format( + "i={} actual={} expected={}", + i, + magic_enum::enum_name(actual_results[i]), + magic_enum::enum_name(c.result)); } - return minmax_index; -} - -} // namespace - -TEST_F(MinMaxIndexTest, CheckIsNull) -try -{ - auto col_type = makeNullable(std::make_shared()); - // {ColumnData, DelMarkData} - PackData packs = { - {{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 0}}, - {{6, 7, 8, 9, 10}, {0, 0, 0, 0, 0}}, - {{std::nullopt, std::nullopt}, {0, 0}}, - {{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 1}}, - {{6, 7, 8, 9, 10}, {0, 0, 0, 1, 0}}, - {{std::nullopt, std::nullopt}, {1, 0}}, - {{std::nullopt, std::nullopt}, {1, 1}}, - {{1, 2, 3, 4}, {1, 1, 1, 1}}, - - }; - auto minmax_index = createMinMaxIndex(*col_type, packs); - auto expected_results - = {RSResult::Some, - RSResult::None, - RSResult::All, - RSResult::None, - RSResult::None, - RSResult::All, - RSResult::None, - RSResult::None}; - ASSERT_EQ(packs.size(), expected_results.size()); - auto actual_results = minmax_index->checkIsNull(0, packs.size()); - cmpResults(actual_results, expected_results, std::source_location::current()); } CATCH