Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Storages: MinMaxIndex::checkIsNull returns RSResult::All if a pack only contains null marks and delete marks #9152

Merged
merged 5 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -576,13 +576,17 @@ RSResults MinMaxIndex::checkNullableCmp(
return results;
}

// If a pack only contains null marks and delete marks, checkIsNull will return RSResult::All.
// This is safe because MVCC will read the tag column and the deleted rows will be filtered out.
RSResults MinMaxIndex::checkIsNull(size_t start_pack, size_t pack_count)
{
RSResults results(pack_count, RSResult::None);
for (size_t i = start_pack; i < start_pack + pack_count; ++i)
{
if (has_null_marks[i])
results[i - start_pack] = RSResult::Some;
{
results[i - start_pack] = has_value_marks[i] ? RSResult::Some : RSResult::All;
}
}
return results;
}
Expand Down
48 changes: 48 additions & 0 deletions dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,18 @@
#include <Storages/DeltaMerge/DeltaMergeStore.h>
#include <Storages/DeltaMerge/Filter/RSOperator.h>
#include <Storages/DeltaMerge/FilterParser/FilterParser.h>
#include <Storages/DeltaMerge/Index/RSResult.h>
#include <Storages/DeltaMerge/Index/RoughCheck.h>
#include <Storages/DeltaMerge/Index/ValueComparison.h>
#include <Storages/DeltaMerge/Segment.h>
#include <Storages/DeltaMerge/tests/DMTestEnv.h>
#include <TestUtils/FunctionTestUtils.h>
#include <TestUtils/InputStreamTestUtils.h>
#include <TestUtils/TiFlashTestBasic.h>

#include <ext/scope_guard.h>
#include <magic_enum.hpp>
#include <memory>

namespace DB::DM::tests
{
Expand Down Expand Up @@ -2255,4 +2259,48 @@ try
}
CATCH

TEST_F(MinMaxIndexTest, CheckIsNull)
try
{
struct IsNullTestCase
{
std::vector<std::optional<Int64>> column_data;
std::vector<UInt64> del_mark;
RSResult result;
};

std::vector<IsNullTestCase> cases = {
{{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 0}, RSResult::Some},
{{6, 7, 8, 9, 10}, {0, 0, 0, 0, 0}, RSResult::None},
{{std::nullopt, std::nullopt}, {0, 0}, RSResult::All},
{{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 1}, RSResult::None},
{{6, 7, 8, 9, 10}, {0, 0, 0, 1, 0}, RSResult::None},
{{std::nullopt, std::nullopt}, {1, 0}, RSResult::All},
{{std::nullopt, std::nullopt}, {1, 1}, RSResult::None},
{{1, 2, 3, 4}, {1, 1, 1, 1}, RSResult::None},
};

auto col_type = makeNullable(std::make_shared<DataTypeInt64>());
auto minmax_index = std::make_shared<MinMaxIndex>(*col_type);
for (const auto & c : cases)
{
ASSERT_EQ(c.column_data.size(), c.del_mark.size());
auto col_data = createColumn<Nullable<Int64>>(c.column_data).column;
auto del_mark_col = createColumn<UInt8>(c.del_mark).column;
minmax_index->addPack(*col_data, static_cast<const ColumnVector<UInt8> *>(del_mark_col.get()));
}

auto actual_results = minmax_index->checkIsNull(0, cases.size());
for (size_t i = 0; i < cases.size(); ++i)
{
const auto & c = cases[i];
ASSERT_EQ(actual_results[i], c.result) << fmt::format(
"i={} actual={} expected={}",
i,
magic_enum::enum_name(actual_results[i]),
magic_enum::enum_name(c.result));
}
}
CATCH

} // namespace DB::DM::tests