diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index d57aa58333f..a7462671c77 100755 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 PingCAP, Ltd. +// Copyright 2023 PingCAP, Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -162,13 +162,13 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::GreatestInt, "tidbGreatest"}, {tipb::ScalarFuncSig::GreatestReal, "tidbGreatest"}, - {tipb::ScalarFuncSig::GreatestString, "greatest"}, + {tipb::ScalarFuncSig::GreatestString, "tidbGreatestString"}, {tipb::ScalarFuncSig::GreatestDecimal, "greatest"}, {tipb::ScalarFuncSig::GreatestTime, "greatest"}, {tipb::ScalarFuncSig::LeastInt, "tidbLeast"}, {tipb::ScalarFuncSig::LeastReal, "tidbLeast"}, - {tipb::ScalarFuncSig::LeastString, "least"}, + {tipb::ScalarFuncSig::LeastString, "tidbLeastString"}, {tipb::ScalarFuncSig::LeastDecimal, "least"}, {tipb::ScalarFuncSig::LeastTime, "least"}, diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index fda8a113ada..416f3d5fffc 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -1,4 +1,4 @@ -// Copyright 2022 PingCAP, Ltd. +// Copyright 2023 PingCAP, Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index e8333ceeeea..82df7472792 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -205,7 +205,7 @@ struct ConvertToDecimalImpl } else { - if (const ColumnVector * col_from + if (const auto * col_from = checkAndGetColumn>(block.getByPosition(arguments[0]).column.get())) { auto col_to = ColumnDecimal::create(0, scale); @@ -245,7 +245,7 @@ struct ConvertToDecimalImpl const IDataType & data_type_to = *block.getByPosition(result).type; - if (const ColumnString * col_from_string = checkAndGetColumn(&col_from)) + if (const auto * col_from_string = checkAndGetColumn(&col_from)) { auto res = data_type_to.createColumn(); @@ -758,8 +758,8 @@ struct ConvertThroughParsing } const IColumn * col_from = block.getByPosition(arguments[0]).column.get(); - const ColumnString * col_from_string = checkAndGetColumn(col_from); - const ColumnFixedString * col_from_fixed_string = checkAndGetColumn(col_from); + const auto * col_from_string = checkAndGetColumn(col_from); + const auto * col_from_fixed_string = checkAndGetColumn(col_from); if (std::is_same_v && !col_from_string) throw Exception("Illegal column " + col_from->getName() @@ -880,7 +880,7 @@ struct ConvertImplGenericFromString const IDataType & data_type_to = *block.getByPosition(result).type; - if (const ColumnString * col_from_string = checkAndGetColumn(&col_from)) + if (const auto * col_from_string = checkAndGetColumn(&col_from)) { auto res = data_type_to.createColumn(); @@ -947,7 +947,7 @@ struct ConvertImpl { static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { - if (const ColumnFixedString * col_from = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) + if (const auto * col_from = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) { auto col_to = ColumnString::create(); @@ -1754,7 +1754,7 @@ class FunctionDateFormat : public IFunction class FunctionGetFormat : public IFunction { private: - static String get_format(const StringRef & time_type, const StringRef & location) + static String getFormat(const StringRef & time_type, const StringRef & location) { if (time_type == "DATE") { @@ -1850,7 +1850,7 @@ class FunctionGetFormat : public IFunction for (size_t i = 0; i < size; ++i) { const auto & location = location_col->getDataAt(i); - const auto & result = get_format(StringRef(time_type), location); + const auto & result = getFormat(StringRef(time_type), location); write_buffer.write(result.c_str(), result.size()); writeChar(0, write_buffer); offsets_to[i] = write_buffer.count(); @@ -2615,7 +2615,7 @@ class FunctionCast final : public IFunctionBase const size_t result) { const auto & array_arg = block.getByPosition(arguments.front()); - if (const ColumnArray * col_array = checkAndGetColumn(array_arg.column.get())) + if (const auto * col_array = checkAndGetColumn(array_arg.column.get())) { /// create block for converting nested column containing original and result columns Block nested_block{ diff --git a/dbms/src/Functions/LeastGreatest.h b/dbms/src/Functions/LeastGreatest.h index 55e76b66524..d0b12354767 100644 --- a/dbms/src/Functions/LeastGreatest.h +++ b/dbms/src/Functions/LeastGreatest.h @@ -1,4 +1,4 @@ -// Copyright 2022 PingCAP, Ltd. +// Copyright 2023 PingCAP, Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -29,12 +30,13 @@ #include #include #include +#include #include #include #include #include -#include +#include #include namespace DB @@ -120,5 +122,463 @@ class FunctionVectorizedLeastGreatest : public IFunction private: const Context & context; }; +template +struct LeastGreatestStringImpl +{ + static void mergeImpl( + const TiDB::TiDBCollatorPtr & collator, + size_t a_size, + size_t b_size, + const unsigned char * a_data, + const unsigned char * b_data, + ColumnString::Chars_t & c_data, + ColumnString::Offsets & c_offsets, + size_t i) + { + int res = 0; + auto pre_offset = StringUtil::offsetAt(c_offsets, i); + if constexpr (use_collator) + res = collator->compare(reinterpret_cast(&a_data[0]), a_size, reinterpret_cast(&b_data[0]), b_size); + else + res = mem_utils::CompareStrView({reinterpret_cast(&a_data[0]), a_size}, {reinterpret_cast(&b_data[0]), b_size}); + + if constexpr (least) + { + if (res < 0) + { + memcpy(&c_data[pre_offset], &a_data[0], a_size); + c_offsets[i] = pre_offset + a_size + 1; + } + else if (res == 0) + { + size_t size = std::min(a_size, b_size); + memcpy(&c_data[pre_offset], &b_data[0], size); + c_offsets[i] = pre_offset + size + 1; + } + else + { + memcpy(&c_data[pre_offset], &b_data[0], b_size); + c_offsets[i] = pre_offset + b_size + 1; + } + } + else + { + if (res < 0) + { + memcpy(&c_data[pre_offset], &b_data[0], b_size); + c_offsets[i] = pre_offset + b_size + 1; + } + else if (res == 0) + { + size_t size = std::max(a_size, b_size); + if (a_size > b_size) + { + memcpy(&c_data[pre_offset], &a_data[0], size); + c_offsets[i] = pre_offset + size + 1; + } + else + { + memcpy(&c_data[pre_offset], &b_data[0], size); + c_offsets[i] = pre_offset + size + 1; + } + } + else + { + memcpy(&c_data[pre_offset], &a_data[0], a_size); + c_offsets[i] = pre_offset + a_size + 1; + } + } + } + + static void processImpl( + const TiDB::TiDBCollatorPtr & collator, + size_t a_size, + size_t b_size, + const unsigned char * a_data, + const unsigned char * b_data, + std::vector & res_ref, + size_t i) + { + int res = 0; + if constexpr (use_collator) + res = collator->compare(reinterpret_cast(&a_data[0]), a_size, reinterpret_cast(&b_data[0]), b_size); + else + res = mem_utils::CompareStrView({reinterpret_cast(&a_data[0]), a_size}, {reinterpret_cast(&b_data[0]), b_size}); + + if constexpr (least) + { + if (res < 0) + { + res_ref[i] = StringRef(&a_data[0], a_size); + } + else if (res == 0) + { + size_t size = std::min(a_size, b_size); + res_ref[i] = StringRef(&b_data[0], size); + } + else + { + res_ref[i] = StringRef(&b_data[0], b_size); + } + } + else + { + if (res < 0) + { + res_ref[i] = StringRef(&b_data[0], b_size); + } + else if (res == 0) + { + if (a_size > b_size) + res_ref[i] = StringRef(&a_data[0], a_size); + else + res_ref[i] = StringRef(&b_data[0], b_size); + } + else + { + res_ref[i] = StringRef(&a_data[0], a_size); + } + } + } + + // StringRef_string + static void process( + const TiDB::TiDBCollatorPtr & collator, + std::vector & res_ref, + const ColumnString::Chars_t & b_data, + const ColumnString::Offsets & b_offsets, + size_t i) + { + size_t a_size = res_ref[i].size; + size_t b_size = StringUtil::sizeAt(b_offsets, i) - 1; + const auto * a_data = reinterpret_cast(res_ref[i].data); + processImpl(collator, a_size, b_size, a_data, &b_data[b_offsets[i - 1]], res_ref, i); + } + + // StringRef_constant + static void process( + const TiDB::TiDBCollatorPtr & collator, + std::vector & res_ref, + StringRef & b, + ColumnString::Chars_t & c_data, + ColumnString::Offsets & c_offsets, + size_t i) + { + size_t a_size = res_ref[i].size; + ColumnString::Offset b_size = b.size; + const auto * a_data = reinterpret_cast(res_ref[i].data); + const auto * b_data = reinterpret_cast(b.data); + mergeImpl(collator, a_size, b_size, &a_data[0], &b_data[0], c_data, c_offsets, i); + } + + // string_string + static void process( + const TiDB::TiDBCollatorPtr & collator, + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const ColumnString::Chars_t & b_data, + const ColumnString::Offsets & b_offsets, + std::vector & res_ref, + size_t i) + { + size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; + size_t b_size = StringUtil::sizeAt(b_offsets, i) - 1; + processImpl(collator, a_size, b_size, &a_data[a_offsets[i - 1]], &b_data[b_offsets[i - 1]], res_ref, i); + } + + // string_constant + static void process( + const TiDB::TiDBCollatorPtr & collator, + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const StringRef & b, + std::vector & res_ref, + size_t i) + { + const auto * b_data = reinterpret_cast(b.data); + ColumnString::Offset b_size = b.size; + size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; + processImpl(collator, a_size, b_size, &a_data[a_offsets[i - 1]], &b_data[0], res_ref, i); + } + + // string_constant + static void process( + const TiDB::TiDBCollatorPtr & collator, + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const StringRef & b, + ColumnString::Chars_t & c_data, + ColumnString::Offsets & c_offsets, + size_t i) + { + const auto * b_data = reinterpret_cast(b.data); + ColumnString::Offset b_size = b.size; + size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; + mergeImpl(collator, a_size, b_size, &a_data[a_offsets[i - 1]], &b_data[0], c_data, c_offsets, i); + } + + // constant_constant + static void process( + const TiDB::TiDBCollatorPtr & collator, + StringRef & a, + const StringRef & b) + { + int res = 0; + if constexpr (use_collator) + res = collator->compare(reinterpret_cast(a.data), a.size, reinterpret_cast(b.data), b.size); + else + res = a.compare(b); + + if constexpr (least) + { + if (res > 0 || (res == 0 && a.size > b.size)) + a = b; + } + else + { + if (res < 0 || (res == 0 && a.size < b.size)) + a = b; + } + } +}; + +template +struct StringOperationImpl +{ + static void NO_INLINE stringVectorStringVector( + const TiDB::TiDBCollatorPtr & collator, + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const ColumnString::Chars_t & b_data, + const ColumnString::Offsets & b_offsets, + std::vector & res_ref) + { + size_t size = a_offsets.size(); + for (size_t i = 0; i < size; ++i) + LeastGreatestStringImpl::process(collator, a_data, a_offsets, b_data, b_offsets, res_ref, i); + } + + static void NO_INLINE stringRefVectorStringVector( + const TiDB::TiDBCollatorPtr & collator, + std::vector & res_ref, + const ColumnString::Chars_t & b_data, + const ColumnString::Offsets & b_offsets) + { + size_t size = b_offsets.size(); + for (size_t i = 0; i < size; ++i) + LeastGreatestStringImpl::process(collator, res_ref, b_data, b_offsets, i); + } + + static void NO_INLINE stringRefVectorConstant( + const TiDB::TiDBCollatorPtr & collator, + std::vector & res_ref, + StringRef & b, + ColumnString::Chars_t & c_data, + ColumnString::Offsets & c_offsets) + { + size_t size = res_ref.size(); + size_t res_ref_size = 0; + for (auto & ref : res_ref) + res_ref_size += ref.size; + c_data.resize(std::max(res_ref_size, b.size * size)); + c_offsets.resize(size); + for (size_t i = 0; i < size; ++i) + LeastGreatestStringImpl::process(collator, res_ref, b, c_data, c_offsets, i); + } + + static void NO_INLINE stringVectorConstant( + const TiDB::TiDBCollatorPtr & collator, + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const StringRef & b, + ColumnString::Chars_t & c_data, + ColumnString::Offsets & c_offsets) + { + size_t size = a_offsets.size(); + c_data.resize(std::max(a_data.size(), b.size * size)); + c_offsets.resize(size); + for (size_t i = 0; i < size; ++i) + LeastGreatestStringImpl::process(collator, a_data, a_offsets, b, c_data, c_offsets, i); + } + + static void constantConstant( + const TiDB::TiDBCollatorPtr & collator, + StringRef & a, + StringRef & b) + { + LeastGreatestStringImpl::process(collator, a, b); + } +}; + +template +class FunctionLeastGreatestString : public IFunction +{ +public: + static constexpr auto name = least ? "tidbLeastString" : "tidbGreatestString"; + explicit FunctionLeastGreatestString() = default; + + static FunctionPtr create(const Context & context [[maybe_unused]]) + { + return std::make_shared(); + } + + String getName() const override { return name; } + bool isVariadic() const override { return true; } + + void setCollator(const TiDB::TiDBCollatorPtr & collator_) override { collator = collator_; } + + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForNulls() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() < 2) + throw Exception( + fmt::format("Number of arguments for function {} doesn't match: passed {}, should be at least 2.", getName(), arguments.size()), + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (const auto & argument : arguments) + { + if (!argument->isString()) + { + throw Exception( + fmt::format("argument type not string"), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + size_t num_arguments = arguments.size(); + if (num_arguments < 2) + { + throw Exception( + fmt::format("Number of arguments for function {} doesn't match: passed {}, should be at least 2.", getName(), arguments.size()), + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + if (collator) + executeInternal(block, arguments, result); + else + executeInternal(block, arguments, result); + } + + template + void executeInternal(Block & block, const ColumnNumbers & arguments, size_t result) const + { + size_t num_arguments = arguments.size(); + + using impl = StringOperationImpl; + + std::vector const_columns; + std::vector string_columns; + for (size_t i = 0; i < num_arguments; ++i) + { + const auto * c = block.getByPosition(arguments[i]).column.get(); + const auto * c_string = checkAndGetColumn(c); + const ColumnConst * c_const = checkAndGetColumnConstStringOrFixedString(c); + if (c_const) + const_columns.emplace_back(c_const); + if (c_string) + string_columns.emplace_back(c_string); + } + + // 1. calculate result column for const columns + StringRef const_res; + if (!const_columns.empty()) + { + const_res = const_columns[0]->getDataAt(0); + for (size_t i = 1; i < const_columns.size(); ++i) + { + StringRef b = const_columns[i]->getDataAt(0); + impl::constantConstant(collator, const_res, b); + } + + if (string_columns.empty()) // fill the result column + { + block.getByPosition(result).column + = block + .getByPosition(result) + .type->createColumnConst(const_columns[0]->size(), Field(const_res.toString())); + return; + } + } + + // 2. calculate result column for string columns + auto string_columns_size = string_columns.size(); + if (string_columns_size == 1) + { + // 3A. merge result columns of const columns and result_col + ColumnString * result_col = nullptr; + result_col = const_cast(string_columns[0]); + auto col_str = ColumnString::create(); + impl::stringVectorConstant( + collator, + result_col->getChars(), + result_col->getOffsets(), + const_res, + col_str->getChars(), + col_str->getOffsets()); + block.getByPosition(result).column = std::move(col_str); + return; + } + else if (string_columns_size >= 2) + { + std::vector result_string_refs; + result_string_refs.resize(string_columns[0]->size()); + for (size_t i = 1; i < string_columns_size; ++i) + { + const DB::ColumnString * c0_string; + const auto * c1_string = string_columns[i]; + if (i == 1) + { + c0_string = checkAndGetColumn(string_columns[0]); + + impl::stringVectorStringVector( + collator, + c0_string->getChars(), + c0_string->getOffsets(), + c1_string->getChars(), + c1_string->getOffsets(), + result_string_refs); + } + else + { + impl::stringRefVectorStringVector( + collator, + result_string_refs, + c1_string->getChars(), + c1_string->getOffsets()); + } + } + if (const_columns.empty()) // no const columns, use string columns result + { + // materialize string columns result + auto res_column = ColumnString::create(); + for (auto & ref : result_string_refs) + res_column->insertData(ref.data, ref.size); + block.getByPosition(result).column = std::move(res_column); + return; + } + else + { + // 3B. merge result columns of const columns and vector columns + auto col_str = ColumnString::create(); + impl::stringRefVectorConstant( + collator, + result_string_refs, + const_res, + col_str->getChars(), + col_str->getOffsets()); + block.getByPosition(result).column = std::move(col_str); + return; + } + } + } + +private: + TiDB::TiDBCollatorPtr collator{}; +}; } // namespace DB diff --git a/dbms/src/Functions/greatest.cpp b/dbms/src/Functions/greatest.cpp index 7efabef8470..35538cf05ce 100644 --- a/dbms/src/Functions/greatest.cpp +++ b/dbms/src/Functions/greatest.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 PingCAP, Ltd. +// Copyright 2023 PingCAP, Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,8 +26,8 @@ struct BinaryGreatestBaseImpl template static Result apply(A a, B b) { - const Result tmp_a = static_cast(a); // NOLINT(bugprone-signed-char-misuse) - const Result tmp_b = static_cast(b); // NOLINT(bugprone-signed-char-misuse) + const auto tmp_a = static_cast(a); // NOLINT(bugprone-signed-char-misuse) + const auto tmp_b = static_cast(b); // NOLINT(bugprone-signed-char-misuse) return accurate::greaterOp(tmp_a, tmp_b) ? tmp_a : tmp_b; } template @@ -46,8 +46,8 @@ struct BinaryGreatestBaseImpl template static Result apply(A a, B b) { - const Result tmp_a = static_cast(a); // NOLINT(bugprone-signed-char-misuse) - const Result tmp_b = static_cast(b); // NOLINT(bugprone-signed-char-misuse) + const auto tmp_a = static_cast(a); // NOLINT(bugprone-signed-char-misuse) + const auto tmp_b = static_cast(b); // NOLINT(bugprone-signed-char-misuse) return tmp_a > tmp_b ? tmp_a : tmp_b; } template @@ -71,6 +71,7 @@ using FunctionTiDBGreatest = FunctionVectorizedLeastGreatest(); + factory.registerFunction>(); } } // namespace DB diff --git a/dbms/src/Functions/least.cpp b/dbms/src/Functions/least.cpp index 7508ca81320..52785632256 100644 --- a/dbms/src/Functions/least.cpp +++ b/dbms/src/Functions/least.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 PingCAP, Ltd. +// Copyright 2023 PingCAP, Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,8 +30,8 @@ struct BinaryLeastBaseImpl static Result apply(A a, B b) { /** gcc 4.9.2 successfully vectorizes a loop from this function. */ - const Result tmp_a = static_cast(a); // NOLINT(bugprone-signed-char-misuse) - const Result tmp_b = static_cast(b); // NOLINT(bugprone-signed-char-misuse) + const auto tmp_a = static_cast(a); // NOLINT(bugprone-signed-char-misuse) + const auto tmp_b = static_cast(b); // NOLINT(bugprone-signed-char-misuse) return accurate::lessOp(tmp_a, tmp_b) ? tmp_a : tmp_b; } template @@ -50,8 +50,8 @@ struct BinaryLeastBaseImpl template static Result apply(A a, B b) { - const Result tmp_a = static_cast(a); // NOLINT(bugprone-signed-char-misuse) - const Result tmp_b = static_cast(b); // NOLINT(bugprone-signed-char-misuse) + const auto tmp_a = static_cast(a); // NOLINT(bugprone-signed-char-misuse) + const auto tmp_b = static_cast(b); // NOLINT(bugprone-signed-char-misuse) return tmp_a < tmp_b ? tmp_a : tmp_b; } template @@ -75,6 +75,7 @@ using FunctionTiDBLeast = FunctionVectorizedLeastGreatest(); + factory.registerFunction>(); } } // namespace DB diff --git a/dbms/src/Functions/tests/gtest_least_greatest.cpp b/dbms/src/Functions/tests/gtest_least_greatest.cpp index cbf7552fdc1..339eb291cb8 100644 --- a/dbms/src/Functions/tests/gtest_least_greatest.cpp +++ b/dbms/src/Functions/tests/gtest_least_greatest.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 PingCAP, Ltd. +// Copyright 2023 PingCAP, Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -193,6 +193,476 @@ try } CATCH +TEST_F(LeastGreatestTest, leastString) +try +{ + const String & func_name = "tidbLeastString"; + const auto * utf8mb4_general_ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); + + /// without collator + // vector vector + ASSERT_COLUMN_EQ(createColumn>({"11"}), + executeFunction( + func_name, + {createColumn>({"1111"}), + createColumn>({"11"}), + createColumn>({"111"}), + createColumn>({"111111"})})); + + ASSERT_COLUMN_EQ(createColumn>({""}), + executeFunction( + func_name, + {createColumn>({"1111"}), + createColumn>({"11"}), + createColumn>({"111"}), + createColumn>({""})})); + + ASSERT_COLUMN_EQ(createColumn>({"11", "2"}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", "2"}), + createColumn>({"111", "22"})})); + // vector constant + ASSERT_COLUMN_EQ( + createColumn>({"11", "22222", "22", "11111111"}), + executeFunction( + func_name, + {createColumn>({"11", "33", "22", "11111111"}), + createConstColumn>(4, "22222")})); + + ASSERT_COLUMN_EQ( + createColumn>({"11", "211", "211", "11111111"}), + executeFunction( + func_name, + {createColumn>({"11", "33", "22", "11111111"}), + createConstColumn>(4, "22222"), + createConstColumn>(4, "211")})); + + ASSERT_COLUMN_EQ( + createColumn>({"11", "03", "11", "11111111"}), + executeFunction( + func_name, + {createColumn>({"11", "33", "22", "11111111"}), + createColumn>({"33", "03", "11", "2333"}), + createConstColumn>(4, "22222")})); + + // constant vector + ASSERT_COLUMN_EQ( + createColumn>({"11", "22222", "22", "11111111"}), + executeFunction( + func_name, + {createConstColumn>(4, "22222"), + createColumn>({"11", "33", "22", "11111111"})})); + + // constant constant + ASSERT_COLUMN_EQ(createConstColumn(4, "11"), + executeFunction( + func_name, + {createConstColumn>(4, "1111"), + createConstColumn>(4, "11")})); + + ASSERT_COLUMN_EQ(createConstColumn(4, "11"), + executeFunction( + func_name, + {createConstColumn>(4, "11"), + createConstColumn>(4, "1111")})); + + + ASSERT_COLUMN_EQ(createConstColumn(4, "11"), + executeFunction( + func_name, + {createConstColumn>(4, "11"), + createConstColumn>(4, "111"), + createConstColumn>(4, "1111")})); + + /// with collator + // vector vector + ASSERT_COLUMN_EQ(createColumn>({"11"}), + executeFunction( + func_name, + {createColumn>({"1111"}), + createColumn>({"11"}), + createColumn>({"111"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"111"}), + executeFunction( + func_name, + {createColumn>({"1111"}), + createColumn>({"2222"}), + createColumn>({"111"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"a", "b"}), + executeFunction( + func_name, + {createColumn>({"a", "b"}), + createColumn>({"b", "c"}), + createColumn>({"c", "d"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"a", "b", "asdhkas-\\"}), + executeFunction( + func_name, + {createColumn>({"a", "b", "asdhkas-\\"}), + createColumn>({"b", "c", "sadhhdask"}), + createColumn>({"c", "d", "sahdjkdsahk"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"11", "22"}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", "22"}), + createColumn>({"111", "222"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"1", "2"}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", "22"}), + createColumn>({"111", "222"}), + createColumn>({"1", "2"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"11", {}}), + executeFunction( + func_name, + {createColumn>({"1111", {}}), + createColumn>({"11", "22"}), + createColumn>({"111", "222"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn({"11", "2"}), + executeFunction( + func_name, + {createColumn({"1111", "2"}), + createColumn({"11", "22"}), + createColumn({"111", "222"})}, + utf8mb4_general_ci_collator)); + + + ASSERT_COLUMN_EQ(createColumn>({{}, {}}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", {}}), + createColumn>({{}, "222"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({{}, {}}), + executeFunction( + func_name, + {createColumn>({{}, {}}), + createColumn>({{}, {}}), + createColumn>({{}, {}})}, + utf8mb4_general_ci_collator)); + + // vector constant + ASSERT_COLUMN_EQ( + createColumn>({"11", "22222", "22", "11111111"}), + executeFunction( + func_name, + {createColumn>({"11", "33", "22", "11111111"}), + createConstColumn>(4, "22222")}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ( + createColumn>({"11", "22222", "11111", "1111"}), + executeFunction( + func_name, + {createColumn>({"11", "44", "333", "22222"}), + createColumn>({"11", "367", "121", "234"}), + createColumn>({"1111", "33", "11111", "9999"}), + createColumn>({"111", "33", "7777", "1111"}), + createColumn>({"11", "66", "6767", "78878"}), + createConstColumn>(4, "22222"), + createConstColumn>(4, "33333"), + createConstColumn>(4, "44444")}, + utf8mb4_general_ci_collator)); + + // constant vector + ASSERT_COLUMN_EQ( + createColumn>({"11", "22222", "22", "11111111"}), + executeFunction( + func_name, + {createConstColumn>(4, "22222"), + createColumn>({"11", "33", "22", "11111111"})}, + utf8mb4_general_ci_collator)); + + // constant constant + ASSERT_COLUMN_EQ( + createConstColumn(4, "11111"), + executeFunction( + func_name, + {createConstColumn>(4, "11111"), + createConstColumn>(4, "22222")}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ( + createConstColumn>(4, {}), + executeFunction( + func_name, + {createConstColumn>(4, {}), + createConstColumn>(4, "22222")}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ( + createConstColumn(4, "111"), + executeFunction( + func_name, + { + createConstColumn>(4, "1111"), + createConstColumn>(4, "22222"), + createConstColumn>(4, "111"), + }, + utf8mb4_general_ci_collator)); + + const auto * bin_col = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + ASSERT_COLUMN_EQ(createColumn>({{}, {}}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", {}}), + createColumn>({{}, "222"})}, + bin_col)); +} +CATCH + +TEST_F(LeastGreatestTest, greatestString) +try +{ + const String & func_name = "tidbGreatestString"; + + const auto * utf8mb4_general_ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); + + /// without collator + // vector vector + ASSERT_COLUMN_EQ(createColumn>({"111111"}), + executeFunction( + func_name, + {createColumn>({"1111"}), + createColumn>({"11"}), + createColumn>({"111"}), + createColumn>({"111111"})})); + + ASSERT_COLUMN_EQ(createColumn>({"1111"}), + executeFunction( + func_name, + {createColumn>({"1111"}), + createColumn>({"11"}), + createColumn>({"111"}), + createColumn>({""})})); + + ASSERT_COLUMN_EQ(createColumn>({"1111", "2222"}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", "2"}), + createColumn>({"111", "22"})})); + // vector constant + ASSERT_COLUMN_EQ( + createColumn>({"22222", "33", "22222", "22222"}), + executeFunction( + func_name, + {createColumn>({"11", "33", "22", "11111111"}), + createConstColumn>(4, "22222")})); + + ASSERT_COLUMN_EQ( + createColumn>({"31111", "33", "31111", "31111"}), + executeFunction( + func_name, + {createColumn>({"11", "33", "22", "11111111"}), + createConstColumn>(4, "22222"), + createConstColumn>(4, "31111")})); + + ASSERT_COLUMN_EQ( + createColumn>({"33", "33", "22222", "2333"}), + executeFunction( + func_name, + {createColumn>({"11", "33", "22", "11111111"}), + createColumn>({"33", "33", "11", "2333"}), + createConstColumn>(4, "22222")})); + + // constant vector + ASSERT_COLUMN_EQ( + createColumn>({"22222", "33", "22222", "22222"}), + executeFunction( + func_name, + {createConstColumn>(4, "22222"), + createColumn>({"11", "33", "22", "11111111"})})); + + // constant constant + ASSERT_COLUMN_EQ(createConstColumn(4, "1111"), + executeFunction( + func_name, + {createConstColumn>(4, "1111"), + createConstColumn>(4, "11")})); + + + ASSERT_COLUMN_EQ(createConstColumn(4, "1111"), + executeFunction( + func_name, + {createConstColumn>(4, "11"), + createConstColumn>(4, "1111")})); + + /// with collator + // vector vector + ASSERT_COLUMN_EQ(createColumn>({"1111"}), + executeFunction( + func_name, + {createColumn>({"1111"}), + createColumn>({"11"}), + createColumn>({"111"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"2222"}), + executeFunction( + func_name, + {createColumn>({"1111"}), + createColumn>({"2222"}), + createColumn>({"111"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"c", "d"}), + executeFunction( + func_name, + {createColumn>({"a", "b"}), + createColumn>({"b", "c"}), + createColumn>({"c", "d"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"c", "d", "sahdjkdsahk"}), + executeFunction( + func_name, + {createColumn>({"a", "b", "asdhkas-\\"}), + createColumn>({"b", "c", "sadhhdask"}), + createColumn>({"c", "d", "sahdjkdsahk"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"1111", "2222"}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", "22"}), + createColumn>({"111", "222"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"1111", "2222"}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", "22"}), + createColumn>({"111", "222"}), + createColumn>({"1", "2"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({"1111", {}}), + executeFunction( + func_name, + {createColumn>({"1111", {}}), + createColumn>({"11", "22"}), + createColumn>({"111", "222"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn({"1111", "222"}), + executeFunction( + func_name, + {createColumn({"1111", "2"}), + createColumn({"11", "22"}), + createColumn({"111", "222"})}, + utf8mb4_general_ci_collator)); + + + ASSERT_COLUMN_EQ(createColumn>({{}, {}}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", {}}), + createColumn>({{}, "222"})}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ(createColumn>({{}, {}}), + executeFunction( + func_name, + {createColumn>({{}, {}}), + createColumn>({{}, {}}), + createColumn>({{}, {}})}, + utf8mb4_general_ci_collator)); + + // vector constant + ASSERT_COLUMN_EQ( + createColumn>({"22222", "33", "22222", "22222"}), + executeFunction( + func_name, + {createColumn>({"11", "33", "22", "11111111"}), + createConstColumn>(4, "22222")}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ( + createColumn>({"44444", "66", "7777", "9999"}), + executeFunction( + func_name, + {createColumn>({"11", "44", "333", "22222"}), + createColumn>({"11", "367", "121", "234"}), + createColumn>({"1111", "33", "11111", "9999"}), + createColumn>({"111", "33", "7777", "1111"}), + createColumn>({"11", "66", "6767", "78878"}), + createConstColumn>(4, "22222"), + createConstColumn>(4, "33333"), + createConstColumn>(4, "44444")}, + utf8mb4_general_ci_collator)); + + // constant vector + ASSERT_COLUMN_EQ( + createColumn>({"22222", "33", "22222", "22222"}), + executeFunction( + func_name, + {createConstColumn>(4, "22222"), + createColumn>({"11", "33", "22", "11111111"})}, + utf8mb4_general_ci_collator)); + + // constant constant + ASSERT_COLUMN_EQ( + createConstColumn(4, "22222"), + executeFunction( + func_name, + {createConstColumn>(4, "11111"), + createConstColumn>(4, "22222")}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ( + createConstColumn>(4, {}), + executeFunction( + func_name, + {createConstColumn>(4, {}), + createConstColumn>(4, "22222")}, + utf8mb4_general_ci_collator)); + + ASSERT_COLUMN_EQ( + createConstColumn(4, "33"), + executeFunction( + func_name, + { + createConstColumn>(4, "1111"), + createConstColumn>(4, "22222"), + createConstColumn>(4, "33"), + }, + utf8mb4_general_ci_collator)); + + const auto * bin_col = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + ASSERT_COLUMN_EQ(createColumn>({{}, {}}), + executeFunction( + func_name, + {createColumn>({"1111", "2222"}), + createColumn>({"11", {}}), + createColumn>({{}, "222"})}, + bin_col)); +} +CATCH + TEST_F(LeastGreatestTest, testGreatest) try { diff --git a/tests/fullstack-test/expr/least_greatest.test b/tests/fullstack-test/expr/least_greatest.test index 38b2ba6bc3d..bb3ecad4483 100644 --- a/tests/fullstack-test/expr/least_greatest.test +++ b/tests/fullstack-test/expr/least_greatest.test @@ -1,4 +1,4 @@ -# Copyright 2022 PingCAP, Ltd. +# Copyright 2023 PingCAP, Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,8 +24,12 @@ mysql> drop table if exists test.t3 mysql> create table test.t3 (id1 int, id2 int) mysql> alter table test.t3 set tiflash replica 1 mysql> insert into test.t3 values (NULL, NULL), (NULL, NULL), (NULL, NULL) +mysql> drop table if exists test.t4 +mysql> create table test.t4 (a varchar(20), b varchar(20)) +mysql> alter table test.t4 set tiflash replica 1 +mysql> insert into test.t4 values ("111", "422"), ("11", "333"), ("1", "343") -func> wait_table test t1 t2 t3 +func> wait_table test t1 t2 t3 t4 # parse error mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; select least() from test.t1 @@ -143,6 +147,27 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; sele | NULL | +-----------------+ +# least String +mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; select least(a, b) from test.t4; ++-------------+ +| least(a, b) | ++-------------+ +| 111 | +| 11 | +| 1 | ++-------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; select least(a, b, '11', '1') from test.t4 +least(a, b, '11', '1') +1 +1 +1 + +mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; select least(a, b, "11", "1", null) from test.t4 +least(a, b, 11, 1, null) +NULL +NULL +NULL + # greatest tests mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; select id, greatest(id, id1, id2, id3, id4, id5, id6, id7, id8) from test.t1 order by 1 +------+------------------------------------------------------+ @@ -284,6 +309,35 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; sele +--------------------+ # greatest(decimal, int) not support for now. + +# greatest String +mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; select greatest(a, b) from test.t4 ++----------------+ +| greatest(a, b) | ++----------------+ +| 422 | +| 333 | +| 343 | ++----------------+ + +mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; select greatest(a, b, "342") from test.t4 +greatest(a, b, 342) +422 +342 +343 + +mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; select greatest(a, b, "342", "344") from test.t4 +greatest(a, b, 342, 344) +422 +344 +344 + +mysql> set @@tidb_isolation_read_engines='tiflash'; set tidb_enforce_mpp=1; select greatest(a, b, "342", "344", null) from test.t4 +greatest(a, b, 342, 344, null) +NULL +NULL +NULL + # mysql> drop table if exists test.t1; # mysql> create table test.t1(c1 decimal(10, 1), c2 int); # mysql> insert into test.t1 values(1, 11);