From 954e1e1e82adf723f1e37017b04838b092a9d047 Mon Sep 17 00:00:00 2001 From: Hangjie Mo Date: Tue, 15 Feb 2022 13:17:38 +0800 Subject: [PATCH] expression: fix retType of buitin func with JSON (#32301) close pingcap/tidb#32281 --- cmd/explaintest/r/common_collation.result | 6 +++ cmd/explaintest/t/common_collation.test | 4 ++ expression/collation.go | 49 ++++++++++++----------- expression/collation_test.go | 26 ++++++++++++ 4 files changed, 62 insertions(+), 23 deletions(-) diff --git a/cmd/explaintest/r/common_collation.result b/cmd/explaintest/r/common_collation.result index 242359eb59e32..235ce7fce3d0d 100644 --- a/cmd/explaintest/r/common_collation.result +++ b/cmd/explaintest/r/common_collation.result @@ -22,6 +22,12 @@ coercibility(a) coercibility(b) coercibility(c) coercibility(d) coercibility(e) select collation(d), collation(upper(d)), collation(elt(1, d, 0x12)), collation(elt(1, elt(1, d, 0x12), 0x12)), collation(elt(1, d, b)) from t; collation(d) collation(upper(d)) collation(elt(1, d, 0x12)) collation(elt(1, elt(1, d, 0x12), 0x12)) collation(elt(1, d, b)) binary utf8mb4_bin utf8mb4_bin utf8mb4_bin binary +drop table t; +create table t(a binary, b json, c char charset gbk); +insert into t values ('a', '{"a":"b"}', 'a'); +select collation(concat(a, b)), collation(concat(b, a)), collation(concat(0x61, b)), collation(concat(b, 0x61)), collation(concat(c, b)), collation(concat(b, c)) from t; +collation(concat(a, b)) collation(concat(b, a)) collation(concat(0x61, b)) collation(concat(b, 0x61)) collation(concat(c, b)) collation(concat(b, c)) +binary binary utf8mb4_bin utf8mb4_bin utf8mb4_bin utf8mb4_bin DROP TABLE IF EXISTS t2; CREATE TABLE t2 ( id INT NOT NULL PRIMARY KEY auto_increment, diff --git a/cmd/explaintest/t/common_collation.test b/cmd/explaintest/t/common_collation.test index 43c15f24a7b5a..8f19d9d1a3e27 100644 --- a/cmd/explaintest/t/common_collation.test +++ b/cmd/explaintest/t/common_collation.test @@ -17,6 +17,10 @@ create table t (a char(20), b blob(100), c text, d json, e timestamp, f set('a insert into t values ('你好', '你好', '你好', '{\"测试\": \"你好\"}', '2018-10-13', 1, '你好', 'a一'); select coercibility(a), coercibility(b), coercibility(c), coercibility(d), coercibility(e), coercibility(f), coercibility(g), coercibility(h) from t; select collation(d), collation(upper(d)), collation(elt(1, d, 0x12)), collation(elt(1, elt(1, d, 0x12), 0x12)), collation(elt(1, d, b)) from t; +drop table t; +create table t(a binary, b json, c char charset gbk); +insert into t values ('a', '{"a":"b"}', 'a'); +select collation(concat(a, b)), collation(concat(b, a)), collation(concat(0x61, b)), collation(concat(b, 0x61)), collation(concat(c, b)), collation(concat(b, c)) from t; # test greatest and least function with collation. DROP TABLE IF EXISTS t2; diff --git a/expression/collation.go b/expression/collation.go index 94c5c01f92798..9febc5b423b16 100644 --- a/expression/collation.go +++ b/expression/collation.go @@ -370,16 +370,25 @@ func inferCollation(exprs ...Expression) *ExprCollation { repertoire := exprs[0].Repertoire() coercibility := exprs[0].Coercibility() - dstCharset, dstCollation, isJSON := exprs[0].GetType().Charset, exprs[0].GetType().Collate, exprs[0].GetType().EvalType() == types.ETJson + dstCharset, dstCollation := exprs[0].GetType().Charset, exprs[0].GetType().Collate + if exprs[0].GetType().EvalType() == types.ETJson { + dstCharset, dstCollation = charset.CharsetUTF8MB4, charset.CollationUTF8MB4 + } unknownCS := false // Aggregate arguments one by one, agg(a, b, c) := agg(agg(a, b), c). for _, arg := range exprs[1:] { + argCharset, argCollation := arg.GetType().Charset, arg.GetType().Collate + // The collation of JSON is always utf8mb4_bin in builtin-func which is same as MySQL + // see details https://github.com/pingcap/tidb/issues/31320#issuecomment-1010599311 + if arg.GetType().EvalType() == types.ETJson { + argCharset, argCollation = charset.CharsetUTF8MB4, charset.CollationUTF8MB4 + } // If one of the arguments is binary charset, we allow it can be used with other charsets. // If they have the same coercibility, let the binary charset one to be the winner because binary has more precedence. - if dstCollation == charset.CollationBin || arg.GetType().Collate == charset.CollationBin { - if coercibility > arg.Coercibility() || (coercibility == arg.Coercibility() && arg.GetType().Collate == charset.CollationBin) { - coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson + if dstCollation == charset.CollationBin || argCollation == charset.CollationBin { + if coercibility > arg.Coercibility() || (coercibility == arg.Coercibility() && argCollation == charset.CollationBin) { + coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation } repertoire |= arg.Repertoire() continue @@ -392,7 +401,7 @@ func inferCollation(exprs ...Expression) *ExprCollation { // 4. constant value is allowed because we can eval and convert it directly. // If we can not aggregate these two collations, we will get CoercibilityNone and wait for an explicit COLLATE clause, if // there is no explicit COLLATE clause, we will get an error. - if dstCharset != arg.GetType().Charset { + if dstCharset != argCharset { switch { case coercibility < arg.Coercibility(): if arg.Repertoire() == ASCII || arg.Coercibility() >= CoercibilitySysconst || isUnicodeCollation(dstCharset) { @@ -400,15 +409,15 @@ func inferCollation(exprs ...Expression) *ExprCollation { continue } case coercibility == arg.Coercibility(): - if (isUnicodeCollation(dstCharset) && !isUnicodeCollation(arg.GetType().Charset)) || (dstCharset == charset.CharsetUTF8MB4 && arg.GetType().Charset == charset.CharsetUTF8) { + if (isUnicodeCollation(dstCharset) && !isUnicodeCollation(argCharset)) || (dstCharset == charset.CharsetUTF8MB4 && argCharset == charset.CharsetUTF8) { repertoire |= arg.Repertoire() continue - } else if (isUnicodeCollation(arg.GetType().Charset) && !isUnicodeCollation(dstCharset)) || (arg.GetType().Charset == charset.CharsetUTF8MB4 && dstCharset == charset.CharsetUTF8) { - coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson + } else if (isUnicodeCollation(argCharset) && !isUnicodeCollation(dstCharset)) || (argCharset == charset.CharsetUTF8MB4 && dstCharset == charset.CharsetUTF8) { + coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation repertoire |= arg.Repertoire() continue } else if repertoire == ASCII && arg.Repertoire() != ASCII { - coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson + coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation repertoire |= arg.Repertoire() continue } else if repertoire != ASCII && arg.Repertoire() == ASCII { @@ -416,8 +425,8 @@ func inferCollation(exprs ...Expression) *ExprCollation { continue } case coercibility > arg.Coercibility(): - if repertoire == ASCII || coercibility >= CoercibilitySysconst || isUnicodeCollation(arg.GetType().Charset) { - coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson + if repertoire == ASCII || coercibility >= CoercibilitySysconst || isUnicodeCollation(argCharset) { + coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation repertoire |= arg.Repertoire() continue } @@ -425,24 +434,24 @@ func inferCollation(exprs ...Expression) *ExprCollation { // Cannot apply conversion. repertoire |= arg.Repertoire() - coercibility, dstCharset, dstCollation, isJSON = CoercibilityNone, charset.CharsetBin, charset.CollationBin, false + coercibility, dstCharset, dstCollation = CoercibilityNone, charset.CharsetBin, charset.CollationBin unknownCS = true } else { // If charset is the same, use lower coercibility, if coercibility is the same and none of them are _bin, // derive to CoercibilityNone and _bin collation. switch { case coercibility == arg.Coercibility(): - if dstCollation == arg.GetType().Collate { + if dstCollation == argCollation { } else if coercibility == CoercibilityExplicit { return nil } else if isBinCollation(dstCollation) { - } else if isBinCollation(arg.GetType().Collate) { - coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson + } else if isBinCollation(argCollation) { + coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation } else { - coercibility, dstCollation, dstCharset, isJSON = CoercibilityNone, getBinCollation(arg.GetType().Charset), arg.GetType().Charset, arg.GetType().EvalType() == types.ETJson + coercibility, dstCharset, dstCollation = CoercibilityNone, argCharset, getBinCollation(argCharset) } case coercibility > arg.Coercibility(): - coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson + coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation } repertoire |= arg.Repertoire() } @@ -452,12 +461,6 @@ func inferCollation(exprs ...Expression) *ExprCollation { return nil } - // The collation of JSON is always utf8mb4_bin in builtin-func which is same as MySQL - // see details https://github.com/pingcap/tidb/issues/31320#issuecomment-1010599311 - if isJSON { - dstCharset, dstCollation = charset.CharsetUTF8MB4, charset.CollationUTF8MB4 - } - return &ExprCollation{ Coer: coercibility, Repe: repertoire, diff --git a/expression/collation_test.go b/expression/collation_test.go index dbd7e9bd04aee..335c55368ec5c 100644 --- a/expression/collation_test.go +++ b/expression/collation_test.go @@ -524,6 +524,32 @@ func TestDeriveCollation(t *testing.T) { false, &ExprCollation{CoercibilityImplicit, UNICODE, charset.CharsetUTF8MB4, charset.CollationUTF8MB4}, }, + { + []string{ + ast.Concat, ast.ConcatWS, ast.Coalesce, ast.Greatest, ast.Least, + }, + []Expression{ + newColString(charset.CharsetGBK, charset.CollationGBKBin), + newColJSON(), + }, + []types.EvalType{types.ETString, types.ETJson}, + types.ETString, + false, + &ExprCollation{CoercibilityImplicit, UNICODE, charset.CharsetUTF8MB4, charset.CollationUTF8MB4}, + }, + { + []string{ + ast.Concat, ast.ConcatWS, ast.Coalesce, ast.Greatest, ast.Least, + }, + []Expression{ + newColJSON(), + newColString(charset.CharsetBinary, charset.CharsetBinary), + }, + []types.EvalType{types.ETJson, types.ETString}, + types.ETString, + false, + &ExprCollation{CoercibilityImplicit, UNICODE, charset.CharsetBinary, charset.CharsetBinary}, + }, { []string{ ast.Concat, ast.ConcatWS, ast.Coalesce, ast.In, ast.Greatest, ast.Least,