Skip to content

Commit

Permalink
expression: fix retType of buitin func with JSON (#32301)
Browse files Browse the repository at this point in the history
close #32281
  • Loading branch information
Defined2014 authored Feb 15, 2022
1 parent c9ec0ba commit 954e1e1
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 23 deletions.
6 changes: 6 additions & 0 deletions cmd/explaintest/r/common_collation.result
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ coercibility(a) coercibility(b) coercibility(c) coercibility(d) coercibility(e)
select collation(d), collation(upper(d)), collation(elt(1, d, 0x12)), collation(elt(1, elt(1, d, 0x12), 0x12)), collation(elt(1, d, b)) from t;
collation(d) collation(upper(d)) collation(elt(1, d, 0x12)) collation(elt(1, elt(1, d, 0x12), 0x12)) collation(elt(1, d, b))
binary utf8mb4_bin utf8mb4_bin utf8mb4_bin binary
drop table t;
create table t(a binary, b json, c char charset gbk);
insert into t values ('a', '{"a":"b"}', 'a');
select collation(concat(a, b)), collation(concat(b, a)), collation(concat(0x61, b)), collation(concat(b, 0x61)), collation(concat(c, b)), collation(concat(b, c)) from t;
collation(concat(a, b)) collation(concat(b, a)) collation(concat(0x61, b)) collation(concat(b, 0x61)) collation(concat(c, b)) collation(concat(b, c))
binary binary utf8mb4_bin utf8mb4_bin utf8mb4_bin utf8mb4_bin
DROP TABLE IF EXISTS t2;
CREATE TABLE t2 (
id INT NOT NULL PRIMARY KEY auto_increment,
Expand Down
4 changes: 4 additions & 0 deletions cmd/explaintest/t/common_collation.test
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ create table t (a char(20), b blob(100), c text, d json, e timestamp, f set('a
insert into t values ('你好', '你好', '你好', '{\"测试\": \"你好\"}', '2018-10-13', 1, '你好', 'a一');
select coercibility(a), coercibility(b), coercibility(c), coercibility(d), coercibility(e), coercibility(f), coercibility(g), coercibility(h) from t;
select collation(d), collation(upper(d)), collation(elt(1, d, 0x12)), collation(elt(1, elt(1, d, 0x12), 0x12)), collation(elt(1, d, b)) from t;
drop table t;
create table t(a binary, b json, c char charset gbk);
insert into t values ('a', '{"a":"b"}', 'a');
select collation(concat(a, b)), collation(concat(b, a)), collation(concat(0x61, b)), collation(concat(b, 0x61)), collation(concat(c, b)), collation(concat(b, c)) from t;

# test greatest and least function with collation.
DROP TABLE IF EXISTS t2;
Expand Down
49 changes: 26 additions & 23 deletions expression/collation.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,16 +370,25 @@ func inferCollation(exprs ...Expression) *ExprCollation {

repertoire := exprs[0].Repertoire()
coercibility := exprs[0].Coercibility()
dstCharset, dstCollation, isJSON := exprs[0].GetType().Charset, exprs[0].GetType().Collate, exprs[0].GetType().EvalType() == types.ETJson
dstCharset, dstCollation := exprs[0].GetType().Charset, exprs[0].GetType().Collate
if exprs[0].GetType().EvalType() == types.ETJson {
dstCharset, dstCollation = charset.CharsetUTF8MB4, charset.CollationUTF8MB4
}
unknownCS := false

// Aggregate arguments one by one, agg(a, b, c) := agg(agg(a, b), c).
for _, arg := range exprs[1:] {
argCharset, argCollation := arg.GetType().Charset, arg.GetType().Collate
// The collation of JSON is always utf8mb4_bin in builtin-func which is same as MySQL
// see details https://github.com/pingcap/tidb/issues/31320#issuecomment-1010599311
if arg.GetType().EvalType() == types.ETJson {
argCharset, argCollation = charset.CharsetUTF8MB4, charset.CollationUTF8MB4
}
// If one of the arguments is binary charset, we allow it can be used with other charsets.
// If they have the same coercibility, let the binary charset one to be the winner because binary has more precedence.
if dstCollation == charset.CollationBin || arg.GetType().Collate == charset.CollationBin {
if coercibility > arg.Coercibility() || (coercibility == arg.Coercibility() && arg.GetType().Collate == charset.CollationBin) {
coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson
if dstCollation == charset.CollationBin || argCollation == charset.CollationBin {
if coercibility > arg.Coercibility() || (coercibility == arg.Coercibility() && argCollation == charset.CollationBin) {
coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation
}
repertoire |= arg.Repertoire()
continue
Expand All @@ -392,57 +401,57 @@ func inferCollation(exprs ...Expression) *ExprCollation {
// 4. constant value is allowed because we can eval and convert it directly.
// If we can not aggregate these two collations, we will get CoercibilityNone and wait for an explicit COLLATE clause, if
// there is no explicit COLLATE clause, we will get an error.
if dstCharset != arg.GetType().Charset {
if dstCharset != argCharset {
switch {
case coercibility < arg.Coercibility():
if arg.Repertoire() == ASCII || arg.Coercibility() >= CoercibilitySysconst || isUnicodeCollation(dstCharset) {
repertoire |= arg.Repertoire()
continue
}
case coercibility == arg.Coercibility():
if (isUnicodeCollation(dstCharset) && !isUnicodeCollation(arg.GetType().Charset)) || (dstCharset == charset.CharsetUTF8MB4 && arg.GetType().Charset == charset.CharsetUTF8) {
if (isUnicodeCollation(dstCharset) && !isUnicodeCollation(argCharset)) || (dstCharset == charset.CharsetUTF8MB4 && argCharset == charset.CharsetUTF8) {
repertoire |= arg.Repertoire()
continue
} else if (isUnicodeCollation(arg.GetType().Charset) && !isUnicodeCollation(dstCharset)) || (arg.GetType().Charset == charset.CharsetUTF8MB4 && dstCharset == charset.CharsetUTF8) {
coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson
} else if (isUnicodeCollation(argCharset) && !isUnicodeCollation(dstCharset)) || (argCharset == charset.CharsetUTF8MB4 && dstCharset == charset.CharsetUTF8) {
coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation
repertoire |= arg.Repertoire()
continue
} else if repertoire == ASCII && arg.Repertoire() != ASCII {
coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson
coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation
repertoire |= arg.Repertoire()
continue
} else if repertoire != ASCII && arg.Repertoire() == ASCII {
repertoire |= arg.Repertoire()
continue
}
case coercibility > arg.Coercibility():
if repertoire == ASCII || coercibility >= CoercibilitySysconst || isUnicodeCollation(arg.GetType().Charset) {
coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson
if repertoire == ASCII || coercibility >= CoercibilitySysconst || isUnicodeCollation(argCharset) {
coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation
repertoire |= arg.Repertoire()
continue
}
}

// Cannot apply conversion.
repertoire |= arg.Repertoire()
coercibility, dstCharset, dstCollation, isJSON = CoercibilityNone, charset.CharsetBin, charset.CollationBin, false
coercibility, dstCharset, dstCollation = CoercibilityNone, charset.CharsetBin, charset.CollationBin
unknownCS = true
} else {
// If charset is the same, use lower coercibility, if coercibility is the same and none of them are _bin,
// derive to CoercibilityNone and _bin collation.
switch {
case coercibility == arg.Coercibility():
if dstCollation == arg.GetType().Collate {
if dstCollation == argCollation {
} else if coercibility == CoercibilityExplicit {
return nil
} else if isBinCollation(dstCollation) {
} else if isBinCollation(arg.GetType().Collate) {
coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson
} else if isBinCollation(argCollation) {
coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation
} else {
coercibility, dstCollation, dstCharset, isJSON = CoercibilityNone, getBinCollation(arg.GetType().Charset), arg.GetType().Charset, arg.GetType().EvalType() == types.ETJson
coercibility, dstCharset, dstCollation = CoercibilityNone, argCharset, getBinCollation(argCharset)
}
case coercibility > arg.Coercibility():
coercibility, dstCharset, dstCollation, isJSON = arg.Coercibility(), arg.GetType().Charset, arg.GetType().Collate, arg.GetType().EvalType() == types.ETJson
coercibility, dstCharset, dstCollation = arg.Coercibility(), argCharset, argCollation
}
repertoire |= arg.Repertoire()
}
Expand All @@ -452,12 +461,6 @@ func inferCollation(exprs ...Expression) *ExprCollation {
return nil
}

// The collation of JSON is always utf8mb4_bin in builtin-func which is same as MySQL
// see details https://github.com/pingcap/tidb/issues/31320#issuecomment-1010599311
if isJSON {
dstCharset, dstCollation = charset.CharsetUTF8MB4, charset.CollationUTF8MB4
}

return &ExprCollation{
Coer: coercibility,
Repe: repertoire,
Expand Down
26 changes: 26 additions & 0 deletions expression/collation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,32 @@ func TestDeriveCollation(t *testing.T) {
false,
&ExprCollation{CoercibilityImplicit, UNICODE, charset.CharsetUTF8MB4, charset.CollationUTF8MB4},
},
{
[]string{
ast.Concat, ast.ConcatWS, ast.Coalesce, ast.Greatest, ast.Least,
},
[]Expression{
newColString(charset.CharsetGBK, charset.CollationGBKBin),
newColJSON(),
},
[]types.EvalType{types.ETString, types.ETJson},
types.ETString,
false,
&ExprCollation{CoercibilityImplicit, UNICODE, charset.CharsetUTF8MB4, charset.CollationUTF8MB4},
},
{
[]string{
ast.Concat, ast.ConcatWS, ast.Coalesce, ast.Greatest, ast.Least,
},
[]Expression{
newColJSON(),
newColString(charset.CharsetBinary, charset.CharsetBinary),
},
[]types.EvalType{types.ETJson, types.ETString},
types.ETString,
false,
&ExprCollation{CoercibilityImplicit, UNICODE, charset.CharsetBinary, charset.CharsetBinary},
},
{
[]string{
ast.Concat, ast.ConcatWS, ast.Coalesce, ast.In, ast.Greatest, ast.Least,
Expand Down

0 comments on commit 954e1e1

Please sign in to comment.