Skip to content

Commit

Permalink
parser, ddl: support decoding binary literal in set/enum (#35822)
Browse files Browse the repository at this point in the history
close #31338
  • Loading branch information
tangenta authored Jun 30, 2022
1 parent 6d80d00 commit c578d19
Show file tree
Hide file tree
Showing 10 changed files with 140 additions and 57 deletions.
32 changes: 32 additions & 0 deletions cmd/explaintest/r/new_character_set_invalid.result
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,35 @@ a b c
? ? ?
中文?中文 asdf?fdsa 字符集?字符集
@@ @@ @@
set @@sql_mode = default;
drop table t;
create table t(f set(0xD2BB, 0xC8FD), e enum(0xBAEC,0x6A59));
show create table t;
Table Create Table
t CREATE TABLE `t` (
`f` set('һ','??') DEFAULT NULL,
`e` enum('??','jY') DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin
drop table t;
create table t( e enum(0xBAEC,0x6A59));
show create table t;
Table Create Table
t CREATE TABLE `t` (
`e` enum('??','jY') DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin
drop table t;
create table t(f set(0xD2BB, 0xC8FD), e enum(0xBAEC,0x6A59)) collate gbk_bin;
show create table t;
Table Create Table
t CREATE TABLE `t` (
`f` set('一','三') COLLATE gbk_bin DEFAULT NULL,
`e` enum('红','jY') COLLATE gbk_bin DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=gbk COLLATE=gbk_bin
drop table t;
create table t( e enum(0xBAEC,0x6A59)) collate gbk_bin;
show create table t;
Table Create Table
t CREATE TABLE `t` (
`e` enum('红','jY') COLLATE gbk_bin DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=gbk COLLATE=gbk_bin
set @@sql_mode = '';
15 changes: 15 additions & 0 deletions cmd/explaintest/t/new_character_set_invalid.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,18 @@ insert into t values ('À', 'ø', '😂');
insert into t values ('中文À中文', 'asdføfdsa', '字符集😂字符集');
insert into t values (0x4040ffff, 0x4040ffff, 0x4040ffff);
select * from t;

set @@sql_mode = default;
drop table t;
create table t(f set(0xD2BB, 0xC8FD), e enum(0xBAEC,0x6A59));
show create table t;
drop table t;
create table t( e enum(0xBAEC,0x6A59));
show create table t;
drop table t;
create table t(f set(0xD2BB, 0xC8FD), e enum(0xBAEC,0x6A59)) collate gbk_bin;
show create table t;
drop table t;
create table t( e enum(0xBAEC,0x6A59)) collate gbk_bin;
show create table t;
set @@sql_mode = '';
21 changes: 21 additions & 0 deletions ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ import (
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/dbterror"
"github.com/pingcap/tidb/util/domainutil"
"github.com/pingcap/tidb/util/hack"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/mock"
Expand Down Expand Up @@ -820,6 +821,24 @@ func setCharsetCollationFlenDecimal(tp *types.FieldType, colName, colCharset, co
return checkTooBigFieldLengthAndTryAutoConvert(tp, colName, sessVars)
}

func decodeEnumSetBinaryLiteralToUTF8(tp *types.FieldType, chs string) {
if tp.GetType() != mysql.TypeEnum && tp.GetType() != mysql.TypeSet {
return
}
enc := charset.FindEncoding(chs)
for i, elem := range tp.GetElems() {
if !tp.GetElemIsBinaryLit(i) {
continue
}
s, err := enc.Transform(nil, hack.Slice(elem), charset.OpDecodeReplace)
if err != nil {
logutil.BgLogger().Warn("decode enum binary literal to utf-8 failed", zap.Error(err))
}
tp.SetElem(i, string(hack.String(s)))
}
tp.CleanElemIsBinaryLit()
}

// buildColumnAndConstraint builds table.Column and ast.Constraint from the parameters.
// outPriKeyConstraint is the primary key constraint out of column definition. For example:
// `create table t1 (id int , age int, primary key(id));`
Expand Down Expand Up @@ -852,6 +871,7 @@ func buildColumnAndConstraint(
if err := setCharsetCollationFlenDecimal(colDef.Tp, colDef.Name.Name.O, chs, coll, ctx.GetSessionVars()); err != nil {
return nil, nil, errors.Trace(err)
}
decodeEnumSetBinaryLiteralToUTF8(colDef.Tp, chs)
col, cts, err := columnDefToCol(ctx, offset, colDef, outPriKeyConstraint)
if err != nil {
return nil, nil, errors.Trace(err)
Expand Down Expand Up @@ -4523,6 +4543,7 @@ func (d *ddl) getModifiableColumnJob(ctx context.Context, sctx sessionctx.Contex
if err = setCharsetCollationFlenDecimal(&newCol.FieldType, newCol.Name.O, chs, coll, sctx.GetSessionVars()); err != nil {
return nil, errors.Trace(err)
}
decodeEnumSetBinaryLiteralToUTF8(&newCol.FieldType, chs)

// Check the column with foreign key, waiting for the default flen and decimal.
if fkInfo := getColumnForeignKeyInfo(originalColName.L, t.Meta().ForeignKeys); fkInfo != nil {
Expand Down
14 changes: 7 additions & 7 deletions ddl/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,19 +68,19 @@ func TestDefaultValueInEnum(t *testing.T) {
tk.MustExec("use test;")
// The value 0x91 should not cause panic.
tk.MustExec("create table t(a enum('a', 0x91) charset gbk);")
tk.MustExec("insert into t values (1), (2);") // Use 1-base index to locate the value.
tk.MustQuery("select a from t;").Check(testkit.Rows("a", "")) // 0x91 is truncate.
tk.MustExec("insert into t values (1), (2);") // Use 1-base index to locate the value.
tk.MustQuery("select a from t;").Check(testkit.Rows("a", "?")) // 0x91 is replaced to '?'.
tk.MustExec("drop table t;")
tk.MustExec("create table t (a enum('a', 0x91)) charset gbk;") // Test for table charset.
tk.MustExec("insert into t values (1), (2);")
tk.MustQuery("select a from t;").Check(testkit.Rows("a", ""))
tk.MustQuery("select a from t;").Check(testkit.Rows("a", "?"))
tk.MustExec("drop table t;")
tk.MustGetErrMsg("create table t(a set('a', 0x91, '') charset gbk);",
"[types:1291]Column 'a' has duplicated value '' in SET")
// Test valid utf-8 string value in enum. Note that the binary literal only can be decoded to utf-8.
tk.MustGetErrMsg("create table t(a set('a', 0x91, '?') charset gbk);",
"[types:1291]Column 'a' has duplicated value '?' in SET")
// Test valid utf-8 string value in enum.
tk.MustExec("create table t (a enum('a', 0xE4BDA0E5A5BD) charset gbk);")
tk.MustExec("insert into t values (1), (2);")
tk.MustQuery("select a from t;").Check(testkit.Rows("a", "你好"))
tk.MustQuery("select a from t;").Check(testkit.Rows("a", "浣犲ソ"))
}

func TestDDLStatementsBackFill(t *testing.T) {
Expand Down
22 changes: 0 additions & 22 deletions parser/ast/misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (

"github.com/pingcap/errors"
"github.com/pingcap/tidb/parser/auth"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/format"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
Expand Down Expand Up @@ -3619,27 +3618,6 @@ type TextString struct {
IsBinaryLiteral bool
}

// TransformTextStrings converts a slice of TextString to strings.
// This is only used by enum/set strings.
func TransformTextStrings(ts []*TextString, _ string) []string {
// The UTF-8 encoding rather than other encoding is used
// because parser is not possible to determine the "real"
// charset that a binary literal string should be converted to.
enc := charset.EncodingUTF8Impl
ret := make([]string, 0, len(ts))
for _, t := range ts {
if !t.IsBinaryLiteral {
ret = append(ret, t.Value)
} else {
// Validate the binary literal string.
// See https://github.com/pingcap/tidb/issues/30740.
r, _ := enc.Transform(nil, charset.HackSlice(t.Value), charset.OpDecodeNoErr)
ret = append(ret, charset.HackString(r))
}
}
return ret
}

type BinaryLiteral interface {
ToString() string
}
Expand Down
2 changes: 2 additions & 0 deletions parser/ast/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ func (checker *nodeTextCleaner) Enter(in Node) (out Node, skipChildren bool) {
}
case *Join:
node.ExplicitParens = false
case *ColumnDef:
node.Tp.CleanElemIsBinaryLit()
}
return in, false
}
Expand Down
22 changes: 12 additions & 10 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -19881,12 +19881,13 @@ yynewstate:
tp := types.NewFieldType(mysql.TypeEnum)
elems := yyS[yypt-2].item.([]*ast.TextString)
opt := yyS[yypt-0].item.(*ast.OptBinary)
tp.SetElems(ast.TransformTextStrings(elems, opt.Charset))
tp.SetElems(make([]string, len(elems)))
fieldLen := -1 // enum_flen = max(ele_flen)
for i := range tp.GetElems() {
tp.SetElem(i, strings.TrimRight(tp.GetElem(i), " "))
if len(tp.GetElem(i)) > fieldLen {
fieldLen = len(tp.GetElem(i))
for i, e := range elems {
trimmed := strings.TrimRight(e.Value, " ")
tp.SetElemWithIsBinaryLit(i, trimmed, e.IsBinaryLiteral)
if len(trimmed) > fieldLen {
fieldLen = len(trimmed)
}
}
tp.SetFlen(fieldLen)
Expand All @@ -19901,11 +19902,12 @@ yynewstate:
tp := types.NewFieldType(mysql.TypeSet)
elems := yyS[yypt-2].item.([]*ast.TextString)
opt := yyS[yypt-0].item.(*ast.OptBinary)
tp.SetElems(ast.TransformTextStrings(elems, opt.Charset))
fieldLen := len(tp.GetElems()) - 1 // set_flen = sum(ele_flen) + number_of_ele - 1
for i := range tp.GetElems() {
tp.SetElem(i, strings.TrimRight(tp.GetElem(i), " "))
fieldLen += len(tp.GetElem(i))
tp.SetElems(make([]string, len(elems)))
fieldLen := len(elems) - 1 // set_flen = sum(ele_flen) + number_of_ele - 1
for i, e := range elems {
trimmed := strings.TrimRight(e.Value, " ")
tp.SetElemWithIsBinaryLit(i, trimmed, e.IsBinaryLiteral)
fieldLen += len(trimmed)
}
tp.SetFlen(fieldLen)
tp.SetCharset(opt.Charset)
Expand Down
22 changes: 12 additions & 10 deletions parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -11896,12 +11896,13 @@ StringType:
tp := types.NewFieldType(mysql.TypeEnum)
elems := $3.([]*ast.TextString)
opt := $5.(*ast.OptBinary)
tp.SetElems(ast.TransformTextStrings(elems, opt.Charset))
tp.SetElems(make([]string, len(elems)))
fieldLen := -1 // enum_flen = max(ele_flen)
for i := range tp.GetElems() {
tp.SetElem(i, strings.TrimRight(tp.GetElem(i), " "))
if len(tp.GetElem(i)) > fieldLen {
fieldLen = len(tp.GetElem(i))
for i, e := range elems {
trimmed := strings.TrimRight(e.Value, " ")
tp.SetElemWithIsBinaryLit(i, trimmed, e.IsBinaryLiteral)
if len(trimmed) > fieldLen {
fieldLen = len(trimmed)
}
}
tp.SetFlen(fieldLen)
Expand All @@ -11916,11 +11917,12 @@ StringType:
tp := types.NewFieldType(mysql.TypeSet)
elems := $3.([]*ast.TextString)
opt := $5.(*ast.OptBinary)
tp.SetElems(ast.TransformTextStrings(elems, opt.Charset))
fieldLen := len(tp.GetElems()) - 1 // set_flen = sum(ele_flen) + number_of_ele - 1
for i := range tp.GetElems() {
tp.SetElem(i, strings.TrimRight(tp.GetElem(i), " "))
fieldLen += len(tp.GetElem(i))
tp.SetElems(make([]string, len(elems)))
fieldLen := len(elems) - 1 // set_flen = sum(ele_flen) + number_of_ele - 1
for i, e := range elems {
trimmed := strings.TrimRight(e.Value, " ")
tp.SetElemWithIsBinaryLit(i, trimmed, e.IsBinaryLiteral)
fieldLen += len(trimmed)
}
tp.SetFlen(fieldLen)
tp.SetCharset(opt.Charset)
Expand Down
2 changes: 2 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6192,6 +6192,8 @@ func (checker *nodeTextCleaner) Enter(in ast.Node) (out ast.Node, skipChildren b
node.Specs = specs
case *ast.Join:
node.ExplicitParens = false
case *ast.ColumnDef:
node.Tp.CleanElemIsBinaryLit()
}
return in, false
}
Expand Down
45 changes: 37 additions & 8 deletions parser/types/field_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ type FieldType struct {
// collate represent collate rules of the charset
collate string
// elems is the element list for enum and set type.
elems []string
elems []string
elemsIsBinaryLit []bool
// Please keep in mind that jsonFieldType should be updated if you add a new field here.
}

// NewFieldType returns a FieldType,
Expand Down Expand Up @@ -180,10 +182,34 @@ func (ft *FieldType) SetElem(idx int, element string) {
ft.elems[idx] = element
}

func (ft *FieldType) SetElemWithIsBinaryLit(idx int, element string, isBinaryLit bool) {
ft.elems[idx] = element
if isBinaryLit {
// Create the binary literal flags lazily.
if ft.elemsIsBinaryLit == nil {
ft.elemsIsBinaryLit = make([]bool, len(ft.elems))
}
ft.elemsIsBinaryLit[idx] = true
}
}

func (ft *FieldType) GetElem(idx int) string {
return ft.elems[idx]
}

func (ft *FieldType) GetElemIsBinaryLit(idx int) bool {
if len(ft.elemsIsBinaryLit) == 0 {
return false
}
return ft.elemsIsBinaryLit[idx]
}

func (ft *FieldType) CleanElemIsBinaryLit() {
if ft != nil && ft.elemsIsBinaryLit != nil {
ft.elemsIsBinaryLit = nil
}
}

// Clone returns a copy of itself.
func (ft *FieldType) Clone() *FieldType {
ret := *ft
Expand Down Expand Up @@ -506,13 +532,14 @@ func HasCharset(ft *FieldType) bool {

// for json
type jsonFieldType struct {
Tp byte
Flag uint
Flen int
Decimal int
Charset string
Collate string
Elems []string
Tp byte
Flag uint
Flen int
Decimal int
Charset string
Collate string
Elems []string
ElemsIsBinaryLit []bool
}

func (ft *FieldType) UnmarshalJSON(data []byte) error {
Expand All @@ -526,6 +553,7 @@ func (ft *FieldType) UnmarshalJSON(data []byte) error {
ft.charset = r.Charset
ft.collate = r.Collate
ft.elems = r.Elems
ft.elemsIsBinaryLit = r.ElemsIsBinaryLit
}
return err
}
Expand All @@ -539,5 +567,6 @@ func (ft *FieldType) MarshalJSON() ([]byte, error) {
r.Charset = ft.charset
r.Collate = ft.collate
r.Elems = ft.elems
r.ElemsIsBinaryLit = ft.elemsIsBinaryLit
return json.Marshal(r)
}

0 comments on commit c578d19

Please sign in to comment.