Skip to content

Commit

Permalink
cherry pick pingcap#31114 to release-5.3
Browse files Browse the repository at this point in the history
Signed-off-by: ti-srebot <[email protected]>
  • Loading branch information
WizardXiao authored and ti-srebot committed Jan 18, 2022
1 parent 83b273a commit da1a8ee
Show file tree
Hide file tree
Showing 7 changed files with 294 additions and 30 deletions.
66 changes: 36 additions & 30 deletions dumpling/export/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,42 +134,45 @@ type Config struct {
SessionParams map[string]interface{}
Labels prometheus.Labels `json:"-"`
Tables DatabaseTables

CollationCompatible string
}

// DefaultConfig returns the default export Config for dumpling
func DefaultConfig() *Config {
allFilter, _ := filter.Parse([]string{"*.*"})
return &Config{
Databases: nil,
Host: "127.0.0.1",
User: "root",
Port: 3306,
Password: "",
Threads: 4,
Logger: nil,
StatusAddr: ":8281",
FileSize: UnspecifiedSize,
StatementSize: DefaultStatementSize,
OutputDirPath: ".",
ServerInfo: ServerInfoUnknown,
SortByPk: true,
Tables: nil,
Snapshot: "",
Consistency: consistencyTypeAuto,
NoViews: true,
Rows: UnspecifiedSize,
Where: "",
FileType: "",
NoHeader: false,
NoSchemas: false,
NoData: false,
CsvNullValue: "\\N",
SQL: "",
TableFilter: allFilter,
DumpEmptyDatabase: true,
SessionParams: make(map[string]interface{}),
OutputFileTemplate: DefaultOutputFileTemplate,
PosAfterConnect: false,
Databases: nil,
Host: "127.0.0.1",
User: "root",
Port: 3306,
Password: "",
Threads: 4,
Logger: nil,
StatusAddr: ":8281",
FileSize: UnspecifiedSize,
StatementSize: DefaultStatementSize,
OutputDirPath: ".",
ServerInfo: ServerInfoUnknown,
SortByPk: true,
Tables: nil,
Snapshot: "",
Consistency: consistencyTypeAuto,
NoViews: true,
Rows: UnspecifiedSize,
Where: "",
FileType: "",
NoHeader: false,
NoSchemas: false,
NoData: false,
CsvNullValue: "\\N",
SQL: "",
TableFilter: allFilter,
DumpEmptyDatabase: true,
SessionParams: make(map[string]interface{}),
OutputFileTemplate: DefaultOutputFileTemplate,
PosAfterConnect: false,
CollationCompatible: LooseCollationCompatible,
}
}

Expand Down Expand Up @@ -550,6 +553,9 @@ const (
defaultDumpGCSafePointTTL = 5 * 60
defaultEtcdDialTimeOut = 3 * time.Second

LooseCollationCompatible = "loose"
StrictCollationCompatible = "strict"

dumplingServiceSafePointPrefix = "dumpling"
)

Expand Down
162 changes: 162 additions & 0 deletions dumpling/export/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,19 @@ func (d *Dumper) Dump() (dumpErr error) {
// for consistency none, the binlog pos in metadata might be earlier than dumped data. We need to enable safe-mode to assure data safety.
err = m.recordGlobalMetaData(metaConn, conf.ServerInfo.ServerType, false)
if err != nil {
<<<<<<< HEAD
tctx.L().Info("get global metadata failed", zap.Error(err))
=======
tctx.L().Info("get global metadata failed", log.ShortError(err))
}

if d.conf.CollationCompatible == StrictCollationCompatible {
//init charset and default collation map
d.charsetAndDefaultCollationMap, err = GetCharsetAndDefaultCollation(tctx.Context, metaConn)
if err != nil {
return err
}
>>>>>>> 1e7f0dcc6... dumpling: add collation_compatible config in dumpling (#31114)
}

// for other consistencies, we should get table list after consistency is set up and GlobalMetaData is cached
Expand Down Expand Up @@ -300,8 +312,19 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
if !conf.NoSchemas {
createDatabaseSQL, err := ShowCreateDatabase(metaConn, dbName)
if err != nil {
<<<<<<< HEAD
return err
=======
return errors.Trace(err)
}

// adjust db collation
createDatabaseSQL, err = adjustDatabaseCollation(tctx, d.conf.CollationCompatible, parser1, createDatabaseSQL, d.charsetAndDefaultCollationMap)
if err != nil {
return errors.Trace(err)
>>>>>>> 1e7f0dcc6... dumpling: add collation_compatible config in dumpling (#31114)
}

task := NewTaskDatabaseMeta(dbName, createDatabaseSQL)
ctxDone := d.sendTaskToChan(tctx, task, taskChan)
if ctxDone {
Expand All @@ -325,6 +348,17 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
return tctx.Err()
}
} else {
<<<<<<< HEAD
=======

// adjust table collation
newCreateSQL, err := adjustTableCollation(tctx, d.conf.CollationCompatible, parser1, meta.ShowCreateTable(), d.charsetAndDefaultCollationMap)
if err != nil {
return errors.Trace(err)
}
meta.(*tableMeta).showCreateTable = newCreateSQL

>>>>>>> 1e7f0dcc6... dumpling: add collation_compatible config in dumpling (#31114)
task := NewTaskTableMeta(dbName, table.Name, meta.ShowCreateTable())
ctxDone := d.sendTaskToChan(tctx, task, taskChan)
if ctxDone {
Expand All @@ -344,6 +378,134 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
return nil
}

<<<<<<< HEAD
=======
// adjustDatabaseCollation adjusts db collation and return new create sql and collation
func adjustDatabaseCollation(tctx *tcontext.Context, collationCompatible string, parser *parser.Parser, originSQL string, charsetAndDefaultCollationMap map[string]string) (string, error) {
if collationCompatible != StrictCollationCompatible {
return originSQL, nil
}
stmt, err := parser.ParseOneStmt(originSQL, "", "")
if err != nil {
tctx.L().Warn("parse create database error, maybe tidb parser doesn't support it", zap.String("originSQL", originSQL), log.ShortError(err))
return originSQL, nil
}
createStmt, ok := stmt.(*ast.CreateDatabaseStmt)
if !ok {
return originSQL, nil
}
var charset string
for _, createOption := range createStmt.Options {
// already have 'Collation'
if createOption.Tp == ast.DatabaseOptionCollate {
return originSQL, nil
}
if createOption.Tp == ast.DatabaseOptionCharset {
charset = createOption.Value
}
}
// get db collation
collation, ok := charsetAndDefaultCollationMap[strings.ToLower(charset)]
if !ok {
tctx.L().Warn("not found database charset default collation.", zap.String("originSQL", originSQL), zap.String("charset", strings.ToLower(charset)))
return originSQL, nil
}
// add collation
createStmt.Options = append(createStmt.Options, &ast.DatabaseOption{Tp: ast.DatabaseOptionCollate, Value: collation})
// rewrite sql
var b []byte
bf := bytes.NewBuffer(b)
err = createStmt.Restore(&format.RestoreCtx{
Flags: format.DefaultRestoreFlags | format.RestoreTiDBSpecialComment,
In: bf,
})
if err != nil {
return "", errors.Trace(err)
}
return bf.String(), nil
}

// adjustTableCollation adjusts table collation
func adjustTableCollation(tctx *tcontext.Context, collationCompatible string, parser *parser.Parser, originSQL string, charsetAndDefaultCollationMap map[string]string) (string, error) {
if collationCompatible != StrictCollationCompatible {
return originSQL, nil
}
stmt, err := parser.ParseOneStmt(originSQL, "", "")
if err != nil {
tctx.L().Warn("parse create table error, maybe tidb parser doesn't support it", zap.String("originSQL", originSQL), log.ShortError(err))
return originSQL, nil
}
createStmt, ok := stmt.(*ast.CreateTableStmt)
if !ok {
return originSQL, nil
}
var charset string
var collation string
for _, createOption := range createStmt.Options {
// already have 'Collation'
if createOption.Tp == ast.TableOptionCollate {
collation = createOption.StrValue
break
}
if createOption.Tp == ast.TableOptionCharset {
charset = createOption.StrValue
}
}

if collation == "" && charset != "" {
// get db collation
collation, ok := charsetAndDefaultCollationMap[strings.ToLower(charset)]
if !ok {
tctx.L().Warn("not found table charset default collation.", zap.String("originSQL", originSQL), zap.String("charset", strings.ToLower(charset)))
return originSQL, nil
}

// add collation
createStmt.Options = append(createStmt.Options, &ast.TableOption{Tp: ast.TableOptionCollate, StrValue: collation})
}

// adjust columns collation
adjustColumnsCollation(tctx, createStmt, charsetAndDefaultCollationMap)

// rewrite sql
var b []byte
bf := bytes.NewBuffer(b)
err = createStmt.Restore(&format.RestoreCtx{
Flags: format.DefaultRestoreFlags | format.RestoreTiDBSpecialComment,
In: bf,
})
if err != nil {
return "", errors.Trace(err)
}
return bf.String(), nil
}

// adjustColumnsCollation adds column's collation.
func adjustColumnsCollation(tctx *tcontext.Context, createStmt *ast.CreateTableStmt, charsetAndDefaultCollationMap map[string]string) {
for _, col := range createStmt.Cols {
for _, options := range col.Options {
// already have 'Collation'
if options.Tp == ast.ColumnOptionCollate {
continue
}
}
fieldType := col.Tp
if fieldType.Collate != "" {
continue
}
if fieldType.Charset != "" {
// just have charset
collation, ok := charsetAndDefaultCollationMap[strings.ToLower(fieldType.Charset)]
if !ok {
tctx.L().Warn("not found charset default collation for column.", zap.String("table", createStmt.Table.Name.String()), zap.String("column", col.Name.String()), zap.String("charset", strings.ToLower(fieldType.Charset)))
continue
}
fieldType.Collate = collation
}
}
}

>>>>>>> 1e7f0dcc6... dumpling: add collation_compatible config in dumpling (#31114)
func (d *Dumper) dumpTableData(tctx *tcontext.Context, conn *sql.Conn, meta TableMeta, taskChan chan<- Task) error {
conf := d.conf
if conf.NoData {
Expand Down
80 changes: 80 additions & 0 deletions dumpling/export/dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,83 @@ func TestGetListTableTypeByConf(t *testing.T) {
require.Equalf(t, x.expected, getListTableTypeByConf(conf), "server info: %s, consistency: %s", x.serverInfo, x.consistency)
}
}
<<<<<<< HEAD
=======

func TestAdjustDatabaseCollation(t *testing.T) {
tctx, cancel := tcontext.Background().WithLogger(appLogger).WithCancel()
defer cancel()
parser1 := parser.New()

originSQLs := []string{
"create database `test` CHARACTER SET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create database `test` CHARACTER SET=utf8mb4",
}

expectedSQLs := []string{
"create database `test` CHARACTER SET=utf8mb4 COLLATE=utf8mb4_general_ci",
"CREATE DATABASE `test` CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci",
}
charsetAndDefaultCollationMap := map[string]string{"utf8mb4": "utf8mb4_general_ci"}

for _, originSQL := range originSQLs {
newSQL, err := adjustDatabaseCollation(tctx, LooseCollationCompatible, parser1, originSQL, charsetAndDefaultCollationMap)
require.NoError(t, err)
require.Equal(t, originSQL, newSQL)
}

for i, originSQL := range originSQLs {
newSQL, err := adjustDatabaseCollation(tctx, StrictCollationCompatible, parser1, originSQL, charsetAndDefaultCollationMap)
require.NoError(t, err)
require.Equal(t, expectedSQLs[i], newSQL)
}
}

func TestAdjustTableCollation(t *testing.T) {
tctx, cancel := tcontext.Background().WithLogger(appLogger).WithCancel()
defer cancel()

parser1 := parser.New()

originSQLs := []string{
"create table `test`.`t1` (id int) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create table `test`.`t1` (id int) CHARSET=utf8mb4",
"create table `test`.`t1` (id int, name varchar(20) CHARACTER SET utf8mb4, work varchar(20)) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ",
"create table `test`.`t1` (id int, name varchar(20), work varchar(20)) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20)) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20) CHARACTER SET utf8mb4) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create table `test`.`t1` (id int, name varchar(20) CHARACTER SET utf8mb4, work varchar(20)) CHARSET=utf8mb4 ",
"create table `test`.`t1` (id int, name varchar(20), work varchar(20)) CHARSET=utf8mb4",
"create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20)) CHARSET=utf8mb4",
"create table `test`.`t1` (id int, name varchar(20) COLLATE utf8mb4_general_ci, work varchar(20) CHARACTER SET utf8mb4) CHARSET=utf8mb4",
}

expectedSQLs := []string{
"CREATE TABLE `test`.`t1` (`id` INT) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20),`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20),`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20)) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
"CREATE TABLE `test`.`t1` (`id` INT,`name` VARCHAR(20) COLLATE utf8mb4_general_ci,`work` VARCHAR(20) CHARACTER SET UTF8MB4 COLLATE utf8mb4_general_ci) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
}

charsetAndDefaultCollationMap := map[string]string{"utf8mb4": "utf8mb4_general_ci"}

for _, originSQL := range originSQLs {
newSQL, err := adjustTableCollation(tctx, LooseCollationCompatible, parser1, originSQL, charsetAndDefaultCollationMap)
require.NoError(t, err)
require.Equal(t, originSQL, newSQL)
}

for i, originSQL := range originSQLs {
newSQL, err := adjustTableCollation(tctx, StrictCollationCompatible, parser1, originSQL, charsetAndDefaultCollationMap)
require.NoError(t, err)
require.Equal(t, expectedSQLs[i], newSQL)
}

}
>>>>>>> 1e7f0dcc6... dumpling: add collation_compatible config in dumpling (#31114)
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
/*!40101 SET NAMES binary*/;
<<<<<<< HEAD
=======
/*T![placement] SET PLACEMENT_CHECKS = 0*/;
>>>>>>> 1e7f0dcc6... dumpling: add collation_compatible config in dumpling (#31114)
CREATE DATABASE `quo``te/database` /*!40100 DEFAULT CHARACTER SET latin1 */;
4 changes: 4 additions & 0 deletions dumpling/tests/quote/data/quote-database-schema-create.sql
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
/*!40101 SET NAMES binary*/;
<<<<<<< HEAD
=======
/*T![placement] SET PLACEMENT_CHECKS = 0*/;
>>>>>>> 1e7f0dcc6... dumpling: add collation_compatible config in dumpling (#31114)
CREATE DATABASE `quo``te/database` /*!40100 DEFAULT CHARACTER SET latin1 */ /*!80016 DEFAULT ENCRYPTION='N' */;
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
/*!40101 SET NAMES binary*/;
<<<<<<< HEAD
=======
/*T![placement] SET PLACEMENT_CHECKS = 0*/;
>>>>>>> 1e7f0dcc6... dumpling: add collation_compatible config in dumpling (#31114)
CREATE TABLE `quo``te/table` (
`quo``te/col` int(11) NOT NULL,
`a` int(11) DEFAULT NULL,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
/*!40101 SET NAMES binary*/;
<<<<<<< HEAD
=======
/*T![placement] SET PLACEMENT_CHECKS = 0*/;
>>>>>>> 1e7f0dcc6... dumpling: add collation_compatible config in dumpling (#31114)
CREATE TABLE `quo``te/table` (
`quo``te/col` int NOT NULL,
`a` int DEFAULT NULL,
Expand Down

0 comments on commit da1a8ee

Please sign in to comment.