From e48e54915ef706c00cf4742c53b4cb76e74a9b09 Mon Sep 17 00:00:00 2001 From: kennytm Date: Mon, 29 Oct 2018 01:28:36 +0800 Subject: [PATCH 01/15] mydump: added a ragel-based data file lexer and chunk parser The new lexer is 8x faster than MDDataReader. Speed is now a concern because we are going to read the entire file to get the accurate rows count per chunk. --- .gitattributes | 1 + Makefile | 5 + lightning/mydump/parser.go | 198 ++++ lightning/mydump/parser.rl | 100 ++ lightning/mydump/parser_generated.go | 1308 ++++++++++++++++++++++++++ lightning/mydump/parser_test.go | 111 +++ 6 files changed, 1723 insertions(+) create mode 100644 .gitattributes create mode 100644 lightning/mydump/parser.go create mode 100644 lightning/mydump/parser.rl create mode 100644 lightning/mydump/parser_generated.go create mode 100644 lightning/mydump/parser_test.go diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..ba35fa100 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*_generated.go linguist-generated=true diff --git a/Makefile b/Makefile index ef46180d7..a0f1c35c4 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,11 @@ checksuccess: echo "Lightning build successfully :-) !" ; \ fi +data_parsers: + ragel -Z -G2 -o tmp_parser.go lightning/mydump/parser.rl + @echo '// Code generated by ragel DO NOT EDIT.' | cat - tmp_parser.go > lightning/mydump/parser_generated.go + @rm tmp_parser.go + lightning: $(GOBUILD) $(RACE_FLAG) -ldflags '$(LDFLAGS)' -o $(LIGHTNING_BIN) cmd/main.go diff --git a/lightning/mydump/parser.go b/lightning/mydump/parser.go new file mode 100644 index 000000000..1e135a17a --- /dev/null +++ b/lightning/mydump/parser.go @@ -0,0 +1,198 @@ +package mydump + +import ( + "io" + + "github.com/pkg/errors" +) + +// ChunkParser is a parser of the data files (the file containing only INSERT +// statements). +type ChunkParser struct { + // states for the lexer + reader io.Reader + buf []byte + bufSize int + isLastChunk bool + + lastRow Row + // Current file offset. + pos int64 + // The (quoted) table name used in the last INSERT statement. Assumed to be + // constant throughout the entire file. + TableName []byte + // The list of columns in the form `(a, b, c)` in the last INSERT statement. + // Assumed to be constant throughout the entire file. + Columns []byte +} + +// Chunk represents a portion of the data file. +type Chunk struct { + Offset int64 + EndOffset int64 + PrevRowIDMax int64 + RowIDMax int64 +} + +// Row is the content of a row. +type Row struct { + RowID int64 + Row []byte +} + +// NewChunkParser creates a new parser which can read chunks out of a file. +func NewChunkParser(reader io.Reader) *ChunkParser { + return &ChunkParser{ + reader: reader, + bufSize: 8192, + } +} + +// Reader returns the underlying reader of this parser. +func (parser *ChunkParser) Reader() io.Reader { + return parser.reader +} + +// SetPos changes the reported position and row ID. +func (parser *ChunkParser) SetPos(pos int64, rowID int64) { + parser.pos = pos + parser.lastRow.RowID = rowID +} + +// Pos returns the current file offset. +func (parser *ChunkParser) Pos() int64 { + return parser.pos +} + +type token byte + +const ( + tokNil token = iota + tokValues + tokRow + tokName +) + +func tryAppendTo(out *[]byte, tail []byte) { + if out == nil || len(tail) == 0 { + return + } + if len(*out) == 0 { + *out = tail + } else { + *out = append(*out, tail...) + } +} + +func (parser *ChunkParser) readBlock() error { + block := make([]byte, parser.bufSize) + + n, err := io.ReadFull(parser.reader, block) + switch err { + case io.ErrUnexpectedEOF: + parser.isLastChunk = true + fallthrough + case nil: + tryAppendTo(&parser.buf, block[:n]) + return nil + default: + return errors.Trace(err) + } +} + +// ReadRow reads a row from the datafile. +func (parser *ChunkParser) ReadRow() error { + // This parser will recognize contents like: + // + // `tableName` (...) VALUES (...) (...) (...) + // + // Keywords like INSERT, INTO and separators like ',' and ';' are treated + // like comments and ignored. Therefore, this parser will accept some + // nonsense input. The advantage is the parser becomes extremely simple, + // suitable for us where we just want to quickly and accurately split the + // file apart, not to validate the content. + + type state byte + + const ( + // the state after reading "VALUES" + stateRow state = iota + // the state after reading the table name, before "VALUES" + stateColumns + ) + + row := &parser.lastRow + st := stateRow + + for { + tok, content, err := parser.lex() + if err != nil { + return errors.Trace(err) + } + switch tok { + case tokRow: + switch st { + case stateRow: + row.RowID++ + row.Row = content + return nil + case stateColumns: + parser.Columns = content + continue + } + + case tokName: + st = stateColumns + parser.TableName = content + parser.Columns = nil + continue + + case tokValues: + st = stateRow + continue + + default: + return errors.Errorf("Syntax error at position %d", parser.pos) + } + } +} + +// LastRow is the copy of the row parsed by the last call to ReadRow(). +func (parser *ChunkParser) LastRow() Row { + return parser.lastRow +} + +// ReadChunks parses the entire file and splits it into continuous chunks of +// size >= minSize. +func (parser *ChunkParser) ReadChunks(minSize int64) ([]Chunk, error) { + var chunks []Chunk + + cur := Chunk{ + Offset: parser.pos, + EndOffset: parser.pos, + PrevRowIDMax: parser.lastRow.RowID, + RowIDMax: parser.lastRow.RowID, + } + + for { + switch err := parser.ReadRow(); errors.Cause(err) { + case nil: + cur.EndOffset = parser.pos + cur.RowIDMax = parser.lastRow.RowID + if cur.EndOffset-cur.Offset >= minSize { + chunks = append(chunks, cur) + cur.Offset = cur.EndOffset + cur.PrevRowIDMax = cur.RowIDMax + } + + case io.EOF: + if cur.Offset < cur.EndOffset { + chunks = append(chunks, cur) + } + return chunks, nil + + default: + return nil, errors.Trace(err) + } + } +} diff --git a/lightning/mydump/parser.rl b/lightning/mydump/parser.rl new file mode 100644 index 000000000..9c3ff75e2 --- /dev/null +++ b/lightning/mydump/parser.rl @@ -0,0 +1,100 @@ +// Please edit `parser.rl` if you want to modify this file. To generate +// `parser_generated.go`, please execute +// +// ```sh +// make data_parsers +// ``` + +package mydump + +import ( + "io" + + "github.com/pingcap/tidb-lightning/lightning/common" + "github.com/pkg/errors" +) + +%%{ +#` + +machine chunk_parser; + +block_comment = '/*' any* :>> '*/'; +line_comment = /--[^\n]*\n/; +comment = block_comment | line_comment | space | [,;] | 'insert'i | 'into'i; + +single_quoted = "'" (^"'" | "\\" any)** "'"; +double_quoted = '"' (^'"' | '\\' any)** '"'; +back_quoted = '`' ^'`'* '`'; +unquoted = ^([,;()'"`] | space)+; + +row = '(' (^[)'"`] | single_quoted | double_quoted | back_quoted)* ')'; +name = (back_quoted | double_quoted | unquoted)+; + +main := |* + comment; + + 'values'i => { + consumedToken = tokValues + fbreak; + }; + + row => { + consumedToken = tokRow + fbreak; + }; + + name => { + consumedToken = tokName + fbreak; + }; +*|; + +#` +}%% + +%% write data; + +func (parser *ChunkParser) lex() (token, []byte, error) { + var cs, ts, te, act, p int + %% write init; + + for { + data := parser.buf + consumedToken := tokNil + pe := len(data) + eof := -1 + if parser.isLastChunk { + eof = pe + } + + %% write exec; + + if cs == %%{ write error; }%% { + common.AppLogger.Errorf("Syntax error near byte %d, content is «%s»", parser.pos, string(data)) + return tokNil, nil, errors.New("Syntax error") + } + + if consumedToken != tokNil { + result := data[ts:te] + parser.buf = data[te:] + parser.pos += int64(te) + return consumedToken, result, nil + } + + if parser.isLastChunk { + return tokNil, nil, io.EOF + } + + parser.buf = parser.buf[ts:] + parser.pos += int64(ts) + p -= ts + te -= ts + ts = 0 + if err := parser.readBlock(); err != nil { + return tokNil, nil, errors.Trace(err) + } + } + + return tokNil, nil, nil +} diff --git a/lightning/mydump/parser_generated.go b/lightning/mydump/parser_generated.go new file mode 100644 index 000000000..0fcbbc226 --- /dev/null +++ b/lightning/mydump/parser_generated.go @@ -0,0 +1,1308 @@ +// Code generated by ragel DO NOT EDIT. + +//line lightning/mydump/parser.rl:1 +// Please edit `parser.rl` if you want to modify this file. To generate +// `parser_generated.go`, please execute +// +// ```sh +// make data_parsers +// ``` + +package mydump + +import ( + "io" + + "github.com/pingcap/tidb-lightning/lightning/common" + "github.com/pkg/errors" +) + + +//line lightning/mydump/parser.rl:54 + + + +//line tmp_parser.go:25 +const chunk_parser_start int = 21 +const chunk_parser_first_final int = 21 +const chunk_parser_error int = 0 + +const chunk_parser_en_main int = 21 + + +//line lightning/mydump/parser.rl:57 + +func (parser *ChunkParser) lex() (token, []byte, error) { + var cs, ts, te, act, p int + +//line tmp_parser.go:38 + { + cs = chunk_parser_start + ts = 0 + te = 0 + act = 0 + } + +//line lightning/mydump/parser.rl:61 + + for { + data := parser.buf + consumedToken := tokNil + pe := len(data) + eof := -1 + if parser.isLastChunk { + eof = pe + } + + +//line tmp_parser.go:58 + { + if p == pe { + goto _test_eof + } + switch cs { + case 21: + goto st_case_21 + case 22: + goto st_case_22 + case 1: + goto st_case_1 + case 2: + goto st_case_2 + case 3: + goto st_case_3 + case 0: + goto st_case_0 + case 4: + goto st_case_4 + case 5: + goto st_case_5 + case 6: + goto st_case_6 + case 7: + goto st_case_7 + case 8: + goto st_case_8 + case 9: + goto st_case_9 + case 23: + goto st_case_23 + case 24: + goto st_case_24 + case 10: + goto st_case_10 + case 11: + goto st_case_11 + case 25: + goto st_case_25 + case 12: + goto st_case_12 + case 13: + goto st_case_13 + case 26: + goto st_case_26 + case 27: + goto st_case_27 + case 28: + goto st_case_28 + case 14: + goto st_case_14 + case 15: + goto st_case_15 + case 16: + goto st_case_16 + case 17: + goto st_case_17 + case 18: + goto st_case_18 + case 29: + goto st_case_29 + case 19: + goto st_case_19 + case 20: + goto st_case_20 + case 30: + goto st_case_30 + case 31: + goto st_case_31 + case 32: + goto st_case_32 + case 33: + goto st_case_33 + case 34: + goto st_case_34 + case 35: + goto st_case_35 + case 36: + goto st_case_36 + case 37: + goto st_case_37 + case 38: + goto st_case_38 + case 39: + goto st_case_39 + case 40: + goto st_case_40 + } + goto st_out +tr0: +//line NONE:1 + switch act { + case 0: + {{goto st0 }} + case 2: + {p = (te) - 1 + + consumedToken = tokValues + {p++; cs = 21; goto _out } + } + case 4: + {p = (te) - 1 + + consumedToken = tokName + {p++; cs = 21; goto _out } + } + default: + {p = (te) - 1 +} + } + + goto st21 +tr8: +//line lightning/mydump/parser.rl:42 +te = p+1 +{ + consumedToken = tokRow + {p++; cs = 21; goto _out } + } + goto st21 +tr12: +//line lightning/mydump/parser.rl:47 +p = (te) - 1 +{ + consumedToken = tokName + {p++; cs = 21; goto _out } + } + goto st21 +tr14: +//line lightning/mydump/parser.rl:35 +te = p+1 + + goto st21 +tr34: +//line lightning/mydump/parser.rl:47 +te = p +p-- +{ + consumedToken = tokName + {p++; cs = 21; goto _out } + } + goto st21 +tr35: +//line lightning/mydump/parser.rl:35 +te = p +p-- + + goto st21 + st21: +//line NONE:1 +ts = 0 + +//line NONE:1 +act = 0 + + if p++; p == pe { + goto _test_eof21 + } + st_case_21: +//line NONE:1 +ts = p + +//line tmp_parser.go:221 + switch data[p] { + case 32: + goto tr14 + case 34: + goto st1 + case 40: + goto st4 + case 44: + goto tr14 + case 45: + goto tr30 + case 47: + goto tr31 + case 59: + goto tr14 + case 73: + goto tr32 + case 86: + goto tr33 + case 96: + goto st3 + case 105: + goto tr32 + case 118: + goto tr33 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto st0 + } + case data[p] >= 9: + goto tr14 + } + goto tr2 +tr2: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st22 +tr37: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:35 +act = 1; + goto st22 +tr47: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:37 +act = 2; + goto st22 + st22: + if p++; p == pe { + goto _test_eof22 + } + st_case_22: +//line tmp_parser.go:283 + switch data[p] { + case 32: + goto tr0 + case 34: + goto st1 + case 44: + goto tr0 + case 59: + goto tr0 + case 96: + goto st3 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr0 + } + case data[p] >= 9: + goto tr0 + } + goto tr2 + st1: + if p++; p == pe { + goto _test_eof1 + } + st_case_1: + switch data[p] { + case 34: + goto tr2 + case 92: + goto st2 + } + goto st1 + st2: + if p++; p == pe { + goto _test_eof2 + } + st_case_2: + goto st1 + st3: + if p++; p == pe { + goto _test_eof3 + } + st_case_3: + if data[p] == 96 { + goto tr2 + } + goto st3 +st_case_0: + st0: + cs = 0 + goto _out + st4: + if p++; p == pe { + goto _test_eof4 + } + st_case_4: + switch data[p] { + case 34: + goto st5 + case 39: + goto st7 + case 41: + goto tr8 + case 96: + goto st9 + } + goto st4 + st5: + if p++; p == pe { + goto _test_eof5 + } + st_case_5: + switch data[p] { + case 34: + goto st4 + case 92: + goto st6 + } + goto st5 + st6: + if p++; p == pe { + goto _test_eof6 + } + st_case_6: + goto st5 + st7: + if p++; p == pe { + goto _test_eof7 + } + st_case_7: + switch data[p] { + case 39: + goto st4 + case 92: + goto st8 + } + goto st7 + st8: + if p++; p == pe { + goto _test_eof8 + } + st_case_8: + goto st7 + st9: + if p++; p == pe { + goto _test_eof9 + } + st_case_9: + if data[p] == 96 { + goto st4 + } + goto st9 +tr30: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st23 + st23: + if p++; p == pe { + goto _test_eof23 + } + st_case_23: +//line tmp_parser.go:409 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 45: + goto tr17 + case 59: + goto tr34 + case 96: + goto st3 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr17: +//line NONE:1 +te = p+1 + + goto st24 + st24: + if p++; p == pe { + goto _test_eof24 + } + st_case_24: +//line tmp_parser.go:443 + switch data[p] { + case 10: + goto tr14 + case 32: + goto st10 + case 34: + goto st11 + case 44: + goto st10 + case 59: + goto st10 + case 96: + goto st13 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto st10 + } + case data[p] >= 9: + goto st10 + } + goto tr17 + st10: + if p++; p == pe { + goto _test_eof10 + } + st_case_10: + if data[p] == 10 { + goto tr14 + } + goto st10 + st11: + if p++; p == pe { + goto _test_eof11 + } + st_case_11: + switch data[p] { + case 10: + goto tr16 + case 34: + goto tr17 + case 92: + goto st12 + } + goto st11 +tr16: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:35 +act = 1; + goto st25 + st25: + if p++; p == pe { + goto _test_eof25 + } + st_case_25: +//line tmp_parser.go:502 + switch data[p] { + case 34: + goto tr2 + case 92: + goto st2 + } + goto st1 + st12: + if p++; p == pe { + goto _test_eof12 + } + st_case_12: + if data[p] == 10 { + goto tr16 + } + goto st11 + st13: + if p++; p == pe { + goto _test_eof13 + } + st_case_13: + switch data[p] { + case 10: + goto tr20 + case 96: + goto tr17 + } + goto st13 +tr20: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:35 +act = 1; + goto st26 + st26: + if p++; p == pe { + goto _test_eof26 + } + st_case_26: +//line tmp_parser.go:543 + if data[p] == 96 { + goto tr2 + } + goto st3 +tr31: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st27 + st27: + if p++; p == pe { + goto _test_eof27 + } + st_case_27: +//line tmp_parser.go:560 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 42: + goto tr24 + case 44: + goto tr34 + case 59: + goto tr34 + case 96: + goto st3 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr24: +//line NONE:1 +te = p+1 + + goto st28 + st28: + if p++; p == pe { + goto _test_eof28 + } + st_case_28: +//line tmp_parser.go:594 + switch data[p] { + case 32: + goto st14 + case 34: + goto st16 + case 42: + goto tr36 + case 44: + goto st14 + case 59: + goto st14 + case 96: + goto st19 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto st14 + } + case data[p] >= 9: + goto st14 + } + goto tr24 + st14: + if p++; p == pe { + goto _test_eof14 + } + st_case_14: + if data[p] == 42 { + goto st15 + } + goto st14 + st15: + if p++; p == pe { + goto _test_eof15 + } + st_case_15: + switch data[p] { + case 42: + goto st15 + case 47: + goto tr14 + } + goto st14 + st16: + if p++; p == pe { + goto _test_eof16 + } + st_case_16: + switch data[p] { + case 34: + goto tr24 + case 42: + goto st17 + case 92: + goto st18 + } + goto st16 + st17: + if p++; p == pe { + goto _test_eof17 + } + st_case_17: + switch data[p] { + case 34: + goto tr24 + case 42: + goto st17 + case 47: + goto tr16 + case 92: + goto st18 + } + goto st16 + st18: + if p++; p == pe { + goto _test_eof18 + } + st_case_18: + if data[p] == 42 { + goto st17 + } + goto st16 +tr36: +//line NONE:1 +te = p+1 + + goto st29 + st29: + if p++; p == pe { + goto _test_eof29 + } + st_case_29: +//line tmp_parser.go:688 + switch data[p] { + case 32: + goto st14 + case 34: + goto st16 + case 42: + goto tr36 + case 44: + goto st14 + case 47: + goto tr37 + case 59: + goto st14 + case 96: + goto st19 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto st14 + } + case data[p] >= 9: + goto st14 + } + goto tr24 + st19: + if p++; p == pe { + goto _test_eof19 + } + st_case_19: + switch data[p] { + case 42: + goto st20 + case 96: + goto tr24 + } + goto st19 + st20: + if p++; p == pe { + goto _test_eof20 + } + st_case_20: + switch data[p] { + case 42: + goto st20 + case 47: + goto tr20 + case 96: + goto tr24 + } + goto st19 +tr32: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st30 + st30: + if p++; p == pe { + goto _test_eof30 + } + st_case_30: +//line tmp_parser.go:752 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 78: + goto tr38 + case 96: + goto st3 + case 110: + goto tr38 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr38: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st31 + st31: + if p++; p == pe { + goto _test_eof31 + } + st_case_31: +//line tmp_parser.go:790 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 83: + goto tr39 + case 84: + goto tr40 + case 96: + goto st3 + case 115: + goto tr39 + case 116: + goto tr40 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr39: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st32 + st32: + if p++; p == pe { + goto _test_eof32 + } + st_case_32: +//line tmp_parser.go:832 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 69: + goto tr41 + case 96: + goto st3 + case 101: + goto tr41 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr41: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st33 + st33: + if p++; p == pe { + goto _test_eof33 + } + st_case_33: +//line tmp_parser.go:870 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 82: + goto tr42 + case 96: + goto st3 + case 114: + goto tr42 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr42: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st34 + st34: + if p++; p == pe { + goto _test_eof34 + } + st_case_34: +//line tmp_parser.go:908 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 84: + goto tr37 + case 96: + goto st3 + case 116: + goto tr37 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr40: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st35 + st35: + if p++; p == pe { + goto _test_eof35 + } + st_case_35: +//line tmp_parser.go:946 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 79: + goto tr37 + case 96: + goto st3 + case 111: + goto tr37 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr33: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st36 + st36: + if p++; p == pe { + goto _test_eof36 + } + st_case_36: +//line tmp_parser.go:984 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 65: + goto tr43 + case 96: + goto st3 + case 97: + goto tr43 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr43: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st37 + st37: + if p++; p == pe { + goto _test_eof37 + } + st_case_37: +//line tmp_parser.go:1022 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 76: + goto tr44 + case 96: + goto st3 + case 108: + goto tr44 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr44: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st38 + st38: + if p++; p == pe { + goto _test_eof38 + } + st_case_38: +//line tmp_parser.go:1060 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 85: + goto tr45 + case 96: + goto st3 + case 117: + goto tr45 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr45: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st39 + st39: + if p++; p == pe { + goto _test_eof39 + } + st_case_39: +//line tmp_parser.go:1098 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 69: + goto tr46 + case 96: + goto st3 + case 101: + goto tr46 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 +tr46: +//line NONE:1 +te = p+1 + +//line lightning/mydump/parser.rl:47 +act = 4; + goto st40 + st40: + if p++; p == pe { + goto _test_eof40 + } + st_case_40: +//line tmp_parser.go:1136 + switch data[p] { + case 32: + goto tr34 + case 34: + goto st1 + case 44: + goto tr34 + case 59: + goto tr34 + case 83: + goto tr47 + case 96: + goto st3 + case 115: + goto tr47 + } + switch { + case data[p] > 13: + if 39 <= data[p] && data[p] <= 41 { + goto tr34 + } + case data[p] >= 9: + goto tr34 + } + goto tr2 + st_out: + _test_eof21: cs = 21; goto _test_eof + _test_eof22: cs = 22; goto _test_eof + _test_eof1: cs = 1; goto _test_eof + _test_eof2: cs = 2; goto _test_eof + _test_eof3: cs = 3; goto _test_eof + _test_eof4: cs = 4; goto _test_eof + _test_eof5: cs = 5; goto _test_eof + _test_eof6: cs = 6; goto _test_eof + _test_eof7: cs = 7; goto _test_eof + _test_eof8: cs = 8; goto _test_eof + _test_eof9: cs = 9; goto _test_eof + _test_eof23: cs = 23; goto _test_eof + _test_eof24: cs = 24; goto _test_eof + _test_eof10: cs = 10; goto _test_eof + _test_eof11: cs = 11; goto _test_eof + _test_eof25: cs = 25; goto _test_eof + _test_eof12: cs = 12; goto _test_eof + _test_eof13: cs = 13; goto _test_eof + _test_eof26: cs = 26; goto _test_eof + _test_eof27: cs = 27; goto _test_eof + _test_eof28: cs = 28; goto _test_eof + _test_eof14: cs = 14; goto _test_eof + _test_eof15: cs = 15; goto _test_eof + _test_eof16: cs = 16; goto _test_eof + _test_eof17: cs = 17; goto _test_eof + _test_eof18: cs = 18; goto _test_eof + _test_eof29: cs = 29; goto _test_eof + _test_eof19: cs = 19; goto _test_eof + _test_eof20: cs = 20; goto _test_eof + _test_eof30: cs = 30; goto _test_eof + _test_eof31: cs = 31; goto _test_eof + _test_eof32: cs = 32; goto _test_eof + _test_eof33: cs = 33; goto _test_eof + _test_eof34: cs = 34; goto _test_eof + _test_eof35: cs = 35; goto _test_eof + _test_eof36: cs = 36; goto _test_eof + _test_eof37: cs = 37; goto _test_eof + _test_eof38: cs = 38; goto _test_eof + _test_eof39: cs = 39; goto _test_eof + _test_eof40: cs = 40; goto _test_eof + + _test_eof: {} + if p == eof { + switch cs { + case 22: + goto tr0 + case 1: + goto tr0 + case 2: + goto tr0 + case 3: + goto tr0 + case 23: + goto tr34 + case 24: + goto tr34 + case 10: + goto tr12 + case 11: + goto tr12 + case 25: + goto tr35 + case 12: + goto tr12 + case 13: + goto tr12 + case 26: + goto tr35 + case 27: + goto tr34 + case 28: + goto tr34 + case 14: + goto tr12 + case 15: + goto tr12 + case 16: + goto tr12 + case 17: + goto tr12 + case 18: + goto tr12 + case 29: + goto tr34 + case 19: + goto tr12 + case 20: + goto tr12 + case 30: + goto tr34 + case 31: + goto tr34 + case 32: + goto tr34 + case 33: + goto tr34 + case 34: + goto tr34 + case 35: + goto tr34 + case 36: + goto tr34 + case 37: + goto tr34 + case 38: + goto tr34 + case 39: + goto tr34 + case 40: + goto tr34 + } + } + + _out: {} + } + +//line lightning/mydump/parser.rl:72 + + if cs == 0 { + common.AppLogger.Errorf("Syntax error near byte %d, content is «%s»", parser.pos, string(data)) + return tokNil, nil, errors.New("Syntax error") + } + + if consumedToken != tokNil { + result := data[ts:te] + parser.buf = data[te:] + parser.pos += int64(te) + return consumedToken, result, nil + } + + if parser.isLastChunk { + return tokNil, nil, io.EOF + } + + parser.buf = parser.buf[ts:] + parser.pos += int64(ts) + p -= ts + te -= ts + ts = 0 + if err := parser.readBlock(); err != nil { + return tokNil, nil, errors.Trace(err) + } + } + + return tokNil, nil, nil +} diff --git a/lightning/mydump/parser_test.go b/lightning/mydump/parser_test.go new file mode 100644 index 000000000..4f033b66e --- /dev/null +++ b/lightning/mydump/parser_test.go @@ -0,0 +1,111 @@ +package mydump_test + +import ( + "io" + "strings" + + . "github.com/pingcap/check" + "github.com/pingcap/tidb-lightning/lightning/mydump" + "github.com/pkg/errors" +) + +var _ = Suite(&testMydumpParserSuite{}) + +type testMydumpParserSuite struct{} + +func (s *testMydumpParserSuite) SetUpSuite(c *C) {} +func (s *testMydumpParserSuite) TearDownSuite(c *C) {} + +func (s *testMydumpParserSuite) TestReadRow(c *C) { + reader := strings.NewReader( + "/* whatever pragmas */;" + + "INSERT INTO `namespaced`.`table` (columns, more, columns) VALUES (1, 2, 3), (4, 5, 6);" + + "INSERT `namespaced`.`table` (x,y,z) VALUES (7,8,9);" + + "insert another_table values (10, 11, 12, '(13)', '(', 14, ')');", + ) + + parser := mydump.NewChunkParser(reader) + + c.Assert(parser.ReadRow(), IsNil) + c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ + RowID: 1, + Row: []byte("(1, 2, 3)"), + }) + c.Assert(parser.TableName, DeepEquals, []byte("`namespaced`.`table`")) + c.Assert(parser.Columns, DeepEquals, []byte("(columns, more, columns)")) + c.Assert(parser.Pos(), Equals, int64(97)) + + c.Assert(parser.ReadRow(), IsNil) + c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ + RowID: 2, + Row: []byte("(4, 5, 6)"), + }) + c.Assert(parser.TableName, DeepEquals, []byte("`namespaced`.`table`")) + c.Assert(parser.Columns, DeepEquals, []byte("(columns, more, columns)")) + c.Assert(parser.Pos(), Equals, int64(108)) + + c.Assert(parser.ReadRow(), IsNil) + c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ + RowID: 3, + Row: []byte("(7,8,9)"), + }) + c.Assert(parser.TableName, DeepEquals, []byte("`namespaced`.`table`")) + c.Assert(parser.Columns, DeepEquals, []byte("(x,y,z)")) + c.Assert(parser.Pos(), Equals, int64(159)) + + c.Assert(parser.ReadRow(), IsNil) + c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ + RowID: 4, + Row: []byte("(10, 11, 12, '(13)', '(', 14, ')')"), + }) + c.Assert(parser.TableName, DeepEquals, []byte("another_table")) + c.Assert(parser.Columns, IsNil) + c.Assert(parser.Pos(), Equals, int64(222)) + + c.Assert(errors.Cause(parser.ReadRow()), Equals, io.EOF) +} + +func (s *testMydumpParserSuite) TestReadChunks(c *C) { + reader := strings.NewReader(` + INSERT foo VALUES (1,2,3,4),(5,6,7,8),(9,10,11,12); + INSERT foo VALUES (13,14,15,16),(17,18,19,20),(21,22,23,24),(25,26,27,28); + INSERT foo VALUES (29,30,31,32),(33,34,35,36); + `) + + parser := mydump.NewChunkParser(reader) + + chunks, err := parser.ReadChunks(32) + c.Assert(err, IsNil) + c.Assert(chunks, DeepEquals, []mydump.Chunk{ + mydump.Chunk{ + Offset: 0, + EndOffset: 40, + PrevRowIDMax: 0, + RowIDMax: 2, + }, + mydump.Chunk{ + Offset: 40, + EndOffset: 88, + PrevRowIDMax: 2, + RowIDMax: 4, + }, + mydump.Chunk{ + Offset: 88, + EndOffset: 130, + PrevRowIDMax: 4, + RowIDMax: 7, + }, + mydump.Chunk{ + Offset: 130, + EndOffset: 165, + PrevRowIDMax: 7, + RowIDMax: 8, + }, + mydump.Chunk{ + Offset: 165, + EndOffset: 179, + PrevRowIDMax: 8, + RowIDMax: 9, + }, + }) +} From 761e8b9559a72d199a1dbfde95cca835e5df6b26 Mon Sep 17 00:00:00 2001 From: kennytm Date: Tue, 30 Oct 2018 00:49:39 +0800 Subject: [PATCH 02/15] mydump, restore: use the new parser for chunkRestore --- lightning/kv/sql2kv.go | 6 +- lightning/mydump/region.go | 97 +++++++++++++++--------------- lightning/mydump/region_test.go | 69 +++++++++++---------- lightning/restore/restore.go | 103 ++++++++++++++++++++------------ 4 files changed, 157 insertions(+), 118 deletions(-) diff --git a/lightning/kv/sql2kv.go b/lightning/kv/sql2kv.go index f15e45bdc..d77955fbb 100644 --- a/lightning/kv/sql2kv.go +++ b/lightning/kv/sql2kv.go @@ -109,10 +109,10 @@ func (kvcodec *TableKVEncoder) NextRowID() int64 { return kvcodec.idAllocator.Base() + 1 } -func (kvcodec *TableKVEncoder) SQL2KV(sql []byte) ([]kvec.KvPair, uint64, error) { +func (kvcodec *TableKVEncoder) SQL2KV(sql string) ([]kvec.KvPair, uint64, error) { if PrepareStmtMode { // via prepare statment - kvPairs, rowsAffected, err := kvcodec.encodeViaPstmt(sql) + kvPairs, rowsAffected, err := kvcodec.encodeViaPstmt([]byte(sql)) if err == nil { return kvPairs, rowsAffected, nil } @@ -120,7 +120,7 @@ func (kvcodec *TableKVEncoder) SQL2KV(sql []byte) ([]kvec.KvPair, uint64, error) } // via sql execution - kvPairs, rowsAffected, err := kvcodec.encoder.Encode(string(sql), kvcodec.tableID) + kvPairs, rowsAffected, err := kvcodec.encoder.Encode(sql, kvcodec.tableID) if err != nil { common.AppLogger.Errorf("[sql2kv] sql encode error = %v", err) return nil, 0, errors.Trace(err) diff --git a/lightning/mydump/region.go b/lightning/mydump/region.go index e9de22791..9a4c2b8e1 100644 --- a/lightning/mydump/region.go +++ b/lightning/mydump/region.go @@ -2,7 +2,7 @@ package mydump import ( "fmt" - "io" + "os" "runtime" "sort" "sync" @@ -18,13 +18,26 @@ type TableRegion struct { Table string File string - Offset int64 - Size int64 + Columns []byte + Chunk Chunk } func (reg *TableRegion) Name() string { return fmt.Sprintf("%s|%s|%d|%d", - reg.DB, reg.Table, reg.ID, reg.Offset) + reg.DB, reg.Table, reg.ID, reg.Chunk.Offset) +} + +func (reg *TableRegion) RowIDMin() int64 { + return reg.Chunk.PrevRowIDMax + 1 +} +func (reg *TableRegion) Rows() int64 { + return reg.Chunk.RowIDMax - reg.Chunk.PrevRowIDMax +} +func (reg *TableRegion) Offset() int64 { + return reg.Chunk.Offset +} +func (reg *TableRegion) Size() int64 { + return reg.Chunk.EndOffset - reg.Chunk.Offset } type regionSlice []*TableRegion @@ -37,7 +50,7 @@ func (rs regionSlice) Swap(i, j int) { } func (rs regionSlice) Less(i, j int) bool { if rs[i].File == rs[j].File { - return rs[i].Offset < rs[j].Offset + return rs[i].Chunk.Offset < rs[j].Chunk.Offset } return rs[i].File < rs[j].File } @@ -82,12 +95,14 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion { go func(pid int, file string) { common.AppLogger.Debugf("[%s] loading file's region (%s) ...", table, file) - var regions []*TableRegion - regions = splitFuzzyRegion(db, table, file, minRegionSize) - - lock.Lock() - filesRegions = append(filesRegions, regions...) - lock.Unlock() + chunks, err := splitExactChunks(db, table, file, minRegionSize) + if err == nil { + lock.Lock() + filesRegions = append(filesRegions, chunks...) + lock.Unlock() + } else { + common.AppLogger.Errorf("failed to extract chunks from file (%s): %s", file, err.Error()) + } processors <- pid wg.Done() @@ -97,56 +112,44 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion { // Setup files' regions sort.Sort(filesRegions) // ps : sort region by - (fileName, fileOffset) + var totalRowCount int64 for i, region := range filesRegions { region.ID = i + + // Re-adjust the row IDs so they won't be overlapping. + chunkRowCount := region.Chunk.RowIDMax - region.Chunk.PrevRowIDMax + region.Chunk.PrevRowIDMax = totalRowCount + totalRowCount += chunkRowCount + region.Chunk.RowIDMax = totalRowCount } return filesRegions } -func splitFuzzyRegion(db string, table string, file string, minRegionSize int64) []*TableRegion { - reader, err := NewMDDataReader(file, 0) +func splitExactChunks(db string, table string, file string, minChunkSize int64) ([]*TableRegion, error) { + reader, err := os.Open(file) if err != nil { - if err == ErrInsertStatementNotFound { - common.AppLogger.Warnf("failed to generate file's regions (%s) : %s", file, err.Error()) - } else { - common.AppLogger.Errorf("failed to generate file's regions (%s) : %s", file, err.Error()) - } - return nil + return nil, errors.Trace(err) } defer reader.Close() - newRegion := func(off int64) *TableRegion { - return &TableRegion{ - ID: -1, - DB: db, - Table: table, - File: file, - Offset: off, - Size: 0, - } + parser := NewChunkParser(reader) + chunks, err := parser.ReadChunks(minChunkSize) + if err != nil { + return nil, errors.Trace(err) } - regions := make([]*TableRegion, 0) - - var extendSize = int64(4 << 10) // 4 K - var offset int64 - for { - reader.Seek(offset + minRegionSize) - _, err := reader.Read(extendSize) - pos := reader.Tell() - - region := newRegion(offset) - region.Size = pos - offset - if region.Size > 0 { - regions = append(regions, region) - } - - if errors.Cause(err) == io.EOF { - break + annotatedChunks := make([]*TableRegion, len(chunks)) + for i, chunk := range chunks { + annotatedChunks[i] = &TableRegion{ + ID: -1, + DB: db, + Table: table, + File: file, + Columns: parser.Columns, + Chunk: chunk, } - offset = pos } - return regions + return annotatedChunks, nil } diff --git a/lightning/mydump/region_test.go b/lightning/mydump/region_test.go index c68df5e02..0d4ff99a6 100644 --- a/lightning/mydump/region_test.go +++ b/lightning/mydump/region_test.go @@ -2,6 +2,8 @@ package mydump_test import ( "bytes" + "fmt" + "path/filepath" . "github.com/pingcap/check" "github.com/pingcap/tidb-lightning/lightning/common" @@ -20,8 +22,15 @@ type testMydumpRegionSuite struct{} func (s *testMydumpRegionSuite) SetUpSuite(c *C) {} func (s *testMydumpRegionSuite) TearDownSuite(c *C) {} +var expectedTuplesCount = map[string]int64{ + "i": 1, + "report_case_high_risk": 1, + "tbl_autoid": 10000, + "tbl_multi_index": 10000, +} + /* - TODO : test with specified 'fuzzyRegionSize' & 'regionBlockSize' ... + TODO : test with specified 'regionBlockSize' ... */ func (s *testMydumpRegionSuite) TestTableRegion(c *C) { cfg := &config.Config{Mydumper: config.MydumperRuntime{SourceDir: "./examples"}} @@ -32,33 +41,38 @@ func (s *testMydumpRegionSuite) TestTableRegion(c *C) { for _, meta := range dbMeta.Tables { regions := founder.MakeTableRegions(meta) - // table := meta.Name - // fmt.Printf("[%s] region count ===============> %d\n", table, len(regions)) - // for _, region := range regions { - // fname := filepath.Base(region.File) - // fmt.Printf("[%s] rowID = %5d / rows = %5d / offset = %10d / size = %10d \n", - // fname, region.BeginRowID, region.Rows, region.Offset, region.Size) - // } + table := meta.Name + fmt.Printf("[%s] region count ===============> %d\n", table, len(regions)) + for _, region := range regions { + fname := filepath.Base(region.File) + fmt.Printf("[%s] rowID = %5d / rows = %5d / offset = %10d / size = %10d \n", + fname, + region.RowIDMin(), + region.Rows(), + region.Offset(), + region.Size()) + } // check - region-size vs file-size var tolFileSize int64 = 0 - var tolRegionSize int64 = 0 for _, file := range meta.DataFiles { fileSize, err := common.GetFileSize(file) c.Assert(err, IsNil) tolFileSize += fileSize } - for _, region := range regions { - tolRegionSize += region.Size - } - c.Assert(tolRegionSize, Equals, tolFileSize) - - // check - rows num - // var tolRows int64 = 0 + // var tolRegionSize int64 = 0 // for _, region := range regions { - // tolRows += region.Rows + // tolRegionSize += region.Size() // } - // c.Assert(tolRows, Equals, int64(10000)) + // c.Assert(tolRegionSize, Equals, tolFileSize) + // (The size will not be equal since the comments at the end are omitted) + + // check - rows num + var tolRows int64 = 0 + for _, region := range regions { + tolRows += region.Rows() + } + c.Assert(tolRows, Equals, expectedTuplesCount[table]) // check - range regionNum := len(regions) @@ -66,11 +80,11 @@ func (s *testMydumpRegionSuite) TestTableRegion(c *C) { for i := 1; i < regionNum; i++ { reg := regions[i] if preReg.File == reg.File { - c.Assert(reg.Offset, Equals, preReg.Offset+preReg.Size) - // c.Assert(reg.BeginRowID, Equals, preReg.BeginRowID+preReg.Rows) + c.Assert(reg.Offset(), Equals, preReg.Offset()+preReg.Size()) + c.Assert(reg.RowIDMin(), Equals, preReg.RowIDMin()+preReg.Rows()) } else { c.Assert(reg.Offset, Equals, 0) - // c.Assert(reg.BeginRowID, Equals, 1) + c.Assert(reg.RowIDMin(), Equals, 1) } preReg = reg } @@ -85,27 +99,20 @@ func (s *testMydumpRegionSuite) TestRegionReader(c *C) { dbMeta := loader.GetDatabases()["mocker_test"] founder := NewRegionFounder(defMinRegionSize) - expectedTuplesCount := map[string]int{ - "i": 1, - "report_case_high_risk": 1, - "tbl_autoid": 10000, - "tbl_multi_index": 10000, - } - for _, meta := range dbMeta.Tables { regions := founder.MakeTableRegions(meta) tolValTuples := 0 for _, reg := range regions { - regReader, _ := NewRegionReader(reg.File, reg.Offset, reg.Size) - stmts, _ := regReader.Read(reg.Size) + regReader, _ := NewRegionReader(reg.File, reg.Offset(), reg.Size()) + stmts, _ := regReader.Read(reg.Size()) for _, stmt := range stmts { parts := bytes.Split(stmt, []byte("),")) tolValTuples += len(parts) } } - c.Assert(tolValTuples, Equals, expectedTuplesCount[meta.Name]) + c.Assert(int64(tolValTuples), Equals, expectedTuplesCount[meta.Name]) } return diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go index 7e0f92d83..f137b5c20 100644 --- a/lightning/restore/restore.go +++ b/lightning/restore/restore.go @@ -13,7 +13,7 @@ import ( "time" "github.com/coreos/go-semver/semver" - "github.com/pkg/errors" + "github.com/cznic/mathutil" sstpb "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/tidb-lightning/lightning/common" "github.com/pingcap/tidb-lightning/lightning/config" @@ -23,6 +23,7 @@ import ( verify "github.com/pingcap/tidb-lightning/lightning/verification" tidbcfg "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/util/kvencoder" + "github.com/pkg/errors" ) const ( @@ -795,31 +796,38 @@ func (pool *RestoreWorkerPool) Recycle(worker *RestoreWorker) { //////////////////////////////////////////////////////////////// type chunkRestore struct { - reader *mydump.RegionReader - path string - offset int64 - name string + parser *mydump.ChunkParser + path string + name string + columns []byte + chunk mydump.Chunk } func newChunkRestore(chunk *mydump.TableRegion, cp *TableCheckpoint) (*chunkRestore, error) { - reader, err := mydump.NewRegionReader(chunk.File, chunk.Offset, chunk.Size) + reader, err := os.Open(chunk.File) if err != nil { return nil, errors.Trace(err) } - if pos, ok := cp.ChunkPos(chunk.File, chunk.Offset); ok { - reader.Seek(pos) + parser := mydump.NewChunkParser(reader) + + pos, ok := cp.ChunkPos(chunk.File, chunk.Offset()) + if !ok { + pos = chunk.Offset() } + reader.Seek(pos, io.SeekStart) + parser.Pos = pos return &chunkRestore{ - reader: reader, - path: chunk.File, - offset: chunk.Offset, - name: chunk.Name(), + parser: parser, + path: chunk.File, + name: chunk.Name(), + columns: chunk.Columns, + chunk: chunk.Chunk, }, nil } func (cr *chunkRestore) close() { - cr.reader.Close() + cr.parser.Reader().(*os.File).Close() } type TableRestore struct { @@ -884,7 +892,7 @@ func (t *TableRestore) loadChunks(minChunkSize int64, cp *TableCheckpoint) []*my // Remove all regions which have been imported newChunks := chunks[:0] for _, chunk := range chunks { - if pos, ok := cp.ChunkPos(chunk.File, chunk.Offset); !ok || pos < chunk.Offset+chunk.Size { + if pos, ok := cp.ChunkPos(chunk.File, chunk.Chunk.Offset); !ok || pos < chunk.Chunk.EndOffset { newChunks = append(newChunks, chunk) } } @@ -1085,7 +1093,6 @@ func (cr *chunkRestore) restore( timer := time.Now() -outside: for { select { case <-ctx.Done(): @@ -1093,15 +1100,39 @@ outside: default: } + endOffset := mathutil.MinInt64(cr.chunk.EndOffset, cr.parser.Pos+rc.cfg.Mydumper.ReadBlockSize) + if cr.parser.Pos >= endOffset { + break + } + start := time.Now() - sqls, err := cr.reader.Read(rc.cfg.Mydumper.ReadBlockSize) - switch errors.Cause(err) { - case nil: - case io.EOF: - break outside - default: - return errors.Trace(err) + + var sqls strings.Builder + sqls.WriteString("INSERT INTO ") + sqls.WriteString(t.tableName) + sqls.Write(cr.columns) + sqls.WriteString(" VALUES") + var sep byte = ' ' + readLoop: + for cr.parser.Pos < endOffset { + err := cr.parser.ReadRow() + switch errors.Cause(err) { + case nil: + sqls.WriteByte(sep) + sep = ',' + lastRow := cr.parser.LastRow() + sqls.Write(lastRow.Row) + case io.EOF: + break readLoop + default: + return errors.Trace(err) + } + } + if sep != ',' { // quick and dirty way to check if `sqls` actually contained any values + continue } + sqls.WriteByte(';') + metrics.MarkTiming(readMark, start) var ( @@ -1110,21 +1141,19 @@ outside: localChecksum verify.KVChecksum ) // sql -> kv - for _, stmt := range sqls { - start = time.Now() - kvs, affectedRows, err := kvEncoder.SQL2KV(stmt) - metrics.MarkTiming(encodeMark, start) - common.AppLogger.Debugf("len(kvs) %d, len(sql) %d", len(kvs), len(stmt)) - if err != nil { - common.AppLogger.Errorf("kv encode failed = %s\n", err.Error()) - return errors.Trace(err) - } - - totalKVs = append(totalKVs, kvs...) - localChecksum.Update(kvs) - totalAffectedRows += affectedRows + start = time.Now() + kvs, affectedRows, err := kvEncoder.SQL2KV(sqls.String()) + metrics.MarkTiming(encodeMark, start) + common.AppLogger.Debugf("len(kvs) %d, len(sql) %d", len(kvs), sqls.Len()) + if err != nil { + common.AppLogger.Errorf("kv encode failed = %s\n", err.Error()) + return errors.Trace(err) } + totalKVs = append(totalKVs, kvs...) + localChecksum.Update(kvs) + totalAffectedRows += affectedRows + // kv -> deliver ( -> tikv ) start = time.Now() stream, err := engine.NewWriteStream(ctx) @@ -1157,8 +1186,8 @@ outside: AllocBase: t.alloc.Base() + 1, Checksum: t.checksum, Path: cr.path, - Offset: cr.offset, - Pos: cr.reader.Tell(), + Offset: cr.chunk.Offset, + Pos: cr.parser.Pos, }, } t.checksumLock.Unlock() From efd17762806e43c3b2bb68a7efaad3a6287f8dd7 Mon Sep 17 00:00:00 2001 From: kennytm Date: Tue, 30 Oct 2018 01:47:27 +0800 Subject: [PATCH 03/15] restore, kv: replace the default ID allocator by a panicking allocator --- lightning/kv/allocator.go | 46 ++++++++++++++++++++++++++++++++++++ lightning/kv/sql2kv.go | 9 +++---- lightning/restore/restore.go | 6 ++--- 3 files changed, 52 insertions(+), 9 deletions(-) create mode 100644 lightning/kv/allocator.go diff --git a/lightning/kv/allocator.go b/lightning/kv/allocator.go new file mode 100644 index 000000000..b554fae6f --- /dev/null +++ b/lightning/kv/allocator.go @@ -0,0 +1,46 @@ +package kv + +import "sync/atomic" + +// PanickingAllocator is an ID allocator which panics on all operations except Rebase +type PanickingAllocator struct { + base int64 +} + +func NewPanickingAllocator(base int64) *PanickingAllocator { + return &PanickingAllocator{base: base} +} + +func (alloc *PanickingAllocator) Alloc(int64) (int64, error) { + panic("unexpected Alloc() call") +} + +func (alloc *PanickingAllocator) Reset(newBase int64) { + panic("unexpected Reset() call") +} + +func (alloc *PanickingAllocator) Rebase(tableID, newBase int64, allocIDs bool) error { + // CAS + for { + oldBase := atomic.LoadInt64(&alloc.base) + if newBase <= oldBase { + break + } + if atomic.CompareAndSwapInt64(&alloc.base, oldBase, newBase) { + break + } + } + return nil +} + +func (alloc *PanickingAllocator) Base() int64 { + return atomic.LoadInt64(&alloc.base) +} + +func (alloc *PanickingAllocator) End() int64 { + panic("unexpected End() call") +} + +func (alloc *PanickingAllocator) NextGlobalAutoID(tableID int64) (int64, error) { + panic("unexpected NextGlobalAutoID() call") +} diff --git a/lightning/kv/sql2kv.go b/lightning/kv/sql2kv.go index d77955fbb..6640f1ce8 100644 --- a/lightning/kv/sql2kv.go +++ b/lightning/kv/sql2kv.go @@ -6,6 +6,7 @@ import ( "github.com/pingcap/tidb-lightning/lightning/metric" sqltool "github.com/pingcap/tidb-lightning/lightning/sql" "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/meta/autoid" kvec "github.com/pingcap/tidb/util/kvencoder" ) @@ -31,13 +32,13 @@ type TableKVEncoder struct { bufValues []interface{} encoder kvec.KvEncoder - idAllocator *kvec.Allocator + idAllocator autoid.Allocator } func NewTableKVEncoder( dbName string, table string, tableID int64, - columns int, sqlMode string, alloc *kvec.Allocator) (*TableKVEncoder, error) { + columns int, sqlMode string, alloc autoid.Allocator) (*TableKVEncoder, error) { encoder, err := kvec.New(dbName, alloc) if err != nil { @@ -96,10 +97,6 @@ func (kvcodec *TableKVEncoder) makeStatments(maxRows int) ([]uint32, error) { return stmtIds, nil } -func (kvcodec *TableKVEncoder) ResetRowID(rowID int64) { - kvcodec.idAllocator.Reset(rowID) -} - func (kvcodec *TableKVEncoder) Close() error { metric.KvEncoderCounter.WithLabelValues("closed").Inc() return errors.Trace(kvcodec.encoder.Close()) diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go index f137b5c20..1def7186c 100644 --- a/lightning/restore/restore.go +++ b/lightning/restore/restore.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/tidb-lightning/lightning/mydump" verify "github.com/pingcap/tidb-lightning/lightning/verification" tidbcfg "github.com/pingcap/tidb/config" + "github.com/pingcap/tidb/meta/autoid" "github.com/pingcap/tidb/util/kvencoder" "github.com/pkg/errors" ) @@ -837,7 +838,7 @@ type TableRestore struct { tableInfo *TidbTableInfo tableMeta *mydump.MDTableMeta encoder kvenc.KvEncoder - alloc *kvenc.Allocator + alloc autoid.Allocator checksumLock sync.Mutex checksum verify.KVChecksum @@ -853,8 +854,7 @@ func NewTableRestore( tableInfo *TidbTableInfo, cp *TableCheckpoint, ) (*TableRestore, error) { - idAlloc := kvenc.NewAllocator() - idAlloc.Reset(cp.AllocBase) + idAlloc := kv.NewPanickingAllocator(cp.AllocBase) encoder, err := kvenc.New(dbInfo.Name, idAlloc) if err != nil { return nil, errors.Trace(err) From 1b1f1c5852951d5a8e4f102f680a7585aa9d7d0a Mon Sep 17 00:00:00 2001 From: kennytm Date: Tue, 30 Oct 2018 15:57:52 +0800 Subject: [PATCH 04/15] restore: include _tidb_rowid if this column is required --- lightning/restore/checkpoints.go | 205 ++++++++++++++++++++++--------- lightning/restore/restore.go | 168 +++++++++++++++---------- tests/checkpoint_chunks/run.sh | 2 +- 3 files changed, 248 insertions(+), 127 deletions(-) diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go index 8f814d932..488478fb8 100644 --- a/lightning/restore/checkpoints.go +++ b/lightning/restore/checkpoints.go @@ -13,6 +13,7 @@ import ( "github.com/satori/go.uuid" "github.com/pingcap/tidb-lightning/lightning/common" + "github.com/pingcap/tidb-lightning/lightning/mydump" verify "github.com/pingcap/tidb-lightning/lightning/verification" ) @@ -51,46 +52,62 @@ func (status CheckpointStatus) MetricName() string { } } +type ChunkCheckpointKey struct { + Path string + Offset int64 +} + +func (key *ChunkCheckpointKey) String() string { + return fmt.Sprintf("%s:%d", key.Path, key.Offset) +} + +type ChunkCheckpoint struct { + Key ChunkCheckpointKey + Columns []byte + ShouldIncludeRowID bool + Chunk mydump.Chunk + Checksum verify.KVChecksum +} + type TableCheckpoint struct { Status CheckpointStatus Engine uuid.UUID AllocBase int64 - Checksum verify.KVChecksum - chunks map[chunkCheckpoint]int64 + Chunks []*ChunkCheckpoint // a sorted array } func (cp *TableCheckpoint) resetChunks() { - cp.chunks = make(map[chunkCheckpoint]int64) + cp.Chunks = nil } -func (cp *TableCheckpoint) ChunkPos(path string, offset int64) (int64, bool) { - pos, ok := cp.chunks[chunkCheckpoint{path: path, offset: offset}] - return pos, ok -} - -type chunkCheckpoint struct { - path string - offset int64 +type chunkCheckpointDiff struct { + path string + offset int64 + pos int64 + rowID int64 + checksum verify.KVChecksum } type TableCheckpointDiff struct { - hasStatus bool - hasChecksum bool - status CheckpointStatus - allocBase int64 - checksum verify.KVChecksum - chunks map[chunkCheckpoint]int64 + hasStatus bool + hasChunks bool + status CheckpointStatus + allocBase int64 + chunks map[ChunkCheckpointKey]chunkCheckpointDiff } func NewTableCheckpointDiff() *TableCheckpointDiff { return &TableCheckpointDiff{ status: CheckpointStatusMaxInvalid + 1, - chunks: make(map[chunkCheckpoint]int64), + chunks: make(map[ChunkCheckpointKey]chunkCheckpointDiff), } } func (cpd *TableCheckpointDiff) String() string { - return fmt.Sprintf("{hasStatus:%v, hasChecksum:%v, status:%d, allocBase:%d, checksum:%v, chunks:[%d]}", cpd.hasStatus, cpd.hasChecksum, cpd.status, cpd.allocBase, cpd.checksum, len(cpd.chunks)) + return fmt.Sprintf( + "{hasStatus:%v, hasChunks:%v, status:%d, allocBase:%d, chunks:[%d]}", + cpd.hasStatus, cpd.hasChunks, cpd.status, cpd.allocBase, len(cpd.chunks), + ) } type TableCheckpointMerger interface { @@ -116,25 +133,28 @@ func (merger *StatusCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) { } type ChunkCheckpointMerger struct { + Key ChunkCheckpointKey AllocBase int64 Checksum verify.KVChecksum - Path string - Offset int64 Pos int64 + RowID int64 } func (merger *ChunkCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) { - cpd.hasChecksum = true + cpd.hasChunks = true cpd.allocBase = merger.AllocBase - cpd.checksum = merger.Checksum - chcp := chunkCheckpoint{path: merger.Path, offset: merger.Offset} - cpd.chunks[chcp] = merger.Pos + cpd.chunks[merger.Key] = chunkCheckpointDiff{ + pos: merger.Pos, + rowID: merger.RowID, + checksum: merger.Checksum, + } } type CheckpointsDB interface { Initialize(ctx context.Context, dbInfo map[string]*TidbDBInfo) error Get(ctx context.Context, tableName string) (*TableCheckpoint, error) Close() error + InsertChunkCheckpoints(ctx context.Context, tableName string, checkpoints []*ChunkCheckpoint) error Update(checkpointDiffs map[string]*TableCheckpointDiff) RemoveCheckpoint(ctx context.Context, tableName string) error @@ -162,10 +182,13 @@ func (*NullCheckpointsDB) Get(_ context.Context, tableName string) (*TableCheckp return &TableCheckpoint{ Status: CheckpointStatusLoaded, Engine: uuid.NewV4(), - chunks: make(map[chunkCheckpoint]int64), }, nil } +func (*NullCheckpointsDB) InsertChunkCheckpoints(_ context.Context, _ string, _ []*ChunkCheckpoint) error { + return nil +} + func (*NullCheckpointsDB) Update(map[string]*TableCheckpointDiff) {} type MySQLCheckpointsDB struct { @@ -190,18 +213,25 @@ func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) ( engine binary(16) NOT NULL, status tinyint unsigned DEFAULT 30, alloc_base bigint NOT NULL DEFAULT 0, - kvc_bytes bigint unsigned NOT NULL DEFAULT 0, - kvc_kvs bigint unsigned NOT NULL DEFAULT 0, - kvc_checksum bigint unsigned NOT NULL DEFAULT 0, create_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, INDEX(node_id, session) ); - CREATE TABLE IF NOT EXISTS %[1]s.chunk_v2 ( + CREATE TABLE IF NOT EXISTS %[1]s.chunk_v3 ( table_name varchar(261) NOT NULL, path varchar(2048) NOT NULL, offset bigint NOT NULL, + columns text NULL, + should_include_row_id BOOL NOT NULL, + end_offset bigint NOT NULL, pos bigint NOT NULL, + prev_rowid_max bigint NOT NULL, + rowid_max bigint NOT NULL, + kvc_bytes bigint unsigned NOT NULL DEFAULT 0, + kvc_kvs bigint unsigned NOT NULL DEFAULT 0, + kvc_checksum bigint unsigned NOT NULL DEFAULT 0, + create_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, PRIMARY KEY(table_name, path, offset) ); `, schema)) @@ -271,7 +301,14 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab purpose := "(read checkpoint " + tableName + ")" err := common.TransactWithRetry(ctx, cpdb.db, purpose, func(c context.Context, tx *sql.Tx) error { - query := fmt.Sprintf(`SELECT path, offset, pos FROM %s.chunk_v2 WHERE table_name = ?`, cpdb.schema) + query := fmt.Sprintf(` + SELECT + path, offset, columns, should_include_row_id, + pos, end_offset, prev_rowid_max, rowid_max, + kvc_bytes, kvc_kvs, kvc_checksum + FROM %s.chunk_v3 WHERE table_name = ? + ORDER BY path, offset; + `, cpdb.schema) rows, err := tx.QueryContext(c, query, tableName) if err != nil { return errors.Trace(err) @@ -279,32 +316,35 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab defer rows.Close() for rows.Next() { var ( - ccp chunkCheckpoint - pos int64 + value = new(ChunkCheckpoint) + kvcBytes uint64 + kvcKVs uint64 + kvcChecksum uint64 ) - if err := rows.Scan(&ccp.path, &ccp.offset, &pos); err != nil { + if err := rows.Scan( + &value.Key.Path, &value.Key.Offset, &value.Columns, &value.ShouldIncludeRowID, + &value.Chunk.Offset, &value.Chunk.EndOffset, &value.Chunk.PrevRowIDMax, &value.Chunk.RowIDMax, + &kvcBytes, &kvcKVs, &kvcChecksum, + ); err != nil { return errors.Trace(err) } - cp.chunks[ccp] = pos + value.Checksum = verify.MakeKVChecksum(kvcBytes, kvcKVs, kvcChecksum) + cp.Chunks = append(cp.Chunks, value) } if err := rows.Err(); err != nil { return errors.Trace(err) } query = fmt.Sprintf(` - SELECT status, engine, alloc_base, kvc_bytes, kvc_kvs, kvc_checksum - FROM %s.table_v1 WHERE table_name = ? + SELECT status, engine, alloc_base FROM %s.table_v1 WHERE table_name = ? `, cpdb.schema) row := tx.QueryRowContext(c, query, tableName) var ( - status uint8 - engine []byte - kvcBytes uint64 - kvcKVs uint64 - kvcChecksum uint64 + status uint8 + engine []byte ) - if err := row.Scan(&status, &engine, &cp.AllocBase, &kvcBytes, &kvcKVs, &kvcChecksum); err != nil { + if err := row.Scan(&status, &engine, &cp.AllocBase); err != nil { cp.resetChunks() return errors.Trace(err) } @@ -314,7 +354,6 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab return errors.Trace(err) } cp.Status = CheckpointStatus(status) - cp.Checksum = verify.MakeKVChecksum(kvcBytes, kvcKVs, kvcChecksum) return nil }) if err != nil { @@ -328,12 +367,52 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab return cp, nil } +func (cpdb *MySQLCheckpointsDB) InsertChunkCheckpoints(ctx context.Context, tableName string, checkpoints []*ChunkCheckpoint) error { + err := common.TransactWithRetry(ctx, cpdb.db, "(update chunk checkpoints for "+tableName+")", func(c context.Context, tx *sql.Tx) error { + stmt, err := tx.PrepareContext(c, fmt.Sprintf(` + REPLACE INTO %s.chunk_v3 ( + table_name, path, offset, columns, should_include_row_id, + pos, end_offset, prev_rowid_max, rowid_max, + kvc_bytes, kvc_kvs, kvc_checksum + ) VALUES ( + ?, ?, ?, ?, ?, + ?, ?, ?, ?, + ?, ?, ? + ); + `, cpdb.schema)) + if err != nil { + return errors.Trace(err) + } + defer stmt.Close() + + for _, value := range checkpoints { + _, err = stmt.ExecContext( + c, + tableName, value.Key.Path, value.Key.Offset, value.Columns, value.ShouldIncludeRowID, + value.Chunk.Offset, value.Chunk.EndOffset, value.Chunk.PrevRowIDMax, value.Chunk.RowIDMax, + value.Checksum.SumSize(), value.Checksum.SumKVS(), value.Checksum.Sum(), + ) + if err != nil { + return errors.Trace(err) + } + } + + return nil + }) + if err != nil { + return errors.Trace(err) + } + + return nil +} + func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpointDiff) { chunkQuery := fmt.Sprintf(` - REPLACE INTO %s.chunk_v2 (table_name, path, offset, pos) VALUES (?, ?, ?, ?); + UPDATE %s.chunk_v3 SET pos = ?, prev_rowid_max = ?, kvc_bytes = ?, kvc_kvs = ?, kvc_checksum = ? + WHERE table_name = ? AND path = ? AND offset = ?; `, cpdb.schema) checksumQuery := fmt.Sprintf(` - UPDATE %s.table_v1 SET alloc_base = ?, kvc_bytes = ?, kvc_kvs = ?, kvc_checksum = ? WHERE table_name = ?; + UPDATE %s.table_v1 SET alloc_base = ? WHERE table_name = ?; `, cpdb.schema) statusQuery := fmt.Sprintf(` UPDATE %s.table_v1 SET status = ? WHERE table_name = ?; @@ -362,13 +441,17 @@ func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoi return errors.Trace(e) } } - if cpd.hasChecksum { - if _, e := checksumStmt.ExecContext(c, cpd.allocBase, cpd.checksum.SumSize(), cpd.checksum.SumKVS(), cpd.checksum.Sum(), tableName); e != nil { + if cpd.hasChunks { + if _, e := checksumStmt.ExecContext(c, cpd.allocBase, tableName); e != nil { return errors.Trace(e) } } - for chcp, pos := range cpd.chunks { - if _, e := chunkStmt.ExecContext(c, tableName, chcp.path, chcp.offset, pos); e != nil { + for key, diff := range cpd.chunks { + if _, e := chunkStmt.ExecContext( + c, + diff.pos, diff.rowID, diff.checksum.SumSize(), diff.checksum.SumKVS(), diff.checksum.Sum(), + tableName, key.Path, key.Offset, + ); e != nil { return errors.Trace(e) } } @@ -409,11 +492,11 @@ func (cpdb *MySQLCheckpointsDB) RemoveCheckpoint(ctx context.Context, tableName ) if tableName == "all" { - deleteChunkFmt = "DELETE FROM %[1]s.chunk_v2 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE node_id = ?)" + deleteChunkFmt = "DELETE FROM %[1]s.chunk_v3 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE node_id = ?)" deleteTableFmt = "DELETE FROM %s.table_v1 WHERE node_id = ?" arg = nodeID } else { - deleteChunkFmt = "DELETE FROM %s.chunk_v2 WHERE table_name = ?" + deleteChunkFmt = "DELETE FROM %s.chunk_v3 WHERE table_name = ?" deleteTableFmt = "DELETE FROM %s.table_v1 WHERE table_name = ?" arg = tableName } @@ -485,7 +568,7 @@ func (cpdb *MySQLCheckpointsDB) destroyErrorCheckpoints(ctx context.Context, tab SELECT table_name FROM %s.table_v1 WHERE %s = ? AND status <= %d; `, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid) deleteChunkQuery := fmt.Sprintf(` - DELETE FROM %[1]s.chunk_v2 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE %[2]s = ? AND status <= %[3]d) + DELETE FROM %[1]s.chunk_v3 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE %[2]s = ? AND status <= %[3]d) `, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid) deleteTableQuery := fmt.Sprintf(` DELETE FROM %s.table_v1 WHERE %s = ? AND status <= %d @@ -538,9 +621,6 @@ func (cpdb *MySQLCheckpointsDB) DumpTables(ctx context.Context, writer io.Writer hex(engine) AS engine, status, alloc_base, - kvc_bytes, - kvc_kvs, - kvc_checksum, create_time, update_time FROM %s.table_v1; @@ -559,8 +639,17 @@ func (cpdb *MySQLCheckpointsDB) DumpChunks(ctx context.Context, writer io.Writer table_name, path, offset, - pos - FROM %s.chunk_v2; + columns, + pos, + end_offset, + prev_rowid_max, + rowid_max, + kvc_bytes, + kvc_kvs, + kvc_checksum, + create_time, + update_time + FROM %s.chunk_v3; `, cpdb.schema)) if err != nil { return errors.Trace(err) diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go index 1def7186c..8cd48b28f 100644 --- a/lightning/restore/restore.go +++ b/lightning/restore/restore.go @@ -1,12 +1,14 @@ package restore import ( + "bytes" "context" "database/sql" "fmt" "io" "net/http" "os" + "regexp" "strings" "sync" "sync/atomic" @@ -23,6 +25,7 @@ import ( verify "github.com/pingcap/tidb-lightning/lightning/verification" tidbcfg "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/meta/autoid" + "github.com/pingcap/tidb/model" "github.com/pingcap/tidb/util/kvencoder" "github.com/pkg/errors" ) @@ -394,9 +397,14 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T return nil, errors.Trace(err) } - var chunks []*mydump.TableRegion - if cp.Status < CheckpointStatusAllWritten { - chunks = t.loadChunks(rc.cfg.Mydumper.MinRegionSize, cp) + // no need to do anything if the chunks are already populated + if len(cp.Chunks) > 0 { + common.AppLogger.Infof("[%s] reusing %d chunks from checkpoint", t.tableName, len(cp.Chunks)) + } else if cp.Status < CheckpointStatusAllWritten { + t.populateChunks(rc.cfg.Mydumper.MinRegionSize, cp, t.tableInfo) + if err := rc.checkpointsDB.InsertChunkCheckpoints(ctx, t.tableName, cp.Chunks); err != nil { + return nil, errors.Trace(err) + } } var wg sync.WaitGroup @@ -409,7 +417,11 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T handledChunksCount := new(int32) // Restore table data - for _, chunk := range chunks { + for chunkIndex, chunk := range cp.Chunks { + if chunk.Chunk.Offset >= chunk.Chunk.EndOffset { + continue + } + select { case <-ctx.Done(): return nil, ctx.Err() @@ -429,7 +441,7 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T // 3. load kvs data (into kv deliver server) // 4. flush kvs data (into tikv node) - cr, err := newChunkRestore(chunk, cp) + cr, err := newChunkRestore(chunkIndex, chunk) if err != nil { return nil, errors.Trace(err) } @@ -449,7 +461,7 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T if err != nil { metric.ChunkCounter.WithLabelValues(metric.ChunkStateFailed).Inc() if !common.IsContextCanceledError(err) { - common.AppLogger.Errorf("[%s] chunk %s run task error %s", t.tableName, cr.name, errors.ErrorStack(err)) + common.AppLogger.Errorf("[%s] chunk #%d (%s) run task error %s", t.tableName, cr.index, &cr.chunk.Key, errors.ErrorStack(err)) } chunkErrMutex.Lock() if chunkErr == nil { @@ -461,7 +473,7 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T metric.ChunkCounter.WithLabelValues(metric.ChunkStateFinished).Inc() handled := int(atomic.AddInt32(handledChunksCount, 1)) - common.AppLogger.Infof("[%s] handled region count = %d (%s)", t.tableName, handled, common.Percent(handled, len(chunks))) + common.AppLogger.Infof("[%s] handled region count = %d (%s)", t.tableName, handled, common.Percent(handled, len(cp.Chunks))) }(worker, cr) } @@ -520,7 +532,7 @@ func (t *TableRestore) postProcess(ctx context.Context, closedEngine *kv.ClosedE // 4. do table checksum if cp.Status < CheckpointStatusCompleted { - err := t.compareChecksum(ctx, rc.cfg) + err := t.compareChecksum(ctx, rc.cfg, cp) rc.saveStatusCheckpoint(t.tableName, err, CheckpointStatusCompleted) if err != nil { common.AppLogger.Errorf("[%s] checksum failed: %v", t.tableName, err.Error()) @@ -797,33 +809,25 @@ func (pool *RestoreWorkerPool) Recycle(worker *RestoreWorker) { //////////////////////////////////////////////////////////////// type chunkRestore struct { - parser *mydump.ChunkParser - path string - name string - columns []byte - chunk mydump.Chunk + parser *mydump.ChunkParser + index int + chunk *ChunkCheckpoint } -func newChunkRestore(chunk *mydump.TableRegion, cp *TableCheckpoint) (*chunkRestore, error) { - reader, err := os.Open(chunk.File) +func newChunkRestore(index int, chunk *ChunkCheckpoint) (*chunkRestore, error) { + reader, err := os.Open(chunk.Key.Path) if err != nil { return nil, errors.Trace(err) } parser := mydump.NewChunkParser(reader) - pos, ok := cp.ChunkPos(chunk.File, chunk.Offset()) - if !ok { - pos = chunk.Offset() - } - reader.Seek(pos, io.SeekStart) - parser.Pos = pos + reader.Seek(chunk.Chunk.Offset, io.SeekStart) + parser.SetPos(chunk.Chunk.Offset, chunk.Chunk.PrevRowIDMax) return &chunkRestore{ - parser: parser, - path: chunk.File, - name: chunk.Name(), - columns: chunk.Columns, - chunk: chunk.Chunk, + parser: parser, + index: index, + chunk: chunk, }, nil } @@ -840,9 +844,6 @@ type TableRestore struct { encoder kvenc.KvEncoder alloc autoid.Allocator - checksumLock sync.Mutex - checksum verify.KVChecksum - rows uint64 checkpointStatus CheckpointStatus engine *kv.OpenedEngine } @@ -872,7 +873,6 @@ func NewTableRestore( tableMeta: tableMeta, encoder: encoder, alloc: idAlloc, - checksum: cp.Checksum, }, nil } @@ -881,27 +881,52 @@ func (tr *TableRestore) Close() { common.AppLogger.Infof("[%s] restore done", tr.tableName) } -func (t *TableRestore) loadChunks(minChunkSize int64, cp *TableCheckpoint) []*mydump.TableRegion { +var tidbRowIDColumnRegex = regexp.MustCompile(fmt.Sprintf("`%[1]s`|(?i:\\b%[1]s\\b)", model.ExtraHandleName)) + +func (t *TableRestore) populateChunks(minChunkSize int64, cp *TableCheckpoint, tableInfo *TidbTableInfo) { common.AppLogger.Infof("[%s] load chunks", t.tableName) timer := time.Now() founder := mydump.NewRegionFounder(minChunkSize) chunks := founder.MakeTableRegions(t.tableMeta) - // Ref: https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating - // Remove all regions which have been imported - newChunks := chunks[:0] + cp.Chunks = make([]*ChunkCheckpoint, 0, len(chunks)) + for _, chunk := range chunks { - if pos, ok := cp.ChunkPos(chunk.File, chunk.Chunk.Offset); !ok || pos < chunk.Chunk.EndOffset { - newChunks = append(newChunks, chunk) + columns := chunk.Columns + + shouldIncludeRowID := !tableInfo.core.PKIsHandle && !tidbRowIDColumnRegex.Match(columns) + if shouldIncludeRowID { + // we need to inject the _tidb_rowid column + if len(columns) != 0 { + // column listing already exists, just append the new column. + columns = append(columns[:len(columns)-1], (",`" + model.ExtraHandleName.String() + "`)")...) + } else { + // we need to recreate the columns + var buf bytes.Buffer + buf.WriteString("(`") + for _, columnInfo := range tableInfo.core.Columns { + buf.WriteString(columnInfo.Name.String()) + buf.WriteString("`,`") + } + buf.WriteString(model.ExtraHandleName.String()) + buf.WriteString("`)") + columns = buf.Bytes() + } } + + cp.Chunks = append(cp.Chunks, &ChunkCheckpoint{ + Key: ChunkCheckpointKey{ + Path: chunk.File, + Offset: chunk.Chunk.Offset, + }, + Columns: columns, + ShouldIncludeRowID: shouldIncludeRowID, + Chunk: chunk.Chunk, + }) } - common.AppLogger.Infof( - "[%s] load %d chunks (%d are new) takes %v", - t.tableName, len(chunks), len(newChunks), time.Since(timer), - ) - return newChunks + common.AppLogger.Infof("[%s] load %d chunks takes %v", t.tableName, len(chunks), time.Since(timer)) } func (tr *TableRestore) restoreTableMeta(ctx context.Context, cfg *config.Config) error { @@ -938,29 +963,34 @@ func (tr *TableRestore) importKV(ctx context.Context, closedEngine *kv.ClosedEng return errors.Trace(err) } closedEngine.Cleanup(ctx) - common.AppLogger.Infof("[%s] local checksum %v, has imported %d rows", tr.tableName, tr.checksum, tr.rows) return nil } // do checksum for each table. -func (tr *TableRestore) compareChecksum(ctx context.Context, cfg *config.Config) error { +func (tr *TableRestore) compareChecksum(ctx context.Context, cfg *config.Config, cp *TableCheckpoint) error { if !cfg.PostRestore.Checksum { common.AppLogger.Infof("[%s] Skip checksum.", tr.tableName) return nil } + var localChecksum verify.KVChecksum + for _, chunk := range cp.Chunks { + localChecksum.Add(&chunk.Checksum) + } + common.AppLogger.Infof("[%s] local checksum %+v", tr.tableName, localChecksum) + remoteChecksum, err := DoChecksum(ctx, cfg.TiDB, tr.tableName) if err != nil { return errors.Trace(err) } - if remoteChecksum.Checksum != tr.checksum.Sum() || - remoteChecksum.TotalKVs != tr.checksum.SumKVS() || - remoteChecksum.TotalBytes != tr.checksum.SumSize() { + if remoteChecksum.Checksum != localChecksum.Sum() || + remoteChecksum.TotalKVs != localChecksum.SumKVS() || + remoteChecksum.TotalBytes != localChecksum.SumSize() { return errors.Errorf("checksum mismatched remote vs local => (checksum: %d vs %d) (total_kvs: %d vs %d) (total_bytes:%d vs %d)", - remoteChecksum.Checksum, tr.checksum.Sum(), - remoteChecksum.TotalKVs, tr.checksum.SumKVS(), - remoteChecksum.TotalBytes, tr.checksum.SumSize(), + remoteChecksum.Checksum, localChecksum.Sum(), + remoteChecksum.TotalKVs, localChecksum.SumKVS(), + remoteChecksum.TotalBytes, localChecksum.SumSize(), ) } @@ -1100,8 +1130,8 @@ func (cr *chunkRestore) restore( default: } - endOffset := mathutil.MinInt64(cr.chunk.EndOffset, cr.parser.Pos+rc.cfg.Mydumper.ReadBlockSize) - if cr.parser.Pos >= endOffset { + endOffset := mathutil.MinInt64(cr.chunk.Chunk.EndOffset, cr.parser.Pos()+rc.cfg.Mydumper.ReadBlockSize) + if cr.parser.Pos() >= endOffset { break } @@ -1110,18 +1140,23 @@ func (cr *chunkRestore) restore( var sqls strings.Builder sqls.WriteString("INSERT INTO ") sqls.WriteString(t.tableName) - sqls.Write(cr.columns) + sqls.Write(cr.chunk.Columns) sqls.WriteString(" VALUES") var sep byte = ' ' readLoop: - for cr.parser.Pos < endOffset { + for cr.parser.Pos() < endOffset { err := cr.parser.ReadRow() switch errors.Cause(err) { case nil: sqls.WriteByte(sep) sep = ',' lastRow := cr.parser.LastRow() - sqls.Write(lastRow.Row) + if cr.chunk.ShouldIncludeRowID { + sqls.Write(lastRow.Row[:len(lastRow.Row)-1]) + fmt.Fprintf(&sqls, ",%d)", lastRow.RowID) + } else { + sqls.Write(lastRow.Row) + } case io.EOF: break readLoop default: @@ -1136,13 +1171,12 @@ func (cr *chunkRestore) restore( metrics.MarkTiming(readMark, start) var ( - totalKVs []kvenc.KvPair - totalAffectedRows uint64 - localChecksum verify.KVChecksum + totalKVs []kvenc.KvPair + localChecksum verify.KVChecksum ) // sql -> kv start = time.Now() - kvs, affectedRows, err := kvEncoder.SQL2KV(sqls.String()) + kvs, _, err := kvEncoder.SQL2KV(sqls.String()) metrics.MarkTiming(encodeMark, start) common.AppLogger.Debugf("len(kvs) %d, len(sql) %d", len(kvs), sqls.Len()) if err != nil { @@ -1152,7 +1186,6 @@ func (cr *chunkRestore) restore( totalKVs = append(totalKVs, kvs...) localChecksum.Update(kvs) - totalAffectedRows += affectedRows // kv -> deliver ( -> tikv ) start = time.Now() @@ -1177,23 +1210,22 @@ func (cr *chunkRestore) restore( // Update the table, and save a checkpoint. // (the write to the importer is effective immediately, thus update these here) - t.checksumLock.Lock() - t.checksum.Add(&localChecksum) - t.rows += totalAffectedRows + cr.chunk.Checksum.Add(&localChecksum) + cr.chunk.Chunk.Offset = cr.parser.Pos() + cr.chunk.Chunk.PrevRowIDMax = cr.parser.LastRow().RowID rc.saveCpCh <- saveCp{ tableName: t.tableName, merger: &ChunkCheckpointMerger{ + Key: cr.chunk.Key, AllocBase: t.alloc.Base() + 1, - Checksum: t.checksum, - Path: cr.path, - Offset: cr.chunk.Offset, - Pos: cr.parser.Pos, + Checksum: cr.chunk.Checksum, + Pos: cr.chunk.Chunk.Offset, + RowID: cr.chunk.Chunk.PrevRowIDMax, }, } - t.checksumLock.Unlock() } - common.AppLogger.Infof("[%s] restore chunk [%s] takes %v", t.tableName, cr.name, time.Since(timer)) + common.AppLogger.Infof("[%s] restore chunk #%d (%s) takes %v", t.tableName, cr.index, &cr.chunk.Key, time.Since(timer)) return nil } diff --git a/tests/checkpoint_chunks/run.sh b/tests/checkpoint_chunks/run.sh index ba7225daa..964d9f5b9 100755 --- a/tests/checkpoint_chunks/run.sh +++ b/tests/checkpoint_chunks/run.sh @@ -54,5 +54,5 @@ check_contains "count(i): $(($ROW_COUNT*$CHUNK_COUNT))" check_contains "sum(i): $(( $ROW_COUNT*$CHUNK_COUNT*(($CHUNK_COUNT+2)*$ROW_COUNT + 1)/2 ))" run_sql "SELECT count(*) FROM tidb_lightning_checkpoint_test_cpch.table_v1 WHERE status = 180" check_contains "count(*): 1" -run_sql "SELECT count(*) FROM tidb_lightning_checkpoint_test_cpch.chunk_v2 WHERE pos > offset" +run_sql "SELECT count(*) FROM tidb_lightning_checkpoint_test_cpch.chunk_v3 WHERE pos = end_offset" check_contains "count(*): $CHUNK_COUNT" From e4ba06afc70b67d37cce1d0ce61cdc292ed3bf9a Mon Sep 17 00:00:00 2001 From: kennytm Date: Tue, 6 Nov 2018 11:17:26 +0800 Subject: [PATCH 05/15] lightning: applied some missing `go fmt` change --- cmd/main.go | 2 +- lightning/common/util.go | 5 +++-- lightning/lightning.go | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index d011a1e67..79cffb6ee 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -7,11 +7,11 @@ import ( "os/signal" "syscall" - "github.com/pkg/errors" "github.com/pingcap/tidb-lightning/lightning" "github.com/pingcap/tidb-lightning/lightning/common" "github.com/pingcap/tidb-lightning/lightning/config" plan "github.com/pingcap/tidb/planner/core" + "github.com/pkg/errors" ) func setGlobalVars() { diff --git a/lightning/common/util.go b/lightning/common/util.go index 727210d04..795ead7e4 100644 --- a/lightning/common/util.go +++ b/lightning/common/util.go @@ -16,8 +16,8 @@ import ( "path/filepath" "github.com/go-sql-driver/mysql" - "github.com/pkg/errors" tmysql "github.com/pingcap/tidb/mysql" + "github.com/pkg/errors" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) @@ -229,7 +229,8 @@ func IsRetryableError(err error) bool { } // IsContextCanceledError returns whether the error is caused by context -// cancellation. +// cancellation. This function returns `false` (not a context-canceled error) if +// `err == nil`. func IsContextCanceledError(err error) bool { err = errors.Cause(err) return err == context.Canceled || status.Code(err) == codes.Canceled diff --git a/lightning/lightning.go b/lightning/lightning.go index 365afb2d0..13da7213e 100644 --- a/lightning/lightning.go +++ b/lightning/lightning.go @@ -7,8 +7,8 @@ import ( "runtime" "sync" - "github.com/pkg/errors" sstpb "github.com/pingcap/kvproto/pkg/import_sstpb" + "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/pingcap/tidb-lightning/lightning/common" From 47d5df6e09ab3d0430f475cc0fbd551b12a04461 Mon Sep 17 00:00:00 2001 From: kennytm Date: Tue, 6 Nov 2018 12:04:39 +0800 Subject: [PATCH 06/15] *: minor test fixes --- lightning/restore/restore.go | 4 ++-- tests/error_summary/run.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go index 8cd48b28f..c2257f107 100644 --- a/lightning/restore/restore.go +++ b/lightning/restore/restore.go @@ -1028,8 +1028,8 @@ func DoChecksum(ctx context.Context, dsn config.DBStore, table string) (*RemoteC // set it back finally defer func() { err = UpdateGCLifeTime(ctx, db, ori) - if err != nil { - common.AppLogger.Errorf("[%s] update tikv_gc_life_time error %s", table, errors.ErrorStack(err)) + if err != nil && !common.IsContextCanceledError(err) { + common.AppLogger.Errorf("[%s] update tikv_gc_life_time error %v", table, errors.ErrorStack(err)) } }() diff --git a/tests/error_summary/run.sh b/tests/error_summary/run.sh index c98c79ca7..3f92e6d20 100755 --- a/tests/error_summary/run.sh +++ b/tests/error_summary/run.sh @@ -26,7 +26,7 @@ check_contains 'sum(id): 28' check_contains 'sum(k): 32' # Verify the log contains the expected messages at the last few lines -tail -10 "$TEST_DIR/lightning-error-summary.log" > "$TEST_DIR/lightning-error-summary.tail" +tail -20 "$TEST_DIR/lightning-error-summary.log" > "$TEST_DIR/lightning-error-summary.tail" grep -Fq '[error] Totally **2** tables failed to be imported.' "$TEST_DIR/lightning-error-summary.tail" grep -Fq '[`error_summary`.`a`] [checksum] checksum mismatched' "$TEST_DIR/lightning-error-summary.tail" grep -Fq '[`error_summary`.`c`] [checksum] checksum mismatched' "$TEST_DIR/lightning-error-summary.tail" From 38f87a5af0a8a88af861c467724ece4044604ca0 Mon Sep 17 00:00:00 2001 From: kennytm Date: Wed, 7 Nov 2018 18:42:16 +0800 Subject: [PATCH 07/15] checkpoints: minor fix, ensure recorded alloc_base is increasing --- lightning/restore/checkpoints.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go index 488478fb8..04691ce09 100644 --- a/lightning/restore/checkpoints.go +++ b/lightning/restore/checkpoints.go @@ -412,7 +412,7 @@ func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoi WHERE table_name = ? AND path = ? AND offset = ?; `, cpdb.schema) checksumQuery := fmt.Sprintf(` - UPDATE %s.table_v1 SET alloc_base = ? WHERE table_name = ?; + UPDATE %s.table_v1 SET alloc_base = GREATEST(?, alloc_base) WHERE table_name = ?; `, cpdb.schema) statusQuery := fmt.Sprintf(` UPDATE %s.table_v1 SET status = ? WHERE table_name = ?; From 48cb1f7ac91605171428289f3973087420899e5e Mon Sep 17 00:00:00 2001 From: kennytm Date: Wed, 7 Nov 2018 21:42:55 +0800 Subject: [PATCH 08/15] *: addressed comments --- lightning/kv/sql2kv.go | 6 +----- lightning/mydump/region.go | 17 ++++++++++++----- lightning/mydump/region_test.go | 6 ++++-- lightning/restore/checkpoints.go | 3 ++- lightning/restore/restore.go | 12 +++++++++--- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/lightning/kv/sql2kv.go b/lightning/kv/sql2kv.go index 6640f1ce8..8eb4545d7 100644 --- a/lightning/kv/sql2kv.go +++ b/lightning/kv/sql2kv.go @@ -1,13 +1,13 @@ package kv import ( - "github.com/pkg/errors" "github.com/pingcap/tidb-lightning/lightning/common" "github.com/pingcap/tidb-lightning/lightning/metric" sqltool "github.com/pingcap/tidb-lightning/lightning/sql" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/meta/autoid" kvec "github.com/pingcap/tidb/util/kvencoder" + "github.com/pkg/errors" ) const ( @@ -102,10 +102,6 @@ func (kvcodec *TableKVEncoder) Close() error { return errors.Trace(kvcodec.encoder.Close()) } -func (kvcodec *TableKVEncoder) NextRowID() int64 { - return kvcodec.idAllocator.Base() + 1 -} - func (kvcodec *TableKVEncoder) SQL2KV(sql string) ([]kvec.KvPair, uint64, error) { if PrepareStmtMode { // via prepare statment diff --git a/lightning/mydump/region.go b/lightning/mydump/region.go index 9a4c2b8e1..f85b0040b 100644 --- a/lightning/mydump/region.go +++ b/lightning/mydump/region.go @@ -79,7 +79,7 @@ func NewRegionFounder(minRegionSize int64) *RegionFounder { } } -func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion { +func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) ([]*TableRegion, error) { var lock sync.Mutex var wg sync.WaitGroup @@ -88,6 +88,8 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion { processors := f.processors minRegionSize := f.minRegionSize + var chunkErr error + // Split files into regions filesRegions := make(regionSlice, 0, len(meta.DataFiles)) for _, dataFile := range meta.DataFiles { @@ -96,13 +98,13 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion { common.AppLogger.Debugf("[%s] loading file's region (%s) ...", table, file) chunks, err := splitExactChunks(db, table, file, minRegionSize) + lock.Lock() if err == nil { - lock.Lock() filesRegions = append(filesRegions, chunks...) - lock.Unlock() } else { - common.AppLogger.Errorf("failed to extract chunks from file (%s): %s", file, err.Error()) + chunkErr = errors.Annotatef(err, "%s", file) } + lock.Unlock() processors <- pid wg.Done() @@ -110,6 +112,11 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion { } wg.Wait() + if chunkErr != nil { + common.AppLogger.Errorf("failed to extract chunks from file: %v", chunkErr) + return nil, chunkErr + } + // Setup files' regions sort.Sort(filesRegions) // ps : sort region by - (fileName, fileOffset) var totalRowCount int64 @@ -123,7 +130,7 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion { region.Chunk.RowIDMax = totalRowCount } - return filesRegions + return filesRegions, nil } func splitExactChunks(db string, table string, file string, minChunkSize int64) ([]*TableRegion, error) { diff --git a/lightning/mydump/region_test.go b/lightning/mydump/region_test.go index 0d4ff99a6..9453571cc 100644 --- a/lightning/mydump/region_test.go +++ b/lightning/mydump/region_test.go @@ -39,7 +39,8 @@ func (s *testMydumpRegionSuite) TestTableRegion(c *C) { founder := NewRegionFounder(defMinRegionSize) for _, meta := range dbMeta.Tables { - regions := founder.MakeTableRegions(meta) + regions, err := founder.MakeTableRegions(meta) + c.Assert(err, IsNil) table := meta.Name fmt.Printf("[%s] region count ===============> %d\n", table, len(regions)) @@ -100,7 +101,8 @@ func (s *testMydumpRegionSuite) TestRegionReader(c *C) { founder := NewRegionFounder(defMinRegionSize) for _, meta := range dbMeta.Tables { - regions := founder.MakeTableRegions(meta) + regions, err := founder.MakeTableRegions(meta) + c.Assert(err, IsNil) tolValTuples := 0 for _, reg := range regions { diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go index 04691ce09..09f2acd57 100644 --- a/lightning/restore/checkpoints.go +++ b/lightning/restore/checkpoints.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/cznic/mathutil" "github.com/joho/sqltocsv" "github.com/pkg/errors" "github.com/satori/go.uuid" @@ -142,7 +143,7 @@ type ChunkCheckpointMerger struct { func (merger *ChunkCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) { cpd.hasChunks = true - cpd.allocBase = merger.AllocBase + cpd.allocBase = mathutil.MaxInt64(cpd.allocBase, merger.AllocBase) cpd.chunks[merger.Key] = chunkCheckpointDiff{ pos: merger.Pos, rowID: merger.RowID, diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go index c2257f107..c78dcf755 100644 --- a/lightning/restore/restore.go +++ b/lightning/restore/restore.go @@ -401,7 +401,9 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T if len(cp.Chunks) > 0 { common.AppLogger.Infof("[%s] reusing %d chunks from checkpoint", t.tableName, len(cp.Chunks)) } else if cp.Status < CheckpointStatusAllWritten { - t.populateChunks(rc.cfg.Mydumper.MinRegionSize, cp, t.tableInfo) + if err := t.populateChunks(rc.cfg.Mydumper.MinRegionSize, cp, t.tableInfo); err != nil { + return nil, errors.Trace(err) + } if err := rc.checkpointsDB.InsertChunkCheckpoints(ctx, t.tableName, cp.Chunks); err != nil { return nil, errors.Trace(err) } @@ -883,12 +885,15 @@ func (tr *TableRestore) Close() { var tidbRowIDColumnRegex = regexp.MustCompile(fmt.Sprintf("`%[1]s`|(?i:\\b%[1]s\\b)", model.ExtraHandleName)) -func (t *TableRestore) populateChunks(minChunkSize int64, cp *TableCheckpoint, tableInfo *TidbTableInfo) { +func (t *TableRestore) populateChunks(minChunkSize int64, cp *TableCheckpoint, tableInfo *TidbTableInfo) error { common.AppLogger.Infof("[%s] load chunks", t.tableName) timer := time.Now() founder := mydump.NewRegionFounder(minChunkSize) - chunks := founder.MakeTableRegions(t.tableMeta) + chunks, err := founder.MakeTableRegions(t.tableMeta) + if err != nil { + return errors.Trace(err) + } cp.Chunks = make([]*ChunkCheckpoint, 0, len(chunks)) @@ -927,6 +932,7 @@ func (t *TableRestore) populateChunks(minChunkSize int64, cp *TableCheckpoint, t } common.AppLogger.Infof("[%s] load %d chunks takes %v", t.tableName, len(chunks), time.Since(timer)) + return nil } func (tr *TableRestore) restoreTableMeta(ctx context.Context, cfg *config.Config) error { From 5e7e4555cedbc8a9d3a4d3fea438094c7afbead5 Mon Sep 17 00:00:00 2001 From: kennytm Date: Wed, 7 Nov 2018 23:44:05 +0800 Subject: [PATCH 09/15] tests: added a test case to ensure non-PK AUTO_INC works --- .../data/vt.non_pk_auto_inc-schema.sql | 9 +++++++ .../various_types/data/vt.non_pk_auto_inc.sql | 26 +++++++++++++++++++ tests/various_types/run.sh | 7 +++++ 3 files changed, 42 insertions(+) create mode 100644 tests/various_types/data/vt.non_pk_auto_inc-schema.sql create mode 100644 tests/various_types/data/vt.non_pk_auto_inc.sql diff --git a/tests/various_types/data/vt.non_pk_auto_inc-schema.sql b/tests/various_types/data/vt.non_pk_auto_inc-schema.sql new file mode 100644 index 000000000..fef3be74b --- /dev/null +++ b/tests/various_types/data/vt.non_pk_auto_inc-schema.sql @@ -0,0 +1,9 @@ +/*!40101 SET NAMES binary*/; +/*!40014 SET FOREIGN_KEY_CHECKS=0*/; + +CREATE TABLE `non_pk_auto_inc` ( + `pk` char(36) NOT NULL, + `id` int(11) NOT NULL AUTO_INCREMENT, + PRIMARY KEY (`pk`), + UNIQUE KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=30001; diff --git a/tests/various_types/data/vt.non_pk_auto_inc.sql b/tests/various_types/data/vt.non_pk_auto_inc.sql new file mode 100644 index 000000000..9009cea4f --- /dev/null +++ b/tests/various_types/data/vt.non_pk_auto_inc.sql @@ -0,0 +1,26 @@ +/*!40101 SET NAMES binary*/; +/*!40014 SET FOREIGN_KEY_CHECKS=0*/; +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `non_pk_auto_inc` VALUES +("c5862b7d-e2a1-11e8-81d3-d5360eceeab8",1), +("d7c9dce1-e2a1-11e8-beea-a3f4b99b3e1e",3), +("d7c9de1f-e2a1-11e8-8630-b1256aff79d4",4), +("d7c9de81-e2a1-11e8-be4f-f17e7808e755",5), +("d7c9ded4-e2a1-11e8-ad15-658b46ee1390",6), +("d7c9df20-e2a1-11e8-91a9-e3a3822c60a7",7), +("d7c9dfb9-e2a1-11e8-a8d7-31054a5bf6a8",8), +("d7c9e002-e2a1-11e8-9ff1-9fc4350e1311",9), +("da71fb0d-e2a1-11e8-891e-835bd645efad",17), +("da71fbd6-e2a1-11e8-9e02-ff5f31a7c894",18), +("da71fc00-e2a1-11e8-9a81-230df4ae8e5e",19), +("da71fc29-e2a1-11e8-9823-37aa4b9b6fd1",20), +("da71fc5e-e2a1-11e8-9a4c-534927b63a63",21), +("da71fc87-e2a1-11e8-ae93-fb9ff0878e13",22), +("da71fcaf-e2a1-11e8-aac5-153d3fc52861",23), +("db87f492-e2a1-11e8-a30e-b3a363c99db5",31), +("db87f6c0-e2a1-11e8-82ea-4f787bed9c70",32), +("db87f716-e2a1-11e8-9caa-3fb2ed9f5bcf",33), +("db87f75f-e2a1-11e8-8778-05a4da66a78d",34), +("db87f7a8-e2a1-11e8-9562-31f8c96addec",35), +("db87f7f1-e2a1-11e8-922b-bbba2c355880",36), +("db87f837-e2a1-11e8-ba19-f9baeeda0855",37); diff --git a/tests/various_types/run.sh b/tests/various_types/run.sh index 23386b78e..22be52f23 100755 --- a/tests/various_types/run.sh +++ b/tests/various_types/run.sh @@ -68,3 +68,10 @@ run_sql 'SELECT count(*) FROM vt.`enum-set` WHERE find_in_set("x50", `set`) > 0' check_contains 'count(*): 10' run_sql 'SELECT `set` FROM vt.`enum-set` WHERE `enum` = "gcc"' check_contains 'set: x00,x06,x07,x09,x17,x20,x23,x24,x27,x37,x44,x46,x49,x54,x55,x58,x61,x62' + +run_sql 'SELECT count(*), max(id) FROM vt.`non_pk_auto_inc`' +check_contains 'count(*): 22' +check_contains 'max(id): 37' +run_sql 'INSERT INTO vt.`non_pk_auto_inc` (`pk`) VALUES ("?")' +run_sql 'SELECT id > 37 FROM vt.`non_pk_auto_inc` WHERE `pk` = "?"' +check_contains 'id > 37: 1' From 56e724a5b55e6b12c901b5aba66ef3bab1e451d3 Mon Sep 17 00:00:00 2001 From: kennytm Date: Sat, 10 Nov 2018 00:53:03 +0800 Subject: [PATCH 10/15] *: addressed comments --- lightning/mydump/parser.rl | 22 ++++++++++++++++++++++ lightning/mydump/region.go | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/lightning/mydump/parser.rl b/lightning/mydump/parser.rl index 9c3ff75e2..fa5881fe9 100644 --- a/lightning/mydump/parser.rl +++ b/lightning/mydump/parser.rl @@ -17,20 +17,42 @@ import ( %%{ #` +# This is a ragel parser to quickly scan through a data source file consisting +# of INSERT statements only. You may find detailed syntax explanation on its +# website . + machine chunk_parser; +# We treat all unimportant patterns as "comments". This include: +# - Real SQL comments `/* ... */` and `-- ...` +# - Whitespace +# - Separators `,` and `;` +# - The keywords `INSERT` and `INTO` (suffix `i` means case-insensitive). block_comment = '/*' any* :>> '*/'; line_comment = /--[^\n]*\n/; comment = block_comment | line_comment | space | [,;] | 'insert'i | 'into'i; +# The patterns parse quoted strings. +# They do NOT handle the escape-by-doubling syntax like `'ten o''clock'`, this +# will be handled as two tokens: `'ten o'` and `'clock'`. See the `name` rule +# below for why this doesn't matter. single_quoted = "'" (^"'" | "\\" any)** "'"; double_quoted = '"' (^'"' | '\\' any)** '"'; back_quoted = '`' ^'`'* '`'; unquoted = ^([,;()'"`] | space)+; +# Matches a "row" of the form `( ... )`, where the content doesn't matter. row = '(' (^[)'"`] | single_quoted | double_quoted | back_quoted)* ')'; + +# Matches a table name, which consists of one or more identifiers. This allows +# us to match a qualified name like `foo.bar`, and also double-backquote like +# ``` `foo``bar` ```. name = (back_quoted | double_quoted | unquoted)+; +# The actual parser only produces 3 kinds of tokens: +# - The keyword VALUES, as a separator between column names and data rows +# - A row (which can be a list of columns or values depending on context) +# - A table name main := |* comment; diff --git a/lightning/mydump/region.go b/lightning/mydump/region.go index f85b0040b..1d73c7aa2 100644 --- a/lightning/mydump/region.go +++ b/lightning/mydump/region.go @@ -103,6 +103,7 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) ([]*TableRegion, err filesRegions = append(filesRegions, chunks...) } else { chunkErr = errors.Annotatef(err, "%s", file) + common.AppLogger.Errorf("failed to extract chunks from file: %v", chunkErr) } lock.Unlock() @@ -113,7 +114,6 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) ([]*TableRegion, err wg.Wait() if chunkErr != nil { - common.AppLogger.Errorf("failed to extract chunks from file: %v", chunkErr) return nil, chunkErr } From 8678f9867582b704e45c7ca32238d434697e5a17 Mon Sep 17 00:00:00 2001 From: kennytm Date: Sat, 10 Nov 2018 01:31:31 +0800 Subject: [PATCH 11/15] restore: properly rebase the allocators On first read, we will reset the allocator base it is the maximum of 1. the AUTO_INCREMENT option of the CREATE TABLE statement, or 2. the total number of rows This ensures future writes after importing will not clobber existing rows due to overlapping _tidb_rowid. --- lightning/restore/checkpoints.go | 28 ++++++---- lightning/restore/restore.go | 28 ++++++++-- lightning/restore/tidb.go | 7 ++- tests/tidb_rowid/config.toml | 24 ++++++++ tests/tidb_rowid/data/rowid-schema-create.sql | 1 + ...owid.exotic`table``name-schema.sql.ignored | 1 + .../data/rowid.exotic`table``name.sql.ignored | 7 +++ .../data/rowid.explicit_tidb_rowid-schema.sql | 1 + .../data/rowid.explicit_tidb_rowid.sql | 11 ++++ tests/tidb_rowid/data/rowid.non_pk-schema.sql | 1 + tests/tidb_rowid/data/rowid.non_pk.sql | 11 ++++ .../data/rowid.non_pk_auto_inc-schema.sql} | 2 +- .../data/rowid.non_pk_auto_inc.sql} | 0 .../data/rowid.pre_rebase-schema.sql | 1 + tests/tidb_rowid/data/rowid.pre_rebase.sql | 1 + tests/tidb_rowid/run.sh | 55 +++++++++++++++++++ tests/various_types/run.sh | 7 --- 17 files changed, 159 insertions(+), 27 deletions(-) create mode 100644 tests/tidb_rowid/config.toml create mode 100644 tests/tidb_rowid/data/rowid-schema-create.sql create mode 100644 tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored create mode 100644 tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored create mode 100644 tests/tidb_rowid/data/rowid.explicit_tidb_rowid-schema.sql create mode 100644 tests/tidb_rowid/data/rowid.explicit_tidb_rowid.sql create mode 100644 tests/tidb_rowid/data/rowid.non_pk-schema.sql create mode 100644 tests/tidb_rowid/data/rowid.non_pk.sql rename tests/{various_types/data/vt.non_pk_auto_inc-schema.sql => tidb_rowid/data/rowid.non_pk_auto_inc-schema.sql} (73%) rename tests/{various_types/data/vt.non_pk_auto_inc.sql => tidb_rowid/data/rowid.non_pk_auto_inc.sql} (100%) create mode 100644 tests/tidb_rowid/data/rowid.pre_rebase-schema.sql create mode 100644 tests/tidb_rowid/data/rowid.pre_rebase.sql create mode 100755 tests/tidb_rowid/run.sh diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go index 09f2acd57..ff0f1a2e6 100644 --- a/lightning/restore/checkpoints.go +++ b/lightning/restore/checkpoints.go @@ -91,7 +91,7 @@ type chunkCheckpointDiff struct { type TableCheckpointDiff struct { hasStatus bool - hasChunks bool + hasRebase bool status CheckpointStatus allocBase int64 chunks map[ChunkCheckpointKey]chunkCheckpointDiff @@ -106,8 +106,8 @@ func NewTableCheckpointDiff() *TableCheckpointDiff { func (cpd *TableCheckpointDiff) String() string { return fmt.Sprintf( - "{hasStatus:%v, hasChunks:%v, status:%d, allocBase:%d, chunks:[%d]}", - cpd.hasStatus, cpd.hasChunks, cpd.status, cpd.allocBase, len(cpd.chunks), + "{hasStatus:%v, hasRebase:%v, status:%d, allocBase:%d, chunks:[%d]}", + cpd.hasStatus, cpd.hasRebase, cpd.status, cpd.allocBase, len(cpd.chunks), ) } @@ -134,16 +134,13 @@ func (merger *StatusCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) { } type ChunkCheckpointMerger struct { - Key ChunkCheckpointKey - AllocBase int64 - Checksum verify.KVChecksum - Pos int64 - RowID int64 + Key ChunkCheckpointKey + Checksum verify.KVChecksum + Pos int64 + RowID int64 } func (merger *ChunkCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) { - cpd.hasChunks = true - cpd.allocBase = mathutil.MaxInt64(cpd.allocBase, merger.AllocBase) cpd.chunks[merger.Key] = chunkCheckpointDiff{ pos: merger.Pos, rowID: merger.RowID, @@ -151,6 +148,15 @@ func (merger *ChunkCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) { } } +type RebaseCheckpointMerger struct { + AllocBase int64 +} + +func (merger *RebaseCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) { + cpd.hasRebase = true + cpd.allocBase = mathutil.MaxInt64(cpd.allocBase, merger.AllocBase) +} + type CheckpointsDB interface { Initialize(ctx context.Context, dbInfo map[string]*TidbDBInfo) error Get(ctx context.Context, tableName string) (*TableCheckpoint, error) @@ -442,7 +448,7 @@ func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoi return errors.Trace(e) } } - if cpd.hasChunks { + if cpd.hasRebase { if _, e := checksumStmt.ExecContext(c, cpd.allocBase, tableName); e != nil { return errors.Trace(e) } diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go index c78dcf755..d5a80ec03 100644 --- a/lightning/restore/restore.go +++ b/lightning/restore/restore.go @@ -407,6 +407,19 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T if err := rc.checkpointsDB.InsertChunkCheckpoints(ctx, t.tableName, cp.Chunks); err != nil { return nil, errors.Trace(err) } + + // rebase the allocator so it exceeds the number of rows. + cp.AllocBase = mathutil.MaxInt64(cp.AllocBase, t.tableInfo.core.AutoIncID) + for _, chunk := range cp.Chunks { + cp.AllocBase = mathutil.MaxInt64(cp.AllocBase, chunk.Chunk.RowIDMax) + } + t.alloc.Rebase(t.tableInfo.ID, cp.AllocBase, false) + rc.saveCpCh <- saveCp{ + tableName: t.tableName, + merger: &RebaseCheckpointMerger{ + AllocBase: cp.AllocBase, + }, + } } var wg sync.WaitGroup @@ -1221,12 +1234,17 @@ func (cr *chunkRestore) restore( cr.chunk.Chunk.PrevRowIDMax = cr.parser.LastRow().RowID rc.saveCpCh <- saveCp{ tableName: t.tableName, - merger: &ChunkCheckpointMerger{ - Key: cr.chunk.Key, + merger: &RebaseCheckpointMerger{ AllocBase: t.alloc.Base() + 1, - Checksum: cr.chunk.Checksum, - Pos: cr.chunk.Chunk.Offset, - RowID: cr.chunk.Chunk.PrevRowIDMax, + }, + } + rc.saveCpCh <- saveCp{ + tableName: t.tableName, + merger: &ChunkCheckpointMerger{ + Key: cr.chunk.Key, + Checksum: cr.chunk.Checksum, + Pos: cr.chunk.Chunk.Offset, + RowID: cr.chunk.Chunk.PrevRowIDMax, }, } } diff --git a/lightning/restore/tidb.go b/lightning/restore/tidb.go index f03a20e7e..9356a00c3 100644 --- a/lightning/restore/tidb.go +++ b/lightning/restore/tidb.go @@ -9,12 +9,12 @@ import ( "regexp" "time" - "github.com/pkg/errors" "github.com/pingcap/tidb-lightning/lightning/common" "github.com/pingcap/tidb-lightning/lightning/config" "github.com/pingcap/tidb-lightning/lightning/metric" "github.com/pingcap/tidb-lightning/lightning/mydump" "github.com/pingcap/tidb/model" + "github.com/pkg/errors" ) type TiDBManager struct { @@ -183,7 +183,7 @@ func (timgr *TiDBManager) LoadSchemaInfo(ctx context.Context, schemas map[string } func (timgr *TiDBManager) getCreateTableStmt(ctx context.Context, schema, table string) (string, error) { - query := fmt.Sprintf("SHOW CREATE TABLE `%s`.`%s`", schema, table) + query := fmt.Sprintf("SHOW CREATE TABLE %s", common.UniqueTable(schema, table)) var tbl, createTable string err := common.QueryRowWithRetry(ctx, timgr.db, query, &tbl, &createTable) return createTable, errors.Annotatef(err, "%s", query) @@ -202,7 +202,8 @@ func UpdateGCLifeTime(ctx context.Context, db *sql.DB, gcLifeTime string) error } func AlterAutoIncrement(ctx context.Context, db *sql.DB, schema string, table string, incr int64) error { - query := fmt.Sprintf("ALTER TABLE `%s`.`%s` AUTO_INCREMENT=%d", schema, table, incr) + tableName := common.UniqueTable(schema, table) + query := fmt.Sprintf("ALTER TABLE %s AUTO_INCREMENT=%d", tableName, incr) common.AppLogger.Infof("[%s.%s] %s", schema, table, query) err := common.ExecWithRetry(ctx, db, query, query) if err != nil { diff --git a/tests/tidb_rowid/config.toml b/tests/tidb_rowid/config.toml new file mode 100644 index 000000000..cb356e007 --- /dev/null +++ b/tests/tidb_rowid/config.toml @@ -0,0 +1,24 @@ +[lightning] +check-requirements = false +file = "/dev/stderr" +level = "warning" + +[tikv-importer] +addr = "127.0.0.1:8808" + +[mydumper] +data-source-dir = "tests/tidb_rowid/data" +region-min-size = 48 + +[tidb] +host = "127.0.0.1" +port = 4000 +user = "root" +status-port = 10080 +pd-addr = "127.0.0.1:2379" +log-level = "error" + +[post-restore] +checksum = true +compact = true +analyze = true diff --git a/tests/tidb_rowid/data/rowid-schema-create.sql b/tests/tidb_rowid/data/rowid-schema-create.sql new file mode 100644 index 000000000..d06ab2f96 --- /dev/null +++ b/tests/tidb_rowid/data/rowid-schema-create.sql @@ -0,0 +1 @@ +CREATE DATABASE rowid; diff --git a/tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored b/tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored new file mode 100644 index 000000000..e2d94bbdf --- /dev/null +++ b/tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored @@ -0,0 +1 @@ +create table `exotic``table````name` (a varchar(6) primary key, b int unique auto_increment) auto_increment=80000; \ No newline at end of file diff --git a/tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored b/tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored new file mode 100644 index 000000000..e0341a7ac --- /dev/null +++ b/tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored @@ -0,0 +1,7 @@ +insert `exotic``table````name` (a, b, _tidb_rowid) values +('aaaaaa', 11, 79995), +('bbbbbb', 22, 79996); +insert `exotic``table````name` (a, b, _tidb_rowid) values +('cccccc', 33, 79997), +('dddddd', 44, 79998), +('eeeeee', 55, 79999); diff --git a/tests/tidb_rowid/data/rowid.explicit_tidb_rowid-schema.sql b/tests/tidb_rowid/data/rowid.explicit_tidb_rowid-schema.sql new file mode 100644 index 000000000..4f1d63448 --- /dev/null +++ b/tests/tidb_rowid/data/rowid.explicit_tidb_rowid-schema.sql @@ -0,0 +1 @@ +create table explicit_tidb_rowid (pk varchar(6) primary key); \ No newline at end of file diff --git a/tests/tidb_rowid/data/rowid.explicit_tidb_rowid.sql b/tests/tidb_rowid/data/rowid.explicit_tidb_rowid.sql new file mode 100644 index 000000000..f3769cc81 --- /dev/null +++ b/tests/tidb_rowid/data/rowid.explicit_tidb_rowid.sql @@ -0,0 +1,11 @@ +insert into non_pk (pk, _tidb_rowid) values +('eight', 8), +('five', 5), +('four', 4), +('nine', 9), +('one', 1), +('seven', 7), +('six', 6), +('ten', 10), +('three', 3), +('two', 2); diff --git a/tests/tidb_rowid/data/rowid.non_pk-schema.sql b/tests/tidb_rowid/data/rowid.non_pk-schema.sql new file mode 100644 index 000000000..5b5757644 --- /dev/null +++ b/tests/tidb_rowid/data/rowid.non_pk-schema.sql @@ -0,0 +1 @@ +create table non_pk (pk varchar(6) primary key); diff --git a/tests/tidb_rowid/data/rowid.non_pk.sql b/tests/tidb_rowid/data/rowid.non_pk.sql new file mode 100644 index 000000000..77e2c00be --- /dev/null +++ b/tests/tidb_rowid/data/rowid.non_pk.sql @@ -0,0 +1,11 @@ +insert into non_pk values +('one'), +('two'), +('three'), +('four'), +('five'), +('six'), +('seven'), +('eight'), +('nine'), +('ten'); diff --git a/tests/various_types/data/vt.non_pk_auto_inc-schema.sql b/tests/tidb_rowid/data/rowid.non_pk_auto_inc-schema.sql similarity index 73% rename from tests/various_types/data/vt.non_pk_auto_inc-schema.sql rename to tests/tidb_rowid/data/rowid.non_pk_auto_inc-schema.sql index fef3be74b..a71be02c9 100644 --- a/tests/various_types/data/vt.non_pk_auto_inc-schema.sql +++ b/tests/tidb_rowid/data/rowid.non_pk_auto_inc-schema.sql @@ -6,4 +6,4 @@ CREATE TABLE `non_pk_auto_inc` ( `id` int(11) NOT NULL AUTO_INCREMENT, PRIMARY KEY (`pk`), UNIQUE KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=30001; +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/various_types/data/vt.non_pk_auto_inc.sql b/tests/tidb_rowid/data/rowid.non_pk_auto_inc.sql similarity index 100% rename from tests/various_types/data/vt.non_pk_auto_inc.sql rename to tests/tidb_rowid/data/rowid.non_pk_auto_inc.sql diff --git a/tests/tidb_rowid/data/rowid.pre_rebase-schema.sql b/tests/tidb_rowid/data/rowid.pre_rebase-schema.sql new file mode 100644 index 000000000..887540be5 --- /dev/null +++ b/tests/tidb_rowid/data/rowid.pre_rebase-schema.sql @@ -0,0 +1 @@ +create table pre_rebase (pk varchar(6) primary key) auto_increment=70000; diff --git a/tests/tidb_rowid/data/rowid.pre_rebase.sql b/tests/tidb_rowid/data/rowid.pre_rebase.sql new file mode 100644 index 000000000..4852114cf --- /dev/null +++ b/tests/tidb_rowid/data/rowid.pre_rebase.sql @@ -0,0 +1 @@ +insert into pre_rebase values ('foo'); \ No newline at end of file diff --git a/tests/tidb_rowid/run.sh b/tests/tidb_rowid/run.sh new file mode 100755 index 000000000..54f457168 --- /dev/null +++ b/tests/tidb_rowid/run.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +# Verify that _tidb_rowid is correctly adjusted. + +set -eu + +run_sql 'DROP DATABASE IF EXISTS rowid;' +run_lightning +echo 'Import finished' + +run_sql 'SELECT count(*), max(id), min(_tidb_rowid), max(_tidb_rowid) FROM rowid.`non_pk_auto_inc`' +check_contains 'count(*): 22' +check_contains 'max(id): 37' +check_contains 'min(_tidb_rowid): 1' +check_contains 'max(_tidb_rowid): 22' +run_sql 'INSERT INTO rowid.`non_pk_auto_inc` (`pk`) VALUES ("?")' +run_sql 'SELECT id > 37, _tidb_rowid > 22 FROM rowid.`non_pk_auto_inc` WHERE `pk` = "?"' +check_contains 'id > 37: 1' +check_contains '_tidb_rowid > 22: 1' + +for table_name in non_pk explicit_tidb_rowid; do + run_sql "SELECT count(*), min(_tidb_rowid), max(_tidb_rowid) FROM rowid.${table_name}" + check_contains 'count(*): 10' + check_contains 'min(_tidb_rowid): 1' + check_contains 'max(_tidb_rowid): 10' + run_sql "SELECT _tidb_rowid FROM rowid.${table_name} WHERE pk = 'five'" + check_contains '_tidb_rowid: 5' + run_sql "INSERT INTO rowid.${table_name} VALUES ('eleven')" + run_sql "SELECT count(*) FROM rowid.${table_name}" + check_contains 'count(*): 11' + run_sql "SELECT count(*) FROM rowid.${table_name} WHERE pk > '!'" + check_contains 'count(*): 11' + run_sql "SELECT _tidb_rowid > 10 FROM rowid.${table_name} WHERE pk = 'eleven'" + check_contains '_tidb_rowid > 10: 1' +done + +run_sql 'SELECT count(*), min(_tidb_rowid), max(_tidb_rowid) FROM rowid.pre_rebase' +check_contains 'count(*): 1' +check_contains 'min(_tidb_rowid): 1' +check_contains 'max(_tidb_rowid): 1' +run_sql 'INSERT INTO rowid.pre_rebase VALUES ("?")' +run_sql 'SELECT _tidb_rowid > 70000 FROM rowid.pre_rebase WHERE pk = "?"' +check_contains '_tidb_rowid > 70000: 1' + +# FIXME Not testing these until pingcap/tidb#8259 is fixed. +# +# run_sql 'SELECT count(*) FROM rowid.`exotic``table````name`' +# check_contains 'count(*): 5' +# run_sql 'INSERT INTO rowid.`exotic``table````name` (a) VALUES ("ffffff"), ("gggggg")' +# run_sql 'SELECT _tidb_rowid > 80000, b > 800000 FROM rowid.pre_rebase WHERE a = "ffffff"' +# check_contains '_tidb_rowid > 80000: 1' +# check_contains 'b > 80000: 1' +# run_sql 'SELECT _tidb_rowid > 80000, b > 800000 FROM rowid.pre_rebase WHERE a = "gggggg"' +# check_contains '_tidb_rowid > 80000: 1' +# check_contains 'b > 80000: 1' diff --git a/tests/various_types/run.sh b/tests/various_types/run.sh index 22be52f23..23386b78e 100755 --- a/tests/various_types/run.sh +++ b/tests/various_types/run.sh @@ -68,10 +68,3 @@ run_sql 'SELECT count(*) FROM vt.`enum-set` WHERE find_in_set("x50", `set`) > 0' check_contains 'count(*): 10' run_sql 'SELECT `set` FROM vt.`enum-set` WHERE `enum` = "gcc"' check_contains 'set: x00,x06,x07,x09,x17,x20,x23,x24,x27,x37,x44,x46,x49,x54,x55,x58,x61,x62' - -run_sql 'SELECT count(*), max(id) FROM vt.`non_pk_auto_inc`' -check_contains 'count(*): 22' -check_contains 'max(id): 37' -run_sql 'INSERT INTO vt.`non_pk_auto_inc` (`pk`) VALUES ("?")' -run_sql 'SELECT id > 37 FROM vt.`non_pk_auto_inc` WHERE `pk` = "?"' -check_contains 'id > 37: 1' From 4b4670b8d0dd1eb9b3d18aa3061a8c5b108286d1 Mon Sep 17 00:00:00 2001 From: kennytm Date: Wed, 14 Nov 2018 18:27:43 +0800 Subject: [PATCH 12/15] tests: move the exotic filename tests into its own folder --- tests/exotic_filenames/config.toml | 24 +++++++++++++++++++ .../data/x`f\"n-schema-create.sql" | 1 + .../x`f\"n.exotic`table``name-schema.sql" | 0 .../data/x`f\"n.exotic`table``name.sql" | 0 tests/exotic_filenames/run.sh | 22 +++++++++++++++++ .../data/rowid.specific_auto_inc-schema.sql | 1 + .../data/rowid.specific_auto_inc.sql | 7 ++++++ tests/tidb_rowid/run.sh | 20 +++++++--------- 8 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 tests/exotic_filenames/config.toml create mode 100644 "tests/exotic_filenames/data/x`f\"n-schema-create.sql" rename tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored => "tests/exotic_filenames/data/x`f\"n.exotic`table``name-schema.sql" (100%) rename tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored => "tests/exotic_filenames/data/x`f\"n.exotic`table``name.sql" (100%) create mode 100755 tests/exotic_filenames/run.sh create mode 100644 tests/tidb_rowid/data/rowid.specific_auto_inc-schema.sql create mode 100644 tests/tidb_rowid/data/rowid.specific_auto_inc.sql diff --git a/tests/exotic_filenames/config.toml b/tests/exotic_filenames/config.toml new file mode 100644 index 000000000..a66dc894b --- /dev/null +++ b/tests/exotic_filenames/config.toml @@ -0,0 +1,24 @@ +[lightning] +check-requirements = false +file = "/dev/stderr" +level = "warning" + +[tikv-importer] +addr = "127.0.0.1:8808" + +[mydumper] +data-source-dir = "tests/exotic_filenames/data" +region-min-size = 48 + +[tidb] +host = "127.0.0.1" +port = 4000 +user = "root" +status-port = 10080 +pd-addr = "127.0.0.1:2379" +log-level = "error" + +[post-restore] +checksum = true +compact = true +analyze = true diff --git "a/tests/exotic_filenames/data/x`f\"n-schema-create.sql" "b/tests/exotic_filenames/data/x`f\"n-schema-create.sql" new file mode 100644 index 000000000..1fb10bd2e --- /dev/null +++ "b/tests/exotic_filenames/data/x`f\"n-schema-create.sql" @@ -0,0 +1 @@ +create database `x``f"n`; \ No newline at end of file diff --git a/tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored "b/tests/exotic_filenames/data/x`f\"n.exotic`table``name-schema.sql" similarity index 100% rename from tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored rename to "tests/exotic_filenames/data/x`f\"n.exotic`table``name-schema.sql" diff --git a/tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored "b/tests/exotic_filenames/data/x`f\"n.exotic`table``name.sql" similarity index 100% rename from tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored rename to "tests/exotic_filenames/data/x`f\"n.exotic`table``name.sql" diff --git a/tests/exotic_filenames/run.sh b/tests/exotic_filenames/run.sh new file mode 100755 index 000000000..22e4a1627 --- /dev/null +++ b/tests/exotic_filenames/run.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +# Confirm the behavior for some exotic filenames +# Do not enable until https://github.com/pingcap/tidb/pull/8302 is merged. + +exit 0 + +set -eu + +run_sql 'DROP DATABASE IF EXISTS `x``f"n`;' +run_lightning +echo 'Import finished' + +run_sql 'SELECT count(*) FROM `x``f"n`.`exotic``table````name`' +check_contains 'count(*): 5' +run_sql 'INSERT INTO `x``f"n`.`exotic``table````name` (a) VALUES ("ffffff"), ("gggggg")' +run_sql 'SELECT _tidb_rowid > 80000, b > 80000 FROM `x``f"n`.`exotic``table````name` WHERE a = "ffffff"' +check_contains '_tidb_rowid > 80000: 1' +check_contains 'b > 80000: 1' +run_sql 'SELECT _tidb_rowid > 80000, b > 80000 FROM `x``f"n`.`exotic``table````name` WHERE a = "gggggg"' +check_contains '_tidb_rowid > 80000: 1' +check_contains 'b > 80000: 1' diff --git a/tests/tidb_rowid/data/rowid.specific_auto_inc-schema.sql b/tests/tidb_rowid/data/rowid.specific_auto_inc-schema.sql new file mode 100644 index 000000000..f6962e15a --- /dev/null +++ b/tests/tidb_rowid/data/rowid.specific_auto_inc-schema.sql @@ -0,0 +1 @@ +create table specific_auto_inc (a varchar(6) primary key, b int unique auto_increment) auto_increment=80000; \ No newline at end of file diff --git a/tests/tidb_rowid/data/rowid.specific_auto_inc.sql b/tests/tidb_rowid/data/rowid.specific_auto_inc.sql new file mode 100644 index 000000000..08cf771e4 --- /dev/null +++ b/tests/tidb_rowid/data/rowid.specific_auto_inc.sql @@ -0,0 +1,7 @@ +insert specific_auto_inc (a, b, _tidb_rowid) values +('aaaaaa', 11, 79995), +('bbbbbb', 22, 79996); +insert specific_auto_inc (a, b, _tidb_rowid) values +('cccccc', 33, 79997), +('dddddd', 44, 79998), +('eeeeee', 55, 79999); diff --git a/tests/tidb_rowid/run.sh b/tests/tidb_rowid/run.sh index 54f457168..b65a67e0d 100755 --- a/tests/tidb_rowid/run.sh +++ b/tests/tidb_rowid/run.sh @@ -42,14 +42,12 @@ run_sql 'INSERT INTO rowid.pre_rebase VALUES ("?")' run_sql 'SELECT _tidb_rowid > 70000 FROM rowid.pre_rebase WHERE pk = "?"' check_contains '_tidb_rowid > 70000: 1' -# FIXME Not testing these until pingcap/tidb#8259 is fixed. -# -# run_sql 'SELECT count(*) FROM rowid.`exotic``table````name`' -# check_contains 'count(*): 5' -# run_sql 'INSERT INTO rowid.`exotic``table````name` (a) VALUES ("ffffff"), ("gggggg")' -# run_sql 'SELECT _tidb_rowid > 80000, b > 800000 FROM rowid.pre_rebase WHERE a = "ffffff"' -# check_contains '_tidb_rowid > 80000: 1' -# check_contains 'b > 80000: 1' -# run_sql 'SELECT _tidb_rowid > 80000, b > 800000 FROM rowid.pre_rebase WHERE a = "gggggg"' -# check_contains '_tidb_rowid > 80000: 1' -# check_contains 'b > 80000: 1' +run_sql 'SELECT count(*) FROM rowid.specific_auto_inc' +check_contains 'count(*): 5' +run_sql 'INSERT INTO rowid.specific_auto_inc (a) VALUES ("ffffff"), ("gggggg")' +run_sql 'SELECT _tidb_rowid > 80000, b > 80000 FROM rowid.specific_auto_inc WHERE a = "ffffff"' +check_contains '_tidb_rowid > 80000: 1' +check_contains 'b > 80000: 1' +run_sql 'SELECT _tidb_rowid > 80000, b > 80000 FROM rowid.specific_auto_inc WHERE a = "gggggg"' +check_contains '_tidb_rowid > 80000: 1' +check_contains 'b > 80000: 1' From ed37dbefa3509af059c0790f3fcd7b77cbba5325 Mon Sep 17 00:00:00 2001 From: kennytm Date: Wed, 14 Nov 2018 20:14:43 +0800 Subject: [PATCH 13/15] tests: place Lightning log into a file to avoid Jenkins truncating output --- tests/_utils/run_lightning | 1 + tests/_utils/run_sql | 3 ++- tests/checkpoint/config.toml | 2 +- tests/checkpoint_chunks/config.toml | 2 +- tests/examples/1.toml | 2 +- tests/examples/131072.toml | 2 +- tests/examples/512.toml | 2 +- tests/exotic_filenames/config.toml | 2 +- tests/restore/config.toml | 2 +- tests/tidb_rowid/config.toml | 2 +- tests/tool_135/config.toml | 2 +- tests/tool_241/config.toml | 2 +- tests/various_types/config.toml | 2 +- 13 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/_utils/run_lightning b/tests/_utils/run_lightning index b143d4611..da12ad6ee 100755 --- a/tests/_utils/run_lightning +++ b/tests/_utils/run_lightning @@ -3,4 +3,5 @@ set -eu TEST_DIR=/tmp/lightning_test_result +echo "[$(date)] <<<<<< RUNNING TEST FOR: tests/$TEST_NAME/${1-config}.toml >>>>>>" >> "$TEST_DIR/lightning.log" bin/tidb-lightning.test -test.coverprofile="$TEST_DIR/cov.$TEST_NAME.${1-config}.out" DEVEL -config "tests/$TEST_NAME/${1-config}.toml" diff --git a/tests/_utils/run_sql b/tests/_utils/run_sql index ec715195c..7b8526eeb 100755 --- a/tests/_utils/run_sql +++ b/tests/_utils/run_sql @@ -3,4 +3,5 @@ set -eu TEST_DIR=/tmp/lightning_test_result -mysql -uroot -h127.0.0.1 -P4000 --default-character-set utf8 -E -e "$1" > "$TEST_DIR/sql_res.$TEST_NAME.txt" +echo "[$(date)] Executing SQL: $1" > "$TEST_DIR/sql_res.$TEST_NAME.txt" +mysql -uroot -h127.0.0.1 -P4000 --default-character-set utf8 -E -e "$1" >> "$TEST_DIR/sql_res.$TEST_NAME.txt" diff --git a/tests/checkpoint/config.toml b/tests/checkpoint/config.toml index fa78c0271..9c03363ba 100644 --- a/tests/checkpoint/config.toml +++ b/tests/checkpoint/config.toml @@ -2,7 +2,7 @@ # pprof-port = 28423 table-concurrency = 1 check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "error" [checkpoint] diff --git a/tests/checkpoint_chunks/config.toml b/tests/checkpoint_chunks/config.toml index 57a5d27e7..90a1f0c6d 100644 --- a/tests/checkpoint_chunks/config.toml +++ b/tests/checkpoint_chunks/config.toml @@ -2,7 +2,7 @@ # pprof-port = 12683 region-concurrency = 1 check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "error" [checkpoint] diff --git a/tests/examples/1.toml b/tests/examples/1.toml index 46452d028..463eff080 100644 --- a/tests/examples/1.toml +++ b/tests/examples/1.toml @@ -1,7 +1,7 @@ [lightning] table-concurrency = 1 check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "warning" [tikv-importer] diff --git a/tests/examples/131072.toml b/tests/examples/131072.toml index e460d4c2a..ddb7d2b43 100644 --- a/tests/examples/131072.toml +++ b/tests/examples/131072.toml @@ -1,7 +1,7 @@ [lightning] table-concurrency = 1 check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "warning" [tikv-importer] diff --git a/tests/examples/512.toml b/tests/examples/512.toml index 5def88b8a..44c153701 100644 --- a/tests/examples/512.toml +++ b/tests/examples/512.toml @@ -1,7 +1,7 @@ [lightning] table-concurrency = 1 check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "warning" [tikv-importer] diff --git a/tests/exotic_filenames/config.toml b/tests/exotic_filenames/config.toml index a66dc894b..b20f9759c 100644 --- a/tests/exotic_filenames/config.toml +++ b/tests/exotic_filenames/config.toml @@ -1,6 +1,6 @@ [lightning] check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "warning" [tikv-importer] diff --git a/tests/restore/config.toml b/tests/restore/config.toml index 3dc3e5b81..1254f7b2b 100644 --- a/tests/restore/config.toml +++ b/tests/restore/config.toml @@ -1,7 +1,7 @@ [lightning] table-concurrency = 4 check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "warning" [tikv-importer] diff --git a/tests/tidb_rowid/config.toml b/tests/tidb_rowid/config.toml index cb356e007..7a7255378 100644 --- a/tests/tidb_rowid/config.toml +++ b/tests/tidb_rowid/config.toml @@ -1,6 +1,6 @@ [lightning] check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "warning" [tikv-importer] diff --git a/tests/tool_135/config.toml b/tests/tool_135/config.toml index b122c9840..336013c70 100644 --- a/tests/tool_135/config.toml +++ b/tests/tool_135/config.toml @@ -1,6 +1,6 @@ [lightning] check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "warn" [tikv-importer] diff --git a/tests/tool_241/config.toml b/tests/tool_241/config.toml index 390040efa..f7f058aaf 100644 --- a/tests/tool_241/config.toml +++ b/tests/tool_241/config.toml @@ -1,7 +1,7 @@ [lightning] table-concurrency = 3 check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "warning" [tikv-importer] diff --git a/tests/various_types/config.toml b/tests/various_types/config.toml index 0f2952035..41ff46f94 100644 --- a/tests/various_types/config.toml +++ b/tests/various_types/config.toml @@ -1,6 +1,6 @@ [lightning] check-requirements = false -file = "/dev/stderr" +file = "/tmp/lightning_test_result/lightning.log" level = "warning" [tikv-importer] From 61a933cc58660405076d0c7339e0af7aba8854f5 Mon Sep 17 00:00:00 2001 From: kennytm Date: Mon, 19 Nov 2018 22:38:36 +0800 Subject: [PATCH 14/15] mydump: addressed comments --- lightning/mydump/parser.go | 2 +- lightning/mydump/parser_test.go | 2 +- lightning/mydump/region.go | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lightning/mydump/parser.go b/lightning/mydump/parser.go index 1e135a17a..7c22f462f 100644 --- a/lightning/mydump/parser.go +++ b/lightning/mydump/parser.go @@ -89,7 +89,7 @@ func (parser *ChunkParser) readBlock() error { n, err := io.ReadFull(parser.reader, block) switch err { - case io.ErrUnexpectedEOF: + case io.ErrUnexpectedEOF, io.EOF: parser.isLastChunk = true fallthrough case nil: diff --git a/lightning/mydump/parser_test.go b/lightning/mydump/parser_test.go index 4f033b66e..df713d939 100644 --- a/lightning/mydump/parser_test.go +++ b/lightning/mydump/parser_test.go @@ -19,7 +19,7 @@ func (s *testMydumpParserSuite) TearDownSuite(c *C) {} func (s *testMydumpParserSuite) TestReadRow(c *C) { reader := strings.NewReader( "/* whatever pragmas */;" + - "INSERT INTO `namespaced`.`table` (columns, more, columns) VALUES (1, 2, 3), (4, 5, 6);" + + "INSERT INTO `namespaced`.`table` (columns, more, columns) VALUES (1, 2, 3),\n(4, 5, 6);" + "INSERT `namespaced`.`table` (x,y,z) VALUES (7,8,9);" + "insert another_table values (10, 11, 12, '(13)', '(', 14, ')');", ) diff --git a/lightning/mydump/region.go b/lightning/mydump/region.go index 1d73c7aa2..06fcda847 100644 --- a/lightning/mydump/region.go +++ b/lightning/mydump/region.go @@ -123,7 +123,8 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) ([]*TableRegion, err for i, region := range filesRegions { region.ID = i - // Re-adjust the row IDs so they won't be overlapping. + // Every chunk's PrevRowIDMax was uninitialized (set to 0). We need to + // re-adjust the row IDs so they won't be overlapping. chunkRowCount := region.Chunk.RowIDMax - region.Chunk.PrevRowIDMax region.Chunk.PrevRowIDMax = totalRowCount totalRowCount += chunkRowCount From f7da20214fa7da5e2c5bfee4a04af67166ad4c0d Mon Sep 17 00:00:00 2001 From: kennytm Date: Tue, 20 Nov 2018 16:55:07 +0800 Subject: [PATCH 15/15] checkpoints: addressed comment, turned table name into constant --- lightning/restore/checkpoints.go | 77 +++++++++++++++++--------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go index ff0f1a2e6..54e7f4699 100644 --- a/lightning/restore/checkpoints.go +++ b/lightning/restore/checkpoints.go @@ -34,6 +34,13 @@ const insertCheckpointRetry = 3 const nodeID = 0 +const ( + // the table names to store each kind of checkpoint in the checkpoint database + // remember to increase the version number in case of incompatible change. + checkpointTableNameTable = "table_v1" + checkpointTableNameChunk = "chunk_v3" +) + func (status CheckpointStatus) MetricName() string { switch status { case CheckpointStatusLoaded: @@ -212,7 +219,7 @@ func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) ( // Apparently we could execute multiple DDL statements in Exec() err := common.ExecWithRetry(ctx, db, "(create checkpoints database)", fmt.Sprintf(` CREATE DATABASE IF NOT EXISTS %[1]s; - CREATE TABLE IF NOT EXISTS %[1]s.table_v1 ( + CREATE TABLE IF NOT EXISTS %[1]s.%[2]s ( node_id int unsigned NOT NULL, session bigint unsigned NOT NULL, table_name varchar(261) NOT NULL PRIMARY KEY, @@ -224,7 +231,7 @@ func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) ( update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, INDEX(node_id, session) ); - CREATE TABLE IF NOT EXISTS %[1]s.chunk_v3 ( + CREATE TABLE IF NOT EXISTS %[1]s.%[3]s ( table_name varchar(261) NOT NULL, path varchar(2048) NOT NULL, offset bigint NOT NULL, @@ -241,7 +248,7 @@ func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) ( update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, PRIMARY KEY(table_name, path, offset) ); - `, schema)) + `, schema, checkpointTableNameTable, checkpointTableNameChunk)) if err != nil { return nil, errors.Trace(err) } @@ -268,12 +275,12 @@ func (cpdb *MySQLCheckpointsDB) Initialize(ctx context.Context, dbInfo map[strin // We do need to capture the error is display a user friendly message // (multiple nodes cannot import the same table) though. stmt, err := tx.PrepareContext(c, fmt.Sprintf(` - INSERT INTO %s.table_v1 (node_id, session, table_name, hash, engine) VALUES (?, ?, ?, ?, ?) + INSERT INTO %s.%s (node_id, session, table_name, hash, engine) VALUES (?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE session = CASE WHEN node_id = VALUES(node_id) AND hash = VALUES(hash) THEN VALUES(session) END; - `, cpdb.schema)) + `, cpdb.schema, checkpointTableNameTable)) if err != nil { return errors.Trace(err) } @@ -313,9 +320,9 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab path, offset, columns, should_include_row_id, pos, end_offset, prev_rowid_max, rowid_max, kvc_bytes, kvc_kvs, kvc_checksum - FROM %s.chunk_v3 WHERE table_name = ? + FROM %s.%s WHERE table_name = ? ORDER BY path, offset; - `, cpdb.schema) + `, cpdb.schema, checkpointTableNameChunk) rows, err := tx.QueryContext(c, query, tableName) if err != nil { return errors.Trace(err) @@ -343,8 +350,8 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab } query = fmt.Sprintf(` - SELECT status, engine, alloc_base FROM %s.table_v1 WHERE table_name = ? - `, cpdb.schema) + SELECT status, engine, alloc_base FROM %s.%s WHERE table_name = ? + `, cpdb.schema, checkpointTableNameTable) row := tx.QueryRowContext(c, query, tableName) var ( @@ -377,7 +384,7 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab func (cpdb *MySQLCheckpointsDB) InsertChunkCheckpoints(ctx context.Context, tableName string, checkpoints []*ChunkCheckpoint) error { err := common.TransactWithRetry(ctx, cpdb.db, "(update chunk checkpoints for "+tableName+")", func(c context.Context, tx *sql.Tx) error { stmt, err := tx.PrepareContext(c, fmt.Sprintf(` - REPLACE INTO %s.chunk_v3 ( + REPLACE INTO %s.%s ( table_name, path, offset, columns, should_include_row_id, pos, end_offset, prev_rowid_max, rowid_max, kvc_bytes, kvc_kvs, kvc_checksum @@ -386,7 +393,7 @@ func (cpdb *MySQLCheckpointsDB) InsertChunkCheckpoints(ctx context.Context, tabl ?, ?, ?, ?, ?, ?, ? ); - `, cpdb.schema)) + `, cpdb.schema, checkpointTableNameChunk)) if err != nil { return errors.Trace(err) } @@ -415,15 +422,15 @@ func (cpdb *MySQLCheckpointsDB) InsertChunkCheckpoints(ctx context.Context, tabl func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpointDiff) { chunkQuery := fmt.Sprintf(` - UPDATE %s.chunk_v3 SET pos = ?, prev_rowid_max = ?, kvc_bytes = ?, kvc_kvs = ?, kvc_checksum = ? + UPDATE %s.%s SET pos = ?, prev_rowid_max = ?, kvc_bytes = ?, kvc_kvs = ?, kvc_checksum = ? WHERE table_name = ? AND path = ? AND offset = ?; - `, cpdb.schema) + `, cpdb.schema, checkpointTableNameChunk) checksumQuery := fmt.Sprintf(` - UPDATE %s.table_v1 SET alloc_base = GREATEST(?, alloc_base) WHERE table_name = ?; - `, cpdb.schema) + UPDATE %s.%s SET alloc_base = GREATEST(?, alloc_base) WHERE table_name = ?; + `, cpdb.schema, checkpointTableNameTable) statusQuery := fmt.Sprintf(` - UPDATE %s.table_v1 SET status = ? WHERE table_name = ?; - `, cpdb.schema) + UPDATE %s.%s SET status = ? WHERE table_name = ?; + `, cpdb.schema, checkpointTableNameTable) err := common.TransactWithRetry(context.Background(), cpdb.db, "(update checkpoints)", func(c context.Context, tx *sql.Tx) error { chunkStmt, e := tx.PrepareContext(c, chunkQuery) @@ -499,17 +506,17 @@ func (cpdb *MySQLCheckpointsDB) RemoveCheckpoint(ctx context.Context, tableName ) if tableName == "all" { - deleteChunkFmt = "DELETE FROM %[1]s.chunk_v3 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE node_id = ?)" - deleteTableFmt = "DELETE FROM %s.table_v1 WHERE node_id = ?" + deleteChunkFmt = "DELETE FROM %[1]s.%[2]s WHERE table_name IN (SELECT table_name FROM %[1]s.%[3]s WHERE node_id = ?)" + deleteTableFmt = "DELETE FROM %s.%s WHERE node_id = ?" arg = nodeID } else { - deleteChunkFmt = "DELETE FROM %s.chunk_v3 WHERE table_name = ?" - deleteTableFmt = "DELETE FROM %s.table_v1 WHERE table_name = ?" + deleteChunkFmt = "DELETE FROM %s.%s WHERE table_name = ?%.0s" // the %.0s is to consume the third parameter. + deleteTableFmt = "DELETE FROM %s.%s WHERE table_name = ?" arg = tableName } - deleteChunkQuery := fmt.Sprintf(deleteChunkFmt, cpdb.schema) - deleteTableQuery := fmt.Sprintf(deleteTableFmt, cpdb.schema) + deleteChunkQuery := fmt.Sprintf(deleteChunkFmt, cpdb.schema, checkpointTableNameChunk, checkpointTableNameTable) + deleteTableQuery := fmt.Sprintf(deleteTableFmt, cpdb.schema, checkpointTableNameTable) err := common.TransactWithRetry(ctx, cpdb.db, fmt.Sprintf("(remove checkpoints of %s)", tableName), func(c context.Context, tx *sql.Tx) error { if _, e := tx.ExecContext(c, deleteChunkQuery, arg); e != nil { return errors.Trace(e) @@ -533,8 +540,8 @@ func (cpdb *MySQLCheckpointsDB) IgnoreErrorCheckpoint(ctx context.Context, table colName, arg = "table_name", tableName } query := fmt.Sprintf(` - UPDATE %s.table_v1 SET status = %d WHERE %s = ? AND status <= %d; - `, cpdb.schema, CheckpointStatusLoaded, colName, CheckpointStatusMaxInvalid) + UPDATE %s.%s SET status = %d WHERE %s = ? AND status <= %d; + `, cpdb.schema, checkpointTableNameTable, CheckpointStatusLoaded, colName, CheckpointStatusMaxInvalid) err := common.ExecWithRetry(ctx, cpdb.db, fmt.Sprintf("(ignore error checkpoints for %s)", tableName), query, arg) return errors.Trace(err) @@ -572,14 +579,14 @@ func (cpdb *MySQLCheckpointsDB) destroyErrorCheckpoints(ctx context.Context, tab } selectQuery := fmt.Sprintf(` - SELECT table_name FROM %s.table_v1 WHERE %s = ? AND status <= %d; - `, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid) + SELECT table_name FROM %s.%s WHERE %s = ? AND status <= %d; + `, cpdb.schema, checkpointTableNameTable, conditionColumn, CheckpointStatusMaxInvalid) deleteChunkQuery := fmt.Sprintf(` - DELETE FROM %[1]s.chunk_v3 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE %[2]s = ? AND status <= %[3]d) - `, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid) + DELETE FROM %[1]s.%[4]s WHERE table_name IN (SELECT table_name FROM %[1]s.%[5]s WHERE %[2]s = ? AND status <= %[3]d) + `, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid, checkpointTableNameChunk, checkpointTableNameTable) deleteTableQuery := fmt.Sprintf(` - DELETE FROM %s.table_v1 WHERE %s = ? AND status <= %d - `, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid) + DELETE FROM %s.%s WHERE %s = ? AND status <= %d + `, cpdb.schema, checkpointTableNameTable, conditionColumn, CheckpointStatusMaxInvalid) var targetTables []string @@ -630,8 +637,8 @@ func (cpdb *MySQLCheckpointsDB) DumpTables(ctx context.Context, writer io.Writer alloc_base, create_time, update_time - FROM %s.table_v1; - `, cpdb.schema)) + FROM %s.%s; + `, cpdb.schema, checkpointTableNameTable)) if err != nil { return errors.Trace(err) } @@ -656,8 +663,8 @@ func (cpdb *MySQLCheckpointsDB) DumpChunks(ctx context.Context, writer io.Writer kvc_checksum, create_time, update_time - FROM %s.chunk_v3; - `, cpdb.schema)) + FROM %s.%s; + `, cpdb.schema, checkpointTableNameChunk)) if err != nil { return errors.Trace(err) }