From e48e54915ef706c00cf4742c53b4cb76e74a9b09 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Mon, 29 Oct 2018 01:28:36 +0800
Subject: [PATCH 01/15] mydump: added a ragel-based data file lexer and chunk
 parser

The new lexer is 8x faster than MDDataReader. Speed is now a concern
because we are going to read the entire file to get the accurate rows
count per chunk.
---
 .gitattributes                       |    1 +
 Makefile                             |    5 +
 lightning/mydump/parser.go           |  198 ++++
 lightning/mydump/parser.rl           |  100 ++
 lightning/mydump/parser_generated.go | 1308 ++++++++++++++++++++++++++
 lightning/mydump/parser_test.go      |  111 +++
 6 files changed, 1723 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 lightning/mydump/parser.go
 create mode 100644 lightning/mydump/parser.rl
 create mode 100644 lightning/mydump/parser_generated.go
 create mode 100644 lightning/mydump/parser_test.go

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..ba35fa100
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*_generated.go linguist-generated=true
diff --git a/Makefile b/Makefile
index ef46180d7..a0f1c35c4 100644
--- a/Makefile
+++ b/Makefile
@@ -54,6 +54,11 @@ checksuccess:
 		echo "Lightning build successfully :-) !" ; \
 	fi
 
+data_parsers:
+	ragel -Z -G2 -o tmp_parser.go lightning/mydump/parser.rl
+	@echo '// Code generated by ragel DO NOT EDIT.' | cat - tmp_parser.go > lightning/mydump/parser_generated.go
+	@rm tmp_parser.go
+
 lightning:
 	$(GOBUILD) $(RACE_FLAG) -ldflags '$(LDFLAGS)' -o $(LIGHTNING_BIN) cmd/main.go
 
diff --git a/lightning/mydump/parser.go b/lightning/mydump/parser.go
new file mode 100644
index 000000000..1e135a17a
--- /dev/null
+++ b/lightning/mydump/parser.go
@@ -0,0 +1,198 @@
+package mydump
+
+import (
+	"io"
+
+	"github.com/pkg/errors"
+)
+
+// ChunkParser is a parser of the data files (the file containing only INSERT
+// statements).
+type ChunkParser struct {
+	// states for the lexer
+	reader      io.Reader
+	buf         []byte
+	bufSize     int
+	isLastChunk bool
+
+	lastRow Row
+	// Current file offset.
+	pos int64
+	// The (quoted) table name used in the last INSERT statement. Assumed to be
+	// constant throughout the entire file.
+	TableName []byte
+	// The list of columns in the form `(a, b, c)` in the last INSERT statement.
+	// Assumed to be constant throughout the entire file.
+	Columns []byte
+}
+
+// Chunk represents a portion of the data file.
+type Chunk struct {
+	Offset       int64
+	EndOffset    int64
+	PrevRowIDMax int64
+	RowIDMax     int64
+}
+
+// Row is the content of a row.
+type Row struct {
+	RowID int64
+	Row   []byte
+}
+
+// NewChunkParser creates a new parser which can read chunks out of a file.
+func NewChunkParser(reader io.Reader) *ChunkParser {
+	return &ChunkParser{
+		reader:  reader,
+		bufSize: 8192,
+	}
+}
+
+// Reader returns the underlying reader of this parser.
+func (parser *ChunkParser) Reader() io.Reader {
+	return parser.reader
+}
+
+// SetPos changes the reported position and row ID.
+func (parser *ChunkParser) SetPos(pos int64, rowID int64) {
+	parser.pos = pos
+	parser.lastRow.RowID = rowID
+}
+
+// Pos returns the current file offset.
+func (parser *ChunkParser) Pos() int64 {
+	return parser.pos
+}
+
+type token byte
+
+const (
+	tokNil token = iota
+	tokValues
+	tokRow
+	tokName
+)
+
+func tryAppendTo(out *[]byte, tail []byte) {
+	if out == nil || len(tail) == 0 {
+		return
+	}
+	if len(*out) == 0 {
+		*out = tail
+	} else {
+		*out = append(*out, tail...)
+	}
+}
+
+func (parser *ChunkParser) readBlock() error {
+	block := make([]byte, parser.bufSize)
+
+	n, err := io.ReadFull(parser.reader, block)
+	switch err {
+	case io.ErrUnexpectedEOF:
+		parser.isLastChunk = true
+		fallthrough
+	case nil:
+		tryAppendTo(&parser.buf, block[:n])
+		return nil
+	default:
+		return errors.Trace(err)
+	}
+}
+
+// ReadRow reads a row from the datafile.
+func (parser *ChunkParser) ReadRow() error {
+	// This parser will recognize contents like:
+	//
+	// 		`tableName` (...) VALUES (...) (...) (...)
+	//
+	// Keywords like INSERT, INTO and separators like ',' and ';' are treated
+	// like comments and ignored. Therefore, this parser will accept some
+	// nonsense input. The advantage is the parser becomes extremely simple,
+	// suitable for us where we just want to quickly and accurately split the
+	// file apart, not to validate the content.
+
+	type state byte
+
+	const (
+		// the state after reading "VALUES"
+		stateRow state = iota
+		// the state after reading the table name, before "VALUES"
+		stateColumns
+	)
+
+	row := &parser.lastRow
+	st := stateRow
+
+	for {
+		tok, content, err := parser.lex()
+		if err != nil {
+			return errors.Trace(err)
+		}
+		switch tok {
+		case tokRow:
+			switch st {
+			case stateRow:
+				row.RowID++
+				row.Row = content
+				return nil
+			case stateColumns:
+				parser.Columns = content
+				continue
+			}
+
+		case tokName:
+			st = stateColumns
+			parser.TableName = content
+			parser.Columns = nil
+			continue
+
+		case tokValues:
+			st = stateRow
+			continue
+
+		default:
+			return errors.Errorf("Syntax error at position %d", parser.pos)
+		}
+	}
+}
+
+// LastRow is the copy of the row parsed by the last call to ReadRow().
+func (parser *ChunkParser) LastRow() Row {
+	return parser.lastRow
+}
+
+// ReadChunks parses the entire file and splits it into continuous chunks of
+// size >= minSize.
+func (parser *ChunkParser) ReadChunks(minSize int64) ([]Chunk, error) {
+	var chunks []Chunk
+
+	cur := Chunk{
+		Offset:       parser.pos,
+		EndOffset:    parser.pos,
+		PrevRowIDMax: parser.lastRow.RowID,
+		RowIDMax:     parser.lastRow.RowID,
+	}
+
+	for {
+		switch err := parser.ReadRow(); errors.Cause(err) {
+		case nil:
+			cur.EndOffset = parser.pos
+			cur.RowIDMax = parser.lastRow.RowID
+			if cur.EndOffset-cur.Offset >= minSize {
+				chunks = append(chunks, cur)
+				cur.Offset = cur.EndOffset
+				cur.PrevRowIDMax = cur.RowIDMax
+			}
+
+		case io.EOF:
+			if cur.Offset < cur.EndOffset {
+				chunks = append(chunks, cur)
+			}
+			return chunks, nil
+
+		default:
+			return nil, errors.Trace(err)
+		}
+	}
+}
diff --git a/lightning/mydump/parser.rl b/lightning/mydump/parser.rl
new file mode 100644
index 000000000..9c3ff75e2
--- /dev/null
+++ b/lightning/mydump/parser.rl
@@ -0,0 +1,100 @@
+// Please edit `parser.rl` if you want to modify this file. To generate
+// `parser_generated.go`, please execute
+//
+// ```sh
+// make data_parsers
+// ```
+
+package mydump
+
+import (
+	"io"
+
+	"github.com/pingcap/tidb-lightning/lightning/common"
+	"github.com/pkg/errors"
+)
+
+%%{
+#`
+
+machine chunk_parser;
+
+block_comment = '/*' any* :>> '*/';
+line_comment = /--[^\n]*\n/;
+comment = block_comment | line_comment | space | [,;] | 'insert'i | 'into'i;
+
+single_quoted = "'" (^"'" | "\\" any)** "'";
+double_quoted = '"' (^'"' | '\\' any)** '"';
+back_quoted = '`' ^'`'* '`';
+unquoted = ^([,;()'"`] | space)+;
+
+row = '(' (^[)'"`] | single_quoted | double_quoted | back_quoted)* ')';
+name = (back_quoted | double_quoted | unquoted)+;
+
+main := |*
+	comment;
+
+	'values'i => {
+		consumedToken = tokValues
+		fbreak;
+	};
+
+	row => {
+		consumedToken = tokRow
+		fbreak;
+	};
+
+	name => {
+		consumedToken = tokName
+		fbreak;
+	};
+*|;
+
+#`
+}%%
+
+%% write data;
+
+func (parser *ChunkParser) lex() (token, []byte, error) {
+	var cs, ts, te, act, p int
+	%% write init;
+
+	for {
+		data := parser.buf
+		consumedToken := tokNil
+		pe := len(data)
+		eof := -1
+		if parser.isLastChunk {
+			eof = pe
+		}
+
+		%% write exec;
+
+		if cs == %%{ write error; }%% {
+			common.AppLogger.Errorf("Syntax error near byte %d, content is «%s»", parser.pos, string(data))
+			return tokNil, nil, errors.New("Syntax error")
+		}
+
+		if consumedToken != tokNil {
+			result := data[ts:te]
+			parser.buf = data[te:]
+			parser.pos += int64(te)
+			return consumedToken, result, nil
+		}
+
+		if parser.isLastChunk {
+			return tokNil, nil, io.EOF
+		}
+
+		parser.buf = parser.buf[ts:]
+		parser.pos += int64(ts)
+		p -= ts
+		te -= ts
+		ts = 0
+		if err := parser.readBlock(); err != nil {
+			return tokNil, nil, errors.Trace(err)
+		}
+	}
+
+	return tokNil, nil, nil
+}
diff --git a/lightning/mydump/parser_generated.go b/lightning/mydump/parser_generated.go
new file mode 100644
index 000000000..0fcbbc226
--- /dev/null
+++ b/lightning/mydump/parser_generated.go
@@ -0,0 +1,1308 @@
+// Code generated by ragel DO NOT EDIT.
+
+//line lightning/mydump/parser.rl:1
+// Please edit `parser.rl` if you want to modify this file. To generate
+// `parser_generated.go`, please execute
+//
+// ```sh
+// make data_parsers
+// ```
+
+package mydump
+
+import (
+	"io"
+
+	"github.com/pingcap/tidb-lightning/lightning/common"
+	"github.com/pkg/errors"
+)
+
+
+//line lightning/mydump/parser.rl:54
+
+
+
+//line tmp_parser.go:25
+const chunk_parser_start int = 21
+const chunk_parser_first_final int = 21
+const chunk_parser_error int = 0
+
+const chunk_parser_en_main int = 21
+
+
+//line lightning/mydump/parser.rl:57
+
+func (parser *ChunkParser) lex() (token, []byte, error) {
+	var cs, ts, te, act, p int
+	
+//line tmp_parser.go:38
+	{
+	cs = chunk_parser_start
+	ts = 0
+	te = 0
+	act = 0
+	}
+
+//line lightning/mydump/parser.rl:61
+
+	for {
+		data := parser.buf
+		consumedToken := tokNil
+		pe := len(data)
+		eof := -1
+		if parser.isLastChunk {
+			eof = pe
+		}
+
+		
+//line tmp_parser.go:58
+	{
+	if p == pe {
+		goto _test_eof
+	}
+	switch cs {
+	case 21:
+		goto st_case_21
+	case 22:
+		goto st_case_22
+	case 1:
+		goto st_case_1
+	case 2:
+		goto st_case_2
+	case 3:
+		goto st_case_3
+	case 0:
+		goto st_case_0
+	case 4:
+		goto st_case_4
+	case 5:
+		goto st_case_5
+	case 6:
+		goto st_case_6
+	case 7:
+		goto st_case_7
+	case 8:
+		goto st_case_8
+	case 9:
+		goto st_case_9
+	case 23:
+		goto st_case_23
+	case 24:
+		goto st_case_24
+	case 10:
+		goto st_case_10
+	case 11:
+		goto st_case_11
+	case 25:
+		goto st_case_25
+	case 12:
+		goto st_case_12
+	case 13:
+		goto st_case_13
+	case 26:
+		goto st_case_26
+	case 27:
+		goto st_case_27
+	case 28:
+		goto st_case_28
+	case 14:
+		goto st_case_14
+	case 15:
+		goto st_case_15
+	case 16:
+		goto st_case_16
+	case 17:
+		goto st_case_17
+	case 18:
+		goto st_case_18
+	case 29:
+		goto st_case_29
+	case 19:
+		goto st_case_19
+	case 20:
+		goto st_case_20
+	case 30:
+		goto st_case_30
+	case 31:
+		goto st_case_31
+	case 32:
+		goto st_case_32
+	case 33:
+		goto st_case_33
+	case 34:
+		goto st_case_34
+	case 35:
+		goto st_case_35
+	case 36:
+		goto st_case_36
+	case 37:
+		goto st_case_37
+	case 38:
+		goto st_case_38
+	case 39:
+		goto st_case_39
+	case 40:
+		goto st_case_40
+	}
+	goto st_out
+tr0:
+//line NONE:1
+	switch act {
+	case 0:
+	{{goto st0 }}
+	case 2:
+	{p = (te) - 1
+
+		consumedToken = tokValues
+		{p++; cs = 21; goto _out }
+	}
+	case 4:
+	{p = (te) - 1
+
+		consumedToken = tokName
+		{p++; cs = 21; goto _out }
+	}
+	default:
+	{p = (te) - 1
+}
+	}
+	
+	goto st21
+tr8:
+//line lightning/mydump/parser.rl:42
+te = p+1
+{
+		consumedToken = tokRow
+		{p++; cs = 21; goto _out }
+	}
+	goto st21
+tr12:
+//line lightning/mydump/parser.rl:47
+p = (te) - 1
+{
+		consumedToken = tokName
+		{p++; cs = 21; goto _out }
+	}
+	goto st21
+tr14:
+//line lightning/mydump/parser.rl:35
+te = p+1
+
+	goto st21
+tr34:
+//line lightning/mydump/parser.rl:47
+te = p
+p--
+{
+		consumedToken = tokName
+		{p++; cs = 21; goto _out }
+	}
+	goto st21
+tr35:
+//line lightning/mydump/parser.rl:35
+te = p
+p--
+
+	goto st21
+	st21:
+//line NONE:1
+ts = 0
+
+//line NONE:1
+act = 0
+
+		if p++; p == pe {
+			goto _test_eof21
+		}
+	st_case_21:
+//line NONE:1
+ts = p
+
+//line tmp_parser.go:221
+		switch data[p] {
+		case 32:
+			goto tr14
+		case 34:
+			goto st1
+		case 40:
+			goto st4
+		case 44:
+			goto tr14
+		case 45:
+			goto tr30
+		case 47:
+			goto tr31
+		case 59:
+			goto tr14
+		case 73:
+			goto tr32
+		case 86:
+			goto tr33
+		case 96:
+			goto st3
+		case 105:
+			goto tr32
+		case 118:
+			goto tr33
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto st0
+			}
+		case data[p] >= 9:
+			goto tr14
+		}
+		goto tr2
+tr2:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st22
+tr37:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:35
+act = 1;
+	goto st22
+tr47:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:37
+act = 2;
+	goto st22
+	st22:
+		if p++; p == pe {
+			goto _test_eof22
+		}
+	st_case_22:
+//line tmp_parser.go:283
+		switch data[p] {
+		case 32:
+			goto tr0
+		case 34:
+			goto st1
+		case 44:
+			goto tr0
+		case 59:
+			goto tr0
+		case 96:
+			goto st3
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr0
+			}
+		case data[p] >= 9:
+			goto tr0
+		}
+		goto tr2
+	st1:
+		if p++; p == pe {
+			goto _test_eof1
+		}
+	st_case_1:
+		switch data[p] {
+		case 34:
+			goto tr2
+		case 92:
+			goto st2
+		}
+		goto st1
+	st2:
+		if p++; p == pe {
+			goto _test_eof2
+		}
+	st_case_2:
+		goto st1
+	st3:
+		if p++; p == pe {
+			goto _test_eof3
+		}
+	st_case_3:
+		if data[p] == 96 {
+			goto tr2
+		}
+		goto st3
+st_case_0:
+	st0:
+		cs = 0
+		goto _out
+	st4:
+		if p++; p == pe {
+			goto _test_eof4
+		}
+	st_case_4:
+		switch data[p] {
+		case 34:
+			goto st5
+		case 39:
+			goto st7
+		case 41:
+			goto tr8
+		case 96:
+			goto st9
+		}
+		goto st4
+	st5:
+		if p++; p == pe {
+			goto _test_eof5
+		}
+	st_case_5:
+		switch data[p] {
+		case 34:
+			goto st4
+		case 92:
+			goto st6
+		}
+		goto st5
+	st6:
+		if p++; p == pe {
+			goto _test_eof6
+		}
+	st_case_6:
+		goto st5
+	st7:
+		if p++; p == pe {
+			goto _test_eof7
+		}
+	st_case_7:
+		switch data[p] {
+		case 39:
+			goto st4
+		case 92:
+			goto st8
+		}
+		goto st7
+	st8:
+		if p++; p == pe {
+			goto _test_eof8
+		}
+	st_case_8:
+		goto st7
+	st9:
+		if p++; p == pe {
+			goto _test_eof9
+		}
+	st_case_9:
+		if data[p] == 96 {
+			goto st4
+		}
+		goto st9
+tr30:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st23
+	st23:
+		if p++; p == pe {
+			goto _test_eof23
+		}
+	st_case_23:
+//line tmp_parser.go:409
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 45:
+			goto tr17
+		case 59:
+			goto tr34
+		case 96:
+			goto st3
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr17:
+//line NONE:1
+te = p+1
+
+	goto st24
+	st24:
+		if p++; p == pe {
+			goto _test_eof24
+		}
+	st_case_24:
+//line tmp_parser.go:443
+		switch data[p] {
+		case 10:
+			goto tr14
+		case 32:
+			goto st10
+		case 34:
+			goto st11
+		case 44:
+			goto st10
+		case 59:
+			goto st10
+		case 96:
+			goto st13
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto st10
+			}
+		case data[p] >= 9:
+			goto st10
+		}
+		goto tr17
+	st10:
+		if p++; p == pe {
+			goto _test_eof10
+		}
+	st_case_10:
+		if data[p] == 10 {
+			goto tr14
+		}
+		goto st10
+	st11:
+		if p++; p == pe {
+			goto _test_eof11
+		}
+	st_case_11:
+		switch data[p] {
+		case 10:
+			goto tr16
+		case 34:
+			goto tr17
+		case 92:
+			goto st12
+		}
+		goto st11
+tr16:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:35
+act = 1;
+	goto st25
+	st25:
+		if p++; p == pe {
+			goto _test_eof25
+		}
+	st_case_25:
+//line tmp_parser.go:502
+		switch data[p] {
+		case 34:
+			goto tr2
+		case 92:
+			goto st2
+		}
+		goto st1
+	st12:
+		if p++; p == pe {
+			goto _test_eof12
+		}
+	st_case_12:
+		if data[p] == 10 {
+			goto tr16
+		}
+		goto st11
+	st13:
+		if p++; p == pe {
+			goto _test_eof13
+		}
+	st_case_13:
+		switch data[p] {
+		case 10:
+			goto tr20
+		case 96:
+			goto tr17
+		}
+		goto st13
+tr20:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:35
+act = 1;
+	goto st26
+	st26:
+		if p++; p == pe {
+			goto _test_eof26
+		}
+	st_case_26:
+//line tmp_parser.go:543
+		if data[p] == 96 {
+			goto tr2
+		}
+		goto st3
+tr31:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st27
+	st27:
+		if p++; p == pe {
+			goto _test_eof27
+		}
+	st_case_27:
+//line tmp_parser.go:560
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 42:
+			goto tr24
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 96:
+			goto st3
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr24:
+//line NONE:1
+te = p+1
+
+	goto st28
+	st28:
+		if p++; p == pe {
+			goto _test_eof28
+		}
+	st_case_28:
+//line tmp_parser.go:594
+		switch data[p] {
+		case 32:
+			goto st14
+		case 34:
+			goto st16
+		case 42:
+			goto tr36
+		case 44:
+			goto st14
+		case 59:
+			goto st14
+		case 96:
+			goto st19
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto st14
+			}
+		case data[p] >= 9:
+			goto st14
+		}
+		goto tr24
+	st14:
+		if p++; p == pe {
+			goto _test_eof14
+		}
+	st_case_14:
+		if data[p] == 42 {
+			goto st15
+		}
+		goto st14
+	st15:
+		if p++; p == pe {
+			goto _test_eof15
+		}
+	st_case_15:
+		switch data[p] {
+		case 42:
+			goto st15
+		case 47:
+			goto tr14
+		}
+		goto st14
+	st16:
+		if p++; p == pe {
+			goto _test_eof16
+		}
+	st_case_16:
+		switch data[p] {
+		case 34:
+			goto tr24
+		case 42:
+			goto st17
+		case 92:
+			goto st18
+		}
+		goto st16
+	st17:
+		if p++; p == pe {
+			goto _test_eof17
+		}
+	st_case_17:
+		switch data[p] {
+		case 34:
+			goto tr24
+		case 42:
+			goto st17
+		case 47:
+			goto tr16
+		case 92:
+			goto st18
+		}
+		goto st16
+	st18:
+		if p++; p == pe {
+			goto _test_eof18
+		}
+	st_case_18:
+		if data[p] == 42 {
+			goto st17
+		}
+		goto st16
+tr36:
+//line NONE:1
+te = p+1
+
+	goto st29
+	st29:
+		if p++; p == pe {
+			goto _test_eof29
+		}
+	st_case_29:
+//line tmp_parser.go:688
+		switch data[p] {
+		case 32:
+			goto st14
+		case 34:
+			goto st16
+		case 42:
+			goto tr36
+		case 44:
+			goto st14
+		case 47:
+			goto tr37
+		case 59:
+			goto st14
+		case 96:
+			goto st19
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto st14
+			}
+		case data[p] >= 9:
+			goto st14
+		}
+		goto tr24
+	st19:
+		if p++; p == pe {
+			goto _test_eof19
+		}
+	st_case_19:
+		switch data[p] {
+		case 42:
+			goto st20
+		case 96:
+			goto tr24
+		}
+		goto st19
+	st20:
+		if p++; p == pe {
+			goto _test_eof20
+		}
+	st_case_20:
+		switch data[p] {
+		case 42:
+			goto st20
+		case 47:
+			goto tr20
+		case 96:
+			goto tr24
+		}
+		goto st19
+tr32:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st30
+	st30:
+		if p++; p == pe {
+			goto _test_eof30
+		}
+	st_case_30:
+//line tmp_parser.go:752
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 78:
+			goto tr38
+		case 96:
+			goto st3
+		case 110:
+			goto tr38
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr38:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st31
+	st31:
+		if p++; p == pe {
+			goto _test_eof31
+		}
+	st_case_31:
+//line tmp_parser.go:790
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 83:
+			goto tr39
+		case 84:
+			goto tr40
+		case 96:
+			goto st3
+		case 115:
+			goto tr39
+		case 116:
+			goto tr40
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr39:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st32
+	st32:
+		if p++; p == pe {
+			goto _test_eof32
+		}
+	st_case_32:
+//line tmp_parser.go:832
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 69:
+			goto tr41
+		case 96:
+			goto st3
+		case 101:
+			goto tr41
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr41:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st33
+	st33:
+		if p++; p == pe {
+			goto _test_eof33
+		}
+	st_case_33:
+//line tmp_parser.go:870
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 82:
+			goto tr42
+		case 96:
+			goto st3
+		case 114:
+			goto tr42
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr42:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st34
+	st34:
+		if p++; p == pe {
+			goto _test_eof34
+		}
+	st_case_34:
+//line tmp_parser.go:908
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 84:
+			goto tr37
+		case 96:
+			goto st3
+		case 116:
+			goto tr37
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr40:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st35
+	st35:
+		if p++; p == pe {
+			goto _test_eof35
+		}
+	st_case_35:
+//line tmp_parser.go:946
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 79:
+			goto tr37
+		case 96:
+			goto st3
+		case 111:
+			goto tr37
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr33:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st36
+	st36:
+		if p++; p == pe {
+			goto _test_eof36
+		}
+	st_case_36:
+//line tmp_parser.go:984
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 65:
+			goto tr43
+		case 96:
+			goto st3
+		case 97:
+			goto tr43
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr43:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st37
+	st37:
+		if p++; p == pe {
+			goto _test_eof37
+		}
+	st_case_37:
+//line tmp_parser.go:1022
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 76:
+			goto tr44
+		case 96:
+			goto st3
+		case 108:
+			goto tr44
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr44:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st38
+	st38:
+		if p++; p == pe {
+			goto _test_eof38
+		}
+	st_case_38:
+//line tmp_parser.go:1060
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 85:
+			goto tr45
+		case 96:
+			goto st3
+		case 117:
+			goto tr45
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr45:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st39
+	st39:
+		if p++; p == pe {
+			goto _test_eof39
+		}
+	st_case_39:
+//line tmp_parser.go:1098
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 69:
+			goto tr46
+		case 96:
+			goto st3
+		case 101:
+			goto tr46
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+tr46:
+//line NONE:1
+te = p+1
+
+//line lightning/mydump/parser.rl:47
+act = 4;
+	goto st40
+	st40:
+		if p++; p == pe {
+			goto _test_eof40
+		}
+	st_case_40:
+//line tmp_parser.go:1136
+		switch data[p] {
+		case 32:
+			goto tr34
+		case 34:
+			goto st1
+		case 44:
+			goto tr34
+		case 59:
+			goto tr34
+		case 83:
+			goto tr47
+		case 96:
+			goto st3
+		case 115:
+			goto tr47
+		}
+		switch {
+		case data[p] > 13:
+			if 39 <= data[p] && data[p] <= 41 {
+				goto tr34
+			}
+		case data[p] >= 9:
+			goto tr34
+		}
+		goto tr2
+	st_out:
+	_test_eof21: cs = 21; goto _test_eof
+	_test_eof22: cs = 22; goto _test_eof
+	_test_eof1: cs = 1; goto _test_eof
+	_test_eof2: cs = 2; goto _test_eof
+	_test_eof3: cs = 3; goto _test_eof
+	_test_eof4: cs = 4; goto _test_eof
+	_test_eof5: cs = 5; goto _test_eof
+	_test_eof6: cs = 6; goto _test_eof
+	_test_eof7: cs = 7; goto _test_eof
+	_test_eof8: cs = 8; goto _test_eof
+	_test_eof9: cs = 9; goto _test_eof
+	_test_eof23: cs = 23; goto _test_eof
+	_test_eof24: cs = 24; goto _test_eof
+	_test_eof10: cs = 10; goto _test_eof
+	_test_eof11: cs = 11; goto _test_eof
+	_test_eof25: cs = 25; goto _test_eof
+	_test_eof12: cs = 12; goto _test_eof
+	_test_eof13: cs = 13; goto _test_eof
+	_test_eof26: cs = 26; goto _test_eof
+	_test_eof27: cs = 27; goto _test_eof
+	_test_eof28: cs = 28; goto _test_eof
+	_test_eof14: cs = 14; goto _test_eof
+	_test_eof15: cs = 15; goto _test_eof
+	_test_eof16: cs = 16; goto _test_eof
+	_test_eof17: cs = 17; goto _test_eof
+	_test_eof18: cs = 18; goto _test_eof
+	_test_eof29: cs = 29; goto _test_eof
+	_test_eof19: cs = 19; goto _test_eof
+	_test_eof20: cs = 20; goto _test_eof
+	_test_eof30: cs = 30; goto _test_eof
+	_test_eof31: cs = 31; goto _test_eof
+	_test_eof32: cs = 32; goto _test_eof
+	_test_eof33: cs = 33; goto _test_eof
+	_test_eof34: cs = 34; goto _test_eof
+	_test_eof35: cs = 35; goto _test_eof
+	_test_eof36: cs = 36; goto _test_eof
+	_test_eof37: cs = 37; goto _test_eof
+	_test_eof38: cs = 38; goto _test_eof
+	_test_eof39: cs = 39; goto _test_eof
+	_test_eof40: cs = 40; goto _test_eof
+
+	_test_eof: {}
+	if p == eof {
+		switch cs {
+		case 22:
+			goto tr0
+		case 1:
+			goto tr0
+		case 2:
+			goto tr0
+		case 3:
+			goto tr0
+		case 23:
+			goto tr34
+		case 24:
+			goto tr34
+		case 10:
+			goto tr12
+		case 11:
+			goto tr12
+		case 25:
+			goto tr35
+		case 12:
+			goto tr12
+		case 13:
+			goto tr12
+		case 26:
+			goto tr35
+		case 27:
+			goto tr34
+		case 28:
+			goto tr34
+		case 14:
+			goto tr12
+		case 15:
+			goto tr12
+		case 16:
+			goto tr12
+		case 17:
+			goto tr12
+		case 18:
+			goto tr12
+		case 29:
+			goto tr34
+		case 19:
+			goto tr12
+		case 20:
+			goto tr12
+		case 30:
+			goto tr34
+		case 31:
+			goto tr34
+		case 32:
+			goto tr34
+		case 33:
+			goto tr34
+		case 34:
+			goto tr34
+		case 35:
+			goto tr34
+		case 36:
+			goto tr34
+		case 37:
+			goto tr34
+		case 38:
+			goto tr34
+		case 39:
+			goto tr34
+		case 40:
+			goto tr34
+		}
+	}
+
+	_out: {}
+	}
+
+//line lightning/mydump/parser.rl:72
+
+		if cs == 0 {
+			common.AppLogger.Errorf("Syntax error near byte %d, content is «%s»", parser.pos, string(data))
+			return tokNil, nil, errors.New("Syntax error")
+		}
+
+		if consumedToken != tokNil {
+			result := data[ts:te]
+			parser.buf = data[te:]
+			parser.pos += int64(te)
+			return consumedToken, result, nil
+		}
+
+		if parser.isLastChunk {
+			return tokNil, nil, io.EOF
+		}
+
+		parser.buf = parser.buf[ts:]
+		parser.pos += int64(ts)
+		p -= ts
+		te -= ts
+		ts = 0
+		if err := parser.readBlock(); err != nil {
+			return tokNil, nil, errors.Trace(err)
+		}
+	}
+
+	return tokNil, nil, nil
+}
diff --git a/lightning/mydump/parser_test.go b/lightning/mydump/parser_test.go
new file mode 100644
index 000000000..4f033b66e
--- /dev/null
+++ b/lightning/mydump/parser_test.go
@@ -0,0 +1,111 @@
+package mydump_test
+
+import (
+	"io"
+	"strings"
+
+	. "github.com/pingcap/check"
+	"github.com/pingcap/tidb-lightning/lightning/mydump"
+	"github.com/pkg/errors"
+)
+
+var _ = Suite(&testMydumpParserSuite{})
+
+type testMydumpParserSuite struct{}
+
+func (s *testMydumpParserSuite) SetUpSuite(c *C)    {}
+func (s *testMydumpParserSuite) TearDownSuite(c *C) {}
+
+func (s *testMydumpParserSuite) TestReadRow(c *C) {
+	reader := strings.NewReader(
+		"/* whatever pragmas */;" +
+			"INSERT INTO `namespaced`.`table` (columns, more, columns) VALUES (1, 2, 3), (4, 5, 6);" +
+			"INSERT `namespaced`.`table` (x,y,z) VALUES (7,8,9);" +
+			"insert another_table values (10, 11, 12, '(13)', '(', 14, ')');",
+	)
+
+	parser := mydump.NewChunkParser(reader)
+
+	c.Assert(parser.ReadRow(), IsNil)
+	c.Assert(parser.LastRow(), DeepEquals, mydump.Row{
+		RowID: 1,
+		Row:   []byte("(1, 2, 3)"),
+	})
+	c.Assert(parser.TableName, DeepEquals, []byte("`namespaced`.`table`"))
+	c.Assert(parser.Columns, DeepEquals, []byte("(columns, more, columns)"))
+	c.Assert(parser.Pos(), Equals, int64(97))
+
+	c.Assert(parser.ReadRow(), IsNil)
+	c.Assert(parser.LastRow(), DeepEquals, mydump.Row{
+		RowID: 2,
+		Row:   []byte("(4, 5, 6)"),
+	})
+	c.Assert(parser.TableName, DeepEquals, []byte("`namespaced`.`table`"))
+	c.Assert(parser.Columns, DeepEquals, []byte("(columns, more, columns)"))
+	c.Assert(parser.Pos(), Equals, int64(108))
+
+	c.Assert(parser.ReadRow(), IsNil)
+	c.Assert(parser.LastRow(), DeepEquals, mydump.Row{
+		RowID: 3,
+		Row:   []byte("(7,8,9)"),
+	})
+	c.Assert(parser.TableName, DeepEquals, []byte("`namespaced`.`table`"))
+	c.Assert(parser.Columns, DeepEquals, []byte("(x,y,z)"))
+	c.Assert(parser.Pos(), Equals, int64(159))
+
+	c.Assert(parser.ReadRow(), IsNil)
+	c.Assert(parser.LastRow(), DeepEquals, mydump.Row{
+		RowID: 4,
+		Row:   []byte("(10, 11, 12, '(13)', '(', 14, ')')"),
+	})
+	c.Assert(parser.TableName, DeepEquals, []byte("another_table"))
+	c.Assert(parser.Columns, IsNil)
+	c.Assert(parser.Pos(), Equals, int64(222))
+
+	c.Assert(errors.Cause(parser.ReadRow()), Equals, io.EOF)
+}
+
+func (s *testMydumpParserSuite) TestReadChunks(c *C) {
+	reader := strings.NewReader(`
+		INSERT foo VALUES (1,2,3,4),(5,6,7,8),(9,10,11,12);
+		INSERT foo VALUES (13,14,15,16),(17,18,19,20),(21,22,23,24),(25,26,27,28);
+		INSERT foo VALUES (29,30,31,32),(33,34,35,36);
+	`)
+
+	parser := mydump.NewChunkParser(reader)
+
+	chunks, err := parser.ReadChunks(32)
+	c.Assert(err, IsNil)
+	c.Assert(chunks, DeepEquals, []mydump.Chunk{
+		mydump.Chunk{
+			Offset:       0,
+			EndOffset:    40,
+			PrevRowIDMax: 0,
+			RowIDMax:     2,
+		},
+		mydump.Chunk{
+			Offset:       40,
+			EndOffset:    88,
+			PrevRowIDMax: 2,
+			RowIDMax:     4,
+		},
+		mydump.Chunk{
+			Offset:       88,
+			EndOffset:    130,
+			PrevRowIDMax: 4,
+			RowIDMax:     7,
+		},
+		mydump.Chunk{
+			Offset:       130,
+			EndOffset:    165,
+			PrevRowIDMax: 7,
+			RowIDMax:     8,
+		},
+		mydump.Chunk{
+			Offset:       165,
+			EndOffset:    179,
+			PrevRowIDMax: 8,
+			RowIDMax:     9,
+		},
+	})
+}

From 761e8b9559a72d199a1dbfde95cca835e5df6b26 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Tue, 30 Oct 2018 00:49:39 +0800
Subject: [PATCH 02/15] mydump, restore: use the new parser for chunkRestore

---
 lightning/kv/sql2kv.go          |   6 +-
 lightning/mydump/region.go      |  97 +++++++++++++++---------------
 lightning/mydump/region_test.go |  69 +++++++++++----------
 lightning/restore/restore.go    | 103 ++++++++++++++++++++------------
 4 files changed, 157 insertions(+), 118 deletions(-)

diff --git a/lightning/kv/sql2kv.go b/lightning/kv/sql2kv.go
index f15e45bdc..d77955fbb 100644
--- a/lightning/kv/sql2kv.go
+++ b/lightning/kv/sql2kv.go
@@ -109,10 +109,10 @@ func (kvcodec *TableKVEncoder) NextRowID() int64 {
 	return kvcodec.idAllocator.Base() + 1
 }
 
-func (kvcodec *TableKVEncoder) SQL2KV(sql []byte) ([]kvec.KvPair, uint64, error) {
+func (kvcodec *TableKVEncoder) SQL2KV(sql string) ([]kvec.KvPair, uint64, error) {
 	if PrepareStmtMode {
 		// via prepare statment
-		kvPairs, rowsAffected, err := kvcodec.encodeViaPstmt(sql)
+		kvPairs, rowsAffected, err := kvcodec.encodeViaPstmt([]byte(sql))
 		if err == nil {
 			return kvPairs, rowsAffected, nil
 		}
@@ -120,7 +120,7 @@ func (kvcodec *TableKVEncoder) SQL2KV(sql []byte) ([]kvec.KvPair, uint64, error)
 	}
 
 	// via sql execution
-	kvPairs, rowsAffected, err := kvcodec.encoder.Encode(string(sql), kvcodec.tableID)
+	kvPairs, rowsAffected, err := kvcodec.encoder.Encode(sql, kvcodec.tableID)
 	if err != nil {
 		common.AppLogger.Errorf("[sql2kv] sql encode error = %v", err)
 		return nil, 0, errors.Trace(err)
diff --git a/lightning/mydump/region.go b/lightning/mydump/region.go
index e9de22791..9a4c2b8e1 100644
--- a/lightning/mydump/region.go
+++ b/lightning/mydump/region.go
@@ -2,7 +2,7 @@ package mydump
 
 import (
 	"fmt"
-	"io"
+	"os"
 	"runtime"
 	"sort"
 	"sync"
@@ -18,13 +18,26 @@ type TableRegion struct {
 	Table string
 	File  string
 
-	Offset int64
-	Size   int64
+	Columns []byte
+	Chunk   Chunk
 }
 
 func (reg *TableRegion) Name() string {
 	return fmt.Sprintf("%s|%s|%d|%d",
-		reg.DB, reg.Table, reg.ID, reg.Offset)
+		reg.DB, reg.Table, reg.ID, reg.Chunk.Offset)
+}
+
+func (reg *TableRegion) RowIDMin() int64 {
+	return reg.Chunk.PrevRowIDMax + 1
+}
+func (reg *TableRegion) Rows() int64 {
+	return reg.Chunk.RowIDMax - reg.Chunk.PrevRowIDMax
+}
+func (reg *TableRegion) Offset() int64 {
+	return reg.Chunk.Offset
+}
+func (reg *TableRegion) Size() int64 {
+	return reg.Chunk.EndOffset - reg.Chunk.Offset
 }
 
 type regionSlice []*TableRegion
@@ -37,7 +50,7 @@ func (rs regionSlice) Swap(i, j int) {
 }
 func (rs regionSlice) Less(i, j int) bool {
 	if rs[i].File == rs[j].File {
-		return rs[i].Offset < rs[j].Offset
+		return rs[i].Chunk.Offset < rs[j].Chunk.Offset
 	}
 	return rs[i].File < rs[j].File
 }
@@ -82,12 +95,14 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion {
 		go func(pid int, file string) {
 			common.AppLogger.Debugf("[%s] loading file's region (%s) ...", table, file)
 
-			var regions []*TableRegion
-			regions = splitFuzzyRegion(db, table, file, minRegionSize)
-
-			lock.Lock()
-			filesRegions = append(filesRegions, regions...)
-			lock.Unlock()
+			chunks, err := splitExactChunks(db, table, file, minRegionSize)
+			if err == nil {
+				lock.Lock()
+				filesRegions = append(filesRegions, chunks...)
+				lock.Unlock()
+			} else {
+				common.AppLogger.Errorf("failed to extract chunks from file (%s): %s", file, err.Error())
+			}
 
 			processors <- pid
 			wg.Done()
@@ -97,56 +112,44 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion {
 
 	// Setup files' regions
 	sort.Sort(filesRegions) // ps : sort region by - (fileName, fileOffset)
+	var totalRowCount int64
 	for i, region := range filesRegions {
 		region.ID = i
+
+		// Re-adjust the row IDs so they won't be overlapping.
+		chunkRowCount := region.Chunk.RowIDMax - region.Chunk.PrevRowIDMax
+		region.Chunk.PrevRowIDMax = totalRowCount
+		totalRowCount += chunkRowCount
+		region.Chunk.RowIDMax = totalRowCount
 	}
 
 	return filesRegions
 }
 
-func splitFuzzyRegion(db string, table string, file string, minRegionSize int64) []*TableRegion {
-	reader, err := NewMDDataReader(file, 0)
+func splitExactChunks(db string, table string, file string, minChunkSize int64) ([]*TableRegion, error) {
+	reader, err := os.Open(file)
 	if err != nil {
-		if err == ErrInsertStatementNotFound {
-			common.AppLogger.Warnf("failed to generate file's regions  (%s) : %s", file, err.Error())
-		} else {
-			common.AppLogger.Errorf("failed to generate file's regions  (%s) : %s", file, err.Error())
-		}
-		return nil
+		return nil, errors.Trace(err)
 	}
 	defer reader.Close()
 
-	newRegion := func(off int64) *TableRegion {
-		return &TableRegion{
-			ID:     -1,
-			DB:     db,
-			Table:  table,
-			File:   file,
-			Offset: off,
-			Size:   0,
-		}
+	parser := NewChunkParser(reader)
+	chunks, err := parser.ReadChunks(minChunkSize)
+	if err != nil {
+		return nil, errors.Trace(err)
 	}
 
-	regions := make([]*TableRegion, 0)
-
-	var extendSize = int64(4 << 10) // 4 K
-	var offset int64
-	for {
-		reader.Seek(offset + minRegionSize)
-		_, err := reader.Read(extendSize)
-		pos := reader.Tell()
-
-		region := newRegion(offset)
-		region.Size = pos - offset
-		if region.Size > 0 {
-			regions = append(regions, region)
-		}
-
-		if errors.Cause(err) == io.EOF {
-			break
+	annotatedChunks := make([]*TableRegion, len(chunks))
+	for i, chunk := range chunks {
+		annotatedChunks[i] = &TableRegion{
+			ID:      -1,
+			DB:      db,
+			Table:   table,
+			File:    file,
+			Columns: parser.Columns,
+			Chunk:   chunk,
 		}
-		offset = pos
 	}
 
-	return regions
+	return annotatedChunks, nil
 }
diff --git a/lightning/mydump/region_test.go b/lightning/mydump/region_test.go
index c68df5e02..0d4ff99a6 100644
--- a/lightning/mydump/region_test.go
+++ b/lightning/mydump/region_test.go
@@ -2,6 +2,8 @@ package mydump_test
 
 import (
 	"bytes"
+	"fmt"
+	"path/filepath"
 
 	. "github.com/pingcap/check"
 	"github.com/pingcap/tidb-lightning/lightning/common"
@@ -20,8 +22,15 @@ type testMydumpRegionSuite struct{}
 func (s *testMydumpRegionSuite) SetUpSuite(c *C)    {}
 func (s *testMydumpRegionSuite) TearDownSuite(c *C) {}
 
+var expectedTuplesCount = map[string]int64{
+	"i":                     1,
+	"report_case_high_risk": 1,
+	"tbl_autoid":            10000,
+	"tbl_multi_index":       10000,
+}
+
 /*
-	TODO : test with specified 'fuzzyRegionSize' & 'regionBlockSize' ...
+	TODO : test with specified 'regionBlockSize' ...
 */
 func (s *testMydumpRegionSuite) TestTableRegion(c *C) {
 	cfg := &config.Config{Mydumper: config.MydumperRuntime{SourceDir: "./examples"}}
@@ -32,33 +41,38 @@ func (s *testMydumpRegionSuite) TestTableRegion(c *C) {
 	for _, meta := range dbMeta.Tables {
 		regions := founder.MakeTableRegions(meta)
 
-		// table := meta.Name
-		// fmt.Printf("[%s] region count ===============> %d\n", table, len(regions))
-		// for _, region := range regions {
-		// 	fname := filepath.Base(region.File)
-		// 	fmt.Printf("[%s] rowID = %5d / rows = %5d / offset = %10d / size = %10d \n",
-		// 		fname, region.BeginRowID, region.Rows, region.Offset, region.Size)
-		// }
+		table := meta.Name
+		fmt.Printf("[%s] region count ===============> %d\n", table, len(regions))
+		for _, region := range regions {
+			fname := filepath.Base(region.File)
+			fmt.Printf("[%s] rowID = %5d / rows = %5d / offset = %10d / size = %10d \n",
+				fname,
+				region.RowIDMin(),
+				region.Rows(),
+				region.Offset(),
+				region.Size())
+		}
 
 		// check - region-size vs file-size
 		var tolFileSize int64 = 0
-		var tolRegionSize int64 = 0
 		for _, file := range meta.DataFiles {
 			fileSize, err := common.GetFileSize(file)
 			c.Assert(err, IsNil)
 			tolFileSize += fileSize
 		}
-		for _, region := range regions {
-			tolRegionSize += region.Size
-		}
-		c.Assert(tolRegionSize, Equals, tolFileSize)
-
-		// check - rows num
-		// var tolRows int64 = 0
+		// var tolRegionSize int64 = 0
 		// for _, region := range regions {
-		// 	tolRows += region.Rows
+		// 	tolRegionSize += region.Size()
 		// }
-		// c.Assert(tolRows, Equals, int64(10000))
+		// c.Assert(tolRegionSize, Equals, tolFileSize)
+		// (The size will not be equal since the comments at the end are omitted)
+
+		// check - rows num
+		var tolRows int64 = 0
+		for _, region := range regions {
+			tolRows += region.Rows()
+		}
+		c.Assert(tolRows, Equals, expectedTuplesCount[table])
 
 		// check - range
 		regionNum := len(regions)
@@ -66,11 +80,11 @@ func (s *testMydumpRegionSuite) TestTableRegion(c *C) {
 		for i := 1; i < regionNum; i++ {
 			reg := regions[i]
 			if preReg.File == reg.File {
-				c.Assert(reg.Offset, Equals, preReg.Offset+preReg.Size)
-				// c.Assert(reg.BeginRowID, Equals, preReg.BeginRowID+preReg.Rows)
+				c.Assert(reg.Offset(), Equals, preReg.Offset()+preReg.Size())
+				c.Assert(reg.RowIDMin(), Equals, preReg.RowIDMin()+preReg.Rows())
 			} else {
 				c.Assert(reg.Offset, Equals, 0)
-				// c.Assert(reg.BeginRowID, Equals, 1)
+				c.Assert(reg.RowIDMin(), Equals, 1)
 			}
 			preReg = reg
 		}
@@ -85,27 +99,20 @@ func (s *testMydumpRegionSuite) TestRegionReader(c *C) {
 	dbMeta := loader.GetDatabases()["mocker_test"]
 	founder := NewRegionFounder(defMinRegionSize)
 
-	expectedTuplesCount := map[string]int{
-		"i": 1,
-		"report_case_high_risk": 1,
-		"tbl_autoid":            10000,
-		"tbl_multi_index":       10000,
-	}
-
 	for _, meta := range dbMeta.Tables {
 		regions := founder.MakeTableRegions(meta)
 
 		tolValTuples := 0
 		for _, reg := range regions {
-			regReader, _ := NewRegionReader(reg.File, reg.Offset, reg.Size)
-			stmts, _ := regReader.Read(reg.Size)
+			regReader, _ := NewRegionReader(reg.File, reg.Offset(), reg.Size())
+			stmts, _ := regReader.Read(reg.Size())
 			for _, stmt := range stmts {
 				parts := bytes.Split(stmt, []byte("),"))
 				tolValTuples += len(parts)
 			}
 		}
 
-		c.Assert(tolValTuples, Equals, expectedTuplesCount[meta.Name])
+		c.Assert(int64(tolValTuples), Equals, expectedTuplesCount[meta.Name])
 	}
 
 	return
diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go
index 7e0f92d83..f137b5c20 100644
--- a/lightning/restore/restore.go
+++ b/lightning/restore/restore.go
@@ -13,7 +13,7 @@ import (
 	"time"
 
 	"github.com/coreos/go-semver/semver"
-	"github.com/pkg/errors"
+	"github.com/cznic/mathutil"
 	sstpb "github.com/pingcap/kvproto/pkg/import_sstpb"
 	"github.com/pingcap/tidb-lightning/lightning/common"
 	"github.com/pingcap/tidb-lightning/lightning/config"
@@ -23,6 +23,7 @@ import (
 	verify "github.com/pingcap/tidb-lightning/lightning/verification"
 	tidbcfg "github.com/pingcap/tidb/config"
 	"github.com/pingcap/tidb/util/kvencoder"
+	"github.com/pkg/errors"
 )
 
 const (
@@ -795,31 +796,38 @@ func (pool *RestoreWorkerPool) Recycle(worker *RestoreWorker) {
 ////////////////////////////////////////////////////////////////
 
 type chunkRestore struct {
-	reader *mydump.RegionReader
-	path   string
-	offset int64
-	name   string
+	parser  *mydump.ChunkParser
+	path    string
+	name    string
+	columns []byte
+	chunk   mydump.Chunk
 }
 
 func newChunkRestore(chunk *mydump.TableRegion, cp *TableCheckpoint) (*chunkRestore, error) {
-	reader, err := mydump.NewRegionReader(chunk.File, chunk.Offset, chunk.Size)
+	reader, err := os.Open(chunk.File)
 	if err != nil {
 		return nil, errors.Trace(err)
 	}
-	if pos, ok := cp.ChunkPos(chunk.File, chunk.Offset); ok {
-		reader.Seek(pos)
+	parser := mydump.NewChunkParser(reader)
+
+	pos, ok := cp.ChunkPos(chunk.File, chunk.Offset())
+	if !ok {
+		pos = chunk.Offset()
 	}
+	reader.Seek(pos, io.SeekStart)
+	parser.Pos = pos
 
 	return &chunkRestore{
-		reader: reader,
-		path:   chunk.File,
-		offset: chunk.Offset,
-		name:   chunk.Name(),
+		parser:  parser,
+		path:    chunk.File,
+		name:    chunk.Name(),
+		columns: chunk.Columns,
+		chunk:   chunk.Chunk,
 	}, nil
 }
 
 func (cr *chunkRestore) close() {
-	cr.reader.Close()
+	cr.parser.Reader().(*os.File).Close()
 }
 
 type TableRestore struct {
@@ -884,7 +892,7 @@ func (t *TableRestore) loadChunks(minChunkSize int64, cp *TableCheckpoint) []*my
 	// Remove all regions which have been imported
 	newChunks := chunks[:0]
 	for _, chunk := range chunks {
-		if pos, ok := cp.ChunkPos(chunk.File, chunk.Offset); !ok || pos < chunk.Offset+chunk.Size {
+		if pos, ok := cp.ChunkPos(chunk.File, chunk.Chunk.Offset); !ok || pos < chunk.Chunk.EndOffset {
 			newChunks = append(newChunks, chunk)
 		}
 	}
@@ -1085,7 +1093,6 @@ func (cr *chunkRestore) restore(
 
 	timer := time.Now()
 
-outside:
 	for {
 		select {
 		case <-ctx.Done():
@@ -1093,15 +1100,39 @@ outside:
 		default:
 		}
 
+		endOffset := mathutil.MinInt64(cr.chunk.EndOffset, cr.parser.Pos+rc.cfg.Mydumper.ReadBlockSize)
+		if cr.parser.Pos >= endOffset {
+			break
+		}
+
 		start := time.Now()
-		sqls, err := cr.reader.Read(rc.cfg.Mydumper.ReadBlockSize)
-		switch errors.Cause(err) {
-		case nil:
-		case io.EOF:
-			break outside
-		default:
-			return errors.Trace(err)
+
+		var sqls strings.Builder
+		sqls.WriteString("INSERT INTO ")
+		sqls.WriteString(t.tableName)
+		sqls.Write(cr.columns)
+		sqls.WriteString(" VALUES")
+		var sep byte = ' '
+	readLoop:
+		for cr.parser.Pos < endOffset {
+			err := cr.parser.ReadRow()
+			switch errors.Cause(err) {
+			case nil:
+				sqls.WriteByte(sep)
+				sep = ','
+				lastRow := cr.parser.LastRow()
+				sqls.Write(lastRow.Row)
+			case io.EOF:
+				break readLoop
+			default:
+				return errors.Trace(err)
+			}
+		}
+		if sep != ',' { // quick and dirty way to check if `sqls` actually contained any values
+			continue
 		}
+		sqls.WriteByte(';')
+
 		metrics.MarkTiming(readMark, start)
 
 		var (
@@ -1110,21 +1141,19 @@ outside:
 			localChecksum     verify.KVChecksum
 		)
 		// sql -> kv
-		for _, stmt := range sqls {
-			start = time.Now()
-			kvs, affectedRows, err := kvEncoder.SQL2KV(stmt)
-			metrics.MarkTiming(encodeMark, start)
-			common.AppLogger.Debugf("len(kvs) %d, len(sql) %d", len(kvs), len(stmt))
-			if err != nil {
-				common.AppLogger.Errorf("kv encode failed = %s\n", err.Error())
-				return errors.Trace(err)
-			}
-
-			totalKVs = append(totalKVs, kvs...)
-			localChecksum.Update(kvs)
-			totalAffectedRows += affectedRows
+		start = time.Now()
+		kvs, affectedRows, err := kvEncoder.SQL2KV(sqls.String())
+		metrics.MarkTiming(encodeMark, start)
+		common.AppLogger.Debugf("len(kvs) %d, len(sql) %d", len(kvs), sqls.Len())
+		if err != nil {
+			common.AppLogger.Errorf("kv encode failed = %s\n", err.Error())
+			return errors.Trace(err)
 		}
 
+		totalKVs = append(totalKVs, kvs...)
+		localChecksum.Update(kvs)
+		totalAffectedRows += affectedRows
+
 		// kv -> deliver ( -> tikv )
 		start = time.Now()
 		stream, err := engine.NewWriteStream(ctx)
@@ -1157,8 +1186,8 @@ outside:
 				AllocBase: t.alloc.Base() + 1,
 				Checksum:  t.checksum,
 				Path:      cr.path,
-				Offset:    cr.offset,
-				Pos:       cr.reader.Tell(),
+				Offset:    cr.chunk.Offset,
+				Pos:       cr.parser.Pos,
 			},
 		}
 		t.checksumLock.Unlock()

From efd17762806e43c3b2bb68a7efaad3a6287f8dd7 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Tue, 30 Oct 2018 01:47:27 +0800
Subject: [PATCH 03/15] restore, kv: replace the default ID allocator by a
 panicking allocator

---
 lightning/kv/allocator.go    | 46 ++++++++++++++++++++++++++++++++++++
 lightning/kv/sql2kv.go       |  9 +++----
 lightning/restore/restore.go |  6 ++---
 3 files changed, 52 insertions(+), 9 deletions(-)
 create mode 100644 lightning/kv/allocator.go

diff --git a/lightning/kv/allocator.go b/lightning/kv/allocator.go
new file mode 100644
index 000000000..b554fae6f
--- /dev/null
+++ b/lightning/kv/allocator.go
@@ -0,0 +1,46 @@
+package kv
+
+import "sync/atomic"
+
+// PanickingAllocator is an ID allocator which panics on all operations except Rebase
+type PanickingAllocator struct {
+	base int64
+}
+
+func NewPanickingAllocator(base int64) *PanickingAllocator {
+	return &PanickingAllocator{base: base}
+}
+
+func (alloc *PanickingAllocator) Alloc(int64) (int64, error) {
+	panic("unexpected Alloc() call")
+}
+
+func (alloc *PanickingAllocator) Reset(newBase int64) {
+	panic("unexpected Reset() call")
+}
+
+func (alloc *PanickingAllocator) Rebase(tableID, newBase int64, allocIDs bool) error {
+	// CAS
+	for {
+		oldBase := atomic.LoadInt64(&alloc.base)
+		if newBase <= oldBase {
+			break
+		}
+		if atomic.CompareAndSwapInt64(&alloc.base, oldBase, newBase) {
+			break
+		}
+	}
+	return nil
+}
+
+func (alloc *PanickingAllocator) Base() int64 {
+	return atomic.LoadInt64(&alloc.base)
+}
+
+func (alloc *PanickingAllocator) End() int64 {
+	panic("unexpected End() call")
+}
+
+func (alloc *PanickingAllocator) NextGlobalAutoID(tableID int64) (int64, error) {
+	panic("unexpected NextGlobalAutoID() call")
+}
diff --git a/lightning/kv/sql2kv.go b/lightning/kv/sql2kv.go
index d77955fbb..6640f1ce8 100644
--- a/lightning/kv/sql2kv.go
+++ b/lightning/kv/sql2kv.go
@@ -6,6 +6,7 @@ import (
 	"github.com/pingcap/tidb-lightning/lightning/metric"
 	sqltool "github.com/pingcap/tidb-lightning/lightning/sql"
 	"github.com/pingcap/tidb/kv"
+	"github.com/pingcap/tidb/meta/autoid"
 	kvec "github.com/pingcap/tidb/util/kvencoder"
 )
 
@@ -31,13 +32,13 @@ type TableKVEncoder struct {
 	bufValues []interface{}
 
 	encoder     kvec.KvEncoder
-	idAllocator *kvec.Allocator
+	idAllocator autoid.Allocator
 }
 
 func NewTableKVEncoder(
 	dbName string,
 	table string, tableID int64,
-	columns int, sqlMode string, alloc *kvec.Allocator) (*TableKVEncoder, error) {
+	columns int, sqlMode string, alloc autoid.Allocator) (*TableKVEncoder, error) {
 
 	encoder, err := kvec.New(dbName, alloc)
 	if err != nil {
@@ -96,10 +97,6 @@ func (kvcodec *TableKVEncoder) makeStatments(maxRows int) ([]uint32, error) {
 	return stmtIds, nil
 }
 
-func (kvcodec *TableKVEncoder) ResetRowID(rowID int64) {
-	kvcodec.idAllocator.Reset(rowID)
-}
-
 func (kvcodec *TableKVEncoder) Close() error {
 	metric.KvEncoderCounter.WithLabelValues("closed").Inc()
 	return errors.Trace(kvcodec.encoder.Close())
diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go
index f137b5c20..1def7186c 100644
--- a/lightning/restore/restore.go
+++ b/lightning/restore/restore.go
@@ -22,6 +22,7 @@ import (
 	"github.com/pingcap/tidb-lightning/lightning/mydump"
 	verify "github.com/pingcap/tidb-lightning/lightning/verification"
 	tidbcfg "github.com/pingcap/tidb/config"
+	"github.com/pingcap/tidb/meta/autoid"
 	"github.com/pingcap/tidb/util/kvencoder"
 	"github.com/pkg/errors"
 )
@@ -837,7 +838,7 @@ type TableRestore struct {
 	tableInfo *TidbTableInfo
 	tableMeta *mydump.MDTableMeta
 	encoder   kvenc.KvEncoder
-	alloc     *kvenc.Allocator
+	alloc     autoid.Allocator
 
 	checksumLock     sync.Mutex
 	checksum         verify.KVChecksum
@@ -853,8 +854,7 @@ func NewTableRestore(
 	tableInfo *TidbTableInfo,
 	cp *TableCheckpoint,
 ) (*TableRestore, error) {
-	idAlloc := kvenc.NewAllocator()
-	idAlloc.Reset(cp.AllocBase)
+	idAlloc := kv.NewPanickingAllocator(cp.AllocBase)
 	encoder, err := kvenc.New(dbInfo.Name, idAlloc)
 	if err != nil {
 		return nil, errors.Trace(err)

From 1b1f1c5852951d5a8e4f102f680a7585aa9d7d0a Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Tue, 30 Oct 2018 15:57:52 +0800
Subject: [PATCH 04/15] restore: include _tidb_rowid if this column is required

---
 lightning/restore/checkpoints.go | 205 ++++++++++++++++++++++---------
 lightning/restore/restore.go     | 168 +++++++++++++++----------
 tests/checkpoint_chunks/run.sh   |   2 +-
 3 files changed, 248 insertions(+), 127 deletions(-)

diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go
index 8f814d932..488478fb8 100644
--- a/lightning/restore/checkpoints.go
+++ b/lightning/restore/checkpoints.go
@@ -13,6 +13,7 @@ import (
 	"github.com/satori/go.uuid"
 
 	"github.com/pingcap/tidb-lightning/lightning/common"
+	"github.com/pingcap/tidb-lightning/lightning/mydump"
 	verify "github.com/pingcap/tidb-lightning/lightning/verification"
 )
 
@@ -51,46 +52,62 @@ func (status CheckpointStatus) MetricName() string {
 	}
 }
 
+type ChunkCheckpointKey struct {
+	Path   string
+	Offset int64
+}
+
+func (key *ChunkCheckpointKey) String() string {
+	return fmt.Sprintf("%s:%d", key.Path, key.Offset)
+}
+
+type ChunkCheckpoint struct {
+	Key                ChunkCheckpointKey
+	Columns            []byte
+	ShouldIncludeRowID bool
+	Chunk              mydump.Chunk
+	Checksum           verify.KVChecksum
+}
+
 type TableCheckpoint struct {
 	Status    CheckpointStatus
 	Engine    uuid.UUID
 	AllocBase int64
-	Checksum  verify.KVChecksum
-	chunks    map[chunkCheckpoint]int64
+	Chunks    []*ChunkCheckpoint // a sorted array
 }
 
 func (cp *TableCheckpoint) resetChunks() {
-	cp.chunks = make(map[chunkCheckpoint]int64)
+	cp.Chunks = nil
 }
 
-func (cp *TableCheckpoint) ChunkPos(path string, offset int64) (int64, bool) {
-	pos, ok := cp.chunks[chunkCheckpoint{path: path, offset: offset}]
-	return pos, ok
-}
-
-type chunkCheckpoint struct {
-	path   string
-	offset int64
+type chunkCheckpointDiff struct {
+	path     string
+	offset   int64
+	pos      int64
+	rowID    int64
+	checksum verify.KVChecksum
 }
 
 type TableCheckpointDiff struct {
-	hasStatus   bool
-	hasChecksum bool
-	status      CheckpointStatus
-	allocBase   int64
-	checksum    verify.KVChecksum
-	chunks      map[chunkCheckpoint]int64
+	hasStatus bool
+	hasChunks bool
+	status    CheckpointStatus
+	allocBase int64
+	chunks    map[ChunkCheckpointKey]chunkCheckpointDiff
 }
 
 func NewTableCheckpointDiff() *TableCheckpointDiff {
 	return &TableCheckpointDiff{
 		status: CheckpointStatusMaxInvalid + 1,
-		chunks: make(map[chunkCheckpoint]int64),
+		chunks: make(map[ChunkCheckpointKey]chunkCheckpointDiff),
 	}
 }
 
 func (cpd *TableCheckpointDiff) String() string {
-	return fmt.Sprintf("{hasStatus:%v, hasChecksum:%v, status:%d, allocBase:%d, checksum:%v, chunks:[%d]}", cpd.hasStatus, cpd.hasChecksum, cpd.status, cpd.allocBase, cpd.checksum, len(cpd.chunks))
+	return fmt.Sprintf(
+		"{hasStatus:%v, hasChunks:%v, status:%d, allocBase:%d, chunks:[%d]}",
+		cpd.hasStatus, cpd.hasChunks, cpd.status, cpd.allocBase, len(cpd.chunks),
+	)
 }
 
 type TableCheckpointMerger interface {
@@ -116,25 +133,28 @@ func (merger *StatusCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) {
 }
 
 type ChunkCheckpointMerger struct {
+	Key       ChunkCheckpointKey
 	AllocBase int64
 	Checksum  verify.KVChecksum
-	Path      string
-	Offset    int64
 	Pos       int64
+	RowID     int64
 }
 
 func (merger *ChunkCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) {
-	cpd.hasChecksum = true
+	cpd.hasChunks = true
 	cpd.allocBase = merger.AllocBase
-	cpd.checksum = merger.Checksum
-	chcp := chunkCheckpoint{path: merger.Path, offset: merger.Offset}
-	cpd.chunks[chcp] = merger.Pos
+	cpd.chunks[merger.Key] = chunkCheckpointDiff{
+		pos:      merger.Pos,
+		rowID:    merger.RowID,
+		checksum: merger.Checksum,
+	}
 }
 
 type CheckpointsDB interface {
 	Initialize(ctx context.Context, dbInfo map[string]*TidbDBInfo) error
 	Get(ctx context.Context, tableName string) (*TableCheckpoint, error)
 	Close() error
+	InsertChunkCheckpoints(ctx context.Context, tableName string, checkpoints []*ChunkCheckpoint) error
 	Update(checkpointDiffs map[string]*TableCheckpointDiff)
 
 	RemoveCheckpoint(ctx context.Context, tableName string) error
@@ -162,10 +182,13 @@ func (*NullCheckpointsDB) Get(_ context.Context, tableName string) (*TableCheckp
 	return &TableCheckpoint{
 		Status: CheckpointStatusLoaded,
 		Engine: uuid.NewV4(),
-		chunks: make(map[chunkCheckpoint]int64),
 	}, nil
 }
 
+func (*NullCheckpointsDB) InsertChunkCheckpoints(_ context.Context, _ string, _ []*ChunkCheckpoint) error {
+	return nil
+}
+
 func (*NullCheckpointsDB) Update(map[string]*TableCheckpointDiff) {}
 
 type MySQLCheckpointsDB struct {
@@ -190,18 +213,25 @@ func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) (
 			engine binary(16) NOT NULL,
 			status tinyint unsigned DEFAULT 30,
 			alloc_base bigint NOT NULL DEFAULT 0,
-			kvc_bytes bigint unsigned NOT NULL DEFAULT 0,
-			kvc_kvs bigint unsigned NOT NULL DEFAULT 0,
-			kvc_checksum bigint unsigned NOT NULL DEFAULT 0,
 			create_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 			update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 			INDEX(node_id, session)
 		);
-		CREATE TABLE IF NOT EXISTS %[1]s.chunk_v2 (
+		CREATE TABLE IF NOT EXISTS %[1]s.chunk_v3 (
 			table_name varchar(261) NOT NULL,
 			path varchar(2048) NOT NULL,
 			offset bigint NOT NULL,
+			columns text NULL,
+			should_include_row_id BOOL NOT NULL,
+			end_offset bigint NOT NULL,
 			pos bigint NOT NULL,
+			prev_rowid_max bigint NOT NULL,
+			rowid_max bigint NOT NULL,
+			kvc_bytes bigint unsigned NOT NULL DEFAULT 0,
+			kvc_kvs bigint unsigned NOT NULL DEFAULT 0,
+			kvc_checksum bigint unsigned NOT NULL DEFAULT 0,
+			create_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 			PRIMARY KEY(table_name, path, offset)
 		);
 	`, schema))
@@ -271,7 +301,14 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab
 
 	purpose := "(read checkpoint " + tableName + ")"
 	err := common.TransactWithRetry(ctx, cpdb.db, purpose, func(c context.Context, tx *sql.Tx) error {
-		query := fmt.Sprintf(`SELECT path, offset, pos FROM %s.chunk_v2 WHERE table_name = ?`, cpdb.schema)
+		query := fmt.Sprintf(`
+			SELECT
+				path, offset, columns, should_include_row_id,
+				pos, end_offset, prev_rowid_max, rowid_max,
+				kvc_bytes, kvc_kvs, kvc_checksum
+			FROM %s.chunk_v3 WHERE table_name = ?
+			ORDER BY path, offset;
+		`, cpdb.schema)
 		rows, err := tx.QueryContext(c, query, tableName)
 		if err != nil {
 			return errors.Trace(err)
@@ -279,32 +316,35 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab
 		defer rows.Close()
 		for rows.Next() {
 			var (
-				ccp chunkCheckpoint
-				pos int64
+				value       = new(ChunkCheckpoint)
+				kvcBytes    uint64
+				kvcKVs      uint64
+				kvcChecksum uint64
 			)
-			if err := rows.Scan(&ccp.path, &ccp.offset, &pos); err != nil {
+			if err := rows.Scan(
+				&value.Key.Path, &value.Key.Offset, &value.Columns, &value.ShouldIncludeRowID,
+				&value.Chunk.Offset, &value.Chunk.EndOffset, &value.Chunk.PrevRowIDMax, &value.Chunk.RowIDMax,
+				&kvcBytes, &kvcKVs, &kvcChecksum,
+			); err != nil {
 				return errors.Trace(err)
 			}
-			cp.chunks[ccp] = pos
+			value.Checksum = verify.MakeKVChecksum(kvcBytes, kvcKVs, kvcChecksum)
+			cp.Chunks = append(cp.Chunks, value)
 		}
 		if err := rows.Err(); err != nil {
 			return errors.Trace(err)
 		}
 
 		query = fmt.Sprintf(`
-			SELECT status, engine, alloc_base, kvc_bytes, kvc_kvs, kvc_checksum
-			FROM %s.table_v1 WHERE table_name = ?
+			SELECT status, engine, alloc_base FROM %s.table_v1 WHERE table_name = ?
 		`, cpdb.schema)
 		row := tx.QueryRowContext(c, query, tableName)
 
 		var (
-			status      uint8
-			engine      []byte
-			kvcBytes    uint64
-			kvcKVs      uint64
-			kvcChecksum uint64
+			status uint8
+			engine []byte
 		)
-		if err := row.Scan(&status, &engine, &cp.AllocBase, &kvcBytes, &kvcKVs, &kvcChecksum); err != nil {
+		if err := row.Scan(&status, &engine, &cp.AllocBase); err != nil {
 			cp.resetChunks()
 			return errors.Trace(err)
 		}
@@ -314,7 +354,6 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab
 			return errors.Trace(err)
 		}
 		cp.Status = CheckpointStatus(status)
-		cp.Checksum = verify.MakeKVChecksum(kvcBytes, kvcKVs, kvcChecksum)
 		return nil
 	})
 	if err != nil {
@@ -328,12 +367,52 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab
 	return cp, nil
 }
 
+func (cpdb *MySQLCheckpointsDB) InsertChunkCheckpoints(ctx context.Context, tableName string, checkpoints []*ChunkCheckpoint) error {
+	err := common.TransactWithRetry(ctx, cpdb.db, "(update chunk checkpoints for "+tableName+")", func(c context.Context, tx *sql.Tx) error {
+		stmt, err := tx.PrepareContext(c, fmt.Sprintf(`
+			REPLACE INTO %s.chunk_v3 (
+				table_name, path, offset, columns, should_include_row_id,
+				pos, end_offset, prev_rowid_max, rowid_max,
+				kvc_bytes, kvc_kvs, kvc_checksum
+			) VALUES (
+				?, ?, ?, ?, ?,
+				?, ?, ?, ?,
+				?, ?, ?
+			);
+		`, cpdb.schema))
+		if err != nil {
+			return errors.Trace(err)
+		}
+		defer stmt.Close()
+
+		for _, value := range checkpoints {
+			_, err = stmt.ExecContext(
+				c,
+				tableName, value.Key.Path, value.Key.Offset, value.Columns, value.ShouldIncludeRowID,
+				value.Chunk.Offset, value.Chunk.EndOffset, value.Chunk.PrevRowIDMax, value.Chunk.RowIDMax,
+				value.Checksum.SumSize(), value.Checksum.SumKVS(), value.Checksum.Sum(),
+			)
+			if err != nil {
+				return errors.Trace(err)
+			}
+		}
+
+		return nil
+	})
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	return nil
+}
+
 func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpointDiff) {
 	chunkQuery := fmt.Sprintf(`
-		REPLACE INTO %s.chunk_v2 (table_name, path, offset, pos) VALUES (?, ?, ?, ?);
+		UPDATE %s.chunk_v3 SET pos = ?, prev_rowid_max = ?, kvc_bytes = ?, kvc_kvs = ?, kvc_checksum = ?
+		WHERE table_name = ? AND path = ? AND offset = ?;
 	`, cpdb.schema)
 	checksumQuery := fmt.Sprintf(`
-		UPDATE %s.table_v1 SET alloc_base = ?, kvc_bytes = ?, kvc_kvs = ?, kvc_checksum = ? WHERE table_name = ?;
+		UPDATE %s.table_v1 SET alloc_base = ? WHERE table_name = ?;
 	`, cpdb.schema)
 	statusQuery := fmt.Sprintf(`
 		UPDATE %s.table_v1 SET status = ? WHERE table_name = ?;
@@ -362,13 +441,17 @@ func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoi
 					return errors.Trace(e)
 				}
 			}
-			if cpd.hasChecksum {
-				if _, e := checksumStmt.ExecContext(c, cpd.allocBase, cpd.checksum.SumSize(), cpd.checksum.SumKVS(), cpd.checksum.Sum(), tableName); e != nil {
+			if cpd.hasChunks {
+				if _, e := checksumStmt.ExecContext(c, cpd.allocBase, tableName); e != nil {
 					return errors.Trace(e)
 				}
 			}
-			for chcp, pos := range cpd.chunks {
-				if _, e := chunkStmt.ExecContext(c, tableName, chcp.path, chcp.offset, pos); e != nil {
+			for key, diff := range cpd.chunks {
+				if _, e := chunkStmt.ExecContext(
+					c,
+					diff.pos, diff.rowID, diff.checksum.SumSize(), diff.checksum.SumKVS(), diff.checksum.Sum(),
+					tableName, key.Path, key.Offset,
+				); e != nil {
 					return errors.Trace(e)
 				}
 			}
@@ -409,11 +492,11 @@ func (cpdb *MySQLCheckpointsDB) RemoveCheckpoint(ctx context.Context, tableName
 	)
 
 	if tableName == "all" {
-		deleteChunkFmt = "DELETE FROM %[1]s.chunk_v2 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE node_id = ?)"
+		deleteChunkFmt = "DELETE FROM %[1]s.chunk_v3 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE node_id = ?)"
 		deleteTableFmt = "DELETE FROM %s.table_v1 WHERE node_id = ?"
 		arg = nodeID
 	} else {
-		deleteChunkFmt = "DELETE FROM %s.chunk_v2 WHERE table_name = ?"
+		deleteChunkFmt = "DELETE FROM %s.chunk_v3 WHERE table_name = ?"
 		deleteTableFmt = "DELETE FROM %s.table_v1 WHERE table_name = ?"
 		arg = tableName
 	}
@@ -485,7 +568,7 @@ func (cpdb *MySQLCheckpointsDB) destroyErrorCheckpoints(ctx context.Context, tab
 		SELECT table_name FROM %s.table_v1 WHERE %s = ? AND status <= %d;
 	`, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid)
 	deleteChunkQuery := fmt.Sprintf(`
-		DELETE FROM %[1]s.chunk_v2 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE %[2]s = ? AND status <= %[3]d)
+		DELETE FROM %[1]s.chunk_v3 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE %[2]s = ? AND status <= %[3]d)
 	`, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid)
 	deleteTableQuery := fmt.Sprintf(`
 		DELETE FROM %s.table_v1 WHERE %s = ? AND status <= %d
@@ -538,9 +621,6 @@ func (cpdb *MySQLCheckpointsDB) DumpTables(ctx context.Context, writer io.Writer
 			hex(engine) AS engine,
 			status,
 			alloc_base,
-			kvc_bytes,
-			kvc_kvs,
-			kvc_checksum,
 			create_time,
 			update_time
 		FROM %s.table_v1;
@@ -559,8 +639,17 @@ func (cpdb *MySQLCheckpointsDB) DumpChunks(ctx context.Context, writer io.Writer
 			table_name,
 			path,
 			offset,
-			pos
-		FROM %s.chunk_v2;
+			columns,
+			pos,
+			end_offset,
+			prev_rowid_max,
+			rowid_max,
+			kvc_bytes,
+			kvc_kvs,
+			kvc_checksum,
+			create_time,
+			update_time
+		FROM %s.chunk_v3;
 	`, cpdb.schema))
 	if err != nil {
 		return errors.Trace(err)
diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go
index 1def7186c..8cd48b28f 100644
--- a/lightning/restore/restore.go
+++ b/lightning/restore/restore.go
@@ -1,12 +1,14 @@
 package restore
 
 import (
+	"bytes"
 	"context"
 	"database/sql"
 	"fmt"
 	"io"
 	"net/http"
 	"os"
+	"regexp"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -23,6 +25,7 @@ import (
 	verify "github.com/pingcap/tidb-lightning/lightning/verification"
 	tidbcfg "github.com/pingcap/tidb/config"
 	"github.com/pingcap/tidb/meta/autoid"
+	"github.com/pingcap/tidb/model"
 	"github.com/pingcap/tidb/util/kvencoder"
 	"github.com/pkg/errors"
 )
@@ -394,9 +397,14 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T
 		return nil, errors.Trace(err)
 	}
 
-	var chunks []*mydump.TableRegion
-	if cp.Status < CheckpointStatusAllWritten {
-		chunks = t.loadChunks(rc.cfg.Mydumper.MinRegionSize, cp)
+	// no need to do anything if the chunks are already populated
+	if len(cp.Chunks) > 0 {
+		common.AppLogger.Infof("[%s] reusing %d chunks from checkpoint", t.tableName, len(cp.Chunks))
+	} else if cp.Status < CheckpointStatusAllWritten {
+		t.populateChunks(rc.cfg.Mydumper.MinRegionSize, cp, t.tableInfo)
+		if err := rc.checkpointsDB.InsertChunkCheckpoints(ctx, t.tableName, cp.Chunks); err != nil {
+			return nil, errors.Trace(err)
+		}
 	}
 
 	var wg sync.WaitGroup
@@ -409,7 +417,11 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T
 	handledChunksCount := new(int32)
 
 	// Restore table data
-	for _, chunk := range chunks {
+	for chunkIndex, chunk := range cp.Chunks {
+		if chunk.Chunk.Offset >= chunk.Chunk.EndOffset {
+			continue
+		}
+
 		select {
 		case <-ctx.Done():
 			return nil, ctx.Err()
@@ -429,7 +441,7 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T
 		// 	3. load kvs data (into kv deliver server)
 		// 	4. flush kvs data (into tikv node)
 
-		cr, err := newChunkRestore(chunk, cp)
+		cr, err := newChunkRestore(chunkIndex, chunk)
 		if err != nil {
 			return nil, errors.Trace(err)
 		}
@@ -449,7 +461,7 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T
 			if err != nil {
 				metric.ChunkCounter.WithLabelValues(metric.ChunkStateFailed).Inc()
 				if !common.IsContextCanceledError(err) {
-					common.AppLogger.Errorf("[%s] chunk %s run task error %s", t.tableName, cr.name, errors.ErrorStack(err))
+					common.AppLogger.Errorf("[%s] chunk #%d (%s) run task error %s", t.tableName, cr.index, &cr.chunk.Key, errors.ErrorStack(err))
 				}
 				chunkErrMutex.Lock()
 				if chunkErr == nil {
@@ -461,7 +473,7 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T
 			metric.ChunkCounter.WithLabelValues(metric.ChunkStateFinished).Inc()
 
 			handled := int(atomic.AddInt32(handledChunksCount, 1))
-			common.AppLogger.Infof("[%s] handled region count = %d (%s)", t.tableName, handled, common.Percent(handled, len(chunks)))
+			common.AppLogger.Infof("[%s] handled region count = %d (%s)", t.tableName, handled, common.Percent(handled, len(cp.Chunks)))
 		}(worker, cr)
 	}
 
@@ -520,7 +532,7 @@ func (t *TableRestore) postProcess(ctx context.Context, closedEngine *kv.ClosedE
 
 	// 4. do table checksum
 	if cp.Status < CheckpointStatusCompleted {
-		err := t.compareChecksum(ctx, rc.cfg)
+		err := t.compareChecksum(ctx, rc.cfg, cp)
 		rc.saveStatusCheckpoint(t.tableName, err, CheckpointStatusCompleted)
 		if err != nil {
 			common.AppLogger.Errorf("[%s] checksum failed: %v", t.tableName, err.Error())
@@ -797,33 +809,25 @@ func (pool *RestoreWorkerPool) Recycle(worker *RestoreWorker) {
 ////////////////////////////////////////////////////////////////
 
 type chunkRestore struct {
-	parser  *mydump.ChunkParser
-	path    string
-	name    string
-	columns []byte
-	chunk   mydump.Chunk
+	parser *mydump.ChunkParser
+	index  int
+	chunk  *ChunkCheckpoint
 }
 
-func newChunkRestore(chunk *mydump.TableRegion, cp *TableCheckpoint) (*chunkRestore, error) {
-	reader, err := os.Open(chunk.File)
+func newChunkRestore(index int, chunk *ChunkCheckpoint) (*chunkRestore, error) {
+	reader, err := os.Open(chunk.Key.Path)
 	if err != nil {
 		return nil, errors.Trace(err)
 	}
 	parser := mydump.NewChunkParser(reader)
 
-	pos, ok := cp.ChunkPos(chunk.File, chunk.Offset())
-	if !ok {
-		pos = chunk.Offset()
-	}
-	reader.Seek(pos, io.SeekStart)
-	parser.Pos = pos
+	reader.Seek(chunk.Chunk.Offset, io.SeekStart)
+	parser.SetPos(chunk.Chunk.Offset, chunk.Chunk.PrevRowIDMax)
 
 	return &chunkRestore{
-		parser:  parser,
-		path:    chunk.File,
-		name:    chunk.Name(),
-		columns: chunk.Columns,
-		chunk:   chunk.Chunk,
+		parser: parser,
+		index:  index,
+		chunk:  chunk,
 	}, nil
 }
 
@@ -840,9 +844,6 @@ type TableRestore struct {
 	encoder   kvenc.KvEncoder
 	alloc     autoid.Allocator
 
-	checksumLock     sync.Mutex
-	checksum         verify.KVChecksum
-	rows             uint64
 	checkpointStatus CheckpointStatus
 	engine           *kv.OpenedEngine
 }
@@ -872,7 +873,6 @@ func NewTableRestore(
 		tableMeta: tableMeta,
 		encoder:   encoder,
 		alloc:     idAlloc,
-		checksum:  cp.Checksum,
 	}, nil
 }
 
@@ -881,27 +881,52 @@ func (tr *TableRestore) Close() {
 	common.AppLogger.Infof("[%s] restore done", tr.tableName)
 }
 
-func (t *TableRestore) loadChunks(minChunkSize int64, cp *TableCheckpoint) []*mydump.TableRegion {
+var tidbRowIDColumnRegex = regexp.MustCompile(fmt.Sprintf("`%[1]s`|(?i:\\b%[1]s\\b)", model.ExtraHandleName))
+
+func (t *TableRestore) populateChunks(minChunkSize int64, cp *TableCheckpoint, tableInfo *TidbTableInfo) {
 	common.AppLogger.Infof("[%s] load chunks", t.tableName)
 	timer := time.Now()
 
 	founder := mydump.NewRegionFounder(minChunkSize)
 	chunks := founder.MakeTableRegions(t.tableMeta)
 
-	// Ref: https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
-	// Remove all regions which have been imported
-	newChunks := chunks[:0]
+	cp.Chunks = make([]*ChunkCheckpoint, 0, len(chunks))
+
 	for _, chunk := range chunks {
-		if pos, ok := cp.ChunkPos(chunk.File, chunk.Chunk.Offset); !ok || pos < chunk.Chunk.EndOffset {
-			newChunks = append(newChunks, chunk)
+		columns := chunk.Columns
+
+		shouldIncludeRowID := !tableInfo.core.PKIsHandle && !tidbRowIDColumnRegex.Match(columns)
+		if shouldIncludeRowID {
+			// we need to inject the _tidb_rowid column
+			if len(columns) != 0 {
+				// column listing already exists, just append the new column.
+				columns = append(columns[:len(columns)-1], (",`" + model.ExtraHandleName.String() + "`)")...)
+			} else {
+				// we need to recreate the columns
+				var buf bytes.Buffer
+				buf.WriteString("(`")
+				for _, columnInfo := range tableInfo.core.Columns {
+					buf.WriteString(columnInfo.Name.String())
+					buf.WriteString("`,`")
+				}
+				buf.WriteString(model.ExtraHandleName.String())
+				buf.WriteString("`)")
+				columns = buf.Bytes()
+			}
 		}
+
+		cp.Chunks = append(cp.Chunks, &ChunkCheckpoint{
+			Key: ChunkCheckpointKey{
+				Path:   chunk.File,
+				Offset: chunk.Chunk.Offset,
+			},
+			Columns:            columns,
+			ShouldIncludeRowID: shouldIncludeRowID,
+			Chunk:              chunk.Chunk,
+		})
 	}
 
-	common.AppLogger.Infof(
-		"[%s] load %d chunks (%d are new) takes %v",
-		t.tableName, len(chunks), len(newChunks), time.Since(timer),
-	)
-	return newChunks
+	common.AppLogger.Infof("[%s] load %d chunks takes %v", t.tableName, len(chunks), time.Since(timer))
 }
 
 func (tr *TableRestore) restoreTableMeta(ctx context.Context, cfg *config.Config) error {
@@ -938,29 +963,34 @@ func (tr *TableRestore) importKV(ctx context.Context, closedEngine *kv.ClosedEng
 		return errors.Trace(err)
 	}
 	closedEngine.Cleanup(ctx)
-	common.AppLogger.Infof("[%s] local checksum %v, has imported %d rows", tr.tableName, tr.checksum, tr.rows)
 	return nil
 }
 
 // do checksum for each table.
-func (tr *TableRestore) compareChecksum(ctx context.Context, cfg *config.Config) error {
+func (tr *TableRestore) compareChecksum(ctx context.Context, cfg *config.Config, cp *TableCheckpoint) error {
 	if !cfg.PostRestore.Checksum {
 		common.AppLogger.Infof("[%s] Skip checksum.", tr.tableName)
 		return nil
 	}
 
+	var localChecksum verify.KVChecksum
+	for _, chunk := range cp.Chunks {
+		localChecksum.Add(&chunk.Checksum)
+	}
+	common.AppLogger.Infof("[%s] local checksum %+v", tr.tableName, localChecksum)
+
 	remoteChecksum, err := DoChecksum(ctx, cfg.TiDB, tr.tableName)
 	if err != nil {
 		return errors.Trace(err)
 	}
 
-	if remoteChecksum.Checksum != tr.checksum.Sum() ||
-		remoteChecksum.TotalKVs != tr.checksum.SumKVS() ||
-		remoteChecksum.TotalBytes != tr.checksum.SumSize() {
+	if remoteChecksum.Checksum != localChecksum.Sum() ||
+		remoteChecksum.TotalKVs != localChecksum.SumKVS() ||
+		remoteChecksum.TotalBytes != localChecksum.SumSize() {
 		return errors.Errorf("checksum mismatched remote vs local => (checksum: %d vs %d) (total_kvs: %d vs %d) (total_bytes:%d vs %d)",
-			remoteChecksum.Checksum, tr.checksum.Sum(),
-			remoteChecksum.TotalKVs, tr.checksum.SumKVS(),
-			remoteChecksum.TotalBytes, tr.checksum.SumSize(),
+			remoteChecksum.Checksum, localChecksum.Sum(),
+			remoteChecksum.TotalKVs, localChecksum.SumKVS(),
+			remoteChecksum.TotalBytes, localChecksum.SumSize(),
 		)
 	}
 
@@ -1100,8 +1130,8 @@ func (cr *chunkRestore) restore(
 		default:
 		}
 
-		endOffset := mathutil.MinInt64(cr.chunk.EndOffset, cr.parser.Pos+rc.cfg.Mydumper.ReadBlockSize)
-		if cr.parser.Pos >= endOffset {
+		endOffset := mathutil.MinInt64(cr.chunk.Chunk.EndOffset, cr.parser.Pos()+rc.cfg.Mydumper.ReadBlockSize)
+		if cr.parser.Pos() >= endOffset {
 			break
 		}
 
@@ -1110,18 +1140,23 @@ func (cr *chunkRestore) restore(
 		var sqls strings.Builder
 		sqls.WriteString("INSERT INTO ")
 		sqls.WriteString(t.tableName)
-		sqls.Write(cr.columns)
+		sqls.Write(cr.chunk.Columns)
 		sqls.WriteString(" VALUES")
 		var sep byte = ' '
 	readLoop:
-		for cr.parser.Pos < endOffset {
+		for cr.parser.Pos() < endOffset {
 			err := cr.parser.ReadRow()
 			switch errors.Cause(err) {
 			case nil:
 				sqls.WriteByte(sep)
 				sep = ','
 				lastRow := cr.parser.LastRow()
-				sqls.Write(lastRow.Row)
+				if cr.chunk.ShouldIncludeRowID {
+					sqls.Write(lastRow.Row[:len(lastRow.Row)-1])
+					fmt.Fprintf(&sqls, ",%d)", lastRow.RowID)
+				} else {
+					sqls.Write(lastRow.Row)
+				}
 			case io.EOF:
 				break readLoop
 			default:
@@ -1136,13 +1171,12 @@ func (cr *chunkRestore) restore(
 		metrics.MarkTiming(readMark, start)
 
 		var (
-			totalKVs          []kvenc.KvPair
-			totalAffectedRows uint64
-			localChecksum     verify.KVChecksum
+			totalKVs      []kvenc.KvPair
+			localChecksum verify.KVChecksum
 		)
 		// sql -> kv
 		start = time.Now()
-		kvs, affectedRows, err := kvEncoder.SQL2KV(sqls.String())
+		kvs, _, err := kvEncoder.SQL2KV(sqls.String())
 		metrics.MarkTiming(encodeMark, start)
 		common.AppLogger.Debugf("len(kvs) %d, len(sql) %d", len(kvs), sqls.Len())
 		if err != nil {
@@ -1152,7 +1186,6 @@ func (cr *chunkRestore) restore(
 
 		totalKVs = append(totalKVs, kvs...)
 		localChecksum.Update(kvs)
-		totalAffectedRows += affectedRows
 
 		// kv -> deliver ( -> tikv )
 		start = time.Now()
@@ -1177,23 +1210,22 @@ func (cr *chunkRestore) restore(
 
 		// Update the table, and save a checkpoint.
 		// (the write to the importer is effective immediately, thus update these here)
-		t.checksumLock.Lock()
-		t.checksum.Add(&localChecksum)
-		t.rows += totalAffectedRows
+		cr.chunk.Checksum.Add(&localChecksum)
+		cr.chunk.Chunk.Offset = cr.parser.Pos()
+		cr.chunk.Chunk.PrevRowIDMax = cr.parser.LastRow().RowID
 		rc.saveCpCh <- saveCp{
 			tableName: t.tableName,
 			merger: &ChunkCheckpointMerger{
+				Key:       cr.chunk.Key,
 				AllocBase: t.alloc.Base() + 1,
-				Checksum:  t.checksum,
-				Path:      cr.path,
-				Offset:    cr.chunk.Offset,
-				Pos:       cr.parser.Pos,
+				Checksum:  cr.chunk.Checksum,
+				Pos:       cr.chunk.Chunk.Offset,
+				RowID:     cr.chunk.Chunk.PrevRowIDMax,
 			},
 		}
-		t.checksumLock.Unlock()
 	}
 
-	common.AppLogger.Infof("[%s] restore chunk [%s] takes %v", t.tableName, cr.name, time.Since(timer))
+	common.AppLogger.Infof("[%s] restore chunk #%d (%s) takes %v", t.tableName, cr.index, &cr.chunk.Key, time.Since(timer))
 
 	return nil
 }
diff --git a/tests/checkpoint_chunks/run.sh b/tests/checkpoint_chunks/run.sh
index ba7225daa..964d9f5b9 100755
--- a/tests/checkpoint_chunks/run.sh
+++ b/tests/checkpoint_chunks/run.sh
@@ -54,5 +54,5 @@ check_contains "count(i): $(($ROW_COUNT*$CHUNK_COUNT))"
 check_contains "sum(i): $(( $ROW_COUNT*$CHUNK_COUNT*(($CHUNK_COUNT+2)*$ROW_COUNT + 1)/2 ))"
 run_sql "SELECT count(*) FROM tidb_lightning_checkpoint_test_cpch.table_v1 WHERE status = 180"
 check_contains "count(*): 1"
-run_sql "SELECT count(*) FROM tidb_lightning_checkpoint_test_cpch.chunk_v2 WHERE pos > offset"
+run_sql "SELECT count(*) FROM tidb_lightning_checkpoint_test_cpch.chunk_v3 WHERE pos = end_offset"
 check_contains "count(*): $CHUNK_COUNT"

From e4ba06afc70b67d37cce1d0ce61cdc292ed3bf9a Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Tue, 6 Nov 2018 11:17:26 +0800
Subject: [PATCH 05/15] lightning: applied some missing `go fmt` change

---
 cmd/main.go              | 2 +-
 lightning/common/util.go | 5 +++--
 lightning/lightning.go   | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/cmd/main.go b/cmd/main.go
index d011a1e67..79cffb6ee 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -7,11 +7,11 @@ import (
 	"os/signal"
 	"syscall"
 
-	"github.com/pkg/errors"
 	"github.com/pingcap/tidb-lightning/lightning"
 	"github.com/pingcap/tidb-lightning/lightning/common"
 	"github.com/pingcap/tidb-lightning/lightning/config"
 	plan "github.com/pingcap/tidb/planner/core"
+	"github.com/pkg/errors"
 )
 
 func setGlobalVars() {
diff --git a/lightning/common/util.go b/lightning/common/util.go
index 727210d04..795ead7e4 100644
--- a/lightning/common/util.go
+++ b/lightning/common/util.go
@@ -16,8 +16,8 @@ import (
 	"path/filepath"
 
 	"github.com/go-sql-driver/mysql"
-	"github.com/pkg/errors"
 	tmysql "github.com/pingcap/tidb/mysql"
+	"github.com/pkg/errors"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 )
@@ -229,7 +229,8 @@ func IsRetryableError(err error) bool {
 }
 
 // IsContextCanceledError returns whether the error is caused by context
-// cancellation.
+// cancellation. This function returns `false` (not a context-canceled error) if
+// `err == nil`.
 func IsContextCanceledError(err error) bool {
 	err = errors.Cause(err)
 	return err == context.Canceled || status.Code(err) == codes.Canceled
diff --git a/lightning/lightning.go b/lightning/lightning.go
index 365afb2d0..13da7213e 100644
--- a/lightning/lightning.go
+++ b/lightning/lightning.go
@@ -7,8 +7,8 @@ import (
 	"runtime"
 	"sync"
 
-	"github.com/pkg/errors"
 	sstpb "github.com/pingcap/kvproto/pkg/import_sstpb"
+	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 
 	"github.com/pingcap/tidb-lightning/lightning/common"

From 47d5df6e09ab3d0430f475cc0fbd551b12a04461 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Tue, 6 Nov 2018 12:04:39 +0800
Subject: [PATCH 06/15] *: minor test fixes

---
 lightning/restore/restore.go | 4 ++--
 tests/error_summary/run.sh   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go
index 8cd48b28f..c2257f107 100644
--- a/lightning/restore/restore.go
+++ b/lightning/restore/restore.go
@@ -1028,8 +1028,8 @@ func DoChecksum(ctx context.Context, dsn config.DBStore, table string) (*RemoteC
 	// set it back finally
 	defer func() {
 		err = UpdateGCLifeTime(ctx, db, ori)
-		if err != nil {
-			common.AppLogger.Errorf("[%s] update tikv_gc_life_time error %s", table, errors.ErrorStack(err))
+		if err != nil && !common.IsContextCanceledError(err) {
+			common.AppLogger.Errorf("[%s] update tikv_gc_life_time error %v", table, errors.ErrorStack(err))
 		}
 	}()
 
diff --git a/tests/error_summary/run.sh b/tests/error_summary/run.sh
index c98c79ca7..3f92e6d20 100755
--- a/tests/error_summary/run.sh
+++ b/tests/error_summary/run.sh
@@ -26,7 +26,7 @@ check_contains 'sum(id): 28'
 check_contains 'sum(k): 32'
 
 # Verify the log contains the expected messages at the last few lines
-tail -10 "$TEST_DIR/lightning-error-summary.log" > "$TEST_DIR/lightning-error-summary.tail"
+tail -20 "$TEST_DIR/lightning-error-summary.log" > "$TEST_DIR/lightning-error-summary.tail"
 grep -Fq '[error] Totally **2** tables failed to be imported.' "$TEST_DIR/lightning-error-summary.tail"
 grep -Fq '[`error_summary`.`a`] [checksum] checksum mismatched' "$TEST_DIR/lightning-error-summary.tail"
 grep -Fq '[`error_summary`.`c`] [checksum] checksum mismatched' "$TEST_DIR/lightning-error-summary.tail"

From 38f87a5af0a8a88af861c467724ece4044604ca0 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Wed, 7 Nov 2018 18:42:16 +0800
Subject: [PATCH 07/15] checkpoints: minor fix, ensure recorded alloc_base is
 increasing

---
 lightning/restore/checkpoints.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go
index 488478fb8..04691ce09 100644
--- a/lightning/restore/checkpoints.go
+++ b/lightning/restore/checkpoints.go
@@ -412,7 +412,7 @@ func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoi
 		WHERE table_name = ? AND path = ? AND offset = ?;
 	`, cpdb.schema)
 	checksumQuery := fmt.Sprintf(`
-		UPDATE %s.table_v1 SET alloc_base = ? WHERE table_name = ?;
+		UPDATE %s.table_v1 SET alloc_base = GREATEST(?, alloc_base) WHERE table_name = ?;
 	`, cpdb.schema)
 	statusQuery := fmt.Sprintf(`
 		UPDATE %s.table_v1 SET status = ? WHERE table_name = ?;

From 48cb1f7ac91605171428289f3973087420899e5e Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Wed, 7 Nov 2018 21:42:55 +0800
Subject: [PATCH 08/15] *: addressed comments

---
 lightning/kv/sql2kv.go           |  6 +-----
 lightning/mydump/region.go       | 17 ++++++++++++-----
 lightning/mydump/region_test.go  |  6 ++++--
 lightning/restore/checkpoints.go |  3 ++-
 lightning/restore/restore.go     | 12 +++++++++---
 5 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/lightning/kv/sql2kv.go b/lightning/kv/sql2kv.go
index 6640f1ce8..8eb4545d7 100644
--- a/lightning/kv/sql2kv.go
+++ b/lightning/kv/sql2kv.go
@@ -1,13 +1,13 @@
 package kv
 
 import (
-	"github.com/pkg/errors"
 	"github.com/pingcap/tidb-lightning/lightning/common"
 	"github.com/pingcap/tidb-lightning/lightning/metric"
 	sqltool "github.com/pingcap/tidb-lightning/lightning/sql"
 	"github.com/pingcap/tidb/kv"
 	"github.com/pingcap/tidb/meta/autoid"
 	kvec "github.com/pingcap/tidb/util/kvencoder"
+	"github.com/pkg/errors"
 )
 
 const (
@@ -102,10 +102,6 @@ func (kvcodec *TableKVEncoder) Close() error {
 	return errors.Trace(kvcodec.encoder.Close())
 }
 
-func (kvcodec *TableKVEncoder) NextRowID() int64 {
-	return kvcodec.idAllocator.Base() + 1
-}
-
 func (kvcodec *TableKVEncoder) SQL2KV(sql string) ([]kvec.KvPair, uint64, error) {
 	if PrepareStmtMode {
 		// via prepare statment
diff --git a/lightning/mydump/region.go b/lightning/mydump/region.go
index 9a4c2b8e1..f85b0040b 100644
--- a/lightning/mydump/region.go
+++ b/lightning/mydump/region.go
@@ -79,7 +79,7 @@ func NewRegionFounder(minRegionSize int64) *RegionFounder {
 	}
 }
 
-func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion {
+func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) ([]*TableRegion, error) {
 	var lock sync.Mutex
 	var wg sync.WaitGroup
 
@@ -88,6 +88,8 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion {
 	processors := f.processors
 	minRegionSize := f.minRegionSize
 
+	var chunkErr error
+
 	// Split files into regions
 	filesRegions := make(regionSlice, 0, len(meta.DataFiles))
 	for _, dataFile := range meta.DataFiles {
@@ -96,13 +98,13 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion {
 			common.AppLogger.Debugf("[%s] loading file's region (%s) ...", table, file)
 
 			chunks, err := splitExactChunks(db, table, file, minRegionSize)
+			lock.Lock()
 			if err == nil {
-				lock.Lock()
 				filesRegions = append(filesRegions, chunks...)
-				lock.Unlock()
 			} else {
-				common.AppLogger.Errorf("failed to extract chunks from file (%s): %s", file, err.Error())
+				chunkErr = errors.Annotatef(err, "%s", file)
 			}
+			lock.Unlock()
 
 			processors <- pid
 			wg.Done()
@@ -110,6 +112,11 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion {
 	}
 	wg.Wait()
 
+	if chunkErr != nil {
+		common.AppLogger.Errorf("failed to extract chunks from file: %v", chunkErr)
+		return nil, chunkErr
+	}
+
 	// Setup files' regions
 	sort.Sort(filesRegions) // ps : sort region by - (fileName, fileOffset)
 	var totalRowCount int64
@@ -123,7 +130,7 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) []*TableRegion {
 		region.Chunk.RowIDMax = totalRowCount
 	}
 
-	return filesRegions
+	return filesRegions, nil
 }
 
 func splitExactChunks(db string, table string, file string, minChunkSize int64) ([]*TableRegion, error) {
diff --git a/lightning/mydump/region_test.go b/lightning/mydump/region_test.go
index 0d4ff99a6..9453571cc 100644
--- a/lightning/mydump/region_test.go
+++ b/lightning/mydump/region_test.go
@@ -39,7 +39,8 @@ func (s *testMydumpRegionSuite) TestTableRegion(c *C) {
 	founder := NewRegionFounder(defMinRegionSize)
 
 	for _, meta := range dbMeta.Tables {
-		regions := founder.MakeTableRegions(meta)
+		regions, err := founder.MakeTableRegions(meta)
+		c.Assert(err, IsNil)
 
 		table := meta.Name
 		fmt.Printf("[%s] region count ===============> %d\n", table, len(regions))
@@ -100,7 +101,8 @@ func (s *testMydumpRegionSuite) TestRegionReader(c *C) {
 	founder := NewRegionFounder(defMinRegionSize)
 
 	for _, meta := range dbMeta.Tables {
-		regions := founder.MakeTableRegions(meta)
+		regions, err := founder.MakeTableRegions(meta)
+		c.Assert(err, IsNil)
 
 		tolValTuples := 0
 		for _, reg := range regions {
diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go
index 04691ce09..09f2acd57 100644
--- a/lightning/restore/checkpoints.go
+++ b/lightning/restore/checkpoints.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/cznic/mathutil"
 	"github.com/joho/sqltocsv"
 	"github.com/pkg/errors"
 	"github.com/satori/go.uuid"
@@ -142,7 +143,7 @@ type ChunkCheckpointMerger struct {
 
 func (merger *ChunkCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) {
 	cpd.hasChunks = true
-	cpd.allocBase = merger.AllocBase
+	cpd.allocBase = mathutil.MaxInt64(cpd.allocBase, merger.AllocBase)
 	cpd.chunks[merger.Key] = chunkCheckpointDiff{
 		pos:      merger.Pos,
 		rowID:    merger.RowID,
diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go
index c2257f107..c78dcf755 100644
--- a/lightning/restore/restore.go
+++ b/lightning/restore/restore.go
@@ -401,7 +401,9 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T
 	if len(cp.Chunks) > 0 {
 		common.AppLogger.Infof("[%s] reusing %d chunks from checkpoint", t.tableName, len(cp.Chunks))
 	} else if cp.Status < CheckpointStatusAllWritten {
-		t.populateChunks(rc.cfg.Mydumper.MinRegionSize, cp, t.tableInfo)
+		if err := t.populateChunks(rc.cfg.Mydumper.MinRegionSize, cp, t.tableInfo); err != nil {
+			return nil, errors.Trace(err)
+		}
 		if err := rc.checkpointsDB.InsertChunkCheckpoints(ctx, t.tableName, cp.Chunks); err != nil {
 			return nil, errors.Trace(err)
 		}
@@ -883,12 +885,15 @@ func (tr *TableRestore) Close() {
 
 var tidbRowIDColumnRegex = regexp.MustCompile(fmt.Sprintf("`%[1]s`|(?i:\\b%[1]s\\b)", model.ExtraHandleName))
 
-func (t *TableRestore) populateChunks(minChunkSize int64, cp *TableCheckpoint, tableInfo *TidbTableInfo) {
+func (t *TableRestore) populateChunks(minChunkSize int64, cp *TableCheckpoint, tableInfo *TidbTableInfo) error {
 	common.AppLogger.Infof("[%s] load chunks", t.tableName)
 	timer := time.Now()
 
 	founder := mydump.NewRegionFounder(minChunkSize)
-	chunks := founder.MakeTableRegions(t.tableMeta)
+	chunks, err := founder.MakeTableRegions(t.tableMeta)
+	if err != nil {
+		return errors.Trace(err)
+	}
 
 	cp.Chunks = make([]*ChunkCheckpoint, 0, len(chunks))
 
@@ -927,6 +932,7 @@ func (t *TableRestore) populateChunks(minChunkSize int64, cp *TableCheckpoint, t
 	}
 
 	common.AppLogger.Infof("[%s] load %d chunks takes %v", t.tableName, len(chunks), time.Since(timer))
+	return nil
 }
 
 func (tr *TableRestore) restoreTableMeta(ctx context.Context, cfg *config.Config) error {

From 5e7e4555cedbc8a9d3a4d3fea438094c7afbead5 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Wed, 7 Nov 2018 23:44:05 +0800
Subject: [PATCH 09/15] tests: added a test case to ensure non-PK AUTO_INC
 works

---
 .../data/vt.non_pk_auto_inc-schema.sql        |  9 +++++++
 .../various_types/data/vt.non_pk_auto_inc.sql | 26 +++++++++++++++++++
 tests/various_types/run.sh                    |  7 +++++
 3 files changed, 42 insertions(+)
 create mode 100644 tests/various_types/data/vt.non_pk_auto_inc-schema.sql
 create mode 100644 tests/various_types/data/vt.non_pk_auto_inc.sql

diff --git a/tests/various_types/data/vt.non_pk_auto_inc-schema.sql b/tests/various_types/data/vt.non_pk_auto_inc-schema.sql
new file mode 100644
index 000000000..fef3be74b
--- /dev/null
+++ b/tests/various_types/data/vt.non_pk_auto_inc-schema.sql
@@ -0,0 +1,9 @@
+/*!40101 SET NAMES binary*/;
+/*!40014 SET FOREIGN_KEY_CHECKS=0*/;
+
+CREATE TABLE `non_pk_auto_inc` (
+  `pk` char(36) NOT NULL,
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`pk`),
+  UNIQUE KEY (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=30001;
diff --git a/tests/various_types/data/vt.non_pk_auto_inc.sql b/tests/various_types/data/vt.non_pk_auto_inc.sql
new file mode 100644
index 000000000..9009cea4f
--- /dev/null
+++ b/tests/various_types/data/vt.non_pk_auto_inc.sql
@@ -0,0 +1,26 @@
+/*!40101 SET NAMES binary*/;
+/*!40014 SET FOREIGN_KEY_CHECKS=0*/;
+/*!40103 SET TIME_ZONE='+00:00' */;
+INSERT INTO `non_pk_auto_inc` VALUES
+("c5862b7d-e2a1-11e8-81d3-d5360eceeab8",1),
+("d7c9dce1-e2a1-11e8-beea-a3f4b99b3e1e",3),
+("d7c9de1f-e2a1-11e8-8630-b1256aff79d4",4),
+("d7c9de81-e2a1-11e8-be4f-f17e7808e755",5),
+("d7c9ded4-e2a1-11e8-ad15-658b46ee1390",6),
+("d7c9df20-e2a1-11e8-91a9-e3a3822c60a7",7),
+("d7c9dfb9-e2a1-11e8-a8d7-31054a5bf6a8",8),
+("d7c9e002-e2a1-11e8-9ff1-9fc4350e1311",9),
+("da71fb0d-e2a1-11e8-891e-835bd645efad",17),
+("da71fbd6-e2a1-11e8-9e02-ff5f31a7c894",18),
+("da71fc00-e2a1-11e8-9a81-230df4ae8e5e",19),
+("da71fc29-e2a1-11e8-9823-37aa4b9b6fd1",20),
+("da71fc5e-e2a1-11e8-9a4c-534927b63a63",21),
+("da71fc87-e2a1-11e8-ae93-fb9ff0878e13",22),
+("da71fcaf-e2a1-11e8-aac5-153d3fc52861",23),
+("db87f492-e2a1-11e8-a30e-b3a363c99db5",31),
+("db87f6c0-e2a1-11e8-82ea-4f787bed9c70",32),
+("db87f716-e2a1-11e8-9caa-3fb2ed9f5bcf",33),
+("db87f75f-e2a1-11e8-8778-05a4da66a78d",34),
+("db87f7a8-e2a1-11e8-9562-31f8c96addec",35),
+("db87f7f1-e2a1-11e8-922b-bbba2c355880",36),
+("db87f837-e2a1-11e8-ba19-f9baeeda0855",37);
diff --git a/tests/various_types/run.sh b/tests/various_types/run.sh
index 23386b78e..22be52f23 100755
--- a/tests/various_types/run.sh
+++ b/tests/various_types/run.sh
@@ -68,3 +68,10 @@ run_sql 'SELECT count(*) FROM vt.`enum-set` WHERE find_in_set("x50", `set`) > 0'
 check_contains 'count(*): 10'
 run_sql 'SELECT `set` FROM vt.`enum-set` WHERE `enum` = "gcc"'
 check_contains 'set: x00,x06,x07,x09,x17,x20,x23,x24,x27,x37,x44,x46,x49,x54,x55,x58,x61,x62'
+
+run_sql 'SELECT count(*), max(id) FROM vt.`non_pk_auto_inc`'
+check_contains 'count(*): 22'
+check_contains 'max(id): 37'
+run_sql 'INSERT INTO vt.`non_pk_auto_inc` (`pk`) VALUES ("?")'
+run_sql 'SELECT id > 37 FROM vt.`non_pk_auto_inc` WHERE `pk` = "?"'
+check_contains 'id > 37: 1'

From 56e724a5b55e6b12c901b5aba66ef3bab1e451d3 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Sat, 10 Nov 2018 00:53:03 +0800
Subject: [PATCH 10/15] *: addressed comments

---
 lightning/mydump/parser.rl | 22 ++++++++++++++++++++++
 lightning/mydump/region.go |  2 +-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/lightning/mydump/parser.rl b/lightning/mydump/parser.rl
index 9c3ff75e2..fa5881fe9 100644
--- a/lightning/mydump/parser.rl
+++ b/lightning/mydump/parser.rl
@@ -17,20 +17,42 @@ import (
 %%{
 #`
 
+# This is a ragel parser to quickly scan through a data source file consisting
+# of INSERT statements only. You may find detailed syntax explanation on its
+# website <https://www.colm.net/open-source/ragel/>.
+
 machine chunk_parser;
 
+# We treat all unimportant patterns as "comments". This include:
+#  - Real SQL comments `/* ... */` and `-- ...`
+#  - Whitespace
+#  - Separators `,` and `;`
+#  - The keywords `INSERT` and `INTO` (suffix `i` means case-insensitive).
 block_comment = '/*' any* :>> '*/';
 line_comment = /--[^\n]*\n/;
 comment = block_comment | line_comment | space | [,;] | 'insert'i | 'into'i;
 
+# The patterns parse quoted strings.
+# They do NOT handle the escape-by-doubling syntax like `'ten o''clock'`, this
+# will be handled as two tokens: `'ten o'` and `'clock'`. See the `name` rule
+# below for why this doesn't matter.
 single_quoted = "'" (^"'" | "\\" any)** "'";
 double_quoted = '"' (^'"' | '\\' any)** '"';
 back_quoted = '`' ^'`'* '`';
 unquoted = ^([,;()'"`] | space)+;
 
+# Matches a "row" of the form `( ... )`, where the content doesn't matter.
 row = '(' (^[)'"`] | single_quoted | double_quoted | back_quoted)* ')';
+
+# Matches a table name, which consists of one or more identifiers. This allows
+# us to match a qualified name like `foo.bar`, and also double-backquote like
+# ``` `foo``bar` ```.
 name = (back_quoted | double_quoted | unquoted)+;
 
+# The actual parser only produces 3 kinds of tokens:
+#  - The keyword VALUES, as a separator between column names and data rows
+#  - A row (which can be a list of columns or values depending on context)
+#  - A table name
 main := |*
 	comment;
 
diff --git a/lightning/mydump/region.go b/lightning/mydump/region.go
index f85b0040b..1d73c7aa2 100644
--- a/lightning/mydump/region.go
+++ b/lightning/mydump/region.go
@@ -103,6 +103,7 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) ([]*TableRegion, err
 				filesRegions = append(filesRegions, chunks...)
 			} else {
 				chunkErr = errors.Annotatef(err, "%s", file)
+				common.AppLogger.Errorf("failed to extract chunks from file: %v", chunkErr)
 			}
 			lock.Unlock()
 
@@ -113,7 +114,6 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) ([]*TableRegion, err
 	wg.Wait()
 
 	if chunkErr != nil {
-		common.AppLogger.Errorf("failed to extract chunks from file: %v", chunkErr)
 		return nil, chunkErr
 	}
 

From 8678f9867582b704e45c7ca32238d434697e5a17 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Sat, 10 Nov 2018 01:31:31 +0800
Subject: [PATCH 11/15] restore: properly rebase the allocators

On first read, we will reset the allocator base it is the maximum of

1. the AUTO_INCREMENT option of the CREATE TABLE statement, or
2. the total number of rows

This ensures future writes after importing will not clobber existing rows
due to overlapping _tidb_rowid.
---
 lightning/restore/checkpoints.go              | 28 ++++++----
 lightning/restore/restore.go                  | 28 ++++++++--
 lightning/restore/tidb.go                     |  7 ++-
 tests/tidb_rowid/config.toml                  | 24 ++++++++
 tests/tidb_rowid/data/rowid-schema-create.sql |  1 +
 ...owid.exotic`table``name-schema.sql.ignored |  1 +
 .../data/rowid.exotic`table``name.sql.ignored |  7 +++
 .../data/rowid.explicit_tidb_rowid-schema.sql |  1 +
 .../data/rowid.explicit_tidb_rowid.sql        | 11 ++++
 tests/tidb_rowid/data/rowid.non_pk-schema.sql |  1 +
 tests/tidb_rowid/data/rowid.non_pk.sql        | 11 ++++
 .../data/rowid.non_pk_auto_inc-schema.sql}    |  2 +-
 .../data/rowid.non_pk_auto_inc.sql}           |  0
 .../data/rowid.pre_rebase-schema.sql          |  1 +
 tests/tidb_rowid/data/rowid.pre_rebase.sql    |  1 +
 tests/tidb_rowid/run.sh                       | 55 +++++++++++++++++++
 tests/various_types/run.sh                    |  7 ---
 17 files changed, 159 insertions(+), 27 deletions(-)
 create mode 100644 tests/tidb_rowid/config.toml
 create mode 100644 tests/tidb_rowid/data/rowid-schema-create.sql
 create mode 100644 tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored
 create mode 100644 tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored
 create mode 100644 tests/tidb_rowid/data/rowid.explicit_tidb_rowid-schema.sql
 create mode 100644 tests/tidb_rowid/data/rowid.explicit_tidb_rowid.sql
 create mode 100644 tests/tidb_rowid/data/rowid.non_pk-schema.sql
 create mode 100644 tests/tidb_rowid/data/rowid.non_pk.sql
 rename tests/{various_types/data/vt.non_pk_auto_inc-schema.sql => tidb_rowid/data/rowid.non_pk_auto_inc-schema.sql} (73%)
 rename tests/{various_types/data/vt.non_pk_auto_inc.sql => tidb_rowid/data/rowid.non_pk_auto_inc.sql} (100%)
 create mode 100644 tests/tidb_rowid/data/rowid.pre_rebase-schema.sql
 create mode 100644 tests/tidb_rowid/data/rowid.pre_rebase.sql
 create mode 100755 tests/tidb_rowid/run.sh

diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go
index 09f2acd57..ff0f1a2e6 100644
--- a/lightning/restore/checkpoints.go
+++ b/lightning/restore/checkpoints.go
@@ -91,7 +91,7 @@ type chunkCheckpointDiff struct {
 
 type TableCheckpointDiff struct {
 	hasStatus bool
-	hasChunks bool
+	hasRebase bool
 	status    CheckpointStatus
 	allocBase int64
 	chunks    map[ChunkCheckpointKey]chunkCheckpointDiff
@@ -106,8 +106,8 @@ func NewTableCheckpointDiff() *TableCheckpointDiff {
 
 func (cpd *TableCheckpointDiff) String() string {
 	return fmt.Sprintf(
-		"{hasStatus:%v, hasChunks:%v, status:%d, allocBase:%d, chunks:[%d]}",
-		cpd.hasStatus, cpd.hasChunks, cpd.status, cpd.allocBase, len(cpd.chunks),
+		"{hasStatus:%v, hasRebase:%v, status:%d, allocBase:%d, chunks:[%d]}",
+		cpd.hasStatus, cpd.hasRebase, cpd.status, cpd.allocBase, len(cpd.chunks),
 	)
 }
 
@@ -134,16 +134,13 @@ func (merger *StatusCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) {
 }
 
 type ChunkCheckpointMerger struct {
-	Key       ChunkCheckpointKey
-	AllocBase int64
-	Checksum  verify.KVChecksum
-	Pos       int64
-	RowID     int64
+	Key      ChunkCheckpointKey
+	Checksum verify.KVChecksum
+	Pos      int64
+	RowID    int64
 }
 
 func (merger *ChunkCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) {
-	cpd.hasChunks = true
-	cpd.allocBase = mathutil.MaxInt64(cpd.allocBase, merger.AllocBase)
 	cpd.chunks[merger.Key] = chunkCheckpointDiff{
 		pos:      merger.Pos,
 		rowID:    merger.RowID,
@@ -151,6 +148,15 @@ func (merger *ChunkCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) {
 	}
 }
 
+type RebaseCheckpointMerger struct {
+	AllocBase int64
+}
+
+func (merger *RebaseCheckpointMerger) MergeInto(cpd *TableCheckpointDiff) {
+	cpd.hasRebase = true
+	cpd.allocBase = mathutil.MaxInt64(cpd.allocBase, merger.AllocBase)
+}
+
 type CheckpointsDB interface {
 	Initialize(ctx context.Context, dbInfo map[string]*TidbDBInfo) error
 	Get(ctx context.Context, tableName string) (*TableCheckpoint, error)
@@ -442,7 +448,7 @@ func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpoi
 					return errors.Trace(e)
 				}
 			}
-			if cpd.hasChunks {
+			if cpd.hasRebase {
 				if _, e := checksumStmt.ExecContext(c, cpd.allocBase, tableName); e != nil {
 					return errors.Trace(e)
 				}
diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go
index c78dcf755..d5a80ec03 100644
--- a/lightning/restore/restore.go
+++ b/lightning/restore/restore.go
@@ -407,6 +407,19 @@ func (t *TableRestore) restore(ctx context.Context, rc *RestoreController, cp *T
 		if err := rc.checkpointsDB.InsertChunkCheckpoints(ctx, t.tableName, cp.Chunks); err != nil {
 			return nil, errors.Trace(err)
 		}
+
+		// rebase the allocator so it exceeds the number of rows.
+		cp.AllocBase = mathutil.MaxInt64(cp.AllocBase, t.tableInfo.core.AutoIncID)
+		for _, chunk := range cp.Chunks {
+			cp.AllocBase = mathutil.MaxInt64(cp.AllocBase, chunk.Chunk.RowIDMax)
+		}
+		t.alloc.Rebase(t.tableInfo.ID, cp.AllocBase, false)
+		rc.saveCpCh <- saveCp{
+			tableName: t.tableName,
+			merger: &RebaseCheckpointMerger{
+				AllocBase: cp.AllocBase,
+			},
+		}
 	}
 
 	var wg sync.WaitGroup
@@ -1221,12 +1234,17 @@ func (cr *chunkRestore) restore(
 		cr.chunk.Chunk.PrevRowIDMax = cr.parser.LastRow().RowID
 		rc.saveCpCh <- saveCp{
 			tableName: t.tableName,
-			merger: &ChunkCheckpointMerger{
-				Key:       cr.chunk.Key,
+			merger: &RebaseCheckpointMerger{
 				AllocBase: t.alloc.Base() + 1,
-				Checksum:  cr.chunk.Checksum,
-				Pos:       cr.chunk.Chunk.Offset,
-				RowID:     cr.chunk.Chunk.PrevRowIDMax,
+			},
+		}
+		rc.saveCpCh <- saveCp{
+			tableName: t.tableName,
+			merger: &ChunkCheckpointMerger{
+				Key:      cr.chunk.Key,
+				Checksum: cr.chunk.Checksum,
+				Pos:      cr.chunk.Chunk.Offset,
+				RowID:    cr.chunk.Chunk.PrevRowIDMax,
 			},
 		}
 	}
diff --git a/lightning/restore/tidb.go b/lightning/restore/tidb.go
index f03a20e7e..9356a00c3 100644
--- a/lightning/restore/tidb.go
+++ b/lightning/restore/tidb.go
@@ -9,12 +9,12 @@ import (
 	"regexp"
 	"time"
 
-	"github.com/pkg/errors"
 	"github.com/pingcap/tidb-lightning/lightning/common"
 	"github.com/pingcap/tidb-lightning/lightning/config"
 	"github.com/pingcap/tidb-lightning/lightning/metric"
 	"github.com/pingcap/tidb-lightning/lightning/mydump"
 	"github.com/pingcap/tidb/model"
+	"github.com/pkg/errors"
 )
 
 type TiDBManager struct {
@@ -183,7 +183,7 @@ func (timgr *TiDBManager) LoadSchemaInfo(ctx context.Context, schemas map[string
 }
 
 func (timgr *TiDBManager) getCreateTableStmt(ctx context.Context, schema, table string) (string, error) {
-	query := fmt.Sprintf("SHOW CREATE TABLE `%s`.`%s`", schema, table)
+	query := fmt.Sprintf("SHOW CREATE TABLE %s", common.UniqueTable(schema, table))
 	var tbl, createTable string
 	err := common.QueryRowWithRetry(ctx, timgr.db, query, &tbl, &createTable)
 	return createTable, errors.Annotatef(err, "%s", query)
@@ -202,7 +202,8 @@ func UpdateGCLifeTime(ctx context.Context, db *sql.DB, gcLifeTime string) error
 }
 
 func AlterAutoIncrement(ctx context.Context, db *sql.DB, schema string, table string, incr int64) error {
-	query := fmt.Sprintf("ALTER TABLE `%s`.`%s` AUTO_INCREMENT=%d", schema, table, incr)
+	tableName := common.UniqueTable(schema, table)
+	query := fmt.Sprintf("ALTER TABLE %s AUTO_INCREMENT=%d", tableName, incr)
 	common.AppLogger.Infof("[%s.%s] %s", schema, table, query)
 	err := common.ExecWithRetry(ctx, db, query, query)
 	if err != nil {
diff --git a/tests/tidb_rowid/config.toml b/tests/tidb_rowid/config.toml
new file mode 100644
index 000000000..cb356e007
--- /dev/null
+++ b/tests/tidb_rowid/config.toml
@@ -0,0 +1,24 @@
+[lightning]
+check-requirements = false
+file = "/dev/stderr"
+level = "warning"
+
+[tikv-importer]
+addr = "127.0.0.1:8808"
+
+[mydumper]
+data-source-dir = "tests/tidb_rowid/data"
+region-min-size = 48
+
+[tidb]
+host = "127.0.0.1"
+port = 4000
+user = "root"
+status-port = 10080
+pd-addr = "127.0.0.1:2379"
+log-level = "error"
+
+[post-restore]
+checksum = true
+compact = true
+analyze = true
diff --git a/tests/tidb_rowid/data/rowid-schema-create.sql b/tests/tidb_rowid/data/rowid-schema-create.sql
new file mode 100644
index 000000000..d06ab2f96
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid-schema-create.sql
@@ -0,0 +1 @@
+CREATE DATABASE rowid;
diff --git a/tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored b/tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored
new file mode 100644
index 000000000..e2d94bbdf
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored
@@ -0,0 +1 @@
+create table `exotic``table````name` (a varchar(6) primary key, b int unique auto_increment) auto_increment=80000;
\ No newline at end of file
diff --git a/tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored b/tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored
new file mode 100644
index 000000000..e0341a7ac
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored
@@ -0,0 +1,7 @@
+insert `exotic``table````name` (a, b, _tidb_rowid) values
+('aaaaaa', 11, 79995),
+('bbbbbb', 22, 79996);
+insert `exotic``table````name` (a, b, _tidb_rowid) values
+('cccccc', 33, 79997),
+('dddddd', 44, 79998),
+('eeeeee', 55, 79999);
diff --git a/tests/tidb_rowid/data/rowid.explicit_tidb_rowid-schema.sql b/tests/tidb_rowid/data/rowid.explicit_tidb_rowid-schema.sql
new file mode 100644
index 000000000..4f1d63448
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.explicit_tidb_rowid-schema.sql
@@ -0,0 +1 @@
+create table explicit_tidb_rowid (pk varchar(6) primary key);
\ No newline at end of file
diff --git a/tests/tidb_rowid/data/rowid.explicit_tidb_rowid.sql b/tests/tidb_rowid/data/rowid.explicit_tidb_rowid.sql
new file mode 100644
index 000000000..f3769cc81
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.explicit_tidb_rowid.sql
@@ -0,0 +1,11 @@
+insert into non_pk (pk, _tidb_rowid) values
+('eight', 8),
+('five', 5),
+('four', 4),
+('nine', 9),
+('one', 1),
+('seven', 7),
+('six', 6),
+('ten', 10),
+('three', 3),
+('two', 2);
diff --git a/tests/tidb_rowid/data/rowid.non_pk-schema.sql b/tests/tidb_rowid/data/rowid.non_pk-schema.sql
new file mode 100644
index 000000000..5b5757644
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.non_pk-schema.sql
@@ -0,0 +1 @@
+create table non_pk (pk varchar(6) primary key);
diff --git a/tests/tidb_rowid/data/rowid.non_pk.sql b/tests/tidb_rowid/data/rowid.non_pk.sql
new file mode 100644
index 000000000..77e2c00be
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.non_pk.sql
@@ -0,0 +1,11 @@
+insert into non_pk values
+('one'),
+('two'),
+('three'),
+('four'),
+('five'),
+('six'),
+('seven'),
+('eight'),
+('nine'),
+('ten');
diff --git a/tests/various_types/data/vt.non_pk_auto_inc-schema.sql b/tests/tidb_rowid/data/rowid.non_pk_auto_inc-schema.sql
similarity index 73%
rename from tests/various_types/data/vt.non_pk_auto_inc-schema.sql
rename to tests/tidb_rowid/data/rowid.non_pk_auto_inc-schema.sql
index fef3be74b..a71be02c9 100644
--- a/tests/various_types/data/vt.non_pk_auto_inc-schema.sql
+++ b/tests/tidb_rowid/data/rowid.non_pk_auto_inc-schema.sql
@@ -6,4 +6,4 @@ CREATE TABLE `non_pk_auto_inc` (
   `id` int(11) NOT NULL AUTO_INCREMENT,
   PRIMARY KEY (`pk`),
   UNIQUE KEY (`id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=30001;
+) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
diff --git a/tests/various_types/data/vt.non_pk_auto_inc.sql b/tests/tidb_rowid/data/rowid.non_pk_auto_inc.sql
similarity index 100%
rename from tests/various_types/data/vt.non_pk_auto_inc.sql
rename to tests/tidb_rowid/data/rowid.non_pk_auto_inc.sql
diff --git a/tests/tidb_rowid/data/rowid.pre_rebase-schema.sql b/tests/tidb_rowid/data/rowid.pre_rebase-schema.sql
new file mode 100644
index 000000000..887540be5
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.pre_rebase-schema.sql
@@ -0,0 +1 @@
+create table pre_rebase (pk varchar(6) primary key) auto_increment=70000;
diff --git a/tests/tidb_rowid/data/rowid.pre_rebase.sql b/tests/tidb_rowid/data/rowid.pre_rebase.sql
new file mode 100644
index 000000000..4852114cf
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.pre_rebase.sql
@@ -0,0 +1 @@
+insert into pre_rebase values ('foo');
\ No newline at end of file
diff --git a/tests/tidb_rowid/run.sh b/tests/tidb_rowid/run.sh
new file mode 100755
index 000000000..54f457168
--- /dev/null
+++ b/tests/tidb_rowid/run.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+# Verify that _tidb_rowid is correctly adjusted.
+
+set -eu
+
+run_sql 'DROP DATABASE IF EXISTS rowid;'
+run_lightning
+echo 'Import finished'
+
+run_sql 'SELECT count(*), max(id), min(_tidb_rowid), max(_tidb_rowid) FROM rowid.`non_pk_auto_inc`'
+check_contains 'count(*): 22'
+check_contains 'max(id): 37'
+check_contains 'min(_tidb_rowid): 1'
+check_contains 'max(_tidb_rowid): 22'
+run_sql 'INSERT INTO rowid.`non_pk_auto_inc` (`pk`) VALUES ("?")'
+run_sql 'SELECT id > 37, _tidb_rowid > 22 FROM rowid.`non_pk_auto_inc` WHERE `pk` = "?"'
+check_contains 'id > 37: 1'
+check_contains '_tidb_rowid > 22: 1'
+
+for table_name in non_pk explicit_tidb_rowid; do
+    run_sql "SELECT count(*), min(_tidb_rowid), max(_tidb_rowid) FROM rowid.${table_name}"
+    check_contains 'count(*): 10'
+    check_contains 'min(_tidb_rowid): 1'
+    check_contains 'max(_tidb_rowid): 10'
+    run_sql "SELECT _tidb_rowid FROM rowid.${table_name} WHERE pk = 'five'"
+    check_contains '_tidb_rowid: 5'
+    run_sql "INSERT INTO rowid.${table_name} VALUES ('eleven')"
+    run_sql "SELECT count(*) FROM rowid.${table_name}"
+    check_contains 'count(*): 11'
+    run_sql "SELECT count(*) FROM rowid.${table_name} WHERE pk > '!'"
+    check_contains 'count(*): 11'
+    run_sql "SELECT _tidb_rowid > 10 FROM rowid.${table_name} WHERE pk = 'eleven'"
+    check_contains '_tidb_rowid > 10: 1'
+done
+
+run_sql 'SELECT count(*), min(_tidb_rowid), max(_tidb_rowid) FROM rowid.pre_rebase'
+check_contains 'count(*): 1'
+check_contains 'min(_tidb_rowid): 1'
+check_contains 'max(_tidb_rowid): 1'
+run_sql 'INSERT INTO rowid.pre_rebase VALUES ("?")'
+run_sql 'SELECT _tidb_rowid > 70000 FROM rowid.pre_rebase WHERE pk = "?"'
+check_contains '_tidb_rowid > 70000: 1'
+
+# FIXME Not testing these until pingcap/tidb#8259 is fixed.
+#
+# run_sql 'SELECT count(*) FROM rowid.`exotic``table````name`'
+# check_contains 'count(*): 5'
+# run_sql 'INSERT INTO rowid.`exotic``table````name` (a) VALUES ("ffffff"), ("gggggg")'
+# run_sql 'SELECT _tidb_rowid > 80000, b > 800000 FROM rowid.pre_rebase WHERE a = "ffffff"'
+# check_contains '_tidb_rowid > 80000: 1'
+# check_contains 'b > 80000: 1'
+# run_sql 'SELECT _tidb_rowid > 80000, b > 800000 FROM rowid.pre_rebase WHERE a = "gggggg"'
+# check_contains '_tidb_rowid > 80000: 1'
+# check_contains 'b > 80000: 1'
diff --git a/tests/various_types/run.sh b/tests/various_types/run.sh
index 22be52f23..23386b78e 100755
--- a/tests/various_types/run.sh
+++ b/tests/various_types/run.sh
@@ -68,10 +68,3 @@ run_sql 'SELECT count(*) FROM vt.`enum-set` WHERE find_in_set("x50", `set`) > 0'
 check_contains 'count(*): 10'
 run_sql 'SELECT `set` FROM vt.`enum-set` WHERE `enum` = "gcc"'
 check_contains 'set: x00,x06,x07,x09,x17,x20,x23,x24,x27,x37,x44,x46,x49,x54,x55,x58,x61,x62'
-
-run_sql 'SELECT count(*), max(id) FROM vt.`non_pk_auto_inc`'
-check_contains 'count(*): 22'
-check_contains 'max(id): 37'
-run_sql 'INSERT INTO vt.`non_pk_auto_inc` (`pk`) VALUES ("?")'
-run_sql 'SELECT id > 37 FROM vt.`non_pk_auto_inc` WHERE `pk` = "?"'
-check_contains 'id > 37: 1'

From 4b4670b8d0dd1eb9b3d18aa3061a8c5b108286d1 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Wed, 14 Nov 2018 18:27:43 +0800
Subject: [PATCH 12/15] tests: move the exotic filename tests into its own
 folder

---
 tests/exotic_filenames/config.toml            | 24 +++++++++++++++++++
 .../data/x`f\"n-schema-create.sql"            |  1 +
 .../x`f\"n.exotic`table``name-schema.sql"     |  0
 .../data/x`f\"n.exotic`table``name.sql"       |  0
 tests/exotic_filenames/run.sh                 | 22 +++++++++++++++++
 .../data/rowid.specific_auto_inc-schema.sql   |  1 +
 .../data/rowid.specific_auto_inc.sql          |  7 ++++++
 tests/tidb_rowid/run.sh                       | 20 +++++++---------
 8 files changed, 64 insertions(+), 11 deletions(-)
 create mode 100644 tests/exotic_filenames/config.toml
 create mode 100644 "tests/exotic_filenames/data/x`f\"n-schema-create.sql"
 rename tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored => "tests/exotic_filenames/data/x`f\"n.exotic`table``name-schema.sql" (100%)
 rename tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored => "tests/exotic_filenames/data/x`f\"n.exotic`table``name.sql" (100%)
 create mode 100755 tests/exotic_filenames/run.sh
 create mode 100644 tests/tidb_rowid/data/rowid.specific_auto_inc-schema.sql
 create mode 100644 tests/tidb_rowid/data/rowid.specific_auto_inc.sql

diff --git a/tests/exotic_filenames/config.toml b/tests/exotic_filenames/config.toml
new file mode 100644
index 000000000..a66dc894b
--- /dev/null
+++ b/tests/exotic_filenames/config.toml
@@ -0,0 +1,24 @@
+[lightning]
+check-requirements = false
+file = "/dev/stderr"
+level = "warning"
+
+[tikv-importer]
+addr = "127.0.0.1:8808"
+
+[mydumper]
+data-source-dir = "tests/exotic_filenames/data"
+region-min-size = 48
+
+[tidb]
+host = "127.0.0.1"
+port = 4000
+user = "root"
+status-port = 10080
+pd-addr = "127.0.0.1:2379"
+log-level = "error"
+
+[post-restore]
+checksum = true
+compact = true
+analyze = true
diff --git "a/tests/exotic_filenames/data/x`f\"n-schema-create.sql" "b/tests/exotic_filenames/data/x`f\"n-schema-create.sql"
new file mode 100644
index 000000000..1fb10bd2e
--- /dev/null
+++ "b/tests/exotic_filenames/data/x`f\"n-schema-create.sql"
@@ -0,0 +1 @@
+create database `x``f"n`;
\ No newline at end of file
diff --git a/tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored "b/tests/exotic_filenames/data/x`f\"n.exotic`table``name-schema.sql"
similarity index 100%
rename from tests/tidb_rowid/data/rowid.exotic`table``name-schema.sql.ignored
rename to "tests/exotic_filenames/data/x`f\"n.exotic`table``name-schema.sql"
diff --git a/tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored "b/tests/exotic_filenames/data/x`f\"n.exotic`table``name.sql"
similarity index 100%
rename from tests/tidb_rowid/data/rowid.exotic`table``name.sql.ignored
rename to "tests/exotic_filenames/data/x`f\"n.exotic`table``name.sql"
diff --git a/tests/exotic_filenames/run.sh b/tests/exotic_filenames/run.sh
new file mode 100755
index 000000000..22e4a1627
--- /dev/null
+++ b/tests/exotic_filenames/run.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+# Confirm the behavior for some exotic filenames
+# Do not enable until https://github.com/pingcap/tidb/pull/8302 is merged.
+
+exit 0
+
+set -eu
+
+run_sql 'DROP DATABASE IF EXISTS `x``f"n`;'
+run_lightning
+echo 'Import finished'
+
+run_sql 'SELECT count(*) FROM `x``f"n`.`exotic``table````name`'
+check_contains 'count(*): 5'
+run_sql 'INSERT INTO `x``f"n`.`exotic``table````name` (a) VALUES ("ffffff"), ("gggggg")'
+run_sql 'SELECT _tidb_rowid > 80000, b > 80000 FROM `x``f"n`.`exotic``table````name` WHERE a = "ffffff"'
+check_contains '_tidb_rowid > 80000: 1'
+check_contains 'b > 80000: 1'
+run_sql 'SELECT _tidb_rowid > 80000, b > 80000 FROM `x``f"n`.`exotic``table````name` WHERE a = "gggggg"'
+check_contains '_tidb_rowid > 80000: 1'
+check_contains 'b > 80000: 1'
diff --git a/tests/tidb_rowid/data/rowid.specific_auto_inc-schema.sql b/tests/tidb_rowid/data/rowid.specific_auto_inc-schema.sql
new file mode 100644
index 000000000..f6962e15a
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.specific_auto_inc-schema.sql
@@ -0,0 +1 @@
+create table specific_auto_inc (a varchar(6) primary key, b int unique auto_increment) auto_increment=80000;
\ No newline at end of file
diff --git a/tests/tidb_rowid/data/rowid.specific_auto_inc.sql b/tests/tidb_rowid/data/rowid.specific_auto_inc.sql
new file mode 100644
index 000000000..08cf771e4
--- /dev/null
+++ b/tests/tidb_rowid/data/rowid.specific_auto_inc.sql
@@ -0,0 +1,7 @@
+insert specific_auto_inc (a, b, _tidb_rowid) values
+('aaaaaa', 11, 79995),
+('bbbbbb', 22, 79996);
+insert specific_auto_inc (a, b, _tidb_rowid) values
+('cccccc', 33, 79997),
+('dddddd', 44, 79998),
+('eeeeee', 55, 79999);
diff --git a/tests/tidb_rowid/run.sh b/tests/tidb_rowid/run.sh
index 54f457168..b65a67e0d 100755
--- a/tests/tidb_rowid/run.sh
+++ b/tests/tidb_rowid/run.sh
@@ -42,14 +42,12 @@ run_sql 'INSERT INTO rowid.pre_rebase VALUES ("?")'
 run_sql 'SELECT _tidb_rowid > 70000 FROM rowid.pre_rebase WHERE pk = "?"'
 check_contains '_tidb_rowid > 70000: 1'
 
-# FIXME Not testing these until pingcap/tidb#8259 is fixed.
-#
-# run_sql 'SELECT count(*) FROM rowid.`exotic``table````name`'
-# check_contains 'count(*): 5'
-# run_sql 'INSERT INTO rowid.`exotic``table````name` (a) VALUES ("ffffff"), ("gggggg")'
-# run_sql 'SELECT _tidb_rowid > 80000, b > 800000 FROM rowid.pre_rebase WHERE a = "ffffff"'
-# check_contains '_tidb_rowid > 80000: 1'
-# check_contains 'b > 80000: 1'
-# run_sql 'SELECT _tidb_rowid > 80000, b > 800000 FROM rowid.pre_rebase WHERE a = "gggggg"'
-# check_contains '_tidb_rowid > 80000: 1'
-# check_contains 'b > 80000: 1'
+run_sql 'SELECT count(*) FROM rowid.specific_auto_inc'
+check_contains 'count(*): 5'
+run_sql 'INSERT INTO rowid.specific_auto_inc (a) VALUES ("ffffff"), ("gggggg")'
+run_sql 'SELECT _tidb_rowid > 80000, b > 80000 FROM rowid.specific_auto_inc WHERE a = "ffffff"'
+check_contains '_tidb_rowid > 80000: 1'
+check_contains 'b > 80000: 1'
+run_sql 'SELECT _tidb_rowid > 80000, b > 80000 FROM rowid.specific_auto_inc WHERE a = "gggggg"'
+check_contains '_tidb_rowid > 80000: 1'
+check_contains 'b > 80000: 1'

From ed37dbefa3509af059c0790f3fcd7b77cbba5325 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Wed, 14 Nov 2018 20:14:43 +0800
Subject: [PATCH 13/15] tests: place Lightning log into a file to avoid Jenkins
 truncating output

---
 tests/_utils/run_lightning          | 1 +
 tests/_utils/run_sql                | 3 ++-
 tests/checkpoint/config.toml        | 2 +-
 tests/checkpoint_chunks/config.toml | 2 +-
 tests/examples/1.toml               | 2 +-
 tests/examples/131072.toml          | 2 +-
 tests/examples/512.toml             | 2 +-
 tests/exotic_filenames/config.toml  | 2 +-
 tests/restore/config.toml           | 2 +-
 tests/tidb_rowid/config.toml        | 2 +-
 tests/tool_135/config.toml          | 2 +-
 tests/tool_241/config.toml          | 2 +-
 tests/various_types/config.toml     | 2 +-
 13 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tests/_utils/run_lightning b/tests/_utils/run_lightning
index b143d4611..da12ad6ee 100755
--- a/tests/_utils/run_lightning
+++ b/tests/_utils/run_lightning
@@ -3,4 +3,5 @@
 set -eu
 TEST_DIR=/tmp/lightning_test_result
 
+echo "[$(date)] <<<<<< RUNNING TEST FOR: tests/$TEST_NAME/${1-config}.toml >>>>>>" >> "$TEST_DIR/lightning.log"
 bin/tidb-lightning.test -test.coverprofile="$TEST_DIR/cov.$TEST_NAME.${1-config}.out" DEVEL -config "tests/$TEST_NAME/${1-config}.toml"
diff --git a/tests/_utils/run_sql b/tests/_utils/run_sql
index ec715195c..7b8526eeb 100755
--- a/tests/_utils/run_sql
+++ b/tests/_utils/run_sql
@@ -3,4 +3,5 @@
 set -eu
 TEST_DIR=/tmp/lightning_test_result
 
-mysql -uroot -h127.0.0.1 -P4000 --default-character-set utf8 -E -e "$1" > "$TEST_DIR/sql_res.$TEST_NAME.txt"
+echo "[$(date)] Executing SQL: $1" > "$TEST_DIR/sql_res.$TEST_NAME.txt"
+mysql -uroot -h127.0.0.1 -P4000 --default-character-set utf8 -E -e "$1" >> "$TEST_DIR/sql_res.$TEST_NAME.txt"
diff --git a/tests/checkpoint/config.toml b/tests/checkpoint/config.toml
index fa78c0271..9c03363ba 100644
--- a/tests/checkpoint/config.toml
+++ b/tests/checkpoint/config.toml
@@ -2,7 +2,7 @@
 # pprof-port = 28423
 table-concurrency = 1
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "error"
 
 [checkpoint]
diff --git a/tests/checkpoint_chunks/config.toml b/tests/checkpoint_chunks/config.toml
index 57a5d27e7..90a1f0c6d 100644
--- a/tests/checkpoint_chunks/config.toml
+++ b/tests/checkpoint_chunks/config.toml
@@ -2,7 +2,7 @@
 # pprof-port = 12683
 region-concurrency = 1
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "error"
 
 [checkpoint]
diff --git a/tests/examples/1.toml b/tests/examples/1.toml
index 46452d028..463eff080 100644
--- a/tests/examples/1.toml
+++ b/tests/examples/1.toml
@@ -1,7 +1,7 @@
 [lightning]
 table-concurrency = 1
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "warning"
 
 [tikv-importer]
diff --git a/tests/examples/131072.toml b/tests/examples/131072.toml
index e460d4c2a..ddb7d2b43 100644
--- a/tests/examples/131072.toml
+++ b/tests/examples/131072.toml
@@ -1,7 +1,7 @@
 [lightning]
 table-concurrency = 1
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "warning"
 
 [tikv-importer]
diff --git a/tests/examples/512.toml b/tests/examples/512.toml
index 5def88b8a..44c153701 100644
--- a/tests/examples/512.toml
+++ b/tests/examples/512.toml
@@ -1,7 +1,7 @@
 [lightning]
 table-concurrency = 1
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "warning"
 
 [tikv-importer]
diff --git a/tests/exotic_filenames/config.toml b/tests/exotic_filenames/config.toml
index a66dc894b..b20f9759c 100644
--- a/tests/exotic_filenames/config.toml
+++ b/tests/exotic_filenames/config.toml
@@ -1,6 +1,6 @@
 [lightning]
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "warning"
 
 [tikv-importer]
diff --git a/tests/restore/config.toml b/tests/restore/config.toml
index 3dc3e5b81..1254f7b2b 100644
--- a/tests/restore/config.toml
+++ b/tests/restore/config.toml
@@ -1,7 +1,7 @@
 [lightning]
 table-concurrency = 4
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "warning"
 
 [tikv-importer]
diff --git a/tests/tidb_rowid/config.toml b/tests/tidb_rowid/config.toml
index cb356e007..7a7255378 100644
--- a/tests/tidb_rowid/config.toml
+++ b/tests/tidb_rowid/config.toml
@@ -1,6 +1,6 @@
 [lightning]
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "warning"
 
 [tikv-importer]
diff --git a/tests/tool_135/config.toml b/tests/tool_135/config.toml
index b122c9840..336013c70 100644
--- a/tests/tool_135/config.toml
+++ b/tests/tool_135/config.toml
@@ -1,6 +1,6 @@
 [lightning]
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "warn"
 
 [tikv-importer]
diff --git a/tests/tool_241/config.toml b/tests/tool_241/config.toml
index 390040efa..f7f058aaf 100644
--- a/tests/tool_241/config.toml
+++ b/tests/tool_241/config.toml
@@ -1,7 +1,7 @@
 [lightning]
 table-concurrency = 3
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "warning"
 
 [tikv-importer]
diff --git a/tests/various_types/config.toml b/tests/various_types/config.toml
index 0f2952035..41ff46f94 100644
--- a/tests/various_types/config.toml
+++ b/tests/various_types/config.toml
@@ -1,6 +1,6 @@
 [lightning]
 check-requirements = false
-file = "/dev/stderr"
+file = "/tmp/lightning_test_result/lightning.log"
 level = "warning"
 
 [tikv-importer]

From 61a933cc58660405076d0c7339e0af7aba8854f5 Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Mon, 19 Nov 2018 22:38:36 +0800
Subject: [PATCH 14/15] mydump: addressed comments

---
 lightning/mydump/parser.go      | 2 +-
 lightning/mydump/parser_test.go | 2 +-
 lightning/mydump/region.go      | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lightning/mydump/parser.go b/lightning/mydump/parser.go
index 1e135a17a..7c22f462f 100644
--- a/lightning/mydump/parser.go
+++ b/lightning/mydump/parser.go
@@ -89,7 +89,7 @@ func (parser *ChunkParser) readBlock() error {
 
 	n, err := io.ReadFull(parser.reader, block)
 	switch err {
-	case io.ErrUnexpectedEOF:
+	case io.ErrUnexpectedEOF, io.EOF:
 		parser.isLastChunk = true
 		fallthrough
 	case nil:
diff --git a/lightning/mydump/parser_test.go b/lightning/mydump/parser_test.go
index 4f033b66e..df713d939 100644
--- a/lightning/mydump/parser_test.go
+++ b/lightning/mydump/parser_test.go
@@ -19,7 +19,7 @@ func (s *testMydumpParserSuite) TearDownSuite(c *C) {}
 func (s *testMydumpParserSuite) TestReadRow(c *C) {
 	reader := strings.NewReader(
 		"/* whatever pragmas */;" +
-			"INSERT INTO `namespaced`.`table` (columns, more, columns) VALUES (1, 2, 3), (4, 5, 6);" +
+			"INSERT INTO `namespaced`.`table` (columns, more, columns) VALUES (1, 2, 3),\n(4, 5, 6);" +
 			"INSERT `namespaced`.`table` (x,y,z) VALUES (7,8,9);" +
 			"insert another_table values (10, 11, 12, '(13)', '(', 14, ')');",
 	)
diff --git a/lightning/mydump/region.go b/lightning/mydump/region.go
index 1d73c7aa2..06fcda847 100644
--- a/lightning/mydump/region.go
+++ b/lightning/mydump/region.go
@@ -123,7 +123,8 @@ func (f *RegionFounder) MakeTableRegions(meta *MDTableMeta) ([]*TableRegion, err
 	for i, region := range filesRegions {
 		region.ID = i
 
-		// Re-adjust the row IDs so they won't be overlapping.
+		// Every chunk's PrevRowIDMax was uninitialized (set to 0). We need to
+		// re-adjust the row IDs so they won't be overlapping.
 		chunkRowCount := region.Chunk.RowIDMax - region.Chunk.PrevRowIDMax
 		region.Chunk.PrevRowIDMax = totalRowCount
 		totalRowCount += chunkRowCount

From f7da20214fa7da5e2c5bfee4a04af67166ad4c0d Mon Sep 17 00:00:00 2001
From: kennytm <kennytm@gmail.com>
Date: Tue, 20 Nov 2018 16:55:07 +0800
Subject: [PATCH 15/15] checkpoints: addressed comment, turned table name into
 constant

---
 lightning/restore/checkpoints.go | 77 +++++++++++++++++---------------
 1 file changed, 42 insertions(+), 35 deletions(-)

diff --git a/lightning/restore/checkpoints.go b/lightning/restore/checkpoints.go
index ff0f1a2e6..54e7f4699 100644
--- a/lightning/restore/checkpoints.go
+++ b/lightning/restore/checkpoints.go
@@ -34,6 +34,13 @@ const insertCheckpointRetry = 3
 
 const nodeID = 0
 
+const (
+	// the table names to store each kind of checkpoint in the checkpoint database
+	// remember to increase the version number in case of incompatible change.
+	checkpointTableNameTable = "table_v1"
+	checkpointTableNameChunk = "chunk_v3"
+)
+
 func (status CheckpointStatus) MetricName() string {
 	switch status {
 	case CheckpointStatusLoaded:
@@ -212,7 +219,7 @@ func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) (
 	// Apparently we could execute multiple DDL statements in Exec()
 	err := common.ExecWithRetry(ctx, db, "(create checkpoints database)", fmt.Sprintf(`
 		CREATE DATABASE IF NOT EXISTS %[1]s;
-		CREATE TABLE IF NOT EXISTS %[1]s.table_v1 (
+		CREATE TABLE IF NOT EXISTS %[1]s.%[2]s (
 			node_id int unsigned NOT NULL,
 			session bigint unsigned NOT NULL,
 			table_name varchar(261) NOT NULL PRIMARY KEY,
@@ -224,7 +231,7 @@ func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) (
 			update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 			INDEX(node_id, session)
 		);
-		CREATE TABLE IF NOT EXISTS %[1]s.chunk_v3 (
+		CREATE TABLE IF NOT EXISTS %[1]s.%[3]s (
 			table_name varchar(261) NOT NULL,
 			path varchar(2048) NOT NULL,
 			offset bigint NOT NULL,
@@ -241,7 +248,7 @@ func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) (
 			update_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 			PRIMARY KEY(table_name, path, offset)
 		);
-	`, schema))
+	`, schema, checkpointTableNameTable, checkpointTableNameChunk))
 	if err != nil {
 		return nil, errors.Trace(err)
 	}
@@ -268,12 +275,12 @@ func (cpdb *MySQLCheckpointsDB) Initialize(ctx context.Context, dbInfo map[strin
 		// We do need to capture the error is display a user friendly message
 		// (multiple nodes cannot import the same table) though.
 		stmt, err := tx.PrepareContext(c, fmt.Sprintf(`
-			INSERT INTO %s.table_v1 (node_id, session, table_name, hash, engine) VALUES (?, ?, ?, ?, ?)
+			INSERT INTO %s.%s (node_id, session, table_name, hash, engine) VALUES (?, ?, ?, ?, ?)
 			ON DUPLICATE KEY UPDATE session = CASE
 				WHEN node_id = VALUES(node_id) AND hash = VALUES(hash)
 				THEN VALUES(session)
 			END;
-		`, cpdb.schema))
+		`, cpdb.schema, checkpointTableNameTable))
 		if err != nil {
 			return errors.Trace(err)
 		}
@@ -313,9 +320,9 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab
 				path, offset, columns, should_include_row_id,
 				pos, end_offset, prev_rowid_max, rowid_max,
 				kvc_bytes, kvc_kvs, kvc_checksum
-			FROM %s.chunk_v3 WHERE table_name = ?
+			FROM %s.%s WHERE table_name = ?
 			ORDER BY path, offset;
-		`, cpdb.schema)
+		`, cpdb.schema, checkpointTableNameChunk)
 		rows, err := tx.QueryContext(c, query, tableName)
 		if err != nil {
 			return errors.Trace(err)
@@ -343,8 +350,8 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab
 		}
 
 		query = fmt.Sprintf(`
-			SELECT status, engine, alloc_base FROM %s.table_v1 WHERE table_name = ?
-		`, cpdb.schema)
+			SELECT status, engine, alloc_base FROM %s.%s WHERE table_name = ?
+		`, cpdb.schema, checkpointTableNameTable)
 		row := tx.QueryRowContext(c, query, tableName)
 
 		var (
@@ -377,7 +384,7 @@ func (cpdb *MySQLCheckpointsDB) Get(ctx context.Context, tableName string) (*Tab
 func (cpdb *MySQLCheckpointsDB) InsertChunkCheckpoints(ctx context.Context, tableName string, checkpoints []*ChunkCheckpoint) error {
 	err := common.TransactWithRetry(ctx, cpdb.db, "(update chunk checkpoints for "+tableName+")", func(c context.Context, tx *sql.Tx) error {
 		stmt, err := tx.PrepareContext(c, fmt.Sprintf(`
-			REPLACE INTO %s.chunk_v3 (
+			REPLACE INTO %s.%s (
 				table_name, path, offset, columns, should_include_row_id,
 				pos, end_offset, prev_rowid_max, rowid_max,
 				kvc_bytes, kvc_kvs, kvc_checksum
@@ -386,7 +393,7 @@ func (cpdb *MySQLCheckpointsDB) InsertChunkCheckpoints(ctx context.Context, tabl
 				?, ?, ?, ?,
 				?, ?, ?
 			);
-		`, cpdb.schema))
+		`, cpdb.schema, checkpointTableNameChunk))
 		if err != nil {
 			return errors.Trace(err)
 		}
@@ -415,15 +422,15 @@ func (cpdb *MySQLCheckpointsDB) InsertChunkCheckpoints(ctx context.Context, tabl
 
 func (cpdb *MySQLCheckpointsDB) Update(checkpointDiffs map[string]*TableCheckpointDiff) {
 	chunkQuery := fmt.Sprintf(`
-		UPDATE %s.chunk_v3 SET pos = ?, prev_rowid_max = ?, kvc_bytes = ?, kvc_kvs = ?, kvc_checksum = ?
+		UPDATE %s.%s SET pos = ?, prev_rowid_max = ?, kvc_bytes = ?, kvc_kvs = ?, kvc_checksum = ?
 		WHERE table_name = ? AND path = ? AND offset = ?;
-	`, cpdb.schema)
+	`, cpdb.schema, checkpointTableNameChunk)
 	checksumQuery := fmt.Sprintf(`
-		UPDATE %s.table_v1 SET alloc_base = GREATEST(?, alloc_base) WHERE table_name = ?;
-	`, cpdb.schema)
+		UPDATE %s.%s SET alloc_base = GREATEST(?, alloc_base) WHERE table_name = ?;
+	`, cpdb.schema, checkpointTableNameTable)
 	statusQuery := fmt.Sprintf(`
-		UPDATE %s.table_v1 SET status = ? WHERE table_name = ?;
-	`, cpdb.schema)
+		UPDATE %s.%s SET status = ? WHERE table_name = ?;
+	`, cpdb.schema, checkpointTableNameTable)
 
 	err := common.TransactWithRetry(context.Background(), cpdb.db, "(update checkpoints)", func(c context.Context, tx *sql.Tx) error {
 		chunkStmt, e := tx.PrepareContext(c, chunkQuery)
@@ -499,17 +506,17 @@ func (cpdb *MySQLCheckpointsDB) RemoveCheckpoint(ctx context.Context, tableName
 	)
 
 	if tableName == "all" {
-		deleteChunkFmt = "DELETE FROM %[1]s.chunk_v3 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE node_id = ?)"
-		deleteTableFmt = "DELETE FROM %s.table_v1 WHERE node_id = ?"
+		deleteChunkFmt = "DELETE FROM %[1]s.%[2]s WHERE table_name IN (SELECT table_name FROM %[1]s.%[3]s WHERE node_id = ?)"
+		deleteTableFmt = "DELETE FROM %s.%s WHERE node_id = ?"
 		arg = nodeID
 	} else {
-		deleteChunkFmt = "DELETE FROM %s.chunk_v3 WHERE table_name = ?"
-		deleteTableFmt = "DELETE FROM %s.table_v1 WHERE table_name = ?"
+		deleteChunkFmt = "DELETE FROM %s.%s WHERE table_name = ?%.0s" // the %.0s is to consume the third parameter.
+		deleteTableFmt = "DELETE FROM %s.%s WHERE table_name = ?"
 		arg = tableName
 	}
 
-	deleteChunkQuery := fmt.Sprintf(deleteChunkFmt, cpdb.schema)
-	deleteTableQuery := fmt.Sprintf(deleteTableFmt, cpdb.schema)
+	deleteChunkQuery := fmt.Sprintf(deleteChunkFmt, cpdb.schema, checkpointTableNameChunk, checkpointTableNameTable)
+	deleteTableQuery := fmt.Sprintf(deleteTableFmt, cpdb.schema, checkpointTableNameTable)
 	err := common.TransactWithRetry(ctx, cpdb.db, fmt.Sprintf("(remove checkpoints of %s)", tableName), func(c context.Context, tx *sql.Tx) error {
 		if _, e := tx.ExecContext(c, deleteChunkQuery, arg); e != nil {
 			return errors.Trace(e)
@@ -533,8 +540,8 @@ func (cpdb *MySQLCheckpointsDB) IgnoreErrorCheckpoint(ctx context.Context, table
 		colName, arg = "table_name", tableName
 	}
 	query := fmt.Sprintf(`
-		UPDATE %s.table_v1 SET status = %d WHERE %s = ? AND status <= %d;
-	`, cpdb.schema, CheckpointStatusLoaded, colName, CheckpointStatusMaxInvalid)
+		UPDATE %s.%s SET status = %d WHERE %s = ? AND status <= %d;
+	`, cpdb.schema, checkpointTableNameTable, CheckpointStatusLoaded, colName, CheckpointStatusMaxInvalid)
 
 	err := common.ExecWithRetry(ctx, cpdb.db, fmt.Sprintf("(ignore error checkpoints for %s)", tableName), query, arg)
 	return errors.Trace(err)
@@ -572,14 +579,14 @@ func (cpdb *MySQLCheckpointsDB) destroyErrorCheckpoints(ctx context.Context, tab
 	}
 
 	selectQuery := fmt.Sprintf(`
-		SELECT table_name FROM %s.table_v1 WHERE %s = ? AND status <= %d;
-	`, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid)
+		SELECT table_name FROM %s.%s WHERE %s = ? AND status <= %d;
+	`, cpdb.schema, checkpointTableNameTable, conditionColumn, CheckpointStatusMaxInvalid)
 	deleteChunkQuery := fmt.Sprintf(`
-		DELETE FROM %[1]s.chunk_v3 WHERE table_name IN (SELECT table_name FROM %[1]s.table_v1 WHERE %[2]s = ? AND status <= %[3]d)
-	`, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid)
+		DELETE FROM %[1]s.%[4]s WHERE table_name IN (SELECT table_name FROM %[1]s.%[5]s WHERE %[2]s = ? AND status <= %[3]d)
+	`, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid, checkpointTableNameChunk, checkpointTableNameTable)
 	deleteTableQuery := fmt.Sprintf(`
-		DELETE FROM %s.table_v1 WHERE %s = ? AND status <= %d
-	`, cpdb.schema, conditionColumn, CheckpointStatusMaxInvalid)
+		DELETE FROM %s.%s WHERE %s = ? AND status <= %d
+	`, cpdb.schema, checkpointTableNameTable, conditionColumn, CheckpointStatusMaxInvalid)
 
 	var targetTables []string
 
@@ -630,8 +637,8 @@ func (cpdb *MySQLCheckpointsDB) DumpTables(ctx context.Context, writer io.Writer
 			alloc_base,
 			create_time,
 			update_time
-		FROM %s.table_v1;
-	`, cpdb.schema))
+		FROM %s.%s;
+	`, cpdb.schema, checkpointTableNameTable))
 	if err != nil {
 		return errors.Trace(err)
 	}
@@ -656,8 +663,8 @@ func (cpdb *MySQLCheckpointsDB) DumpChunks(ctx context.Context, writer io.Writer
 			kvc_checksum,
 			create_time,
 			update_time
-		FROM %s.chunk_v3;
-	`, cpdb.schema))
+		FROM %s.%s;
+	`, cpdb.schema, checkpointTableNameChunk))
 	if err != nil {
 		return errors.Trace(err)
 	}