-
Notifications
You must be signed in to change notification settings - Fork 130
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* pkg/loader: add pkg to load data to mysql (#436) * tests/* add swap unique index value test (#437) * fix json type and may lost data when restart (#463) * translate.go use string type instead of []byte for json field
- Loading branch information
Showing
38 changed files
with
4,172 additions
and
260 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
loader | ||
====== | ||
|
||
A package to load data into MySQL in real-time, aimed to be used by *reparo*, *drainer* etc unified. | ||
|
||
|
||
### Getting started | ||
- Example is available via [example_loader_test.go](./example_loader_test.go) | ||
|
||
You need to write a translator to use *Loader* like *SlaveBinlogToTxn* in [translate.go](./translate.go) to translate upstream data format (e.g. binlog) into `Txn` objects. | ||
|
||
|
||
## Overview | ||
Loader splits the upstream transaction DML events and concurrently (shared by primary key or unique key) loads data into MySQL. It respects causality with [causality.go](./causality.go). | ||
|
||
|
||
## Optimization | ||
#### Large Operation | ||
Instead of executing DML one by one, we can combine many small operations into a single large operation, like using INSERT statements with multiple VALUES lists to insert several rows at a time. This is [faster](https://medium.com/@benmorel/high-speed-inserts-with-mysql-9d3dcd76f723) than inserting one by one. | ||
|
||
#### Merge by Primary Key | ||
You may want to read [log-compaction](https://kafka.apache.org/documentation/#compaction) of Kafka. | ||
|
||
We can treat a table with Primary Key like a KV-store. To reload the table with the change history of the table, we only need the last value of every key. | ||
|
||
While synchronizing data into downstream at real-time, we can get DML events from upstream in batchs and merge by key. After merging, there's only one event for each key, so at downstream, we don't need to do as many events as upstream. This also help we to use batch insert operation. | ||
|
||
We should also consider secondary unique key here, see *execTableBatch* in [executor.go](./executor.go). Currently, we only merge by primary key and do batch operation if the table have primary key and no unique key. | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
package loader | ||
|
||
import ( | ||
"database/sql" | ||
"fmt" | ||
"sync" | ||
"testing" | ||
|
||
_ "github.com/go-sql-driver/mysql" | ||
"github.com/juju/errors" | ||
"github.com/ngaut/log" | ||
) | ||
|
||
func getTestDB() (db *sql.DB, err error) { | ||
dsn := "root:@tcp(127.0.0.1:3306)/?charset=utf8&interpolateParams=true&readTimeout=1m&multiStatements=true" | ||
db, err = sql.Open("mysql", dsn) | ||
return | ||
} | ||
|
||
func BenchmarkInsertMerge(b *testing.B) { | ||
benchmarkWrite(b, true) | ||
} | ||
|
||
func BenchmarkInsertNoMerge(b *testing.B) { | ||
benchmarkWrite(b, false) | ||
} | ||
|
||
func BenchmarkUpdateMerge(b *testing.B) { | ||
benchmarkUpdate(b, true) | ||
} | ||
|
||
func BenchmarkUpdateNoMerge(b *testing.B) { | ||
benchmarkUpdate(b, false) | ||
} | ||
|
||
func BenchmarkDeleteMerge(b *testing.B) { | ||
benchmarkDelete(b, true) | ||
} | ||
|
||
func BenchmarkDeleteNoMerge(b *testing.B) { | ||
benchmarkDelete(b, false) | ||
} | ||
|
||
func benchmarkUpdate(b *testing.B, merge bool) { | ||
log.SetLevelByString("error") | ||
|
||
r, err := newRunner(merge) | ||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
|
||
dropTable(r.db, r.loader) | ||
createTable(r.db, r.loader) | ||
|
||
loadTable(r.db, r.loader, b.N) | ||
|
||
b.ResetTimer() | ||
updateTable(r.db, r.loader, b.N) | ||
|
||
r.close() | ||
} | ||
|
||
func benchmarkDelete(b *testing.B, merge bool) { | ||
log.SetLevelByString("error") | ||
|
||
r, err := newRunner(merge) | ||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
|
||
dropTable(r.db, r.loader) | ||
createTable(r.db, r.loader) | ||
|
||
loadTable(r.db, r.loader, b.N) | ||
|
||
b.ResetTimer() | ||
deleteTable(r.db, r.loader, b.N) | ||
|
||
r.close() | ||
} | ||
|
||
func benchmarkWrite(b *testing.B, merge bool) { | ||
log.SetLevelByString("error") | ||
|
||
r, err := newRunner(merge) | ||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
|
||
dropTable(r.db, r.loader) | ||
createTable(r.db, r.loader) | ||
|
||
b.ResetTimer() | ||
loadTable(r.db, r.loader, b.N) | ||
|
||
r.close() | ||
} | ||
|
||
type runner struct { | ||
db *sql.DB | ||
loader *Loader | ||
wg sync.WaitGroup | ||
} | ||
|
||
func newRunner(merge bool) (r *runner, err error) { | ||
db, err := getTestDB() | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
|
||
loader, err := NewLoader(db, WorkerCount(16), BatchSize(128)) | ||
if err != nil { | ||
return nil, errors.Trace(err) | ||
} | ||
|
||
loader.merge = merge | ||
|
||
r = new(runner) | ||
r.db = db | ||
r.loader = loader | ||
|
||
r.wg.Add(1) | ||
go func() { | ||
err := loader.Run() | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
r.wg.Done() | ||
}() | ||
|
||
go func() { | ||
for range loader.Successes() { | ||
|
||
} | ||
}() | ||
|
||
return | ||
} | ||
|
||
func (r *runner) close() { | ||
r.loader.Close() | ||
r.wg.Wait() | ||
} | ||
|
||
func createTable(db *sql.DB, loader *Loader) error { | ||
var sql string | ||
|
||
sql = "create table test1(id int primary key, a1 int)" | ||
// sql = "create table test1(id int, a1 int, UNIQUE KEY `id` (`id`))" | ||
loader.Input() <- NewDDLTxn("test", "test1", sql) | ||
|
||
return nil | ||
} | ||
|
||
func dropTable(db *sql.DB, loader *Loader) error { | ||
sql := fmt.Sprintf("drop table if exists test1") | ||
loader.Input() <- NewDDLTxn("test", "test1", sql) | ||
return nil | ||
} | ||
|
||
func loadTable(db *sql.DB, loader *Loader, n int) error { | ||
var txns []*Txn | ||
for i := 0; i < n; i++ { | ||
txn := new(Txn) | ||
dml := &DML{ | ||
Database: "test", | ||
Table: "test1", | ||
Tp: InsertDMLType, | ||
Values: map[string]interface{}{ | ||
"id": i, | ||
"a1": i, | ||
}, | ||
} | ||
|
||
txn.AppendDML(dml) | ||
txns = append(txns, txn) | ||
} | ||
|
||
for _, txn := range txns { | ||
loader.Input() <- txn | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func updateTable(db *sql.DB, loader *Loader, n int) error { | ||
var txns []*Txn | ||
for i := 0; i < n; i++ { | ||
txn := new(Txn) | ||
dml := &DML{ | ||
Database: "test", | ||
Table: "test1", | ||
Tp: UpdateDMLType, | ||
Values: map[string]interface{}{ | ||
"id": i, | ||
"a1": i * 10, | ||
}, | ||
OldValues: map[string]interface{}{ | ||
"id": i, | ||
"a1": i, | ||
}, | ||
} | ||
|
||
txn.AppendDML(dml) | ||
txns = append(txns, txn) | ||
} | ||
|
||
for _, txn := range txns { | ||
loader.Input() <- txn | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func deleteTable(db *sql.DB, loader *Loader, n int) error { | ||
var txns []*Txn | ||
for i := 0; i < n; i++ { | ||
txn := new(Txn) | ||
dml := &DML{ | ||
Database: "test", | ||
Table: "test1", | ||
Tp: DeleteDMLType, | ||
Values: map[string]interface{}{ | ||
"id": i, | ||
"a1": i, | ||
}, | ||
} | ||
|
||
txn.AppendDML(dml) | ||
txns = append(txns, txn) | ||
} | ||
|
||
for _, txn := range txns { | ||
loader.Input() <- txn | ||
} | ||
|
||
return nil | ||
|
||
} |
Oops, something went wrong.