diff --git a/migrate-large-mysql-shards-to-tidb.md b/migrate-large-mysql-shards-to-tidb.md index 7312b37a774f2..c536594f9eb05 100644 --- a/migrate-large-mysql-shards-to-tidb.md +++ b/migrate-large-mysql-shards-to-tidb.md @@ -75,6 +75,8 @@ If the migration involves merging data from different sharded tables, primary ke Assume that tables 1~4 have the same table structure as follows. +{{< copyable "sql" >}} + ```sql CREATE TABLE `table1` ( `id` bigint(20) NOT NULL AUTO_INCREMENT, @@ -88,6 +90,8 @@ CREATE TABLE `table1` ( For those four tables, the `id` column is the primary key. It is auto-incremental, which will cause different sharded tables to generate duplicated `id` ranges and cause the primary key conflict on the target table during the migration. On the other hand, the `sid` column is the sharding key, which ensures that the index is unique globally. So you can remove the unique constraint of the `id` column in the target `table5` to avoid the data merge conflicts. +{{< copyable "sql" >}} + ```sql CREATE TABLE `table5` ( `id` bigint(20) NOT NULL, @@ -160,15 +164,23 @@ If the TiDB Lightning task crashes due to unrecoverable errors (for example, dat * --checkpoint-error-ignore: If migration has failed, this option clears the error status as if no errors ever happened. * --checkpoint-remove: This option simply clears all checkpoints, regardless of errors. -For more information, see [TiDB Lightning Checkpoints](https://docs.pingcap.com/tidb/stable/tidb-lightning-checkpoints). +For more information, see [TiDB Lightning Checkpoints](/tidb-lightning/tidb-lightning-checkpoints.md). -### Create the target schema +### Create a target schema -After you make changes in the aforementioned [Check conflicts for sharded tables](/migrate-large-mysql-shards-to-tidb.md#check-conflicts-for-sharded-tables), you can now manually create the `my_db` schema and `table5` in downstream TiDB. After that, you need to configure `tidb-lightning.toml`. +Create `mydb.table5` at downstream. -```toml -[mydumper] -no-schema = true # If you have created the downstream schema and tables, setting `true` tells TiDB Lightning not to create the downstream schema. +{{< copyable "sql" >}} + +```sql +CREATE TABLE `table5` ( + `id` bigint(20) NOT NULL, + `sid` bigint(20) NOT NULL, + `pid` bigint(20) NOT NULL, + `comment` varchar(255) DEFAULT NULL, + INDEX (`id`), + UNIQUE KEY `sid` (`sid`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ``` ### Start the migration task @@ -183,6 +195,9 @@ Follow these steps to start `tidb-lightning`: level = "info" file = "tidb-lightning.log" + [mydumper] + data-source-dir = ${data-path} + [tikv-importer] # Choose a local backend. # "local": The default mode. It is used for large data volumes greater than 1 TiB. During migration, downstream TiDB cannot provide services. @@ -195,24 +210,17 @@ Follow these steps to start `tidb-lightning`: sorted-kv-dir = "${sorted-kv-dir}" # Set the renaming rules ('routes') from source to target tables, in order to support merging different table shards into a single target table. Here you migrate `table1` and `table2` in `my_db1`, and `table3` and `table4` in `my_db2`, to the target `table5` in downstream `my_db`. - [[routes]] - schema-pattern = "my_db1" - table-pattern = "table[1-2]" - target-schema = "my_db" - target-table = "table5" - - [[routes]] - schema-pattern = "my_db2" - table-pattern = "table[3-4]" - target-schema = "my_db" - target-table = "table5" - - [mydumper] - # The source data directory. Set this to the path of the Dumpling exported data. - # If there are several Dumpling-exported data directories, you need to place all these directories in the same parent directory, and use the parent directory here. - data-source-dir = "${data-path}" # The local or S3 path, for example, 's3://my-bucket/sql-backup?region=us-west-2' - # Because table1~table4 from source are merged into another table5 in the target, you should tell TiDB Lightning no need to create schemas, so that table1 ~ table4 won't be created automatically according to the exported schema information - no-schema = true + [[mydumper.files]] + pattern = '(^|/)my_db1\.table[1-2]\..*\.sql$' + schema = "my_db" + table = "table5" + type = "sql" + + [[mydumper.files]] + pattern = '(^|/)my_db2\.table[3-4]\..*\.sql$' + schema = "my_db" + table = "table5" + type = "sql" # Information of the target TiDB cluster. For example purposes only. Replace the IP address with your IP address. [tidb] diff --git a/tidb-lightning/tidb-lightning-configuration.md b/tidb-lightning/tidb-lightning-configuration.md index f0ce6372ec061..874eb0483ef6b 100644 --- a/tidb-lightning/tidb-lightning-configuration.md +++ b/tidb-lightning/tidb-lightning-configuration.md @@ -242,19 +242,6 @@ trim-last-separator = false # schema = '$1' # table = '$2' # type = '$3' -# -# Sets rules for merging sharded schemas and tables. Specifically, import `table1` and `table2` from `my_db1`, and `table3` and `table4` from `my_db2` to `table5` in `my_db`. -# [[routes]] -# schema-pattern = "my_db1" -# table-pattern = "table[1-2]" -# target-schema = "my_db" -# target-table = "table5" -# -# [[routes]] -# schema-pattern = "my_db2" -# table-pattern = "table[3-4]" -# target-schema = "my_db" -# target-table = "table5" [tidb] # Configuration of any TiDB server from the cluster.