From f65786958b02c1a2ed2a49926a676ea9395bd1f1 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Fri, 29 Oct 2021 17:20:50 +0800 Subject: [PATCH 01/28] split actions paths --- .github/workflows/dm_binlog_999999.yaml | 2 ++ .github/workflows/dm_chaos.yaml | 2 ++ .github/workflows/dm_upstream_switch.yaml | 2 ++ .github/workflows/integration.yaml | 4 ++++ .github/workflows/ticdc_chaos.yaml | 4 ++++ .github/workflows/upgrade_dm_via_tiup.yaml | 2 ++ 6 files changed, 16 insertions(+) diff --git a/.github/workflows/dm_binlog_999999.yaml b/.github/workflows/dm_binlog_999999.yaml index f9c1aa17c28..222503cb1c1 100644 --- a/.github/workflows/dm_binlog_999999.yaml +++ b/.github/workflows/dm_binlog_999999.yaml @@ -4,6 +4,8 @@ on: push: branches: - master + paths: + - "dm/**" schedule: - cron: "3 22 * * *" # run at minute 06:03 UTC+8 diff --git a/.github/workflows/dm_chaos.yaml b/.github/workflows/dm_chaos.yaml index a27d37c6a9b..cf4d15dfe4e 100644 --- a/.github/workflows/dm_chaos.yaml +++ b/.github/workflows/dm_chaos.yaml @@ -6,6 +6,8 @@ on: pull_request: branches: - master + paths: + - "dm/**" schedule: - cron: "0,30 17-22 * * *" # run at minute 0 and 30 every hour from 01:00 ~ 06:00 UTC+8 diff --git a/.github/workflows/dm_upstream_switch.yaml b/.github/workflows/dm_upstream_switch.yaml index 55f6d6083e8..42f74b20531 100644 --- a/.github/workflows/dm_upstream_switch.yaml +++ b/.github/workflows/dm_upstream_switch.yaml @@ -4,6 +4,8 @@ on: push: branches: - master + paths: + - "dm/**" schedule: - cron: "3 22 * * *" # run at minute 06:03 UTC+8 diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml index fc2dae06563..91ddeedc3ad 100644 --- a/.github/workflows/integration.yaml +++ b/.github/workflows/integration.yaml @@ -5,8 +5,12 @@ name: Integration Tests on: push: branches: [master] + paths: + - "!dm/**" pull_request: branches: [master] + paths: + - "!dm/**" # See: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#concurrency. concurrency: diff --git a/.github/workflows/ticdc_chaos.yaml b/.github/workflows/ticdc_chaos.yaml index 73e12ad73ee..517fb24e807 100644 --- a/.github/workflows/ticdc_chaos.yaml +++ b/.github/workflows/ticdc_chaos.yaml @@ -1,6 +1,10 @@ name: TiCDC Chaos on: + push: + branches: [master] + paths: + - "!dm/**" schedule: - cron: '40 16-23 * * *' # run at minute 0 and 40 every hour from 00:00 ~ 07:00 UTC+8 diff --git a/.github/workflows/upgrade_dm_via_tiup.yaml b/.github/workflows/upgrade_dm_via_tiup.yaml index ae462c4e04c..ef945bf35fb 100644 --- a/.github/workflows/upgrade_dm_via_tiup.yaml +++ b/.github/workflows/upgrade_dm_via_tiup.yaml @@ -4,6 +4,8 @@ on: pull_request: branches: - master + paths: + - "dm/**" schedule: - cron: "3 22 * * *" # run at minute 06:03 UTC+8 workflow_dispatch: From ce513319eb12f71c1c8510f8fc9cd89231640700 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Tue, 30 Nov 2021 17:36:59 +0800 Subject: [PATCH 02/28] add rfc --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 dm/docs/RFCS/20211130_enhanced_pre_checker.md diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md new file mode 100644 index 00000000000..ee73ef5cfd1 --- /dev/null +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -0,0 +1,65 @@ +# Enhanced pre-check Design + +## Background +Before the DM’s task starts, we should check some items to avoid start-task failures. You can see the details in our [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). In order to allow some users to use DM normally under certain circumstances, we can also set ignore-check-items in task.yaml to ignore some items that you don’t want to check. Now, we find some shortcomings in regard to check-items. + +### Bad user habits +We allow users to ignore all check-items, in which case the user's authority is too large to perform unexpected operations. +### Too much time overhead +If we have a large number of tables in source, we will take too much time in checking table schema, sharding table consistency and sharding table auto increment key. +### Inadequate check +- If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID** for errors. Users should be responsible for what they set. +- Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9), which need more privilege. +- If online-ddl is set by true and a ddl is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the ddl, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/zh/tidb-data-migration/stable/feature-online-ddl). +- For schema_of_shard_tables, whatever pessimistic task and optimistic task, we all check it by comparing all sharding tables’ structures for consistency simply. For optimistic mode, we can do better. +## Proposal +### Restrict user usage +1. Remove the following settings from the [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). If the following items are detected to be set in the configuration, a warning will be reported. + - all + - dump_privilege + - replication_privilege + - server_id + - binlog_enable + - binlog_format + - binlog_row_image +2. If task is full/all mode, the following items will be forced to check (correspondingly, it will not be check in increment mode): + - dump_privilege +3. If task is increment/all mode, the following items will be forced to check (correspondingly, it will not be check in full mode): + - replication_privilege + - server_id + - binlog_enable + - binlog_format + - binlog_row_image +### Speed ​​up check +1. Support concurrent check + - table_schema + - schema_of_shard_tables + - auto_increment_ID +2. Use mydumper.threads as **source_connection_concurrency**, which should update in our document. +#### How to speed up? +Since every checker is concurrent, we can split tables to **source_connection_concurrency** part, and create a checker for every part. +### Optimize some check +1. If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID**. +2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9) and downstream on source. + - For all consistency, we will check + - REPLICATION CLIENT (global) + - SELECT (only dump table) + - For flush consistency: + - RELOAD (global) + - For flush/lock consistency: + - LOCK TABLES (only dump table) + - For TiDB downstream: + - PROCESS (global) +3. Add OnlineddlChecker to check if a ddl exists in online-ddl stage when DM task is all mode and online-ddl is true. It will be forced to check in all mode and not check in increment mode. +4. Enhance schema_of_shard_tables. + - At first, if a machine exits the DM’s checkpoint, the DM’s subsequent task starts/resumes at the checkpoint. So we think the checkpoint guarantees consistency. + - If not exit checkpoint: + - For all/full mode (pessimistic task): we keep the original check; + - For all/full mode (optimistic task): we check whether the shard tables schema meets the definition of [Optimistic Schema Compatibility](20191209_optimistic_ddl.md). If that meets, we can create tables by the compatible schema in the dump stage. + - For incremental mode: not check the sharding tables’ schema, because the table schema obtained from show create table is not the schema at the point of binlog. +5. Make the fail state more gentle, which is from `StateFailure` to `StateWarning`. + - checkAutoIncrementKey + - checkPK/UK + +### Remove checker from tidb-tools to DM +After this change, checker is deeply coupled to DM, both with dump Privilege and optimistic pessimistic coordination. And checker is only used by DM (TiCDC and TiDB all don't use it). So removing checker from tidb-tools to DM is more convenient for development work。 From 549da3f46ac98497d396c173df132983e62739d3 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Wed, 1 Dec 2021 12:03:53 +0800 Subject: [PATCH 03/28] Update dm/docs/RFCS/20211130_enhanced_pre_checker.md Co-authored-by: Ehco --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index ee73ef5cfd1..051f8aabe5d 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -1,7 +1,7 @@ # Enhanced pre-check Design ## Background -Before the DM’s task starts, we should check some items to avoid start-task failures. You can see the details in our [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). In order to allow some users to use DM normally under certain circumstances, we can also set ignore-check-items in task.yaml to ignore some items that you don’t want to check. Now, we find some shortcomings in regard to check-items. +Before the DM’s task starts, we should check some items to avoid start-task failures. You can see the details in our [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). In order to allow some users to use DM normally under certain circumstances, we can also set ignore-check-items in task config to ignore some items that you don’t want to check. Now, we find some shortcomings in regard to check-items. ### Bad user habits We allow users to ignore all check-items, in which case the user's authority is too large to perform unexpected operations. From ad3c49f160dd29e1fc4a88112fef94b0c7f8f0e8 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Wed, 1 Dec 2021 12:04:00 +0800 Subject: [PATCH 04/28] Update dm/docs/RFCS/20211130_enhanced_pre_checker.md Co-authored-by: Ehco --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 051f8aabe5d..dd0e0273cca 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -10,7 +10,7 @@ If we have a large number of tables in source, we will take too much time in che ### Inadequate check - If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID** for errors. Users should be responsible for what they set. - Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9), which need more privilege. -- If online-ddl is set by true and a ddl is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the ddl, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/zh/tidb-data-migration/stable/feature-online-ddl). +- If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the DDL, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/zh/tidb-data-migration/stable/feature-online-ddl). - For schema_of_shard_tables, whatever pessimistic task and optimistic task, we all check it by comparing all sharding tables’ structures for consistency simply. For optimistic mode, we can do better. ## Proposal ### Restrict user usage From d3b42e5b45d39a1844c3b4efabdca4bf7a054648 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Wed, 1 Dec 2021 12:15:43 +0800 Subject: [PATCH 05/28] format markdown --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index dd0e0273cca..47f6fb3cd24 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -1,18 +1,26 @@ # Enhanced pre-check Design ## Background + Before the DM’s task starts, we should check some items to avoid start-task failures. You can see the details in our [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). In order to allow some users to use DM normally under certain circumstances, we can also set ignore-check-items in task config to ignore some items that you don’t want to check. Now, we find some shortcomings in regard to check-items. ### Bad user habits + We allow users to ignore all check-items, in which case the user's authority is too large to perform unexpected operations. + ### Too much time overhead + If we have a large number of tables in source, we will take too much time in checking table schema, sharding table consistency and sharding table auto increment key. + ### Inadequate check -- If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID** for errors. Users should be responsible for what they set. -- Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9), which need more privilege. -- If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the DDL, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/zh/tidb-data-migration/stable/feature-online-ddl). -- For schema_of_shard_tables, whatever pessimistic task and optimistic task, we all check it by comparing all sharding tables’ structures for consistency simply. For optimistic mode, we can do better. + +* If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID** for errors. Users should be responsible for what they set. +* Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9), which need more privilege. +* If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the DDL, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/zh/tidb-data-migration/stable/feature-online-ddl). +* For schema_of_shard_tables, whatever pessimistic task and optimistic task, we all check it by comparing all sharding tables’ structures for consistency simply. For optimistic mode, we can do better. + ## Proposal + ### Restrict user usage 1. Remove the following settings from the [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). If the following items are detected to be set in the configuration, a warning will be reported. - all @@ -30,15 +38,21 @@ If we have a large number of tables in source, we will take too much time in che - binlog_enable - binlog_format - binlog_row_image + ### Speed ​​up check + 1. Support concurrent check - table_schema - schema_of_shard_tables - auto_increment_ID 2. Use mydumper.threads as **source_connection_concurrency**, which should update in our document. + #### How to speed up? + Since every checker is concurrent, we can split tables to **source_connection_concurrency** part, and create a checker for every part. + ### Optimize some check + 1. If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID**. 2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9) and downstream on source. - For all consistency, we will check @@ -62,4 +76,5 @@ Since every checker is concurrent, we can split tables to **source_connection_co - checkPK/UK ### Remove checker from tidb-tools to DM + After this change, checker is deeply coupled to DM, both with dump Privilege and optimistic pessimistic coordination. And checker is only used by DM (TiCDC and TiDB all don't use it). So removing checker from tidb-tools to DM is more convenient for development work。 From f5b3240dc141925e2d28940214be87cdbac6d0d1 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Thu, 2 Dec 2021 20:55:18 +0800 Subject: [PATCH 06/28] Update dm/docs/RFCS/20211130_enhanced_pre_checker.md Co-authored-by: lance6716 --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 47f6fb3cd24..09376901e72 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -77,4 +77,4 @@ Since every checker is concurrent, we can split tables to **source_connection_co ### Remove checker from tidb-tools to DM -After this change, checker is deeply coupled to DM, both with dump Privilege and optimistic pessimistic coordination. And checker is only used by DM (TiCDC and TiDB all don't use it). So removing checker from tidb-tools to DM is more convenient for development work。 +After this change, checker is deeply coupled to DM, both with dump privilege and optimistic pessimistic coordination. And checker is only used by DM (TiCDC and TiDB all don't use it). So removing checker from tidb-tools to DM is more convenient for development work。 From e6bce30c77f2f5f3ebf9ecdef2471a8edca17a65 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Thu, 2 Dec 2021 20:55:24 +0800 Subject: [PATCH 07/28] Update dm/docs/RFCS/20211130_enhanced_pre_checker.md Co-authored-by: lance6716 --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 09376901e72..a81dc564d0b 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -64,7 +64,7 @@ Since every checker is concurrent, we can split tables to **source_connection_co - LOCK TABLES (only dump table) - For TiDB downstream: - PROCESS (global) -3. Add OnlineddlChecker to check if a ddl exists in online-ddl stage when DM task is all mode and online-ddl is true. It will be forced to check in all mode and not check in increment mode. +3. Add OnlineDDLChecker to check if a ddl of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. It will be forced to check in all mode and not checked in increment mode. 4. Enhance schema_of_shard_tables. - At first, if a machine exits the DM’s checkpoint, the DM’s subsequent task starts/resumes at the checkpoint. So we think the checkpoint guarantees consistency. - If not exit checkpoint: From a003e1f369d7bc791a477bd969a601631d281141 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Thu, 2 Dec 2021 21:52:51 +0800 Subject: [PATCH 08/28] address comment --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index a81dc564d0b..dc44870e72d 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -38,6 +38,10 @@ If we have a large number of tables in source, we will take too much time in che - binlog_enable - binlog_format - binlog_row_image +4. Other checkers are the same as before. If you want to ignore them, you should set them in ignore_check_items. + - table_schema + - auto_increment_ID + - schema_of_shard_tables ### Speed ​​up check @@ -53,7 +57,9 @@ Since every checker is concurrent, we can split tables to **source_connection_co ### Optimize some check -1. If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID**. +1. We needn’t check **auto_increment_ID** in following situation: + - If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream; + - If the column of auto increment ID in upstream does not has an unique constraint in downstream. 2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9) and downstream on source. - For all consistency, we will check - REPLICATION CLIENT (global) @@ -64,9 +70,9 @@ Since every checker is concurrent, we can split tables to **source_connection_co - LOCK TABLES (only dump table) - For TiDB downstream: - PROCESS (global) -3. Add OnlineDDLChecker to check if a ddl of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. It will be forced to check in all mode and not checked in increment mode. +3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. It will be forced to check in all mode and not checked in increment mode. 4. Enhance schema_of_shard_tables. - - At first, if a machine exits the DM’s checkpoint, the DM’s subsequent task starts/resumes at the checkpoint. So we think the checkpoint guarantees consistency. + - At first, if a machine exits the DM’s checkpoint and then DM start/resume task, we think the checkpoint guarantees consistency. So we don't check it. - If not exit checkpoint: - For all/full mode (pessimistic task): we keep the original check; - For all/full mode (optimistic task): we check whether the shard tables schema meets the definition of [Optimistic Schema Compatibility](20191209_optimistic_ddl.md). If that meets, we can create tables by the compatible schema in the dump stage. From ebbaee81be257b1fb615da53a0c10bf171f5cd67 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Fri, 3 Dec 2021 16:36:32 +0800 Subject: [PATCH 09/28] add some description --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index dc44870e72d..cd2ebdba386 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -81,6 +81,10 @@ Since every checker is concurrent, we can split tables to **source_connection_co - checkAutoIncrementKey - checkPK/UK -### Remove checker from tidb-tools to DM +### Move checker from [tidb-tools](https://github.com/pingcap/tidb-tools/tree/master/pkg/check) to DM -After this change, checker is deeply coupled to DM, both with dump privilege and optimistic pessimistic coordination. And checker is only used by DM (TiCDC and TiDB all don't use it). So removing checker from tidb-tools to DM is more convenient for development work。 +After this change, checker is deeply coupled to DM, both with dump Privilege and optimistic pessimistic coordination. And checker is only used by DM (TiCDC and TiDB all don’t use it). So removing checkers from tidb-tools to DM is more convenient for development work。 + +In detail, we do not take the initiative to submit pr to the tidb-tools repository. Instead, we will replace the checker in tidb-tools step by step during the development of this feature. + +So at last we will have two checker components in DM and tidb-tools. But DM will completely get rid of tidb-tools checker's ​​dependence or wrap our own checker layer on top of it. From 4c32dd151cdcea540d431bfba2fb074dc32f1313 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Mon, 6 Dec 2021 11:27:57 +0800 Subject: [PATCH 10/28] Update dm/docs/RFCS/20211130_enhanced_pre_checker.md Co-authored-by: lance6716 --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index cd2ebdba386..ae71b2ebd5f 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -72,7 +72,7 @@ Since every checker is concurrent, we can split tables to **source_connection_co - PROCESS (global) 3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. It will be forced to check in all mode and not checked in increment mode. 4. Enhance schema_of_shard_tables. - - At first, if a machine exits the DM’s checkpoint and then DM start/resume task, we think the checkpoint guarantees consistency. So we don't check it. + - If a task has passed the pre-checking when starting and exited, DM should keep the consistency during the task running. So we don't check it when restart the task. - If not exit checkpoint: - For all/full mode (pessimistic task): we keep the original check; - For all/full mode (optimistic task): we check whether the shard tables schema meets the definition of [Optimistic Schema Compatibility](20191209_optimistic_ddl.md). If that meets, we can create tables by the compatible schema in the dump stage. From ce60241a1ad41a6dc41a18661d1d18759cf4ead8 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Mon, 6 Dec 2021 11:56:39 +0800 Subject: [PATCH 11/28] update --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 46 ++++++++----------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index cd2ebdba386..5542f8e0af7 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -21,28 +21,6 @@ If we have a large number of tables in source, we will take too much time in che ## Proposal -### Restrict user usage -1. Remove the following settings from the [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). If the following items are detected to be set in the configuration, a warning will be reported. - - all - - dump_privilege - - replication_privilege - - server_id - - binlog_enable - - binlog_format - - binlog_row_image -2. If task is full/all mode, the following items will be forced to check (correspondingly, it will not be check in increment mode): - - dump_privilege -3. If task is increment/all mode, the following items will be forced to check (correspondingly, it will not be check in full mode): - - replication_privilege - - server_id - - binlog_enable - - binlog_format - - binlog_row_image -4. Other checkers are the same as before. If you want to ignore them, you should set them in ignore_check_items. - - table_schema - - auto_increment_ID - - schema_of_shard_tables - ### Speed ​​up check 1. Support concurrent check @@ -57,9 +35,7 @@ Since every checker is concurrent, we can split tables to **source_connection_co ### Optimize some check -1. We needn’t check **auto_increment_ID** in following situation: - - If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream; - - If the column of auto increment ID in upstream does not has an unique constraint in downstream. +1. We needn’t check **auto_increment_ID**, if the column of auto increment ID in upstream does not has an unique constraint in downstream. 2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9) and downstream on source. - For all consistency, we will check - REPLICATION CLIENT (global) @@ -70,15 +46,31 @@ Since every checker is concurrent, we can split tables to **source_connection_co - LOCK TABLES (only dump table) - For TiDB downstream: - PROCESS (global) -3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. It will be forced to check in all mode and not checked in increment mode. +3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. 4. Enhance schema_of_shard_tables. - At first, if a machine exits the DM’s checkpoint and then DM start/resume task, we think the checkpoint guarantees consistency. So we don't check it. - If not exit checkpoint: - For all/full mode (pessimistic task): we keep the original check; - For all/full mode (optimistic task): we check whether the shard tables schema meets the definition of [Optimistic Schema Compatibility](20191209_optimistic_ddl.md). If that meets, we can create tables by the compatible schema in the dump stage. - For incremental mode: not check the sharding tables’ schema, because the table schema obtained from show create table is not the schema at the point of binlog. + +### Restrict user usage +1. Remove all `ignore_check_items` settings from the [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). If the following items are detected to be set in the configuration, a warning will be reported. +2. If task is full/all mode, the following items will be forced to check (correspondingly, it will not be check in increment mode): + - dump_privilege + - schema_of_shard_tables +3. If task is increment/all mode, the following items will be forced to check (correspondingly, it will not be check in full mode): + - replication_privilege + - server_id + - binlog_enable + - binlog_format + - binlog_row_image + - online_ddl(new added) +4. If task is full/increment/all mode, the following items will be forced to check: + - table_schema + - auto_increment_ID 5. Make the fail state more gentle, which is from `StateFailure` to `StateWarning`. - - checkAutoIncrementKey + - checkAutoIncrementKey(same as auto_increment_ID) - checkPK/UK ### Move checker from [tidb-tools](https://github.com/pingcap/tidb-tools/tree/master/pkg/check) to DM From ef05cfd7b400f62dc0daf28d6109cdd17622e1ce Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Mon, 6 Dec 2021 16:19:51 +0800 Subject: [PATCH 12/28] Apply suggestions from code review Co-authored-by: Chunzhu Li --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 09f15498737..81f946cfe8d 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -42,9 +42,9 @@ Since every checker is concurrent, we can split tables to **source_connection_co - SELECT (only dump table) - For flush consistency: - RELOAD (global) - - For flush/lock consistency: - - LOCK TABLES (only dump table) - - For TiDB downstream: + - For flush/lock consistency: + - LOCK TABLES (only tables to dump) + - For TiDB source databases: - PROCESS (global) 3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. 4. Enhance schema_of_shard_tables. From bd824ef75315c89b623940bcf6bc6ec32a65e5e7 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Tue, 7 Dec 2021 11:52:17 +0800 Subject: [PATCH 13/28] Update dm/docs/RFCS/20211130_enhanced_pre_checker.md Co-authored-by: glorv --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 81f946cfe8d..36d8eda85cb 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -15,7 +15,7 @@ If we have a large number of tables in source, we will take too much time in che ### Inadequate check * If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID** for errors. Users should be responsible for what they set. -* Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9), which need more privilege. +* Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9), which need more privileges. * If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the DDL, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/zh/tidb-data-migration/stable/feature-online-ddl). * For schema_of_shard_tables, whatever pessimistic task and optimistic task, we all check it by comparing all sharding tables’ structures for consistency simply. For optimistic mode, we can do better. From eabe327b15bbd945a4b80d659237202b22f39433 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Tue, 7 Dec 2021 13:32:58 +0800 Subject: [PATCH 14/28] address comment --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 09f15498737..89092e467e1 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -2,7 +2,7 @@ ## Background -Before the DM’s task starts, we should check some items to avoid start-task failures. You can see the details in our [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). In order to allow some users to use DM normally under certain circumstances, we can also set ignore-check-items in task config to ignore some items that you don’t want to check. Now, we find some shortcomings in regard to check-items. +Before the DM’s task starts, we should check some items to avoid start-task failures. You can see the details in our [document](https://docs.pingcap.com/tidb-data-migration/stable/precheck#disable-checking-items). In order to allow some users to use DM normally under certain circumstances, we can also set ignore-check-items in task config to ignore some items that you don’t want to check. Now, we find some shortcomings in regard to check-items. ### Bad user habits @@ -14,9 +14,9 @@ If we have a large number of tables in source, we will take too much time in che ### Inadequate check -* If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID** for errors. Users should be responsible for what they set. -* Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9), which need more privilege. -* If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the DDL, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/zh/tidb-data-migration/stable/feature-online-ddl). +* If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID** for errors. Users should be responsible for what they set. PS: **auto_increment_ID** is only checked when **schema_of_shard_tables** is true. +* Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options), which need more privilege. +* If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the DDL, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/tidb-data-migration/stable/feature-online-ddl). * For schema_of_shard_tables, whatever pessimistic task and optimistic task, we all check it by comparing all sharding tables’ structures for consistency simply. For optimistic mode, we can do better. ## Proposal @@ -36,7 +36,7 @@ Since every checker is concurrent, we can split tables to **source_connection_co ### Optimize some check 1. We needn’t check **auto_increment_ID**, if the column of auto increment ID in upstream does not has an unique constraint in downstream. -2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/zh/tidb/stable/dumpling-overview#%E8%B0%83%E6%95%B4-dumpling-%E7%9A%84%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7%E9%80%89%E9%A1%B9) and downstream on source. +2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options) and downstream on source. - For all consistency, we will check - REPLICATION CLIENT (global) - SELECT (only dump table) @@ -55,7 +55,7 @@ Since every checker is concurrent, we can split tables to **source_connection_co - For incremental mode: not check the sharding tables’ schema, because the table schema obtained from show create table is not the schema at the point of binlog. ### Restrict user usage -1. Remove all `ignore_check_items` settings from the [document](https://docs.pingcap.com/zh/tidb-data-migration/stable/precheck#%E5%85%B3%E9%97%AD%E6%A3%80%E6%9F%A5%E9%A1%B9). If the following items are detected to be set in the configuration, a warning will be reported. +1. Remove all `ignore_check_items` settings from the [document](https://docs.pingcap.com/tidb-data-migration/stable/precheck#disable-checking-items). If the following items are detected to be set in the configuration, a warning will be reported. 2. If task is full/all mode, the following items will be forced to check (correspondingly, it will not be check in increment mode): - dump_privilege - schema_of_shard_tables From 66d1ebd59dc18ead627a8d9fd89df3a4f2ac7fde Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Tue, 7 Dec 2021 16:44:49 +0800 Subject: [PATCH 15/28] update auto increment id --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 254b0e9a76e..7bdd2e3907d 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -14,7 +14,7 @@ If we have a large number of tables in source, we will take too much time in che ### Inadequate check -* If downstream creates tables manually and the new downstream’s auto increment ID is not the same as the upstream, we shouldn’t check **auto_increment_ID** for errors. Users should be responsible for what they set. PS: **auto_increment_ID** is only checked when **schema_of_shard_tables** is true. +* Now we check it by `mapping` which is deprecated. If we don't set the `mapping` and don't ignore the **auto_increment_ID**, the pre-check will report an error. PS: **auto_increment_ID** is only checked when **schema_of_shard_tables** is true. * Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options), which need more privileges. * If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the DDL, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/tidb-data-migration/stable/feature-online-ddl). * For schema_of_shard_tables, whatever pessimistic task and optimistic task, we all check it by comparing all sharding tables’ structures for consistency simply. For optimistic mode, we can do better. @@ -26,7 +26,6 @@ If we have a large number of tables in source, we will take too much time in che 1. Support concurrent check - table_schema - schema_of_shard_tables - - auto_increment_ID 2. Use mydumper.threads as **source_connection_concurrency**, which should update in our document. #### How to speed up? @@ -35,7 +34,11 @@ Since every checker is concurrent, we can split tables to **source_connection_co ### Optimize some check -1. We needn’t check **auto_increment_ID**, if the column of auto increment ID in upstream does not has an unique constraint in downstream. +1. Move **auto_increment_ID** from **schema_of_shard_tables** to **table_schema**. + - Only check if tables exist auto increment ID; + - If table exist, report a warning to user and tell them the method that can resolve the PK/UK conflict; + 1. If you set PK to AUTO_INCREMENT, you must make sure that the primary key in sharding tables is not duplicated; + 2. If sharding tables exit duplicated PK, you can refer to [document](https://docs.pingcap.com/tidb-data-migration/stable/shard-merge-best-practices#handle-conflicts-of-auto-increment-primary-key). 2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options) and downstream on source. - For all consistency, we will check - REPLICATION CLIENT (global) @@ -67,8 +70,7 @@ Since every checker is concurrent, we can split tables to **source_connection_co - binlog_row_image - online_ddl(new added) 4. If task is full/increment/all mode, the following items will be forced to check: - - table_schema - - auto_increment_ID + - table_schema(contain auto_increment_ID) 5. Make the fail state more gentle, which is from `StateFailure` to `StateWarning`. - checkAutoIncrementKey(same as auto_increment_ID) - checkPK/UK From 9844a0311f609efc4b3fe224b58bbcc560f91c84 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Wed, 8 Dec 2021 19:15:18 +0800 Subject: [PATCH 16/28] add omissive version check --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 7bdd2e3907d..23cadf924ae 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -70,8 +70,10 @@ Since every checker is concurrent, we can split tables to **source_connection_co - binlog_row_image - online_ddl(new added) 4. If task is full/increment/all mode, the following items will be forced to check: + - version - table_schema(contain auto_increment_ID) 5. Make the fail state more gentle, which is from `StateFailure` to `StateWarning`. + - checkVersion(same as version) - checkAutoIncrementKey(same as auto_increment_ID) - checkPK/UK From 89267d9116c0e50f33086c207546853d905eb514 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Fri, 10 Dec 2021 10:37:16 +0800 Subject: [PATCH 17/28] Apply suggestions from code review Co-authored-by: lance6716 --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 23cadf924ae..4a58522283f 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -6,18 +6,18 @@ Before the DM’s task starts, we should check some items to avoid start-task fa ### Bad user habits -We allow users to ignore all check-items, in which case the user's authority is too large to perform unexpected operations. +We allow users to ignore all check-items, in which case the user's privilege is too large to perform unexpected operations. ### Too much time overhead -If we have a large number of tables in source, we will take too much time in checking table schema, sharding table consistency and sharding table auto increment key. +If we have a large number of tables in source, we will take too much time in checking table schema, sharding table consistency and sharding table auto increment ID. ### Inadequate check -* Now we check it by `mapping` which is deprecated. If we don't set the `mapping` and don't ignore the **auto_increment_ID**, the pre-check will report an error. PS: **auto_increment_ID** is only checked when **schema_of_shard_tables** is true. +* Now we check auto increment ID by `column-mapping` which is deprecated. If we don't set the `column-mapping` and don't ignore the **auto_increment_ID**, the pre-check will report an error. PS: **auto_increment_ID** is only checked when **schema_of_shard_tables** is true. * Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options), which need more privileges. -* If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, ghost table has been created, is executing the DDL, but is not renamed yet. In this case, DM will report an error when the ghost table is renamed after the dump phase. You can learn more about online-ddl [here](https://docs.pingcap.com/tidb-data-migration/stable/feature-online-ddl). -* For schema_of_shard_tables, whatever pessimistic task and optimistic task, we all check it by comparing all sharding tables’ structures for consistency simply. For optimistic mode, we can do better. +* If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, when ghost table has been created but not been renamed, DM will report an error when the ghost table is renamed during the incremental phase. You can learn more about online-ddl [here](https://docs.pingcap.com/tidb-data-migration/stable/feature-online-ddl). +* For schema_of_shard_tables, whether it's a pessimistic task or an optimistic task, we all simply check consistency of schema by comparing all sharding tables’ structures. For optimistic mode, we can do better. ## Proposal @@ -38,7 +38,7 @@ Since every checker is concurrent, we can split tables to **source_connection_co - Only check if tables exist auto increment ID; - If table exist, report a warning to user and tell them the method that can resolve the PK/UK conflict; 1. If you set PK to AUTO_INCREMENT, you must make sure that the primary key in sharding tables is not duplicated; - 2. If sharding tables exit duplicated PK, you can refer to [document](https://docs.pingcap.com/tidb-data-migration/stable/shard-merge-best-practices#handle-conflicts-of-auto-increment-primary-key). + 2. If sharding tables have duplicated PK, you can refer to [document](https://docs.pingcap.com/tidb-data-migration/stable/shard-merge-best-practices#handle-conflicts-of-auto-increment-primary-key). 2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options) and downstream on source. - For all consistency, we will check - REPLICATION CLIENT (global) From ee361ab684026c0e6f57e560a5848c24962cf0c6 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Fri, 10 Dec 2021 11:12:28 +0800 Subject: [PATCH 18/28] address comment --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 4a58522283f..982d7f1911a 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -26,24 +26,20 @@ If we have a large number of tables in source, we will take too much time in che 1. Support concurrent check - table_schema - schema_of_shard_tables -2. Use mydumper.threads as **source_connection_concurrency**, which should update in our document. - -#### How to speed up? - -Since every checker is concurrent, we can split tables to **source_connection_concurrency** part, and create a checker for every part. + - auto_increment_ID +2. We casn adjust the concurrency by table numbers. ### Optimize some check -1. Move **auto_increment_ID** from **schema_of_shard_tables** to **table_schema**. - - Only check if tables exist auto increment ID; - - If table exist, report a warning to user and tell them the method that can resolve the PK/UK conflict; +1. **auto_increment_ID** only checked in sharding mode. + - If table exist auto increment ID, report a warning to user and tell them the method that can resolve the PK/UK conflict; 1. If you set PK to AUTO_INCREMENT, you must make sure that the primary key in sharding tables is not duplicated; 2. If sharding tables have duplicated PK, you can refer to [document](https://docs.pingcap.com/tidb-data-migration/stable/shard-merge-best-practices#handle-conflicts-of-auto-increment-primary-key). 2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options) and downstream on source. - For all consistency, we will check - REPLICATION CLIENT (global) - SELECT (only dump table) - - For flush consistency: + - For flush consistency: - RELOAD (global) - For flush/lock consistency: - LOCK TABLES (only tables to dump) @@ -51,17 +47,17 @@ Since every checker is concurrent, we can split tables to **source_connection_co - PROCESS (global) 3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. 4. Enhance schema_of_shard_tables. - - If a task has passed the pre-checking when starting and exited, DM should keep the consistency during the task running. So we don't check it when restart the task. + - If a task has passed the pre-checking when starting and exited, DM should keep the consistency during the task running. So we **don't check it** when restart the task. - If not exit checkpoint: - - For all/full mode (pessimistic task): we keep the original check; - - For all/full mode (optimistic task): we check whether the shard tables schema meets the definition of [Optimistic Schema Compatibility](20191209_optimistic_ddl.md). If that meets, we can create tables by the compatible schema in the dump stage. - - For incremental mode: not check the sharding tables’ schema, because the table schema obtained from show create table is not the schema at the point of binlog. + - For all/full mode (pessimistic task): we keep **the original check**; + - For all/full mode (optimistic task): we check whether the shard tables schema meets **the definition of [Optimistic Schema Compatibility](20191209_optimistic_ddl.md)**. If that meets, we can create tables by the compatible schema in the dump stage. + - For incremental mode: **not check** the sharding tables’ schema, because the table schema obtained from show create table is not the schema at the point of binlog. ### Restrict user usage 1. Remove all `ignore_check_items` settings from the [document](https://docs.pingcap.com/tidb-data-migration/stable/precheck#disable-checking-items). If the following items are detected to be set in the configuration, a warning will be reported. 2. If task is full/all mode, the following items will be forced to check (correspondingly, it will not be check in increment mode): - dump_privilege - - schema_of_shard_tables + - schema_of_shard_tables(only for sharding mode) 3. If task is increment/all mode, the following items will be forced to check (correspondingly, it will not be check in full mode): - replication_privilege - server_id @@ -71,7 +67,8 @@ Since every checker is concurrent, we can split tables to **source_connection_co - online_ddl(new added) 4. If task is full/increment/all mode, the following items will be forced to check: - version - - table_schema(contain auto_increment_ID) + - table_schema + - auto_increment_ID(only for sharding mode) 5. Make the fail state more gentle, which is from `StateFailure` to `StateWarning`. - checkVersion(same as version) - checkAutoIncrementKey(same as auto_increment_ID) From 12c9fed61f991f1bae71c4ea6573f463ec724af8 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Tue, 14 Dec 2021 11:54:51 +0800 Subject: [PATCH 19/28] address comment --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 982d7f1911a..2d9e301136d 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -38,17 +38,18 @@ If we have a large number of tables in source, we will take too much time in che 2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options) and downstream on source. - For all consistency, we will check - REPLICATION CLIENT (global) - - SELECT (only dump table) + - SELECT (only INFORMATION_SCHEMA's tables and dump tables) - For flush consistency: - RELOAD (global) - For flush/lock consistency: - - LOCK TABLES (only tables to dump) - - For TiDB source databases: - - PROCESS (global) + - LOCK TABLES (only dump tables) + + As we know, TiDB is different from MySQL in some place. If source is TiDB, we also need: + - PROCESS (global) 3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. 4. Enhance schema_of_shard_tables. - If a task has passed the pre-checking when starting and exited, DM should keep the consistency during the task running. So we **don't check it** when restart the task. - - If not exit checkpoint: + - If not exist checkpoint: - For all/full mode (pessimistic task): we keep **the original check**; - For all/full mode (optimistic task): we check whether the shard tables schema meets **the definition of [Optimistic Schema Compatibility](20191209_optimistic_ddl.md)**. If that meets, we can create tables by the compatible schema in the dump stage. - For incremental mode: **not check** the sharding tables’ schema, because the table schema obtained from show create table is not the schema at the point of binlog. From f90c8ac7cc9bc1d094348531d6fc0094c4e963dd Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Wed, 15 Dec 2021 11:50:30 +0800 Subject: [PATCH 20/28] adjust version checker --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 2d9e301136d..1c3db7e5e3c 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -18,6 +18,7 @@ If we have a large number of tables in source, we will take too much time in che * Dump privilege only checks RELOAD and SELECT. However, Dumpling supports different [consistency configurations](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options), which need more privileges. * If online-ddl is set by true and a DDL is in online-ddl stage, DM will have a problem in all mode. Specifically, when ghost table has been created but not been renamed, DM will report an error when the ghost table is renamed during the incremental phase. You can learn more about online-ddl [here](https://docs.pingcap.com/tidb-data-migration/stable/feature-online-ddl). * For schema_of_shard_tables, whether it's a pessimistic task or an optimistic task, we all simply check consistency of schema by comparing all sharding tables’ structures. For optimistic mode, we can do better. +* Version checker checks that MySQLVersion >= 5.6.0 and MariadbVersion >= 10.1.2 before. However, we find more and more incompatibility problems in MySQLVersion >= 8.0.0 and Mariadb. In view of supporting MySQL 8.0 and Mariadb is experimental yet, checker will report a warning for them. ## Proposal @@ -53,6 +54,10 @@ If we have a large number of tables in source, we will take too much time in che - For all/full mode (pessimistic task): we keep **the original check**; - For all/full mode (optimistic task): we check whether the shard tables schema meets **the definition of [Optimistic Schema Compatibility](20191209_optimistic_ddl.md)**. If that meets, we can create tables by the compatible schema in the dump stage. - For incremental mode: **not check** the sharding tables’ schema, because the table schema obtained from show create table is not the schema at the point of binlog. +5. Version checker will report a warning in the following cases: + - MySQL < 5.6.0 + - MySQL >= 8.0.0 + - Mariadb ### Restrict user usage 1. Remove all `ignore_check_items` settings from the [document](https://docs.pingcap.com/tidb-data-migration/stable/precheck#disable-checking-items). If the following items are detected to be set in the configuration, a warning will be reported. From 320b0091b60b6de74b671765d55c337ccd6a99aa Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Tue, 21 Dec 2021 11:07:36 +0800 Subject: [PATCH 21/28] Apply suggestions from code review Co-authored-by: lance6716 --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 1c3db7e5e3c..f19fa74d672 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -28,15 +28,15 @@ If we have a large number of tables in source, we will take too much time in che - table_schema - schema_of_shard_tables - auto_increment_ID -2. We casn adjust the concurrency by table numbers. +2. We can adjust the concurrency by table numbers. ### Optimize some check -1. **auto_increment_ID** only checked in sharding mode. - - If table exist auto increment ID, report a warning to user and tell them the method that can resolve the PK/UK conflict; +1. Auto_increment_ID only be checked in sharding mode. + - If table exists auto increment ID, report a warning to user and tell them the method that can resolve the PK/UK conflict; 1. If you set PK to AUTO_INCREMENT, you must make sure that the primary key in sharding tables is not duplicated; 2. If sharding tables have duplicated PK, you can refer to [document](https://docs.pingcap.com/tidb-data-migration/stable/shard-merge-best-practices#handle-conflicts-of-auto-increment-primary-key). -2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options) and downstream on source. +2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options) on source. - For all consistency, we will check - REPLICATION CLIENT (global) - SELECT (only INFORMATION_SCHEMA's tables and dump tables) @@ -50,11 +50,11 @@ If we have a large number of tables in source, we will take too much time in che 3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. 4. Enhance schema_of_shard_tables. - If a task has passed the pre-checking when starting and exited, DM should keep the consistency during the task running. So we **don't check it** when restart the task. - - If not exist checkpoint: + - If there does not exist checkpoints: - For all/full mode (pessimistic task): we keep **the original check**; - For all/full mode (optimistic task): we check whether the shard tables schema meets **the definition of [Optimistic Schema Compatibility](20191209_optimistic_ddl.md)**. If that meets, we can create tables by the compatible schema in the dump stage. - For incremental mode: **not check** the sharding tables’ schema, because the table schema obtained from show create table is not the schema at the point of binlog. -5. Version checker will report a warning in the following cases: +5. Version checker will report a warning in the following cases: - MySQL < 5.6.0 - MySQL >= 8.0.0 - Mariadb @@ -71,7 +71,7 @@ If we have a large number of tables in source, we will take too much time in che - binlog_format - binlog_row_image - online_ddl(new added) -4. If task is full/increment/all mode, the following items will be forced to check: +4. The following items will be forced to check: - version - table_schema - auto_increment_ID(only for sharding mode) @@ -82,7 +82,7 @@ If we have a large number of tables in source, we will take too much time in che ### Move checker from [tidb-tools](https://github.com/pingcap/tidb-tools/tree/master/pkg/check) to DM -After this change, checker is deeply coupled to DM, both with dump Privilege and optimistic pessimistic coordination. And checker is only used by DM (TiCDC and TiDB all don’t use it). So removing checkers from tidb-tools to DM is more convenient for development work。 +After this change, checker is deeply coupled to DM, both with dump privilege checking and optimistic pessimistic coordination. And checker is only used by DM (TiCDC and TiDB all don’t use it). So removing checkers from tidb-tools to DM is more convenient for development work。 In detail, we do not take the initiative to submit pr to the tidb-tools repository. Instead, we will replace the checker in tidb-tools step by step during the development of this feature. From cc13850417465bbca53c7180502c1ef607d6def3 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Tue, 21 Dec 2021 11:21:26 +0800 Subject: [PATCH 22/28] address comment --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index f19fa74d672..7efa404bbbf 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -34,19 +34,15 @@ If we have a large number of tables in source, we will take too much time in che 1. Auto_increment_ID only be checked in sharding mode. - If table exists auto increment ID, report a warning to user and tell them the method that can resolve the PK/UK conflict; - 1. If you set PK to AUTO_INCREMENT, you must make sure that the primary key in sharding tables is not duplicated; + 1. If you set PK to AUTO INCREMENT, you must make sure that the primary key in sharding tables is not duplicated; 2. If sharding tables have duplicated PK, you can refer to [document](https://docs.pingcap.com/tidb-data-migration/stable/shard-merge-best-practices#handle-conflicts-of-auto-increment-primary-key). + - And if they have finished the resolving, such as manually created the table and removed AUTO INCREMENT, we should not report the warning. 2. Dump_privilege will check different privileges according to different [consistency](https://docs.pingcap.com/tidb/stable/dumpling-overview#adjust-dumplings-data-consistency-options) on source. - For all consistency, we will check - - REPLICATION CLIENT (global) - SELECT (only INFORMATION_SCHEMA's tables and dump tables) - For flush consistency: - RELOAD (global) - For flush/lock consistency: - - LOCK TABLES (only dump tables) - - As we know, TiDB is different from MySQL in some place. If source is TiDB, we also need: - - PROCESS (global) 3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. 4. Enhance schema_of_shard_tables. - If a task has passed the pre-checking when starting and exited, DM should keep the consistency during the task running. So we **don't check it** when restart the task. @@ -58,6 +54,7 @@ If we have a large number of tables in source, we will take too much time in che - MySQL < 5.6.0 - MySQL >= 8.0.0 - Mariadb + - Others ### Restrict user usage 1. Remove all `ignore_check_items` settings from the [document](https://docs.pingcap.com/tidb-data-migration/stable/precheck#disable-checking-items). If the following items are detected to be set in the configuration, a warning will be reported. From f7992f7ee0b760f12833ec8f965dbaa2b4d98ba5 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Tue, 21 Dec 2021 16:00:24 +0800 Subject: [PATCH 23/28] address comment --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 7efa404bbbf..899aa4df1ae 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -73,9 +73,9 @@ If we have a large number of tables in source, we will take too much time in che - table_schema - auto_increment_ID(only for sharding mode) 5. Make the fail state more gentle, which is from `StateFailure` to `StateWarning`. - - checkVersion(same as version) - - checkAutoIncrementKey(same as auto_increment_ID) - - checkPK/UK + - VersionChecker(same as version) + - AutoIncrementKeyChecker(same as auto_increment_ID) + - PK/UKChecker ### Move checker from [tidb-tools](https://github.com/pingcap/tidb-tools/tree/master/pkg/check) to DM From 1f83940dbc9ec51482e5856e7ef883c1b68de359 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Wed, 22 Dec 2021 13:32:55 +0800 Subject: [PATCH 24/28] address comment --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 899aa4df1ae..8013b64583e 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -43,6 +43,7 @@ If we have a large number of tables in source, we will take too much time in che - For flush consistency: - RELOAD (global) - For flush/lock consistency: + - LOCK TABLES (only dump tables) 3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. 4. Enhance schema_of_shard_tables. - If a task has passed the pre-checking when starting and exited, DM should keep the consistency during the task running. So we **don't check it** when restart the task. @@ -54,7 +55,7 @@ If we have a large number of tables in source, we will take too much time in che - MySQL < 5.6.0 - MySQL >= 8.0.0 - Mariadb - - Others + - Others we don't support ### Restrict user usage 1. Remove all `ignore_check_items` settings from the [document](https://docs.pingcap.com/tidb-data-migration/stable/precheck#disable-checking-items). If the following items are detected to be set in the configuration, a warning will be reported. From a8dc9dafbd110ac421f9d28535b5c731b6f80d82 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Wed, 22 Dec 2021 13:34:45 +0800 Subject: [PATCH 25/28] Update dm/docs/RFCS/20211130_enhanced_pre_checker.md Co-authored-by: lance6716 --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 8013b64583e..27b7ccede63 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -69,7 +69,7 @@ If we have a large number of tables in source, we will take too much time in che - binlog_format - binlog_row_image - online_ddl(new added) -4. The following items will be forced to check: +4. The following items will always be forced to check: - version - table_schema - auto_increment_ID(only for sharding mode) From 93ae0023fe3da47525145094e8185f9cae998271 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Wed, 22 Dec 2021 13:42:38 +0800 Subject: [PATCH 26/28] update --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 1 - 1 file changed, 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 8013b64583e..f623188ed20 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -55,7 +55,6 @@ If we have a large number of tables in source, we will take too much time in che - MySQL < 5.6.0 - MySQL >= 8.0.0 - Mariadb - - Others we don't support ### Restrict user usage 1. Remove all `ignore_check_items` settings from the [document](https://docs.pingcap.com/tidb-data-migration/stable/precheck#disable-checking-items). If the following items are detected to be set in the configuration, a warning will be reported. From d5f8d752d8253cae2e617e4cdfd300c564315c51 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Wed, 22 Dec 2021 16:46:31 +0800 Subject: [PATCH 27/28] remove LOCK TABLES from flush consistency --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 26484e19484..1d7fa2bd3ae 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -42,7 +42,7 @@ If we have a large number of tables in source, we will take too much time in che - SELECT (only INFORMATION_SCHEMA's tables and dump tables) - For flush consistency: - RELOAD (global) - - For flush/lock consistency: + - For lock consistency: - LOCK TABLES (only dump tables) 3. Add OnlineDDLChecker to check if a DDL of tables in allow list exists in online-ddl stage when DM task is all mode and online-ddl is true. 4. Enhance schema_of_shard_tables. From 2ea1a63c2d7cedfb2dda6e9059be15807f674acf Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Thu, 23 Dec 2021 12:07:18 +0800 Subject: [PATCH 28/28] add Binlog_Do_DB --- dm/docs/RFCS/20211130_enhanced_pre_checker.md | 1 + 1 file changed, 1 insertion(+) diff --git a/dm/docs/RFCS/20211130_enhanced_pre_checker.md b/dm/docs/RFCS/20211130_enhanced_pre_checker.md index 1d7fa2bd3ae..d43320fb4bc 100644 --- a/dm/docs/RFCS/20211130_enhanced_pre_checker.md +++ b/dm/docs/RFCS/20211130_enhanced_pre_checker.md @@ -68,6 +68,7 @@ If we have a large number of tables in source, we will take too much time in che - binlog_format - binlog_row_image - online_ddl(new added) + - binlog_do_db(new added) 4. The following items will always be forced to check: - version - table_schema