From 28191c08476847e22e48373349f1549203e89d30 Mon Sep 17 00:00:00 2001 From: Clemens Kolbitsch Date: Wed, 11 Mar 2020 20:21:09 -0700 Subject: [PATCH 1/8] add integration tests for fk data migration --- Makefile | 2 +- copydb/copydb.go | 24 ++++++++-- examples/copydb/conf.json | 16 +++---- go.sum | 11 +++++ table_schema_cache.go | 69 ++++++++++++++++++++++++++++ test/go/table_schema_cache_test.go | 51 ++++++++++++++++++++ test/helpers/db_helper.rb | 39 ++++++++++++++++ test/helpers/ghostferry_helper.rb | 4 ++ test/integration/foreign_key_test.rb | 20 ++++++++ test/lib/go/integrationferry.go | 5 ++ utils.go | 3 +- 11 files changed, 230 insertions(+), 14 deletions(-) create mode 100644 test/integration/foreign_key_test.rb diff --git a/Makefile b/Makefile index 0b19d9b33..0882d2d78 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ PROJECT_DEBS := $(foreach name,$(PROJECTS),$(name)-deb) # Target specific variable, set proj to have a valid value. PROJECT_PKG = ./$(proj)/cmd PROJECT_BIN = ghostferry-$(proj) -BIN_TARGET = $(GOBIN)/$(PROJECT_BIN) +BIN_TARGET = ./ DEB_TARGET = $(BUILD_DIR)/$(PROJECT_BIN)_$(VERSION_STR).deb PLATFORM := $(shell uname -s | tr A-Z a-z) diff --git a/copydb/copydb.go b/copydb/copydb.go index ffb03181d..e06374f07 100644 --- a/copydb/copydb.go +++ b/copydb/copydb.go @@ -58,15 +58,33 @@ func (this *CopydbFerry) Start() error { } func (this *CopydbFerry) CreateDatabasesAndTables() error { + logger := logrus.WithField("tag", "create_databases_and_tables") + // We need to create the same table/schemas on the target database // as the ones we are copying. - logrus.Info("creating databases and tables on target") - for _, tableName := range this.Ferry.Tables.GetTableListWithPriority(this.config.TablesToBeCreatedFirst) { + logger.Info("creating databases and tables on target") + var prioritzedTableNames []string + if len(this.config.TablesToBeCreatedFirst) > 0 { + // if specified, use what the config tells us + logger.Debug("config contains table creation order") + prioritzedTableNames = this.Ferry.Tables.GetTableListWithPriority(this.config.TablesToBeCreatedFirst) + } else { + // otherwise infer the right order ourselves + logger.Debug("inferring table creation order from source database") + var err error + prioritzedTableNames, err = this.Ferry.Tables.GetTableCreationOrder(this.Ferry.SourceDB) + if err != nil { + return err + } + } + + for _, tableName := range prioritzedTableNames { + logger.Debugf("creating database table %s", tableName) t := strings.Split(tableName, ".") err := this.createDatabaseIfExistsOnTarget(t[0]) if err != nil { - logrus.WithError(err).WithField("database", t[0]).Error("cannot create database, this may leave the target database in an insane state") + logger.WithField("database", t[0]).Error("cannot create database, this may leave the target database in an insane state") return err } diff --git a/examples/copydb/conf.json b/examples/copydb/conf.json index bdd0d59a9..28eea760d 100644 --- a/examples/copydb/conf.json +++ b/examples/copydb/conf.json @@ -6,7 +6,8 @@ "Pass": "", "Collation": "utf8mb4_unicode_ci", "Params": { - "charset": "utf8mb4" + "charset": "utf8mb4", + "foreign_key_checks": "0" } }, @@ -17,7 +18,8 @@ "Pass": "", "Collation": "utf8mb4_unicode_ci", "Params": { - "charset": "utf8mb4" + "charset": "utf8mb4", + "foreign_key_checks": "0" } }, @@ -29,12 +31,6 @@ "Blacklist": ["schema_migrations"] }, - "ForceIndexForVerification": { - "abc": { - "table1": "forced_index_name" - } - }, - "DumpStateOnSignal": true, "VerifierType": "ChecksumTable", @@ -44,5 +40,7 @@ "Custom Script 2": ["examples/copydb/s2"] }, - "DumpStateToStdoutOnError": true + "DumpStateToStdoutOnError": true, + + "SkipForeignKeyConstraintsCheck": true } diff --git a/go.sum b/go.sum index 4735b4d6c..524754281 100644 --- a/go.sum +++ b/go.sum @@ -16,6 +16,7 @@ github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/renameio v0.1.0 h1:GOZbcHa3HfsPKPlmyPyN2KEohoMXOhdMbHrvbpl2QaA= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8= github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= @@ -23,10 +24,13 @@ github.com/gorilla/mux v1.6.1 h1:KOwqsTYZdeuMacU7CxjMNYEKeBvLbxW+psodrbcEa3A= github.com/gorilla/mux v1.6.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA= github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= +github.com/kisielk/gotool v1.0.0 h1:AV2c/EiW3KqPNT9ZKl07ehoAGi4C5/01Cfbblndcapg= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1 h1:VkoXIwSboBpnk99O/KFauAEILuNHv5DVFKZMBN/gUgw= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= @@ -51,12 +55,15 @@ github.com/pingcap/errors v0.11.0 h1:DCJQB8jrHbQ1VVlMFIrbj2ApScNNotVmkSNplu2yUt4 github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9 h1:AJD9pZYm72vMgPcQDww9rkZ1DnWfl0pXV3BOWlkYIjA= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= +github.com/pingcap/parser v0.0.0-20190506092653-e336082eb825 h1:U9Kdnknj4n2v76Mg7wazevZ5N9U1OIaMwSNRVLEcLX0= github.com/pingcap/parser v0.0.0-20190506092653-e336082eb825/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= +github.com/pingcap/tipb v0.0.0-20190428032612-535e1abaa330 h1:rRMLMjIMFulCX9sGKZ1hoov/iROMsKyC8Snc02nSukw= github.com/pingcap/tipb v0.0.0-20190428032612-535e1abaa330/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.3.0 h1:RR9dF3JtopPvtkroDZuVD7qquD0bnHlKSqaQhgwt8yk= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= @@ -73,6 +80,7 @@ github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMB github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1 h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= @@ -95,12 +103,14 @@ golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529 h1:iMGN4xG0cnqj3t+zOM8wUB golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e h1:JgcxKXxCjrA2tyDP/aNU9K0Ck5Czfk6C7e2tMw7+bSI= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -129,6 +139,7 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= diff --git a/table_schema_cache.go b/table_schema_cache.go index d1861633b..57c48ff42 100644 --- a/table_schema_cache.go +++ b/table_schema_cache.go @@ -342,6 +342,75 @@ func (c TableSchemaCache) GetTableListWithPriority(priorityList []string) (prior return } +func (c TableSchemaCache) GetTableCreationOrder(db *sql.DB) ([]string, error) { + visitedTables := map[string]bool{} + tableMap := map[string][]string{} + tableOrder := []string{} + + for table := range c { + t := strings.Split(table, ".") + referencedTables, err := getForeignKeyTablesOfTable(db, t[0], t[1]) + if err != nil { + logrus.WithField("table", table).Error("cannot fetch foreign keys") + } + tableMap[table] = referencedTables + } + + for table := range c { + if _, found := visitedTables[table]; !found { + visitedTables, tableOrder = getTableCreationOrderUtil(visitedTables, tableOrder, tableMap, table) + } + } + + return tableOrder, nil +} + +func getTableCreationOrderUtil(visitedTables map[string]bool, tableOrder []string, tableMap map[string][]string, currTable string) (map[string]bool, []string) { + visitedTables[currTable] = true + for _, referencedTable := range tableMap[currTable] { + if _, found := visitedTables[referencedTable]; !found { + visitedTables, tableOrder = getTableCreationOrderUtil(visitedTables, tableOrder, tableMap, referencedTable) + } + } + tableOrder = append(tableOrder, currTable) + return visitedTables, tableOrder +} + +func getForeignKeyTablesOfTable(db *sql.DB, schema string, table string) ([]string, error) { + fkTables := []string{} + visitedTables := map[string]bool{} + + query := fmt.Sprintf(` + SELECT CONCAT(REFERENCED_TABLE_SCHEMA, '.', REFERENCED_TABLE_NAME) + FROM information_schema.key_column_usage + WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s' AND REFERENCED_TABLE_NAME IS NOT NULL + `, + schema, + table, + ) + + rows, err := db.Query(query) + if err != nil { + return fkTables, err + } + + var tableName string + for rows.Next() { + err = rows.Scan(&tableName) + if err != nil { + return fkTables, err + } + + if _, found := visitedTables[tableName]; !found && tableName != table { + fkTables = append(fkTables, tableName) + visitedTables[tableName] = true + } + + } + + return fkTables, nil +} + func showDatabases(c *sql.DB) ([]string, error) { rows, err := c.Query("show databases") if err != nil { diff --git a/test/go/table_schema_cache_test.go b/test/go/table_schema_cache_test.go index 4b51c2c08..54e4cefbb 100644 --- a/test/go/table_schema_cache_test.go +++ b/test/go/table_schema_cache_test.go @@ -3,6 +3,7 @@ package test import ( "fmt" "sort" + "strings" "testing" "github.com/Shopify/ghostferry" @@ -444,6 +445,56 @@ func (this *TableSchemaCacheTestSuite) TestTargetToSourceRewritesErrorsOnDuplica this.Require().Equal(err.Error(), "duplicate target to source rewrite detected") } +func (this *TableSchemaCacheTestSuite) TestGetTableCreationOrderWithoutForeignKeyConstraints() { + tables, err := ghostferry.LoadTables(this.Ferry.SourceDB, this.Ferry.TableFilter, nil, nil, nil, nil) + this.Require().Nil(err) + + creationOrder, err := tables.GetTableCreationOrder(this.Ferry.SourceDB) + this.Require().Nil(err) + + this.Require().Equal(len(creationOrder), 3) + this.Require().ElementsMatch(creationOrder, tables.AllTableNames()) +} + +func (this *TableSchemaCacheTestSuite) TestGetTableCreationOrderWithForeignKeyConstraints() { + _, err := this.Ferry.SourceDB.Exec(fmt.Sprintf("CREATE TABLE `%s`.`table1` (`id1` BIGINT, PRIMARY KEY (`id1`))", testhelpers.TestSchemaName)) + this.Require().Nil(err) + _, err = this.Ferry.SourceDB.Exec(fmt.Sprintf("CREATE TABLE `%s`.`table2` (`id2` BIGINT, PRIMARY KEY (`id2`), CONSTRAINT `fkc2` FOREIGN KEY (`id2`) REFERENCES `table1` (`id1`))", testhelpers.TestSchemaName)) + this.Require().Nil(err) + _, err = this.Ferry.SourceDB.Exec(fmt.Sprintf("CREATE TABLE `%s`.`table3` (`id3` BIGINT, PRIMARY KEY (`id3`), CONSTRAINT `fkc3_1` FOREIGN KEY (`id3`) REFERENCES `table1` (`id1`), CONSTRAINT `fkc3_2` FOREIGN KEY (`id3`) REFERENCES `table2` (`id2`))", testhelpers.TestSchemaName)) + this.Require().Nil(err) + + tables, err := ghostferry.LoadTables(this.Ferry.SourceDB, this.Ferry.TableFilter, nil, nil, nil, nil) + this.Require().Nil(err) + + creationOrder, err := tables.GetTableCreationOrder(this.Ferry.SourceDB) + this.Require().Nil(err) + + // 3 tests from the base test setup plus 3 added above + this.Require().Equal(len(creationOrder), 6) + this.Require().ElementsMatch(creationOrder, tables.AllTableNames()) + + // verify the order: all we care for is that table1 is created before + // table2, which is created before table3 + table1Index := -1 + table2Index := -1 + table3Index := -1 + for i, tableName := range creationOrder { + if strings.HasSuffix(tableName, ".table1") { + table1Index = i + } else if strings.HasSuffix(tableName, ".table2") { + table2Index = i + } else if strings.HasSuffix(tableName, ".table3") { + table3Index = i + } + } + this.Require().NotEqual(table1Index, -1) + this.Require().NotEqual(table2Index, -1) + this.Require().NotEqual(table3Index, -1) + this.Require().True(table1Index < table2Index) + this.Require().True(table2Index < table3Index) +} + func TestTableSchemaCache(t *testing.T) { testhelpers.SetupTest() suite.Run(t, &TableSchemaCacheTestSuite{GhostferryUnitTestSuite: &testhelpers.GhostferryUnitTestSuite{}}) diff --git a/test/helpers/db_helper.rb b/test/helpers/db_helper.rb index 68e6468c9..cf5fbc834 100644 --- a/test/helpers/db_helper.rb +++ b/test/helpers/db_helper.rb @@ -138,6 +138,45 @@ def seed_random_data(connection, database_name: DEFAULT_DB, table_name: DEFAULT_ end end + def disable_foreign_key_constraints + source_db.query("SET FOREIGN_KEY_CHECKS=0") + end + + def enable_foreign_key_constraints + source_db.query("SET FOREIGN_KEY_CHECKS=1") + end + + def seed_random_data_with_fk_constraints(connection, database_name: DEFAULT_DB, number_of_rows: 1111) + dbtable1 = full_table_name(database_name, "test_fk_table1") + dbtable2 = full_table_name(database_name, "test_fk_table2") + dbtable3 = full_table_name(database_name, "test_fk_table3") + + connection.query("CREATE DATABASE IF NOT EXISTS #{database_name}") + connection.query("CREATE TABLE IF NOT EXISTS #{dbtable1} (id1 bigint(20), primary key(id1))") + connection.query("CREATE TABLE IF NOT EXISTS #{dbtable2} (id2 bigint(20), primary key(id2), CONSTRAINT fkc2 foreign key(id2) REFERENCES #{dbtable1}(id1))") + connection.query("CREATE TABLE IF NOT EXISTS #{dbtable3} (id3 bigint(20), primary key(id3), CONSTRAINT fkc3 foreign key(id3) REFERENCES #{dbtable2}(id2))") + + return if number_of_rows == 0 + + [dbtable1, dbtable2, dbtable3].each do |dbtable| + transaction(connection) do + sqlargs = (["(?)"]*number_of_rows).join(", ") + sql = "INSERT INTO #{dbtable} VALUES #{sqlargs}" + insert_statement = connection.prepare(sql) + + rand_rows = [] + number_of_rows.times.each { |n| rand_rows << n } + + insert_statement.execute(*rand_rows) + end + end + end + + def seed_simple_database_with_fk_constraints + max_id = 1111 + seed_random_data_with_fk_constraints(source_db, number_of_rows: max_id) + end + def seed_simple_database_with_single_table # Setup the source database with data. max_id = 1111 diff --git a/test/helpers/ghostferry_helper.rb b/test/helpers/ghostferry_helper.rb index 4b9295bc9..053e48bfd 100644 --- a/test/helpers/ghostferry_helper.rb +++ b/test/helpers/ghostferry_helper.rb @@ -266,6 +266,10 @@ def start_ghostferry(resuming_state = nil) environment["GHOSTFERRY_SKIP_TARGET_VERIFICATION"] = @config[:skip_target_verification] end + if @config[:skip_foreign_key_constraints_check] + environment["SkipForeignKeyConstraintsCheck"] = @config[:skip_foreign_key_constraints_check] + end + if @config[:marginalia] environment["GHOSTFERRY_MARGINALIA"] = @config[:marginalia] end diff --git a/test/integration/foreign_key_test.rb b/test/integration/foreign_key_test.rb new file mode 100644 index 000000000..b9b5a136a --- /dev/null +++ b/test/integration/foreign_key_test.rb @@ -0,0 +1,20 @@ +require "test_helper" +require "json" + +class ForeignKeyTest < GhostferryTestCase + def setup + seed_simple_database_with_fk_constraints + disable_foreign_key_constraints + end + + def teardown + enable_foreign_key_constraints + end + + def test_copy_data_with_fk_constraints_writes_to_source + ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: "true" }) + + ghostferry.run + assert_test_table_is_identical + end +end diff --git a/test/lib/go/integrationferry.go b/test/lib/go/integrationferry.go index 9f43c3e30..cf785dc68 100644 --- a/test/lib/go/integrationferry.go +++ b/test/lib/go/integrationferry.go @@ -254,6 +254,11 @@ func NewStandardConfig() (*ghostferry.Config, error) { } } + skipForeignKeyConstraintsCheck := os.Getenv("SkipForeignKeyConstraintsCheck") + if skipForeignKeyConstraintsCheck == "true" { + config.SkipForeignKeyConstraintsCheck = true + } + return config, config.ValidateConfig() } diff --git a/utils.go b/utils.go index 3520812e4..e76c72799 100644 --- a/utils.go +++ b/utils.go @@ -6,11 +6,12 @@ import ( sqlorig "database/sql" "encoding/binary" "fmt" - sql "github.com/Shopify/ghostferry/sqlwrapper" "sync" "sync/atomic" "time" + sql "github.com/Shopify/ghostferry/sqlwrapper" + "github.com/siddontang/go-mysql/mysql" "github.com/sirupsen/logrus" ) From 91b2cc96c65ab830887d189dcd9e4115cf28f3b7 Mon Sep 17 00:00:00 2001 From: Manan007224 Date: Mon, 14 Jun 2021 22:15:03 -0700 Subject: [PATCH 2/8] integration tests for foreign key data --- ferry.go | 14 +++- test/go/ferry_test.go | 5 +- test/helpers/db_helper.rb | 22 ++--- test/helpers/ghostferry_helper.rb | 2 +- test/integration/foreign_key_test.rb | 117 +++++++++++++++++++++++++-- test/test_helper.rb | 36 +++++---- 6 files changed, 162 insertions(+), 34 deletions(-) diff --git a/ferry.go b/ferry.go index 4cb64d5ef..558aab6aa 100644 --- a/ferry.go +++ b/ferry.go @@ -424,7 +424,7 @@ func (f *Ferry) Initialize() (err error) { f.logger.WithError(err).Error("source master is a read replica") return err } - } else { + } else if !f.Config.SkipForeignKeyConstraintsCheck { isReplica, err := CheckDbIsAReplica(f.SourceDB) if err != nil { f.logger.WithError(err).Error("cannot check if source is a replica") @@ -484,6 +484,18 @@ func (f *Ferry) Initialize() (err error) { } } + if f.Config.SkipForeignKeyConstraintsCheck { + isReadOnly, err := CheckDbIsAReplica(f.SourceDB) + if err != nil { + return err + } + if !isReadOnly { + err = errors.New("Source DB must be read_only") + f.logger.WithError(err).Error("Source DB should be read_only to migrate tables with foreign key constraints") + return err + } + } + if f.Config.DataIterationBatchSizePerTableOverride != nil { err = f.Config.DataIterationBatchSizePerTableOverride.UpdateBatchSizes(f.SourceDB, f.Tables) if err != nil { diff --git a/test/go/ferry_test.go b/test/go/ferry_test.go index 3b343aab6..6bfd5383a 100644 --- a/test/go/ferry_test.go +++ b/test/go/ferry_test.go @@ -56,9 +56,12 @@ func (t *FerryTestSuite) TestSourceDatabaseWithForeignKeyConstraintFailsInitiali ferry = testhelpers.NewTestFerry().Ferry ferry.Config.SkipForeignKeyConstraintsCheck = true - err = ferry.Initialize() + _, err = t.Ferry.SourceDB.Exec("SET GLOBAL read_only = ON") + err = ferry.Initialize() t.Require().Nil(err) + + _, err = t.Ferry.SourceDB.Exec("SET GLOBAL read_only = OFF") } func TestFerryTestSuite(t *testing.T) { diff --git a/test/helpers/db_helper.rb b/test/helpers/db_helper.rb index cf5fbc834..883d3a219 100644 --- a/test/helpers/db_helper.rb +++ b/test/helpers/db_helper.rb @@ -9,6 +9,7 @@ module DbHelper DEFAULT_DB = "gftest" DEFAULT_TABLE = "test_table_1" + DEFAULT_TABLES_WITH_FK_CONSTRAINTS = ["test_fk_table1", "test_fk_table2"] class Mysql2::Client alias_method :query_without_maginalia, :query @@ -42,6 +43,10 @@ def self.rand_data(length: 32) end DEFAULT_FULL_TABLE_NAME = full_table_name(DEFAULT_DB, DEFAULT_TABLE) + DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS = [ + full_table_name(DEFAULT_DB, DEFAULT_TABLES_WITH_FK_CONSTRAINTS[0]), + full_table_name(DEFAULT_DB, DEFAULT_TABLES_WITH_FK_CONSTRAINTS[1]) + ] def full_table_name(db, table) DbHelper.full_table_name(db, table) @@ -139,33 +144,31 @@ def seed_random_data(connection, database_name: DEFAULT_DB, table_name: DEFAULT_ end def disable_foreign_key_constraints - source_db.query("SET FOREIGN_KEY_CHECKS=0") + target_db.query("SET GLOBAL FOREIGN_KEY_CHECKS=0") end def enable_foreign_key_constraints - source_db.query("SET FOREIGN_KEY_CHECKS=1") + target_db.query("SET GLOBAL FOREIGN_KEY_CHECKS=1") end - def seed_random_data_with_fk_constraints(connection, database_name: DEFAULT_DB, number_of_rows: 1111) + def seed_random_data_with_fk_constraints(connection, database_name: DEFAULT_DB, number_of_rows: 1000) dbtable1 = full_table_name(database_name, "test_fk_table1") dbtable2 = full_table_name(database_name, "test_fk_table2") - dbtable3 = full_table_name(database_name, "test_fk_table3") connection.query("CREATE DATABASE IF NOT EXISTS #{database_name}") connection.query("CREATE TABLE IF NOT EXISTS #{dbtable1} (id1 bigint(20), primary key(id1))") - connection.query("CREATE TABLE IF NOT EXISTS #{dbtable2} (id2 bigint(20), primary key(id2), CONSTRAINT fkc2 foreign key(id2) REFERENCES #{dbtable1}(id1))") - connection.query("CREATE TABLE IF NOT EXISTS #{dbtable3} (id3 bigint(20), primary key(id3), CONSTRAINT fkc3 foreign key(id3) REFERENCES #{dbtable2}(id2))") + connection.query("CREATE TABLE IF NOT EXISTS #{dbtable2} (id2 bigint(20), primary key(id2), CONSTRAINT fkc2 foreign key(id2) REFERENCES #{dbtable1}(id1) ON DELETE CASCADE ON UPDATE CASCADE)") return if number_of_rows == 0 - [dbtable1, dbtable2, dbtable3].each do |dbtable| + [dbtable1, dbtable2].each do |dbtable| transaction(connection) do sqlargs = (["(?)"]*number_of_rows).join(", ") sql = "INSERT INTO #{dbtable} VALUES #{sqlargs}" insert_statement = connection.prepare(sql) rand_rows = [] - number_of_rows.times.each { |n| rand_rows << n } + number_of_rows.times.each { |n| rand_rows << n+1 } insert_statement.execute(*rand_rows) end @@ -173,8 +176,9 @@ def seed_random_data_with_fk_constraints(connection, database_name: DEFAULT_DB, end def seed_simple_database_with_fk_constraints - max_id = 1111 + max_id = 1000 seed_random_data_with_fk_constraints(source_db, number_of_rows: max_id) + seed_random_data_with_fk_constraints(target_db, number_of_rows: 0) end def seed_simple_database_with_single_table diff --git a/test/helpers/ghostferry_helper.rb b/test/helpers/ghostferry_helper.rb index 053e48bfd..8b12e3f3e 100644 --- a/test/helpers/ghostferry_helper.rb +++ b/test/helpers/ghostferry_helper.rb @@ -267,7 +267,7 @@ def start_ghostferry(resuming_state = nil) end if @config[:skip_foreign_key_constraints_check] - environment["SkipForeignKeyConstraintsCheck"] = @config[:skip_foreign_key_constraints_check] + environment["SkipForeignKeyConstraintsCheck"] = "true" end if @config[:marginalia] diff --git a/test/integration/foreign_key_test.rb b/test/integration/foreign_key_test.rb index b9b5a136a..010e2e413 100644 --- a/test/integration/foreign_key_test.rb +++ b/test/integration/foreign_key_test.rb @@ -11,10 +11,117 @@ def teardown enable_foreign_key_constraints end - def test_copy_data_with_fk_constraints_writes_to_source - ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: "true" }) - + def test_foreign_key_copy_data_without_writes_to_source + ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: true }) ghostferry.run - assert_test_table_is_identical - end + + assert_test_table_is_identical(tables: DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS) + end + + def test_foreign_key_interrupt_resume_idempotence_without_writes_to_source + ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: true }) + ghostferry.on_status(Ghostferry::Status::AFTER_ROW_COPY) do + ghostferry.term_and_wait_for_exit + end + + dumped_state = ghostferry.run_expecting_interrupt + + ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: true }) + ghostferry.run_with_logs(dumped_state) + + assert_test_table_is_identical(tables: DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS) + + ghostferry.run_with_logs(dumped_state) + + assert_test_table_is_identical(tables: DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS) + assert_ghostferry_completed(ghostferry, times: 2) + end + + # Consider a scenario where rows in parent table on source db are updated during interrupt, and due to CASCADES + # the rows on child table are updated as well, but due to foreign key checks are disabled, the rows in child table + # on target db are not updated. This will lead to non-identical child tables in source and target db as a result + # we should avoid updating rows on source db during interrupt. + def test_foreign_key_verification_fails_with_rows_changed_on_source_during_interrupt + ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: true }) + + batches_written = 0 + ghostferry.on_status(Ghostferry::Status::AFTER_ROW_COPY) do + batches_written += 1 + if batches_written > 7 + ghostferry.term_and_wait_for_exit + end + end + + dumped_state = ghostferry.run_expecting_interrupt + assert_basic_fields_exist_in_dumped_state(dumped_state) + + min_ids = [] + i = 0 + while i < DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS.length + i += 1 + result = target_db.query("SELECT MIN(id#{i}) FROM #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS[i-1]}") + min_ids << result.first["MIN(id#{i})"] + end + + # Due to ON UPDATE CASCADE on test_fk_table2 when we change the id of a row in test_fk_table1 the column + # id2 on test_fk_table2 will be automatically changed in source db, but not being written to the binlogs + # As a result the even after resuming ghostferry from the dumped state the column with id2 = min_id + 1000000 + # won't be present on test_fk_table2 in target db. + choosen_id = min_ids.min + random_value = 1000000 + source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS.first} SET id1 = #{choosen_id + random_value} WHERE id1 = #{choosen_id}") + + ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: true, verifier_type: "Inline" }) + ghostferry.run(dumped_state) + + source_result = source_db.query("SELECT MAX(id2) FROM #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS[1]}") + target_result = target_db.query("SELECT MAX(id2) FROM #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS[1]}") + refute_equal source_result.first["MAX(id2)"], target_result.first["MAX(id2)"] + end + + # Consider a scenario where rows in parent table on source db are changed during interrupt, and due to CASCADES + # the rows on child table are updated as well, but due to foreign key checks are disabled, the rows in child table + # on target db are not updated. Inline Verifier will still pass in this scenario even though the child table is clearly + # not identical on source and target db. The reason is the rows in child table on source db were verified inline before + # the interrupt happened and during the interrupt when these rows were changed, they were not emitted to the binlogs, + # so these changed rows during interrupt never got passed to reverifyStore, and eventually Inline verifier passed. + # This test is to depict the same scenario. + def test_foreign_key_inline_verifier_passes_even_though_source_and_target_tables_are_not_identical + ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: true, verifier_type: "Inline" }) + + batches_written = 0 + ghostferry.on_status(Ghostferry::Status::AFTER_ROW_COPY) do + batches_written += 1 + if batches_written > 7 + ghostferry.term_and_wait_for_exit + end + end + + dumped_state = ghostferry.run_expecting_interrupt + assert_basic_fields_exist_in_dumped_state(dumped_state) + + min_ids = [] + i = 0 + while i < DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS.length + i += 1 + result = target_db.query("SELECT MIN(id#{i}) FROM #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS[i-1]}") + min_ids << result.first["MIN(id#{i})"] + end + + choosen_id = min_ids.min + random_value = 1000000 + source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS.first} SET id1 = #{choosen_id + random_value} WHERE id1 = #{choosen_id}") + + verification_ran = false + incorrect_tables = [] + ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: true, verifier_type: "Inline" }) + ghostferry.on_status(Ghostferry::Status::VERIFIED) do |*tables| + verification_ran = true + incorrect_tables = tables + end + + ghostferry.run(dumped_state) + assert verification_ran + assert_equal incorrect_tables, [] + end end diff --git a/test/test_helper.rb b/test/test_helper.rb index f4cddca34..143140d36 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -136,23 +136,25 @@ def after_all # Assertion Helpers # ##################### - def assert_test_table_is_identical - source, target = source_and_target_table_metrics - - assert source[DEFAULT_FULL_TABLE_NAME][:row_count] > 0 - assert target[DEFAULT_FULL_TABLE_NAME][:row_count] > 0 - - assert_equal( - source[DEFAULT_FULL_TABLE_NAME][:row_count], - target[DEFAULT_FULL_TABLE_NAME][:row_count], - "source and target row count don't match", - ) - - assert_equal( - source[DEFAULT_FULL_TABLE_NAME][:checksum], - target[DEFAULT_FULL_TABLE_NAME][:checksum], - "source and target checksum don't match", - ) + def assert_test_table_is_identical(tables: [DEFAULT_FULL_TABLE_NAME]) + source, target = source_and_target_table_metrics(tables: tables) + + tables.each do |table| + assert source[table][:row_count] > 0 + assert target[table][:row_count] > 0 + + assert_equal( + source[table][:row_count], + target[table][:row_count], + "source and target row count don't match", + ) + + assert_equal( + source[table][:checksum], + target[table][:checksum], + "source and target checksum don't match", + ) + end end # Use this method to assert the validity of the structure of the dumped From 7a36ba736035276c4b7f76b9b35024d404fd0f08 Mon Sep 17 00:00:00 2001 From: Manan007224 Date: Mon, 14 Jun 2021 22:15:47 -0700 Subject: [PATCH 3/8] fix makefile path --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0882d2d78..0b19d9b33 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ PROJECT_DEBS := $(foreach name,$(PROJECTS),$(name)-deb) # Target specific variable, set proj to have a valid value. PROJECT_PKG = ./$(proj)/cmd PROJECT_BIN = ghostferry-$(proj) -BIN_TARGET = ./ +BIN_TARGET = $(GOBIN)/$(PROJECT_BIN) DEB_TARGET = $(BUILD_DIR)/$(PROJECT_BIN)_$(VERSION_STR).deb PLATFORM := $(shell uname -s | tr A-Z a-z) From b4b44e2672818831f15b98ee8b3cb0769275b988 Mon Sep 17 00:00:00 2001 From: Manan007224 Date: Tue, 15 Jun 2021 00:33:32 -0700 Subject: [PATCH 4/8] added documentation for foreign key migration --- docs/source/copydbforeignkey.rst | 25 +++++++++++++++++++++++++ docs/source/copydbinprod.rst | 3 ++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 docs/source/copydbforeignkey.rst diff --git a/docs/source/copydbforeignkey.rst b/docs/source/copydbforeignkey.rst new file mode 100644 index 000000000..55a056615 --- /dev/null +++ b/docs/source/copydbforeignkey.rst @@ -0,0 +1,25 @@ +.. _copydbforeignkey: + +=================================================================================== +Running ``ghostferry-copydb`` in production for tables with Foreign Key Constraints +=================================================================================== + +Migrating tables with foreign keys constraints is an experimental feature in copydb and should be used at your own risk in production. + + +Prerequisites +------------- + +Before migrating tables with foreign key constraints via copydb there are a couple of things to take care of + +- Ghostferry needs to be ran with `SkipForeignKeyConstraintsCheck = true`, which will disable ghostferry to check foreign key + constraints during initialization. + +- Source DB should be read_only. + +Limitations +------------- + +- While migrating tables with foreign key constraints the source db should be read_only as there are some fundamental issues when migrating tables with foreign key constraints at the same time when writes are occurring to the source database. This issue descibes briefly why the source database should be read_only during the migration - https://github.com/Shopify/katesql-migration-backend/issues/194. + +- Interrupt-Resume functionality can be used as long as source database is read_only also during the interrupt period diff --git a/docs/source/copydbinprod.rst b/docs/source/copydbinprod.rst index d9be4ca5b..bf289f135 100644 --- a/docs/source/copydbinprod.rst +++ b/docs/source/copydbinprod.rst @@ -35,7 +35,8 @@ to consider about this are: - There are no foreign key constraints in your tables. - - You should remove these constraints before running Ghostferry. + - You should remove these constraints before running Ghostferry or run Ghostferry with SkipForeignKeyConstraintsCheck = true and ensure + source database is read_only. - ``ghostferry-copydb`` can only copy a whole table at a time. From 7f794cb0f5afea4d21afdf11a03da524a520391a Mon Sep 17 00:00:00 2001 From: Manan007224 Date: Tue, 15 Jun 2021 00:34:39 -0700 Subject: [PATCH 5/8] remove custom config from example/conf.json --- examples/copydb/conf.json | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/examples/copydb/conf.json b/examples/copydb/conf.json index 28eea760d..bdd0d59a9 100644 --- a/examples/copydb/conf.json +++ b/examples/copydb/conf.json @@ -6,8 +6,7 @@ "Pass": "", "Collation": "utf8mb4_unicode_ci", "Params": { - "charset": "utf8mb4", - "foreign_key_checks": "0" + "charset": "utf8mb4" } }, @@ -18,8 +17,7 @@ "Pass": "", "Collation": "utf8mb4_unicode_ci", "Params": { - "charset": "utf8mb4", - "foreign_key_checks": "0" + "charset": "utf8mb4" } }, @@ -31,6 +29,12 @@ "Blacklist": ["schema_migrations"] }, + "ForceIndexForVerification": { + "abc": { + "table1": "forced_index_name" + } + }, + "DumpStateOnSignal": true, "VerifierType": "ChecksumTable", @@ -40,7 +44,5 @@ "Custom Script 2": ["examples/copydb/s2"] }, - "DumpStateToStdoutOnError": true, - - "SkipForeignKeyConstraintsCheck": true + "DumpStateToStdoutOnError": true } From 5a6591297d15b0a2910663a6edeb0925bb105168 Mon Sep 17 00:00:00 2001 From: Manan007224 Date: Tue, 15 Jun 2021 01:12:48 -0700 Subject: [PATCH 6/8] fix ruby integration tests --- docs/source/copydbforeignkey.rst | 4 +++- test/helpers/db_helper.rb | 8 ++++++++ test/integration/foreign_key_test.rb | 12 +++++++++--- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/docs/source/copydbforeignkey.rst b/docs/source/copydbforeignkey.rst index 55a056615..84f4bbaf0 100644 --- a/docs/source/copydbforeignkey.rst +++ b/docs/source/copydbforeignkey.rst @@ -22,4 +22,6 @@ Limitations - While migrating tables with foreign key constraints the source db should be read_only as there are some fundamental issues when migrating tables with foreign key constraints at the same time when writes are occurring to the source database. This issue descibes briefly why the source database should be read_only during the migration - https://github.com/Shopify/katesql-migration-backend/issues/194. -- Interrupt-Resume functionality can be used as long as source database is read_only also during the interrupt period +- `Interrupt-Resume functionality` can be used as long as source database is read_only also during the interrupt period + +- `Inline Verifier` can be used as long as it is ensured that the source database is read_only (even during the interrupt period) diff --git a/test/helpers/db_helper.rb b/test/helpers/db_helper.rb index 883d3a219..6c72dc1d7 100644 --- a/test/helpers/db_helper.rb +++ b/test/helpers/db_helper.rb @@ -143,6 +143,14 @@ def seed_random_data(connection, database_name: DEFAULT_DB, table_name: DEFAULT_ end end + def disable_writes_on_source + source_db.query("SET GLOBAL read_only = ON") + end + + def enable_writes_on_source + source_db.query("SET GLOBAL read_only = OFF") + end + def disable_foreign_key_constraints target_db.query("SET GLOBAL FOREIGN_KEY_CHECKS=0") end diff --git a/test/integration/foreign_key_test.rb b/test/integration/foreign_key_test.rb index 010e2e413..2c3f28b3a 100644 --- a/test/integration/foreign_key_test.rb +++ b/test/integration/foreign_key_test.rb @@ -5,10 +5,12 @@ class ForeignKeyTest < GhostferryTestCase def setup seed_simple_database_with_fk_constraints disable_foreign_key_constraints + disable_writes_on_source end def teardown enable_foreign_key_constraints + enable_writes_on_source end def test_foreign_key_copy_data_without_writes_to_source @@ -67,16 +69,18 @@ def test_foreign_key_verification_fails_with_rows_changed_on_source_during_inter # id2 on test_fk_table2 will be automatically changed in source db, but not being written to the binlogs # As a result the even after resuming ghostferry from the dumped state the column with id2 = min_id + 1000000 # won't be present on test_fk_table2 in target db. + enable_writes_on_source choosen_id = min_ids.min random_value = 1000000 source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS.first} SET id1 = #{choosen_id + random_value} WHERE id1 = #{choosen_id}") + disable_writes_on_source ghostferry = new_ghostferry(MINIMAL_GHOSTFERRY, config: { skip_foreign_key_constraints_check: true, verifier_type: "Inline" }) ghostferry.run(dumped_state) - source_result = source_db.query("SELECT MAX(id2) FROM #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS[1]}") - target_result = target_db.query("SELECT MAX(id2) FROM #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS[1]}") - refute_equal source_result.first["MAX(id2)"], target_result.first["MAX(id2)"] + source_result = source_db.query("CHECKSUM TABLE #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS[1]}") + target_result = target_db.query("CHECKSUM TABLE #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS[1]}") + refute_equal source_result.first["Checksum"], target_result.first["Checksum"] end # Consider a scenario where rows in parent table on source db are changed during interrupt, and due to CASCADES @@ -108,9 +112,11 @@ def test_foreign_key_inline_verifier_passes_even_though_source_and_target_tables min_ids << result.first["MIN(id#{i})"] end + enable_writes_on_source choosen_id = min_ids.min random_value = 1000000 source_db.query("UPDATE #{DEFAULT_FULL_TABLE_NAMES_WITH_FK_CONSTRAINTS.first} SET id1 = #{choosen_id + random_value} WHERE id1 = #{choosen_id}") + disable_writes_on_source verification_ran = false incorrect_tables = [] From 4ea968d2aa702593a4c1ee0f71260333ed88982d Mon Sep 17 00:00:00 2001 From: Manan007224 Date: Tue, 15 Jun 2021 01:33:28 -0700 Subject: [PATCH 7/8] mode documentation --- docs/source/copydbforeignkey.rst | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/source/copydbforeignkey.rst b/docs/source/copydbforeignkey.rst index 84f4bbaf0..645f619ea 100644 --- a/docs/source/copydbforeignkey.rst +++ b/docs/source/copydbforeignkey.rst @@ -12,16 +12,28 @@ Prerequisites Before migrating tables with foreign key constraints via copydb there are a couple of things to take care of -- Ghostferry needs to be ran with `SkipForeignKeyConstraintsCheck = true`, which will disable ghostferry to check foreign key +- Ghostferry needs to be ran with ``SkipForeignKeyConstraintsCheck = true``, which will disable ghostferry to check foreign key constraints during initialization. -- Source DB should be read_only. +- Source DB should be ``read_only``. + +- Need to disable foreign key constraint checks on target DB by passing the following config to target db + + .. code-block:: json + + "Params": { + "foreign_key_checks": "0" + } + +- Even though foreign key constraint checks are disabled on target db, table and db creation must happen in a specific order (eg parent should be created + before child table). This creation order can be specified by passing ``TablesToBeCreatedFirst`` in the config, or else the table creation order will be + figured out by copydb. Limitations ------------- - While migrating tables with foreign key constraints the source db should be read_only as there are some fundamental issues when migrating tables with foreign key constraints at the same time when writes are occurring to the source database. This issue descibes briefly why the source database should be read_only during the migration - https://github.com/Shopify/katesql-migration-backend/issues/194. -- `Interrupt-Resume functionality` can be used as long as source database is read_only also during the interrupt period +- ``Interrupt-Resume`` functionality can be used as long as source database is read_only also during the interrupt period -- `Inline Verifier` can be used as long as it is ensured that the source database is read_only (even during the interrupt period) +- ``Inline Verifier`` can be used as long as it is ensured that the source database is read_only (even during the interrupt period) From 988e6b2a9812f8a00b7b5493eebe502fcbdd2886 Mon Sep 17 00:00:00 2001 From: Manan007224 Date: Wed, 16 Jun 2021 14:42:12 -0700 Subject: [PATCH 8/8] refer to an issue in ghostferry instead of an internal one refer to an issue in ghostferry instead of an internal one --- docs/source/copydbforeignkey.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/copydbforeignkey.rst b/docs/source/copydbforeignkey.rst index 645f619ea..c46d048e2 100644 --- a/docs/source/copydbforeignkey.rst +++ b/docs/source/copydbforeignkey.rst @@ -4,7 +4,7 @@ Running ``ghostferry-copydb`` in production for tables with Foreign Key Constraints =================================================================================== -Migrating tables with foreign keys constraints is an experimental feature in copydb and should be used at your own risk in production. +Migrating tables with foreign keys constraints is an experimental feature in copydb and should be used at your own risk in production. Prerequisites @@ -32,7 +32,7 @@ Before migrating tables with foreign key constraints via copydb there are a coup Limitations ------------- -- While migrating tables with foreign key constraints the source db should be read_only as there are some fundamental issues when migrating tables with foreign key constraints at the same time when writes are occurring to the source database. This issue descibes briefly why the source database should be read_only during the migration - https://github.com/Shopify/katesql-migration-backend/issues/194. +- Currently migrating tables with foreign key constraints is only possible if the source database is in read_only mode. Since tables with foreign key constraints can have referential actions for a foreign key such as ``ON DELETE CASCADE``, ``ON UPDATE CASCADE``. Cascading deletes and updates in child tablees caused by foreign key constraints don't show up in binlogs because these referential actions are dealt internally by InnoDB. This issue descibes briefly why the source database should be read_only during the migration - https://github.com/Shopify/ghostferry/issues/289. - ``Interrupt-Resume`` functionality can be used as long as source database is read_only also during the interrupt period