From 5f444f14cf905b7c8451d5776221915216e83782 Mon Sep 17 00:00:00 2001 From: Priya Bibra Date: Wed, 19 Jul 2023 11:50:47 -0700 Subject: [PATCH 1/7] check keyspace snapshot time if none specified for backup restores Signed-off-by: Priya Bibra --- go/vt/vttablet/tabletmanager/restore.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/go/vt/vttablet/tabletmanager/restore.go b/go/vt/vttablet/tabletmanager/restore.go index 833e19e9917..7822b361e62 100644 --- a/go/vt/vttablet/tabletmanager/restore.go +++ b/go/vt/vttablet/tabletmanager/restore.go @@ -185,6 +185,11 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L log.Infof("Using base_keyspace %v to restore keyspace %v using a backup time of %v", keyspace, tablet.Keyspace, logutil.ProtoToTime(request.BackupTime)) } + startTime := logutil.ProtoToTime(request.BackupTime) + if startTime.IsZero() { + startTime = logutil.ProtoToTime(keyspaceInfo.SnapshotTime) + } + params := mysqlctl.RestoreParams{ Cnf: tm.Cnf, Mysqld: tm.MysqlDaemon, @@ -195,7 +200,7 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L DbName: topoproto.TabletDbName(tablet), Keyspace: keyspace, Shard: tablet.Shard, - StartTime: logutil.ProtoToTime(request.BackupTime), + StartTime: startTime, DryRun: request.DryRun, Stats: backupstats.RestoreStats(), } From a2e9eb57a000e09f9e4861f1651c2ab28e3db698 Mon Sep 17 00:00:00 2001 From: Priya Bibra Date: Mon, 24 Jul 2023 16:16:25 -0700 Subject: [PATCH 2/7] update recovery test to use custom restore time Signed-off-by: Priya Bibra --- go/test/endtoend/recovery/recovery_util.go | 8 +- .../recovery/unshardedrecovery/recovery.go | 94 +++++++++---------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/go/test/endtoend/recovery/recovery_util.go b/go/test/endtoend/recovery/recovery_util.go index 66084e2f4b5..7eedda1d5de 100644 --- a/go/test/endtoend/recovery/recovery_util.go +++ b/go/test/endtoend/recovery/recovery_util.go @@ -51,17 +51,21 @@ func VerifyQueriesUsingVtgate(t *testing.T, session *vtgateconn.VTGateSession, q } // RestoreTablet performs a PITR restore. -func RestoreTablet(t *testing.T, localCluster *cluster.LocalProcessCluster, tablet *cluster.Vttablet, restoreKSName string, shardName string, keyspaceName string, commonTabletArg []string) { +func RestoreTablet(t *testing.T, localCluster *cluster.LocalProcessCluster, tablet *cluster.Vttablet, restoreKSName string, shardName string, keyspaceName string, commonTabletArg []string, restoreTime time.Time) { tablet.ValidateTabletRestart(t) replicaTabletArgs := commonTabletArg _, err := localCluster.VtctlProcess.ExecuteCommandWithOutput("GetKeyspace", restoreKSName) + if restoreTime.IsZero() { + restoreTime = time.Now().UTC() + } + if err != nil { tm := time.Now().UTC() _, err := localCluster.VtctlProcess.ExecuteCommandWithOutput("CreateKeyspace", "--", "--keyspace_type=SNAPSHOT", "--base_keyspace="+keyspaceName, - "--snapshot_time", tm.Format(time.RFC3339), restoreKSName) + "--snapshot_time", restoreTime.Format(time.RFC3339), restoreKSName) require.Nil(t, err) } diff --git a/go/test/endtoend/recovery/unshardedrecovery/recovery.go b/go/test/endtoend/recovery/unshardedrecovery/recovery.go index 612ab06f07c..4539bf4698e 100644 --- a/go/test/endtoend/recovery/unshardedrecovery/recovery.go +++ b/go/test/endtoend/recovery/unshardedrecovery/recovery.go @@ -185,24 +185,30 @@ func TestMainImpl(m *testing.M) { } // TestRecoveryImpl does following -// - create a shard with primary and replica1 only -// - run InitShardPrimary -// - insert some data -// - take a backup -// - insert more data on the primary -// - take another backup -// - create a recovery keyspace after first backup -// - bring up tablet_replica2 in the new keyspace -// - check that new tablet does not have data created after backup1 -// - create second recovery keyspace after second backup -// - bring up tablet_replica3 in second keyspace -// - check that new tablet has data created after backup1 but not data created after backup2 -// - check that vtgate queries work correctly +// 1. create a shard with primary and replica1 only +// - run InitShardPrimary +// - insert some data +// 2. take a backup +// 3.create a recovery keyspace after first backup +// - bring up tablet_replica2 in the new keyspace +// - check that new tablet has data from backup1 +// 4. insert more data on the primary +// 5. take another backup +// 6. create a recovery keyspace after second backup +// - bring up tablet_replica3 in the new keyspace +// - check that new tablet has data from backup2 +// 7. insert more data on the primary +// 8. take another backup +// 9. create a recovery keyspace after second backup again +// - bring up tablet_replica4 in the new keyspace +// - check that new tablet has data from backup2 but not backup3 +// 10. check that vtgate queries work correctly func TestRecoveryImpl(t *testing.T) { defer cluster.PanicHandler(t) defer tabletsTeardown() verifyInitialReplication(t) + // take first backup of value = test1 err := localCluster.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias) assert.NoError(t, err) @@ -210,10 +216,6 @@ func TestRecoveryImpl(t *testing.T) { require.Equal(t, len(backups), 1) assert.Contains(t, backups[0], replica1.Alias) - _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test2')", keyspaceName, true) - assert.NoError(t, err) - cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 2) - err = localCluster.VtctlclientProcess.ApplyVSchema(keyspaceName, vSchema) assert.NoError(t, err) @@ -221,64 +223,58 @@ func TestRecoveryImpl(t *testing.T) { assert.NoError(t, err) assert.Contains(t, output, "vt_insert_test") - recovery.RestoreTablet(t, localCluster, replica2, recoveryKS1, "0", keyspaceName, commonTabletArg) + // restore with latest backup + restoreTime := time.Now().UTC() + recovery.RestoreTablet(t, localCluster, replica2, recoveryKS1, "0", keyspaceName, commonTabletArg, restoreTime) output, err = localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetSrvVSchema", cell) assert.NoError(t, err) assert.Contains(t, output, keyspaceName) assert.Contains(t, output, recoveryKS1) - err = localCluster.VtctlclientProcess.ExecuteCommand("GetSrvKeyspace", cell, keyspaceName) - assert.NoError(t, err) - output, err = localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetVSchema", recoveryKS1) assert.NoError(t, err) assert.Contains(t, output, "vt_insert_test") cluster.VerifyRowsInTablet(t, replica2, keyspaceName, 1) + // verify that restored replica has value = test1 + qr, err := replica2.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) + assert.NoError(t, err) + assert.Equal(t, "test1", qr.Rows[0][0].ToString()) + + // insert new row on primary + _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test2')", keyspaceName, true) + assert.NoError(t, err) + cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 2) + // update the original row in primary _, err = primary.VttabletProcess.QueryTablet("update vt_insert_test set msg = 'msgx1' where id = 1", keyspaceName, true) assert.NoError(t, err) // verify that primary has new value - qr, err := primary.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) + qr, err = primary.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) assert.NoError(t, err) assert.Equal(t, "msgx1", qr.Rows[0][0].ToString()) - // verify that restored replica has old value - qr, err = replica2.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) - assert.NoError(t, err) - assert.Equal(t, "test1", qr.Rows[0][0].ToString()) - + // take second backup of value = msgx1 err = localCluster.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias) assert.NoError(t, err) - _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test3')", keyspaceName, true) - assert.NoError(t, err) - cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 3) - - recovery.RestoreTablet(t, localCluster, replica3, recoveryKS2, "0", keyspaceName, commonTabletArg) + // restore to first backup + recovery.RestoreTablet(t, localCluster, replica3, recoveryKS2, "0", keyspaceName, commonTabletArg, restoreTime) output, err = localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetVSchema", recoveryKS2) assert.NoError(t, err) assert.Contains(t, output, "vt_insert_test") - cluster.VerifyRowsInTablet(t, replica3, keyspaceName, 2) - - // update the original row in primary - _, err = primary.VttabletProcess.QueryTablet("update vt_insert_test set msg = 'msgx2' where id = 1", keyspaceName, true) - assert.NoError(t, err) + // only one row from first backup + cluster.VerifyRowsInTablet(t, replica3, keyspaceName, 1) - // verify that primary has new value - qr, err = primary.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) - assert.NoError(t, err) - assert.Equal(t, "msgx2", qr.Rows[0][0].ToString()) - - // verify that restored replica has old value + //verify that restored replica has value = test1 qr, err = replica3.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) assert.NoError(t, err) - assert.Equal(t, "msgx1", qr.Rows[0][0].ToString()) + assert.Equal(t, "test1", qr.Rows[0][0].ToString()) vtgateInstance := localCluster.NewVtgateInstance() vtgateInstance.TabletTypesToWait = "REPLICA" @@ -299,19 +295,19 @@ func TestRecoveryImpl(t *testing.T) { session := vtgateConn.Session("@replica", nil) // check that vtgate doesn't route queries to new tablet - recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(3)") - recovery.VerifyQueriesUsingVtgate(t, session, "select msg from vt_insert_test where id = 1", `VARCHAR("msgx2")`) + recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(2)") + recovery.VerifyQueriesUsingVtgate(t, session, "select msg from vt_insert_test where id = 1", `VARCHAR("msgx1")`) recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select count(*) from %s.vt_insert_test", recoveryKS1), "INT64(1)") recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select msg from %s.vt_insert_test where id = 1", recoveryKS1), `VARCHAR("test1")`) - recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select count(*) from %s.vt_insert_test", recoveryKS2), "INT64(2)") - recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select msg from %s.vt_insert_test where id = 1", recoveryKS2), `VARCHAR("msgx1")`) + recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select count(*) from %s.vt_insert_test", recoveryKS2), "INT64(1)") + recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select msg from %s.vt_insert_test where id = 1", recoveryKS2), `VARCHAR("test1")`) // check that new keyspace is accessible with 'use ks' cluster.ExecuteQueriesUsingVtgate(t, session, "use "+recoveryKS1+"@replica") recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(1)") cluster.ExecuteQueriesUsingVtgate(t, session, "use "+recoveryKS2+"@replica") - recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(2)") + recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(1)") // check that new tablet is accessible with use `ks:shard` cluster.ExecuteQueriesUsingVtgate(t, session, "use `"+recoveryKS1+":0@replica`") From 480e4599d7308cdefa4d28b572c5fc6ca7f0a598 Mon Sep 17 00:00:00 2001 From: Priya Bibra Date: Mon, 24 Jul 2023 17:06:16 -0700 Subject: [PATCH 3/7] fmt recovery.go Signed-off-by: Priya Bibra --- go/test/endtoend/recovery/recovery_util.go | 1 - .../recovery/unshardedrecovery/recovery.go | 20 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/go/test/endtoend/recovery/recovery_util.go b/go/test/endtoend/recovery/recovery_util.go index 7eedda1d5de..cffae6a5005 100644 --- a/go/test/endtoend/recovery/recovery_util.go +++ b/go/test/endtoend/recovery/recovery_util.go @@ -62,7 +62,6 @@ func RestoreTablet(t *testing.T, localCluster *cluster.LocalProcessCluster, tabl } if err != nil { - tm := time.Now().UTC() _, err := localCluster.VtctlProcess.ExecuteCommandWithOutput("CreateKeyspace", "--", "--keyspace_type=SNAPSHOT", "--base_keyspace="+keyspaceName, "--snapshot_time", restoreTime.Format(time.RFC3339), restoreKSName) diff --git a/go/test/endtoend/recovery/unshardedrecovery/recovery.go b/go/test/endtoend/recovery/unshardedrecovery/recovery.go index 4539bf4698e..e950902cd39 100644 --- a/go/test/endtoend/recovery/unshardedrecovery/recovery.go +++ b/go/test/endtoend/recovery/unshardedrecovery/recovery.go @@ -186,22 +186,26 @@ func TestMainImpl(m *testing.M) { // TestRecoveryImpl does following // 1. create a shard with primary and replica1 only -// - run InitShardPrimary -// - insert some data +// - run InitShardPrimary +// - insert some data +// // 2. take a backup // 3.create a recovery keyspace after first backup -// - bring up tablet_replica2 in the new keyspace -// - check that new tablet has data from backup1 +// - bring up tablet_replica2 in the new keyspace +// - check that new tablet has data from backup1 +// // 4. insert more data on the primary // 5. take another backup // 6. create a recovery keyspace after second backup -// - bring up tablet_replica3 in the new keyspace -// - check that new tablet has data from backup2 +// - bring up tablet_replica3 in the new keyspace +// - check that new tablet has data from backup2 +// // 7. insert more data on the primary // 8. take another backup // 9. create a recovery keyspace after second backup again -// - bring up tablet_replica4 in the new keyspace -// - check that new tablet has data from backup2 but not backup3 +// - bring up tablet_replica4 in the new keyspace +// - check that new tablet has data from backup2 but not backup3 +// // 10. check that vtgate queries work correctly func TestRecoveryImpl(t *testing.T) { defer cluster.PanicHandler(t) From 4ccd55d0bd4178c0d5cd4ae287d915cb9dd468b8 Mon Sep 17 00:00:00 2001 From: Priya Bibra Date: Tue, 25 Jul 2023 09:20:30 -0700 Subject: [PATCH 4/7] fix test Signed-off-by: Priya Bibra --- go/test/endtoend/recovery/unshardedrecovery/recovery.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/test/endtoend/recovery/unshardedrecovery/recovery.go b/go/test/endtoend/recovery/unshardedrecovery/recovery.go index e950902cd39..bffd2594b0a 100644 --- a/go/test/endtoend/recovery/unshardedrecovery/recovery.go +++ b/go/test/endtoend/recovery/unshardedrecovery/recovery.go @@ -318,7 +318,7 @@ func TestRecoveryImpl(t *testing.T) { recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(1)") cluster.ExecuteQueriesUsingVtgate(t, session, "use `"+recoveryKS2+":0@replica`") - recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(2)") + recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(1)") } // verifyInitialReplication will create schema in primary, insert some data to primary and verify the same data in replica. From a6c5a1cfa4333df2958aaaa5b3485b75f5155bf8 Mon Sep 17 00:00:00 2001 From: Priya Bibra Date: Tue, 25 Jul 2023 11:34:04 -0700 Subject: [PATCH 5/7] update comments Signed-off-by: Priya Bibra --- .../endtoend/recovery/unshardedrecovery/recovery.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/go/test/endtoend/recovery/unshardedrecovery/recovery.go b/go/test/endtoend/recovery/unshardedrecovery/recovery.go index bffd2594b0a..c001682ec10 100644 --- a/go/test/endtoend/recovery/unshardedrecovery/recovery.go +++ b/go/test/endtoend/recovery/unshardedrecovery/recovery.go @@ -195,18 +195,12 @@ func TestMainImpl(m *testing.M) { // - check that new tablet has data from backup1 // // 4. insert more data on the primary +// // 5. take another backup // 6. create a recovery keyspace after second backup // - bring up tablet_replica3 in the new keyspace // - check that new tablet has data from backup2 -// -// 7. insert more data on the primary -// 8. take another backup -// 9. create a recovery keyspace after second backup again -// - bring up tablet_replica4 in the new keyspace -// - check that new tablet has data from backup2 but not backup3 -// -// 10. check that vtgate queries work correctly +// 7. check that vtgate queries work correctly func TestRecoveryImpl(t *testing.T) { defer cluster.PanicHandler(t) defer tabletsTeardown() From 3cb7e610dfa7173ac179eba5c236d77f5017f2b2 Mon Sep 17 00:00:00 2001 From: Priya Bibra Date: Tue, 25 Jul 2023 11:47:01 -0700 Subject: [PATCH 6/7] fmt Signed-off-by: Priya Bibra --- go/test/endtoend/recovery/unshardedrecovery/recovery.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/test/endtoend/recovery/unshardedrecovery/recovery.go b/go/test/endtoend/recovery/unshardedrecovery/recovery.go index c001682ec10..2de27fd97da 100644 --- a/go/test/endtoend/recovery/unshardedrecovery/recovery.go +++ b/go/test/endtoend/recovery/unshardedrecovery/recovery.go @@ -200,6 +200,7 @@ func TestMainImpl(m *testing.M) { // 6. create a recovery keyspace after second backup // - bring up tablet_replica3 in the new keyspace // - check that new tablet has data from backup2 +// // 7. check that vtgate queries work correctly func TestRecoveryImpl(t *testing.T) { defer cluster.PanicHandler(t) From 28d2fc9e87306906006e452159eed16273c2dd63 Mon Sep 17 00:00:00 2001 From: Priya Bibra Date: Wed, 26 Jul 2023 15:01:57 -0700 Subject: [PATCH 7/7] add check for backup replica update in test Signed-off-by: Priya Bibra --- .../recovery/unshardedrecovery/recovery.go | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/go/test/endtoend/recovery/unshardedrecovery/recovery.go b/go/test/endtoend/recovery/unshardedrecovery/recovery.go index 2de27fd97da..f4db74bbf4e 100644 --- a/go/test/endtoend/recovery/unshardedrecovery/recovery.go +++ b/go/test/endtoend/recovery/unshardedrecovery/recovery.go @@ -256,6 +256,28 @@ func TestRecoveryImpl(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "msgx1", qr.Rows[0][0].ToString()) + // check that replica1, used for the backup, has the new value + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + + for { + qr, err = replica1.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) + assert.NoError(t, err) + if qr.Rows[0][0].ToString() == "msgx1" { + break + } + + select { + case <-ctx.Done(): + t.Error("timeout waiting for new value to be replicated on replica 1") + break + case <-ticker.C: + } + } + // take second backup of value = msgx1 err = localCluster.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias) assert.NoError(t, err)