Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deflake TestPlannedReparentShardPromoteReplicaFail #13548

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions go/vt/wrangler/testlib/planned_reparent_shard_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,9 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) {
oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0]
oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
"FAKE SET MASTER",
"START SLAVE",
// We call a SetReplicationSource explicitly
"FAKE SET MASTER",
"START SLAVE",
// extra SetReplicationSource call due to retry
Expand Down Expand Up @@ -855,6 +858,13 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) {
assert.True(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly")
assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly")

// After the first call to PRS has failed, we don't know whether `SetReplicationSource` RPC has succeeded on the oldPrimary or not.
// This causes the test to become non-deterministic. To prevent this, we call `SetReplicationSource` on the oldPrimary again, and make sure it has succeeded.
// We also wait until the oldPrimary has demoted itself to a replica type.
err = wr.TabletManagerClient().SetReplicationSource(context.Background(), oldPrimary.Tablet, newPrimary.Tablet.Alias, 0, "", false, false)
require.NoError(t, err)
waitForTabletType(t, wr, oldPrimary.Tablet.Alias, topodatapb.TabletType_REPLICA)

// retrying should work
newPrimary.FakeMysqlDaemon.PromoteError = nil
newPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0]
Expand All @@ -868,6 +878,26 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) {
assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly")
}

// waitForTabletType waits for the given tablet type to be reached.
func waitForTabletType(t *testing.T, wr *wrangler.Wrangler, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType) {
timeout := time.After(15 * time.Second)
for {
tablet, err := wr.TopoServer().GetTablet(context.Background(), tabletAlias)
require.NoError(t, err)
if tablet.Type == tabletType {
return
}

select {
case <-timeout:
t.Fatalf("%s didn't reach the tablet type %v", topoproto.TabletAliasString(tabletAlias), tabletType.String())
return
default:
time.Sleep(100 * time.Millisecond)
}
}
}

// TestPlannedReparentShardSamePrimary tests PRS with oldPrimary works correctly
// Simulate failure of previous PRS and oldPrimary is ReadOnly
// Verify that primary correctly gets set to ReadWrite
Expand Down