From d95cffd73bce8c7e58b07626d7af5d9b41d6ef8d Mon Sep 17 00:00:00 2001 From: William Lu <31415453+WilliamLu99@users.noreply.github.com> Date: Wed, 17 May 2023 01:51:33 -0400 Subject: [PATCH] Fix: convertBoolToSemiSyncAction method to account for all semi sync actions (#13075) * Fix convertBoolToSemiSyncAction method to account for all semi sync actions Signed-off-by: William Lu * Add VitessError VT09013 Signed-off-by: William Lu --------- Signed-off-by: William Lu Signed-off-by: William Lu --- go/cmd/vtcombo/main.go | 5 ++ .../reparent/newfeaturetest/reparent_test.go | 37 +++++++++++++++ go/vt/mysqlctl/fakemysqldaemon.go | 5 ++ go/vt/mysqlctl/mysql_daemon.go | 1 + go/vt/mysqlctl/replication.go | 13 ++++++ go/vt/vterrors/code.go | 2 + go/vt/vttablet/tabletmanager/rpc_actions.go | 30 ++++++++++-- .../vttablet/tabletmanager/rpc_replication.go | 46 +++++++++++++++---- go/vt/vttablet/tabletmanager/tm_init.go | 9 +++- 9 files changed, 134 insertions(+), 14 deletions(-) diff --git a/go/cmd/vtcombo/main.go b/go/cmd/vtcombo/main.go index a6870a017fb..10f8f3d3966 100644 --- a/go/cmd/vtcombo/main.go +++ b/go/cmd/vtcombo/main.go @@ -345,3 +345,8 @@ func (mysqld *vtcomboMysqld) StopReplication(hookExtraEnv map[string]string) err func (mysqld *vtcomboMysqld) SetSemiSyncEnabled(source, replica bool) error { return nil } + +// SemiSyncExtensionLoaded implements the MysqlDaemon interface +func (mysqld *vtcomboMysqld) SemiSyncExtensionLoaded() (bool, error) { + return true, nil +} diff --git a/go/test/endtoend/reparent/newfeaturetest/reparent_test.go b/go/test/endtoend/reparent/newfeaturetest/reparent_test.go index db7784f6459..d5f37dc8604 100644 --- a/go/test/endtoend/reparent/newfeaturetest/reparent_test.go +++ b/go/test/endtoend/reparent/newfeaturetest/reparent_test.go @@ -17,6 +17,8 @@ limitations under the License. package newfeaturetest import ( + "context" + "fmt" "testing" "github.com/stretchr/testify/require" @@ -109,3 +111,38 @@ func TestTabletRestart(t *testing.T) { err := tablets[1].VttabletProcess.Setup() require.NoError(t, err) } + +// Tests ensures that ChangeTabletType works even when semi-sync plugins are not loaded. +func TestChangeTypeWithoutSemiSync(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := utils.SetupReparentCluster(t, "none") + defer utils.TeardownCluster(clusterInstance) + tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets + + ctx := context.Background() + + primary, replica := tablets[0], tablets[1] + + // Unload semi sync plugins + for _, tablet := range tablets[0:4] { + qr := utils.RunSQL(ctx, t, "select @@global.super_read_only", tablet) + result := fmt.Sprintf("%v", qr.Rows[0][0].ToString()) + if result == "1" { + utils.RunSQL(ctx, t, "set global super_read_only = 0", tablet) + } + + utils.RunSQL(ctx, t, "UNINSTALL PLUGIN rpl_semi_sync_slave;", tablet) + utils.RunSQL(ctx, t, "UNINSTALL PLUGIN rpl_semi_sync_master;", tablet) + } + + utils.ValidateTopology(t, clusterInstance, true) + utils.CheckPrimaryTablet(t, clusterInstance, primary) + + // Change replica's type to rdonly + err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", replica.Alias, "rdonly") + require.NoError(t, err) + + // Change tablets type from rdonly back to replica + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", replica.Alias, "replica") + require.NoError(t, err) +} diff --git a/go/vt/mysqlctl/fakemysqldaemon.go b/go/vt/mysqlctl/fakemysqldaemon.go index 475d4251fbc..1ad9cf2d6c2 100644 --- a/go/vt/mysqlctl/fakemysqldaemon.go +++ b/go/vt/mysqlctl/fakemysqldaemon.go @@ -667,6 +667,11 @@ func (fmd *FakeMysqlDaemon) SemiSyncClients() uint32 { return 0 } +// SemiSyncExtensionLoaded is part of the MysqlDaemon interface. +func (fmd *FakeMysqlDaemon) SemiSyncExtensionLoaded() (bool, error) { + return true, nil +} + // SemiSyncSettings is part of the MysqlDaemon interface. func (fmd *FakeMysqlDaemon) SemiSyncSettings() (timeout uint64, numReplicas uint32) { return 10000000, 1 diff --git a/go/vt/mysqlctl/mysql_daemon.go b/go/vt/mysqlctl/mysql_daemon.go index 190b81fb001..7b21fb5cab9 100644 --- a/go/vt/mysqlctl/mysql_daemon.go +++ b/go/vt/mysqlctl/mysql_daemon.go @@ -57,6 +57,7 @@ type MysqlDaemon interface { GetGTIDPurged(ctx context.Context) (mysql.Position, error) SetSemiSyncEnabled(source, replica bool) error SemiSyncEnabled() (source, replica bool) + SemiSyncExtensionLoaded() (bool, error) SemiSyncStatus() (source, replica bool) SemiSyncClients() (count uint32) SemiSyncSettings() (timeout uint64, numReplicas uint32) diff --git a/go/vt/mysqlctl/replication.go b/go/vt/mysqlctl/replication.go index 10258b16d2c..2381fa7294f 100644 --- a/go/vt/mysqlctl/replication.go +++ b/go/vt/mysqlctl/replication.go @@ -767,3 +767,16 @@ func (mysqld *Mysqld) SemiSyncReplicationStatus() (bool, error) { } return false, nil } + +// SemiSyncExtensionLoaded returns whether semi-sync plugins are loaded. +func (mysqld *Mysqld) SemiSyncExtensionLoaded() (bool, error) { + qr, err := mysqld.FetchSuperQuery(context.Background(), "SELECT COUNT(*) > 0 AS plugin_loaded FROM information_schema.plugins WHERE plugin_name LIKE 'rpl_semi_sync%'") + if err != nil { + return false, err + } + pluginPresent, err := qr.Rows[0][0].ToBool() + if err != nil { + return false, err + } + return pluginPresent, nil +} diff --git a/go/vt/vterrors/code.go b/go/vt/vterrors/code.go index b046064c91b..5918abb2bfb 100644 --- a/go/vt/vterrors/code.go +++ b/go/vt/vterrors/code.go @@ -71,6 +71,7 @@ var ( VT09010 = errorWithoutState("VT09010", vtrpcpb.Code_FAILED_PRECONDITION, "SHOW VITESS_THROTTLER STATUS works only on primary tablet", "SHOW VITESS_THROTTLER STATUS works only on primary tablet.") VT09011 = errorWithState("VT09011", vtrpcpb.Code_FAILED_PRECONDITION, UnknownStmtHandler, "Unknown prepared statement handler (%s) given to %s", "The prepared statement is not available") VT09012 = errorWithoutState("VT09012", vtrpcpb.Code_FAILED_PRECONDITION, "%s statement with %s tablet not allowed", "This type of statement is not allowed on the given tablet.") + VT09013 = errorWithoutState("VT09013", vtrpcpb.Code_FAILED_PRECONDITION, "semi-sync plugins are not loaded", "Durability policy wants Vitess to use semi-sync, but the MySQL instances don't have the semi-sync plugin loaded.") VT10001 = errorWithoutState("VT10001", vtrpcpb.Code_ABORTED, "foreign key constraints are not allowed", "Foreign key constraints are not allowed, see https://vitess.io/blog/2021-06-15-online-ddl-why-no-fk/.") @@ -132,6 +133,7 @@ var ( VT09010, VT09011, VT09012, + VT09013, VT10001, VT12001, VT13001, diff --git a/go/vt/vttablet/tabletmanager/rpc_actions.go b/go/vt/vttablet/tabletmanager/rpc_actions.go index 1093c331a1a..d27cff5ebc8 100644 --- a/go/vt/vttablet/tabletmanager/rpc_actions.go +++ b/go/vt/vttablet/tabletmanager/rpc_actions.go @@ -82,7 +82,13 @@ func (tm *TabletManager) ChangeType(ctx context.Context, tabletType topodatapb.T return err } defer tm.unlock() - return tm.changeTypeLocked(ctx, tabletType, DBActionNone, convertBoolToSemiSyncAction(semiSync)) + + semiSyncAction, err := tm.convertBoolToSemiSyncAction(semiSync) + if err != nil { + return err + } + + return tm.changeTypeLocked(ctx, tabletType, DBActionNone, semiSyncAction) } // ChangeType changes the tablet type @@ -142,9 +148,23 @@ func (tm *TabletManager) RunHealthCheck(ctx context.Context) { tm.QueryServiceControl.BroadcastHealth() } -func convertBoolToSemiSyncAction(semiSync bool) SemiSyncAction { - if semiSync { - return SemiSyncActionSet +func (tm *TabletManager) convertBoolToSemiSyncAction(semiSync bool) (SemiSyncAction, error) { + semiSyncExtensionLoaded, err := tm.MysqlDaemon.SemiSyncExtensionLoaded() + if err != nil { + return SemiSyncActionNone, err + } + + if semiSyncExtensionLoaded { + if semiSync { + return SemiSyncActionSet, nil + } else { + return SemiSyncActionUnset, nil + } + } else { + if semiSync { + return SemiSyncActionNone, vterrors.VT09013() + } else { + return SemiSyncActionNone, nil + } } - return SemiSyncActionUnset } diff --git a/go/vt/vttablet/tabletmanager/rpc_replication.go b/go/vt/vttablet/tabletmanager/rpc_replication.go index 445e8d94c8b..c5cd9edb8ae 100644 --- a/go/vt/vttablet/tabletmanager/rpc_replication.go +++ b/go/vt/vttablet/tabletmanager/rpc_replication.go @@ -253,7 +253,12 @@ func (tm *TabletManager) StartReplication(ctx context.Context, semiSync bool) er } defer tm.unlock() - if err := tm.fixSemiSync(tm.Tablet().Type, convertBoolToSemiSyncAction(semiSync)); err != nil { + semiSyncAction, err := tm.convertBoolToSemiSyncAction(semiSync) + if err != nil { + return err + } + + if err := tm.fixSemiSync(tm.Tablet().Type, semiSyncAction); err != nil { return err } return tm.MysqlDaemon.StartReplication(tm.hookExtraEnv()) @@ -325,16 +330,21 @@ func (tm *TabletManager) InitPrimary(ctx context.Context, semiSync bool) (string return "", err } + semiSyncAction, err := tm.convertBoolToSemiSyncAction(semiSync) + if err != nil { + return "", err + } + // Set the server read-write, from now on we can accept real // client writes. Note that if semi-sync replication is enabled, // we'll still need some replicas to be able to commit transactions. - if err := tm.changeTypeLocked(ctx, topodatapb.TabletType_PRIMARY, DBActionSetReadWrite, convertBoolToSemiSyncAction(semiSync)); err != nil { + if err := tm.changeTypeLocked(ctx, topodatapb.TabletType_PRIMARY, DBActionSetReadWrite, semiSyncAction); err != nil { return "", err } // Enforce semi-sync after changing the tablet type to PRIMARY. Otherwise, the // primary will hang while trying to create the database. - if err := tm.fixSemiSync(topodatapb.TabletType_PRIMARY, convertBoolToSemiSyncAction(semiSync)); err != nil { + if err := tm.fixSemiSync(topodatapb.TabletType_PRIMARY, semiSyncAction); err != nil { return "", err } @@ -364,11 +374,16 @@ func (tm *TabletManager) InitReplica(ctx context.Context, parent *topodatapb.Tab } defer tm.unlock() + semiSyncAction, err := tm.convertBoolToSemiSyncAction(semiSync) + if err != nil { + return err + } + // If we were a primary type, switch our type to replica. This // is used on the old primary when using InitShardPrimary with // -force, and the new primary is different from the old primary. if tm.Tablet().Type == topodatapb.TabletType_PRIMARY { - if err := tm.changeTypeLocked(ctx, topodatapb.TabletType_REPLICA, DBActionNone, convertBoolToSemiSyncAction(semiSync)); err != nil { + if err := tm.changeTypeLocked(ctx, topodatapb.TabletType_REPLICA, DBActionNone, semiSyncAction); err != nil { return err } } @@ -389,7 +404,7 @@ func (tm *TabletManager) InitReplica(ctx context.Context, parent *topodatapb.Tab if tt == topodatapb.TabletType_PRIMARY { tt = topodatapb.TabletType_REPLICA } - if err := tm.fixSemiSync(tt, convertBoolToSemiSyncAction(semiSync)); err != nil { + if err := tm.fixSemiSync(tt, semiSyncAction); err != nil { return err } @@ -523,8 +538,13 @@ func (tm *TabletManager) UndoDemotePrimary(ctx context.Context, semiSync bool) e } defer tm.unlock() + semiSyncAction, err := tm.convertBoolToSemiSyncAction(semiSync) + if err != nil { + return err + } + // If using semi-sync, we need to enable source-side. - if err := tm.fixSemiSync(topodatapb.TabletType_PRIMARY, convertBoolToSemiSyncAction(semiSync)); err != nil { + if err := tm.fixSemiSync(topodatapb.TabletType_PRIMARY, semiSyncAction); err != nil { return err } @@ -581,9 +601,14 @@ func (tm *TabletManager) SetReplicationSource(ctx context.Context, parentAlias * } defer tm.unlock() + semiSyncAction, err := tm.convertBoolToSemiSyncAction(semiSync) + if err != nil { + return err + } + // setReplicationSourceLocked also fixes the semi-sync. In case the tablet type is primary it assumes that it will become a replica if SetReplicationSource // is called, so we always call fixSemiSync with a non-primary tablet type. This will always set the source side replication to false. - return tm.setReplicationSourceLocked(ctx, parentAlias, timeCreatedNS, waitPosition, forceStartReplication, convertBoolToSemiSyncAction(semiSync)) + return tm.setReplicationSourceLocked(ctx, parentAlias, timeCreatedNS, waitPosition, forceStartReplication, semiSyncAction) } func (tm *TabletManager) setReplicationSourceRepairReplication(ctx context.Context, parentAlias *topodatapb.TabletAlias, timeCreatedNS int64, waitPosition string, forceStartReplication bool) (err error) { @@ -839,8 +864,13 @@ func (tm *TabletManager) PromoteReplica(ctx context.Context, semiSync bool) (str return "", err } + semiSyncAction, err := tm.convertBoolToSemiSyncAction(semiSync) + if err != nil { + return "", err + } + // If using semi-sync, we need to enable it before going read-write. - if err := tm.fixSemiSync(topodatapb.TabletType_PRIMARY, convertBoolToSemiSyncAction(semiSync)); err != nil { + if err := tm.fixSemiSync(topodatapb.TabletType_PRIMARY, semiSyncAction); err != nil { return "", err } diff --git a/go/vt/vttablet/tabletmanager/tm_init.go b/go/vt/vttablet/tabletmanager/tm_init.go index 0ade89e376e..aea0fd67aa1 100644 --- a/go/vt/vttablet/tabletmanager/tm_init.go +++ b/go/vt/vttablet/tabletmanager/tm_init.go @@ -907,8 +907,15 @@ func (tm *TabletManager) initializeReplication(ctx context.Context, tabletType t } // If using semi-sync, we need to enable it before connecting to primary. // We should set the correct type, since it is used in replica semi-sync + tablet.Type = tabletType - if err := tm.fixSemiSync(tabletType, convertBoolToSemiSyncAction(reparentutil.IsReplicaSemiSync(durability, currentPrimary.Tablet, tablet))); err != nil { + + semiSyncAction, err := tm.convertBoolToSemiSyncAction(reparentutil.IsReplicaSemiSync(durability, currentPrimary.Tablet, tablet)) + if err != nil { + return nil, err + } + + if err := tm.fixSemiSync(tabletType, semiSyncAction); err != nil { return nil, err }