diff --git a/go/inst/analysis_dao.go b/go/inst/analysis_dao.go index 08ef452cc..62cb0167a 100644 --- a/go/inst/analysis_dao.go +++ b/go/inst/analysis_dao.go @@ -475,8 +475,8 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) a.IsReadOnly = m.GetUint("read_only") == 1 if !a.LastCheckValid { - analysisMessage := fmt.Sprintf("analysis: ClusterName: %+v, IsMaster: %+v, LastCheckValid: %+v, LastCheckPartialSuccess: %+v, CountReplicas: %+v, CountValidReplicatingReplicas: %+v, CountLaggingReplicas: %+v, CountDelayedReplicas: %+v, ", - a.ClusterDetails.ClusterName, a.IsMaster, a.LastCheckValid, a.LastCheckPartialSuccess, a.CountReplicas, a.CountValidReplicatingReplicas, a.CountLaggingReplicas, a.CountDelayedReplicas, + analysisMessage := fmt.Sprintf("analysis: ClusterName: %+v, IsMaster: %+v, LastCheckValid: %+v, LastCheckPartialSuccess: %+v, CountReplicas: %+v, CountValidReplicas: %+v, CountValidReplicatingReplicas: %+v, CountLaggingReplicas: %+v, CountDelayedReplicas: %+v, CountReplicasFailingToConnectToMaster: %+v", + a.ClusterDetails.ClusterName, a.IsMaster, a.LastCheckValid, a.LastCheckPartialSuccess, a.CountReplicas, a.CountValidReplicas, a.CountValidReplicatingReplicas, a.CountLaggingReplicas, a.CountDelayedReplicas, a.CountReplicasFailingToConnectToMaster, ) if util.ClearToLog("analysis_dao", analysisMessage) { log.Debugf(analysisMessage) @@ -484,7 +484,7 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) } if a.IsMaster && !a.LastCheckValid && a.CountReplicas == 0 { a.Analysis = DeadMasterWithoutReplicas - a.Description = "Master cannot be reached by orchestrator and has no slave" + a.Description = "Master cannot be reached by orchestrator and has no replica" // } else if a.IsMaster && !a.LastCheckValid && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 { a.Analysis = DeadMaster @@ -503,6 +503,12 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) a.Description = "Master cannot be reached by orchestrator and all of its replicas are lagging" // } else if a.IsMaster && !a.LastCheckValid && !a.LastCheckPartialSuccess && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas > 0 { + // partial success is here to redice noise + a.Analysis = UnreachableMaster + a.Description = "Master cannot be reached by orchestrator but it has replicating replicas; possibly a network/host issue" + // + } else if a.IsMaster && !a.LastCheckValid && a.LastCheckPartialSuccess && a.CountReplicasFailingToConnectToMaster > 0 && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas > 0 { + // there's partial success, but also at least one replica is failing to connect to master a.Analysis = UnreachableMaster a.Description = "Master cannot be reached by orchestrator but it has replicating replicas; possibly a network/host issue" // @@ -517,11 +523,11 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) // } else if a.IsMaster && a.LastCheckValid && a.CountReplicas == 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 { a.Analysis = MasterSingleReplicaNotReplicating - a.Description = "Master is reachable but its single slave is not replicating" + a.Description = "Master is reachable but its single replica is not replicating" // } else if a.IsMaster && a.LastCheckValid && a.CountReplicas == 1 && a.CountValidReplicas == 0 { a.Analysis = MasterSingleReplicaDead - a.Description = "Master is reachable but its single slave is dead" + a.Description = "Master is reachable but its single replica is dead" // } else if a.IsMaster && a.LastCheckValid && a.CountReplicas > 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 { a.Analysis = AllMasterReplicasNotReplicating @@ -549,13 +555,13 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) // } else /* intermediate-master */ if !a.IsMaster && !a.LastCheckValid && a.CountReplicas == 1 && a.CountValidReplicas == a.CountReplicas && a.CountReplicasFailingToConnectToMaster == a.CountReplicas && a.CountValidReplicatingReplicas == 0 { a.Analysis = DeadIntermediateMasterWithSingleReplicaFailingToConnect - a.Description = "Intermediate master cannot be reached by orchestrator and its (single) slave is failing to connect" + a.Description = "Intermediate master cannot be reached by orchestrator and its (single) replica is failing to connect" // - } else /* intermediate-master */ if !a.IsMaster && !a.LastCheckValid && a.CountReplicas == 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 { + } else if !a.IsMaster && !a.LastCheckValid && a.CountReplicas == 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 { a.Analysis = DeadIntermediateMasterWithSingleReplica - a.Description = "Intermediate master cannot be reached by orchestrator and its (single) slave is not replicating" + a.Description = "Intermediate master cannot be reached by orchestrator and its (single) replica is not replicating" // - } else /* intermediate-master */ if !a.IsMaster && !a.LastCheckValid && a.CountReplicas > 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 { + } else if !a.IsMaster && !a.LastCheckValid && a.CountReplicas > 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 { a.Analysis = DeadIntermediateMaster a.Description = "Intermediate master cannot be reached by orchestrator and none of its replicas is replicating" // @@ -594,7 +600,7 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) // } else if a.ReplicationDepth == 1 && a.IsFailingToConnectToMaster { a.Analysis = FirstTierReplicaFailingToConnectToMaster - a.Description = "1st tier slave (directly replicating from topology master) is unable to connect to the master" + a.Description = "1st tier replica (directly replicating from topology master) is unable to connect to the master" // } // else if a.IsMaster && a.CountReplicas == 0 { diff --git a/tests/integration/analysis-unreachable-master-partial-success-broken-replica/create.sql b/tests/integration/analysis-unreachable-master-partial-success-broken-replica/create.sql new file mode 100644 index 000000000..e7e8cfca0 --- /dev/null +++ b/tests/integration/analysis-unreachable-master-partial-success-broken-replica/create.sql @@ -0,0 +1,3 @@ +UPDATE database_instance SET last_seen=last_checked - interval 1 minute where port=22293; +UPDATE database_instance SET last_check_partial_success = 1 where port=22293; +UPDATE database_instance SET slave_io_running=0, last_io_error='error connecting to master' where port=22294; diff --git a/tests/integration/analysis-unreachable-master-partial-success-broken-replica/expect_output b/tests/integration/analysis-unreachable-master-partial-success-broken-replica/expect_output new file mode 100644 index 000000000..eaee1b1f4 --- /dev/null +++ b/tests/integration/analysis-unreachable-master-partial-success-broken-replica/expect_output @@ -0,0 +1,2 @@ +testhost:22293 (cluster testhost:22293): UnreachableMaster +testhost:22294 (cluster testhost:22293): FirstTierReplicaFailingToConnectToMaster diff --git a/tests/integration/analysis-unreachable-master-partial-success-broken-replica/extra_args b/tests/integration/analysis-unreachable-master-partial-success-broken-replica/extra_args new file mode 100644 index 000000000..e294ffff2 --- /dev/null +++ b/tests/integration/analysis-unreachable-master-partial-success-broken-replica/extra_args @@ -0,0 +1 @@ +-c replication-analysis diff --git a/tests/integration/test.sh b/tests/integration/test.sh index b5df73c49..d630817dc 100755 --- a/tests/integration/test.sh +++ b/tests/integration/test.sh @@ -190,6 +190,7 @@ generate_config_file() { cp ${tests_path}/orchestrator.conf.json ${test_config_file} sed -i -e "s/backend-db-placeholder/${db_type}/g" ${test_config_file} sed -i -e "s^sqlite-data-file-placeholder^${sqlite_file}^g" ${test_config_file} + touch "$test_mysql_defaults_file" # required even for sqlite because config file references the my.cnf cgf file echo "- generate_config_file OK" } @@ -202,7 +203,7 @@ test_all() { echo "- deploy_internal_db OK" test_pattern="${1:-.}" - find $tests_path ! -path . -type d -mindepth 1 -maxdepth 1 | xargs ls -td1 | cut -d "/" -f 4 | egrep "$test_pattern" | while read test_name ; do + find $tests_path -mindepth 1 -maxdepth 1 ! -path . -type d | xargs ls -td1 | cut -d "/" -f 4 | egrep "$test_pattern" | while read test_name ; do test_single "$test_name" if [ $? -ne 0 ] ; then echo "+ FAIL"