Skip to content

Commit

Permalink
Try to deflake the across_db_versions workflow
Browse files Browse the repository at this point in the history
Signed-off-by: Matt Lord <mattalord@gmail.com>
  • Loading branch information
mattlord committed Sep 22, 2023
1 parent d57bd05 commit 41860c8
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
2 changes: 1 addition & 1 deletion go/test/endtoend/cluster/mysqlctl_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func (mysqlctl *MysqlctlProcess) Stop() (err error) {
// We first need to try and kill any associated mysqld_safe process or
// else it will immediately restart the mysqld process when we kill it.
mspidb, err := exec.Command("sh", "-c",
fmt.Sprintf("ps auxww | grep mysqld_safe | grep vt_%010d | awk '{print $2}'", mysqlctl.TabletUID)).Output()
fmt.Sprintf("ps auxww | grep -E 'mysqld_safe|mariadbd-safe' | grep vt_%010d | awk '{print $2}'", mysqlctl.TabletUID)).Output()
if err != nil {
return err
}
Expand Down
31 changes: 29 additions & 2 deletions go/test/endtoend/vreplication/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -573,12 +573,39 @@ func (vc *VitessCluster) AddShards(t *testing.T, cells []*Cell, keyspace *Keyspa
log.Infof("Waiting for mysql process for tablet %s", tablets[ind].Name)
if err := proc.Wait(); err != nil {
// Retry starting the database process before giving up.
t.Logf("%v :: Unable to start mysql server for %v. Will retry...", err, tablets[ind].Vttablet)
t.Logf("%v :: Unable to start mysql server for %v. Will cleanup files and processes, then retry...", err, tablets[ind].Vttablet)
tablets[ind].DbServer.CleanupFiles(tablets[ind].Vttablet.TabletUID)
time.Sleep(1 * time.Second)
// Kill any process that's listening on the port we want to
// use as that is the most common problem.
tablets[ind].DbServer.Stop()
killCmd := exec.Command("sudo", "fuser", "-n", "tcp", "-k", fmt.Sprintf("%d", tablets[ind].DbServer.MySQLPort))
if err := killCmd.Run(); err != nil {
log.Errorf("Failed to kill process listening on port %d: %v", tablets[ind].DbServer.MySQLPort, err)
}
// Sleep for the kernel's TCP TIME_WAIT timeout to avoid the
// port already in use error, which is the common cause for
// the process not starting. It's a long wait, but it's worth
// avoiding the test/workflow failure that otherwise occurs.
time.Sleep(60 * time.Second)
dbcmd, err := tablets[ind].DbServer.StartProcess()
require.NoError(t, err)
if err = dbcmd.Wait(); err != nil {
// Get logs to help understand why it failed...
vtdataroot := os.Getenv("VTDATAROOT")
mysqlctlLog := path.Join(vtdataroot, "/tmp/mysqlctl.INFO")
logBytes, ferr := os.ReadFile(mysqlctlLog)
if ferr == nil {
log.Errorf("mysqlctl log contents:\n%s", string(logBytes))
} else {
log.Errorf("Failed to read the mysqlctl log file %q: %v", mysqlctlLog, ferr)
}
mysqldLog := path.Join(vtdataroot, fmt.Sprintf("/vt_%010d/error.log", tablets[ind].Vttablet.TabletUID))
logBytes, ferr = os.ReadFile(mysqldLog)
if ferr == nil {
log.Errorf("mysqld error log contents:\n%s", string(logBytes))
} else {
log.Errorf("Failed to read the mysqld error log file %q: %v", mysqldLog, ferr)
}
output, _ := dbcmd.CombinedOutput()
t.Fatalf("%v :: Unable to start mysql server for %v; Output: %s", err,
tablets[ind].Vttablet, string(output))
Expand Down

0 comments on commit 41860c8

Please sign in to comment.