Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-16.0] OnlineDDL: fix nil 'completed_timestamp' for cancelled migrations (#13928) #13936

Merged
merged 3 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,9 @@ func TestSchemaChange(t *testing.T) {
t.Run("summary: validate sequential migration IDs", func(t *testing.T) {
onlineddl.ValidateSequentialMigrationIDs(t, &vtParams, shards)
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

func testScheduler(t *testing.T) {
Expand Down
13 changes: 8 additions & 5 deletions go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ func TestMain(m *testing.M) {
if err != nil {
fmt.Printf("%v\n", err)
os.Exit(1)
} else {
} else { // nolint:revive
os.Exit(exitcode)
}

Expand Down Expand Up @@ -537,11 +537,11 @@ func TestSchemaChange(t *testing.T) {
onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
})
t.Run("wait for vreplication to run on shard -80", func(t *testing.T) {
vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
vreplStatus := onlineddl.WaitForVReplicationStatus(t, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
})
t.Run("wait for vreplication to run on shard 80-", func(t *testing.T) {
vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
vreplStatus := onlineddl.WaitForVReplicationStatus(t, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
})
t.Run("check status again", func(t *testing.T) {
Expand Down Expand Up @@ -646,11 +646,11 @@ func TestSchemaChange(t *testing.T) {
onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
})
t.Run("wait for vreplication to run on shard -80", func(t *testing.T) {
vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
vreplStatus := onlineddl.WaitForVReplicationStatus(t, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
})
t.Run("wait for vreplication to run on shard 80-", func(t *testing.T) {
vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
vreplStatus := onlineddl.WaitForVReplicationStatus(t, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
})
t.Run("check status again", func(t *testing.T) {
Expand Down Expand Up @@ -888,6 +888,9 @@ func TestSchemaChange(t *testing.T) {
t.Run("summary: validate sequential migration IDs", func(t *testing.T) {
onlineddl.ValidateSequentialMigrationIDs(t, &vtParams, shards)
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

func insertRow(t *testing.T) {
Expand Down
38 changes: 37 additions & 1 deletion go/test/endtoend/onlineddl/vtgate_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ import (
"github.com/stretchr/testify/require"
)

var (
testsStartupTime time.Time
)

func init() {
testsStartupTime = time.Now()
}

// VtgateExecQuery runs a query on VTGate using given query params
func VtgateExecQuery(t *testing.T, vtParams *mysql.ConnParams, query string, expectError string) *sqltypes.Result {
t.Helper()
Expand Down Expand Up @@ -354,7 +362,7 @@ func WaitForThrottlerStatusEnabled(t *testing.T, tablet *cluster.Vttablet, timeo
jsonPath := "IsEnabled"
url := fmt.Sprintf("http://localhost:%d/throttler/status", tablet.HTTPPort)

ctx, cancel := context.WithTimeout(context.Background(), throttlerConfigTimeout)
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

ticker := time.NewTicker(time.Second)
Expand Down Expand Up @@ -428,3 +436,31 @@ func ValidateSequentialMigrationIDs(t *testing.T, vtParams *mysql.ConnParams, sh
assert.Equalf(t, count, shardMax[shard]-shardMin[shard]+1, "mismatch: shared=%v, count=%v, min=%v, max=%v", shard, count, shardMin[shard], shardMax[shard])
}
}

// ValidateCompletedTimestamp ensures that any migration in `cancelled`, `completed`, `failed` statuses
// has a non-nil and valid `completed_timestamp` value.
func ValidateCompletedTimestamp(t *testing.T, vtParams *mysql.ConnParams) {
require.False(t, testsStartupTime.IsZero())
r := VtgateExecQuery(t, vtParams, "show vitess_migrations", "")

completedTimestampNumValidations := 0
for _, row := range r.Named().Rows {
migrationStatus := row.AsString("migration_status", "")
require.NotEmpty(t, migrationStatus)
switch migrationStatus {
case string(schema.OnlineDDLStatusComplete),
string(schema.OnlineDDLStatusFailed),
string(schema.OnlineDDLStatusCancelled):
{
assert.False(t, row["completed_timestamp"].IsNull())
// Also make sure the timestamp is "real", and that it is recent.
timestamp := row.AsString("completed_timestamp", "")
completedTime, err := time.Parse(sqltypes.TimestampFormat, timestamp)
assert.NoError(t, err)
assert.Greater(t, completedTime.Unix(), testsStartupTime.Unix())
completedTimestampNumValidations++
}
}
}
assert.NotZero(t, completedTimestampNumValidations)
}
3 changes: 1 addition & 2 deletions go/test/endtoend/onlineddl/vttablet_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"testing"
"time"

"vitess.io/vitess/go/mysql"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/sqlparser"

Expand All @@ -31,7 +30,7 @@ import (
)

// WaitForVReplicationStatus waits for a vreplication stream to be in one of given states, or timeout
func WaitForVReplicationStatus(t *testing.T, vtParams *mysql.ConnParams, tablet *cluster.Vttablet, uuid string, timeout time.Duration, expectStatuses ...string) (status string) {
func WaitForVReplicationStatus(t *testing.T, tablet *cluster.Vttablet, uuid string, timeout time.Duration, expectStatuses ...string) (status string) {

query, err := sqlparser.ParseAndBind("select state from _vt.vreplication where workflow=%a",
sqltypes.StringBindVariable(uuid),
Expand Down
23 changes: 17 additions & 6 deletions go/vt/vttablet/onlineddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ var vexecInsertTemplates = []string{
'val1', 'val2', 'val3', 'val4', 'val5', 'val6', 'val7', 'val8', 'val9', FROM_UNIXTIME(0), 'vala', 'valb'
)`,
}
var (
// fixCompletedTimestampDone fixes a nil `completed_tiemstamp` columns, see
// https://github.com/vitessio/vitess/issues/13927
// The fix is in release-18.0
// TODO: remove in release-19.0
fixCompletedTimestampDone bool
)

var emptyResult = &sqltypes.Result{}
var acceptableDropTableIfExistsErrorCodes = []int{mysql.ERCantFindFile, mysql.ERNoSuchTable}
Expand Down Expand Up @@ -3852,13 +3859,17 @@ func (e *Executor) gcArtifacts(ctx context.Context) error {
e.migrationMutex.Lock()
defer e.migrationMutex.Unlock()

if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'failed' without updating 'completed_timestamp'
// see https://github.com/vitessio/vitess/issues/8499
// Running this query retroactively sets completed_timestamp
// This 'if' clause can be removed in version v13
return err
// v18 fix (backported to 16). Remove in v19
if !fixCompletedTimestampDone {
if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'cancelled' or 'failed' without updating 'completed_timestamp'
// Running this query retroactively sets completed_timestamp
// This fix is created in v18 and can be removed in v19
return err
}
fixCompletedTimestampDone = true
}

query, err := sqlparser.ParseAndBind(sqlSelectUncollectedArtifacts,
sqltypes.Int64BindVariable(int64((retainOnlineDDLTables).Seconds())),
)
Expand Down
5 changes: 3 additions & 2 deletions go/vt/vttablet/onlineddl/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ const (
migration_uuid=%a
`
sqlUpdateMigrationStatusFailedOrCancelled = `UPDATE _vt.schema_migrations
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled')
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled'),
completed_timestamp=NOW(6)
WHERE
migration_uuid=%a
`
Expand Down Expand Up @@ -349,7 +350,7 @@ const (
SET
completed_timestamp=NOW(6)
WHERE
migration_status='failed'
migration_status IN ('cancelled', 'failed')
AND cleanup_timestamp IS NULL
AND completed_timestamp IS NULL
`
Expand Down
Loading