-
Notifications
You must be signed in to change notification settings - Fork 188
bug-fix: use table-info-before always and fix bug for recover lock in optimistic #1518
Changes from 3 commits
0dfad92
5101a9d
60f5f43
08fe90f
21d75d9
0325de4
1cd95d0
0a67c32
f9153cd
cb236a0
bebd18b
0196510
65bd0af
2f5e68f
ff42ec6
5e04e8f
c660ba9
8450ef4
814d7a7
fb89276
d274e6b
fadfd2b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,7 +72,7 @@ func NewLock(ID, task, downSchema, downTable string, ti *model.TableInfo, tts [] | |
synced: true, | ||
versions: make(map[string]map[string]map[string]int64), | ||
} | ||
l.addTables(tts) | ||
l.addTables(l.joined, tts) | ||
metrics.ReportDDLPending(task, metrics.DDLPendingNone, metrics.DDLPendingSynced) | ||
|
||
return l | ||
|
@@ -149,13 +149,18 @@ func (l *Lock) TrySync(info Info, tts []TargetTable) (newDDLs []string, err erro | |
return ddls, terror.ErrMasterInconsistentOptimisticDDLsAndInfo.Generate(len(ddls), len(newTIs)) | ||
} | ||
|
||
// should not happen | ||
if info.TableInfoBefore == nil { | ||
return ddls, terror.ErrMasterOptimisticTableInfoBeforeNotExist.Generate(ddls) | ||
} | ||
oldTable := schemacmp.Encode(info.TableInfoBefore) | ||
// handle the case where <callerSource, callerSchema, callerTable> | ||
// is not in old source tables and current new source tables. | ||
// duplicate append is not a problem. | ||
tts = append(tts, newTargetTable(l.Task, callerSource, l.DownSchema, l.DownTable, | ||
map[string]map[string]struct{}{callerSchema: {callerTable: struct{}{}}})) | ||
// add any new source tables. | ||
l.addTables(tts) | ||
l.addTables(oldTable, tts) | ||
if val, ok := l.versions[callerSource][callerSchema][callerTable]; !ok || val < infoVersion { | ||
l.versions[callerSource][callerSchema][callerTable] = infoVersion | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. now we changed the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you mean join all the tables for old joined? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm just not sure if the correctness depends on "old joined should be joined result of all l.tables", or "old joined of previous TrySync and new joined of next TrySync should keep consistent" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we assume user start-task with same table info in all upstream, tableInfoBefore should be same as oldJoined or oldtable. Otherwise, the oldJoined is not collect and we may keep it and hope later join may return an error? cc @lichunzhu There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If dm-master/dm-worker meets a restart the table info in all upstream is more likely to be different. What's the problem here? I don't understand this clearly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But if the tableInfoBefore not equal l.tables[source][schema][table], that means the l.tables[source][schema][table] and oldjoined may wrong, do we need join all the tables with tableInfoBefore as oldjoined? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think carefully about this situation. Could you help me check if I'm wrong? When this happens, it means we add a table whose table schema is not equal to joined table info now. When we init all the tables, we have three situations:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So you mean if we get a info which we have already received the table info before, if their schema is different, we should report an error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. But I don't think we should report an error in situation 3. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That means the table haven't been received. In unit test, we have some idempotent TrySync, how do we deal with the case? 🤔 |
||
|
@@ -205,6 +210,7 @@ func (l *Lock) TrySync(info Info, tts []TargetTable) (newDDLs []string, err erro | |
// special case: if the DDL does not affect the schema at all, assume it is | ||
// idempotent and just execute the DDL directly. | ||
// if any real conflicts after joined exist, they will be detected by the following steps. | ||
// this often happens when executing `CREATE TABLE` statement | ||
var cmp int | ||
if cmp, err = nextTable.Compare(oldJoined); err == nil && cmp == 0 { | ||
newDDLs = append(newDDLs, ddls[idx]) | ||
|
@@ -465,7 +471,7 @@ func (l *Lock) tryRevertDone(source, schema, table string) { | |
} | ||
|
||
// addTables adds any not-existing tables into the lock. | ||
func (l *Lock) addTables(tts []TargetTable) { | ||
func (l *Lock) addTables(tb schemacmp.Table, tts []TargetTable) { | ||
for _, tt := range tts { | ||
if _, ok := l.tables[tt.Source]; !ok { | ||
l.tables[tt.Source] = make(map[string]map[string]schemacmp.Table) | ||
|
@@ -480,8 +486,7 @@ func (l *Lock) addTables(tts []TargetTable) { | |
} | ||
for table := range tables { | ||
if _, ok := l.tables[tt.Source][schema][table]; !ok { | ||
// NOTE: the newly added table uses the current table info. | ||
l.tables[tt.Source][schema][table] = l.joined | ||
l.tables[tt.Source][schema][table] = tb | ||
l.done[tt.Source][schema][table] = false | ||
l.versions[tt.Source][schema][table] = 0 | ||
log.L().Info("table added to the lock", zap.String("lock", l.ID), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if
tts
contains the other tables whose schema didn't reachl.joined
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here l.joined equals tableInfoBefore. If it's a new lock(first table), tableInfoBefore=l.joined. For other newer tables, they will be added in L163
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
tts
here is from https://github.com/pingcap/dm/blob/master/dm/master/shardddl/optimist.go#L267.optimistic
will get all source tables from etcd and thistts
may have not only the table in this info, I think.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
em... That's right, I will fix it