Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

br: wait tiflash replicas ready && fix unstable test #46301

Merged
merged 8 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions br/pkg/restore/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1824,11 +1824,28 @@
zap.Stringer("table", tbl.OldTable.Info.Name),
zap.Stringer("db", tbl.OldTable.DB.Name))
for {
progress, err := infosync.CalculateTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, tiFlashStores)
if err != nil {
log.Warn("failed to get tiflash replica progress, wait for next retry", zap.Error(err))
time.Sleep(time.Second)
continue
var progress float64
if pi := tbl.Table.GetPartitionInfo(); pi != nil && len(pi.Definitions) > 0 {
for _, p := range pi.Definitions {
progressOfPartition, err := infosync.MustGetTiFlashProgress(p.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores)
if err != nil {
log.Warn("failed to get progress for tiflash partition replica, retry it",
zap.Int64("tableID", tbl.Table.ID), zap.Int64("partitionID", p.ID), zap.Error(err))
time.Sleep(time.Second)
continue

Check warning on line 1835 in br/pkg/restore/client.go

View check run for this annotation

Codecov / codecov/patch

br/pkg/restore/client.go#L1829-L1835

Added lines #L1829 - L1835 were not covered by tests
}
progress += progressOfPartition

Check warning on line 1837 in br/pkg/restore/client.go

View check run for this annotation

Codecov / codecov/patch

br/pkg/restore/client.go#L1837

Added line #L1837 was not covered by tests
}
progress = progress / float64(len(pi.Definitions))

Check warning on line 1839 in br/pkg/restore/client.go

View check run for this annotation

Codecov / codecov/patch

br/pkg/restore/client.go#L1839

Added line #L1839 was not covered by tests
} else {
var err error
progress, err = infosync.MustGetTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores)
if err != nil {
log.Warn("failed to get progress for tiflash replica, retry it",
zap.Int64("tableID", tbl.Table.ID), zap.Error(err))
time.Sleep(time.Second)
continue

Check warning on line 1847 in br/pkg/restore/client.go

View check run for this annotation

Codecov / codecov/patch

br/pkg/restore/client.go#L1844-L1847

Added lines #L1844 - L1847 were not covered by tests
}
}
// check until progress is 1
if progress == 1 {
Expand Down
15 changes: 15 additions & 0 deletions br/tests/br_tiflash/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,29 @@ run_sql "CREATE DATABASE $DB"

run_sql "CREATE TABLE $DB.kv(k varchar(256) primary key, v int)"

run_sql "CREATE TABLE $DB.partition_kv(\
k INT, \
v INT, \
PRIMARY KEY(k) CLUSTERED \
) PARTITION BY RANGE(k) (\
PARTITION p0 VALUES LESS THAN (200), \
PARTITION p1 VALUES LESS THAN (400), \
PARTITION p2 VALUES LESS THAN MAXVALUE)"

stmt="INSERT INTO $DB.kv(k, v) VALUES ('1-record', 1)"
parition_stmt="INSERT INTO $DB.partition_kv(k, v) VALUES (1, 1)"
for i in $(seq 2 $RECORD_COUNT); do
stmt="$stmt,('$i-record', $i)"
parition_stmt="$parition_stmt,($i, $i)"
done
run_sql "$stmt"
run_sql "$parition_stmt"

if ! run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1"; then
# 10s should be enough for tiflash-proxy get started
sleep 10
run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1"
run_sql "ALTER TABLE $DB.partition_kv SET TIFLASH REPLICA 1"
fi


Expand All @@ -54,6 +67,8 @@ run_sql "DROP DATABASE $DB"
run_br restore full -s "local://$TEST_DIR/$DB" --pd $PD_ADDR --wait-tiflash-ready=true

# check TiFlash sync
echo "wait 3 seconds for tiflash tick puller triggered"
sleep 3
if ! [ $(run_sql "select * from information_schema.tiflash_replica" | grep "PROGRESS" | sed "s/[^0-9]//g") -eq 1 ]; then
echo "restore didn't wait tiflash synced after set --wait-tiflash-ready=true."
exit 1
Expand Down
5 changes: 5 additions & 0 deletions br/tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ export TEST_DIR=/tmp/backup_restore_test
export COV_DIR="/tmp/group_cover"
source $CUR/_utils/run_services

# Create COV_DIR if not exists
if [ -d "$COV_DIR" ]; then
mkdir -p $COV_DIR
fi

# Reset TEST_DIR
rm -rf $TEST_DIR && mkdir -p $TEST_DIR

Expand Down