Skip to content

Commit

Permalink
Merge pull request #3489 from filecoin-project/fix/sched-deadlocks
Browse files Browse the repository at this point in the history
sealing sched: Fix deadlock between worker.wndLk / workersLk
  • Loading branch information
magik6k authored Sep 2, 2020
2 parents cfbbcd4 + 5a2b439 commit 5f79ff3
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions extern/sector-storage/sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ func (sh *scheduler) runWorker(wid WorkerID) {
return
}

sh.workersLk.RLock()
worker.wndLk.Lock()

windowsRequested -= sh.workerCompactWindows(worker, wid)
Expand All @@ -574,8 +575,6 @@ func (sh *scheduler) runWorker(wid WorkerID) {

// process tasks within a window, preferring tasks at lower indexes
for len(firstWindow.todo) > 0 {
sh.workersLk.RLock()

tidx := -1

worker.lk.Lock()
Expand All @@ -589,15 +588,13 @@ func (sh *scheduler) runWorker(wid WorkerID) {
worker.lk.Unlock()

if tidx == -1 {
sh.workersLk.RUnlock()
break assignLoop
}

todo := firstWindow.todo[tidx]

log.Debugf("assign worker sector %d", todo.sector.Number)
err := sh.assignWorker(taskDone, wid, worker, todo)
sh.workersLk.RUnlock()

if err != nil {
log.Error("assignWorker error: %+v", err)
Expand All @@ -618,6 +615,7 @@ func (sh *scheduler) runWorker(wid WorkerID) {
}

worker.wndLk.Unlock()
sh.workersLk.RUnlock()
}
}()
}
Expand Down Expand Up @@ -776,14 +774,19 @@ func (sh *scheduler) dropWorker(wid WorkerID) {
}

func (sh *scheduler) workerCleanup(wid WorkerID, w *workerHandle) {
if !w.cleanupStarted {
select {
case <-w.closingMgr:
default:
close(w.closingMgr)
}

sh.workersLk.Unlock()
select {
case <-w.closedMgr:
case <-time.After(time.Second):
log.Errorf("timeout closing worker manager goroutine %d", wid)
}
sh.workersLk.Lock()

if !w.cleanupStarted {
w.cleanupStarted = true
Expand Down

0 comments on commit 5f79ff3

Please sign in to comment.