Skip to content

Commit

Permalink
Merge pull request #391 from TommyLike/bug/fix_race_issue
Browse files Browse the repository at this point in the history
Fix race condition issue
  • Loading branch information
volcano-sh-bot authored Jul 30, 2019
2 parents 7e8fec6 + 5c44771 commit da80e4f
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ generate-code:
./hack/update-gencode.sh

unit-test:
go list ./... | grep -v e2e | xargs go test -v -cover -covermode atomic -coverprofile coverage.txt
go list ./... | grep -v e2e | xargs go test -v -cover -covermode atomic -coverprofile coverage.txt -race

e2e-test-kind:
./hack/run-e2e-kind.sh
Expand Down
26 changes: 17 additions & 9 deletions pkg/controllers/job/job_controller_actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"
"sort"
"sync"
"sync/atomic"

"github.com/golang/glog"

Expand Down Expand Up @@ -215,6 +216,13 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
var podToDelete []*v1.Pod
var creationErrs []error
var deletionErrs []error
appendMutex := sync.Mutex{}

appendError := func(container *[]error, err error) {
appendMutex.Lock()
defer appendMutex.Unlock()
*container = append(*container, err)
}

for _, ts := range job.Spec.Tasks {
ts.Template.Name = ts.Name
Expand All @@ -238,7 +246,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
delete(pods, podName)
if pod.DeletionTimestamp != nil {
glog.Infof("Pod <%s/%s> is terminating", pod.Namespace, pod.Name)
terminating++
atomic.AddInt32(&terminating, 1)
continue
}

Expand All @@ -263,7 +271,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
// So gang-scheduling could schedule the Job successfully
glog.Errorf("Failed to create pod %s for Job %s, err %#v",
pod.Name, job.Name, err)
creationErrs = append(creationErrs, fmt.Errorf("failed to create pod %s, err: %#v", pod.Name, err))
appendError(&creationErrs, fmt.Errorf("failed to create pod %s, err: %#v", pod.Name, err))
} else {
if err != nil && apierrors.IsAlreadyExists(err) {
cc.resyncTask(pod)
Expand Down Expand Up @@ -297,12 +305,12 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
// So gang-scheduling could schedule the Job successfully
glog.Errorf("Failed to delete pod %s for Job %s, err %#v",
pod.Name, job.Name, err)
deletionErrs = append(deletionErrs, err)
appendError(&deletionErrs, err)
cc.resyncTask(pod)
} else {
glog.V(3).Infof("Deleted Task <%s> of Job <%s/%s>",
pod.Name, job.Namespace, job.Name)
terminating++
atomic.AddInt32(&terminating, 1)
}
}(pod)
}
Expand Down Expand Up @@ -547,15 +555,15 @@ func (cc *Controller) initJobStatus(job *vkv1.Job) (*vkv1.Job, error) {
func classifyAndAddUpPodBaseOnPhase(pod *v1.Pod, pending, running, succeeded, failed, unknown *int32) {
switch pod.Status.Phase {
case v1.PodPending:
*pending++
atomic.AddInt32(pending, 1)
case v1.PodRunning:
*running++
atomic.AddInt32(running, 1)
case v1.PodSucceeded:
*succeeded++
atomic.AddInt32(succeeded, 1)
case v1.PodFailed:
*failed++
atomic.AddInt32(failed, 1)
default:
*unknown++
atomic.AddInt32(unknown, 1)
}
return
}

0 comments on commit da80e4f

Please sign in to comment.