diff --git a/pkg/apis/batch/v1alpha1/job.go b/pkg/apis/batch/v1alpha1/job.go index 9190cab4cc..888d87e026 100644 --- a/pkg/apis/batch/v1alpha1/job.go +++ b/pkg/apis/batch/v1alpha1/job.go @@ -285,15 +285,19 @@ type JobStatus struct { // +optional Terminating int32 `json:"terminating,omitempty" protobuf:"bytes,7,opt,name=terminating"` + // The number of pods which reached phase Unknown. + // +optional + Unknown int32 `json:"unknown,omitempty" protobuf:"bytes,8,opt,name=unknown"` + //Current version of job - Version int32 `json:"version,omitempty" protobuf:"bytes,8,opt,name=version"` + Version int32 `json:"version,omitempty" protobuf:"bytes,9,opt,name=version"` // The number of Job retries. // +optional - RetryCount int32 `json:"retryCount,omitempty" protobuf:"bytes,9,opt,name=retryCount"` + RetryCount int32 `json:"retryCount,omitempty" protobuf:"bytes,10,opt,name=retryCount"` // The resources that controlled by this job, e.g. Service, ConfigMap - ControlledResources map[string]string `json:"controlledResources,omitempty" protobuf:"bytes,8,opt,name=controlledResources"` + ControlledResources map[string]string `json:"controlledResources,omitempty" protobuf:"bytes,11,opt,name=controlledResources"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/cli/job/list.go b/pkg/cli/job/list.go index 3ef742f1e9..f99cac6a93 100644 --- a/pkg/cli/job/list.go +++ b/pkg/cli/job/list.go @@ -65,6 +65,8 @@ const ( Version string = "Version" // Failed failed Failed string = "Failed" + // Unknown pod + Unknown string = "Unknown" // RetryCount retry count RetryCount string = "RetryCount" // JobType job type @@ -110,8 +112,8 @@ func ListJobs() error { // PrintJobs prints all jobs details func PrintJobs(jobs *v1alpha1.JobList, writer io.Writer) { maxNameLen := getMaxNameLen(jobs) - _, err := fmt.Fprintf(writer, fmt.Sprintf("%%-%ds%%-25s%%-12s%%-12s%%-12s%%-6s%%-10s%%-10s%%-12s%%-10s%%-12s\n", maxNameLen), - Name, Creation, Phase, JobType, Replicas, Min, Pending, Running, Succeeded, Failed, RetryCount) + _, err := fmt.Fprintf(writer, fmt.Sprintf("%%-%ds%%-25s%%-12s%%-12s%%-12s%%-6s%%-10s%%-10s%%-12s%%-10s%%-12s%%-10s\n", maxNameLen), + Name, Creation, Phase, JobType, Replicas, Min, Pending, Running, Succeeded, Failed, Unknown, RetryCount) if err != nil { fmt.Printf("Failed to print list command result: %s.\n", err) } @@ -131,9 +133,9 @@ func PrintJobs(jobs *v1alpha1.JobList, writer io.Writer) { if jobType == "" { jobType = "Batch" } - _, err = fmt.Fprintf(writer, fmt.Sprintf("%%-%ds%%-25s%%-12s%%-12s%%-12d%%-6d%%-10d%%-10d%%-12d%%-10d%%-12d\n", maxNameLen), + _, err = fmt.Fprintf(writer, fmt.Sprintf("%%-%ds%%-25s%%-12s%%-12s%%-12d%%-6d%%-10d%%-10d%%-12d%%-10d%%-12d%%-10d\n", maxNameLen), job.Name, job.CreationTimestamp.Format("2006-01-02 15:04:05"), job.Status.State.Phase, jobType, replicas, - job.Status.MinAvailable, job.Status.Pending, job.Status.Running, job.Status.Succeeded, job.Status.Failed, job.Status.RetryCount) + job.Status.MinAvailable, job.Status.Pending, job.Status.Running, job.Status.Succeeded, job.Status.Failed, job.Status.Unknown, job.Status.RetryCount) if err != nil { fmt.Printf("Failed to print list command result: %s.\n", err) } diff --git a/pkg/cli/job/view.go b/pkg/cli/job/view.go index d8130f3ea2..8dbbe7bb7e 100644 --- a/pkg/cli/job/view.go +++ b/pkg/cli/job/view.go @@ -174,6 +174,9 @@ func PrintJobInfo(job *v1alpha1.Job, writer io.Writer) { if job.Status.Terminating > 0 { WriteLine(writer, Level1, "Terminating: \t%d\n", job.Status.Terminating) } + if job.Status.Unknown > 0 { + WriteLine(writer, Level1, "Unknown: \t%d\n", job.Status.Unknown) + } if job.Status.RetryCount > 0 { WriteLine(writer, Level1, "RetryCount: \t%d\n", job.Status.RetryCount) } diff --git a/pkg/cli/job/view_test.go b/pkg/cli/job/view_test.go index f199ec1001..925e840cd9 100644 --- a/pkg/cli/job/view_test.go +++ b/pkg/cli/job/view_test.go @@ -76,6 +76,7 @@ func TestViewJob(t *testing.T) { Pending: 3, Running: 1, Failed: 2, + Unknown: 10, Terminating: 4, RetryCount: 5, MinAvailable: 6, diff --git a/pkg/controllers/job/job_controller_actions.go b/pkg/controllers/job/job_controller_actions.go index 7e7eb2ca84..a1d329bdf7 100644 --- a/pkg/controllers/job/job_controller_actions.go +++ b/pkg/controllers/job/job_controller_actions.go @@ -48,7 +48,7 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseM return nil } - var pending, running, terminating, succeeded, failed int32 + var pending, running, terminating, succeeded, failed, unknown int32 var errs []error var total int @@ -76,16 +76,7 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseM cc.resyncTask(pod) } - switch pod.Status.Phase { - case v1.PodRunning: - running++ - case v1.PodPending: - pending++ - case v1.PodSucceeded: - succeeded++ - case v1.PodFailed: - failed++ - } + classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown) } } @@ -108,6 +99,7 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseM Succeeded: succeeded, Failed: failed, Terminating: terminating, + Unknown: unknown, Version: job.Status.Version, MinAvailable: int32(job.Spec.MinAvailable), RetryCount: job.Status.RetryCount, @@ -217,7 +209,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt return nil } - var running, pending, terminating, succeeded, failed int32 + var running, pending, terminating, succeeded, failed, unknown int32 var podToCreate []*v1.Pod var podToDelete []*v1.Pod @@ -250,16 +242,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt continue } - switch pod.Status.Phase { - case v1.PodPending: - pending++ - case v1.PodRunning: - running++ - case v1.PodSucceeded: - succeeded++ - case v1.PodFailed: - failed++ - } + classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown) } } @@ -273,7 +256,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt for _, pod := range podToCreate { go func(pod *v1.Pod) { defer waitCreationGroup.Done() - _, err := cc.kubeClients.CoreV1().Pods(pod.Namespace).Create(pod) + newPod, err := cc.kubeClients.CoreV1().Pods(pod.Namespace).Create(pod) if err != nil && !apierrors.IsAlreadyExists(err) { // Failed to create Pod, waitCreationGroup a moment and then create it again // This is to ensure all podsMap under the same Job created @@ -286,8 +269,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt cc.resyncTask(pod) } - // TODO: maybe not pending status, maybe unknown. - pending++ + classifyAndAddUpPodBaseOnPhase(newPod, &pending, &running, &succeeded, &failed, &unknown) glog.V(3).Infof("Created Task <%s> of Job <%s/%s>", pod.Name, job.Namespace, job.Name) } @@ -340,6 +322,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt Succeeded: succeeded, Failed: failed, Terminating: terminating, + Unknown: unknown, Version: job.Status.Version, MinAvailable: int32(job.Spec.MinAvailable), ControlledResources: job.Status.ControlledResources, @@ -559,3 +542,19 @@ func (cc *Controller) initJobStatus(job *vkv1.Job) (*vkv1.Job, error) { return newJob, nil } + +func classifyAndAddUpPodBaseOnPhase(pod *v1.Pod, pending, running, succeeded, failed, unknown *int32) { + switch pod.Status.Phase { + case v1.PodPending: + *pending++ + case v1.PodRunning: + *running++ + case v1.PodSucceeded: + *succeeded++ + case v1.PodFailed: + *failed++ + default: + *unknown++ + } + return +}