Skip to content

Commit

Permalink
Sort pod container statuses based on Step order in taskSpec
Browse files Browse the repository at this point in the history
This commit closes tektoncd#3239

Tekton determines the TaskRun status message of a failed TaskRun based
on the results of the first terminated Step (pod container). Until now,
Tekton sorted pod container statuses based on the FinishedAt and
StartedAt timestamps set by Kubernetes. Occasionally, a Step terminated
in response to the first terminated Step could have the same timestamps
as the first terminated Step. Therefore, Tekton was not always able to
correctly determine what the first terminated Step was, and as a result,
Tekton may set an incorrect TaskRun status message.

In this commit, pod container statuses are sorted based on the Step
order set in the taskSpec. This order ought to be correct as Tekton
enforces Steps to be scheduled in this order. In case Tekton adds extra
Steps (such as for pipelineresources), Tekton updates the taskSpec with
these Steps and makes the taskSpec availavle for sorting. Therefore,
Tekton accounts for these internally added Steps when sorting.
  • Loading branch information
Peaorl committed Sep 22, 2020
1 parent ebb25cc commit dd2bfae
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 154 deletions.
75 changes: 26 additions & 49 deletions pkg/pod/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ func MakeTaskRunStatus(logger *zap.SugaredLogger, tr v1beta1.TaskRun, pod *corev
MarkStatusRunning(trs, v1beta1.TaskRunReasonRunning.String(), "Not all Steps in the Task have finished executing")
}

sortPodContainerStatuses(pod.Status.ContainerStatuses, taskSpec.Steps)

complete := areStepsComplete(pod) || pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed

if complete {
Expand Down Expand Up @@ -136,9 +138,6 @@ func MakeTaskRunStatus(logger *zap.SugaredLogger, tr v1beta1.TaskRun, pod *corev

trs.TaskRunResults = removeDuplicateResults(trs.TaskRunResults)

// Sort step states according to the order specified in the TaskRun spec's steps.
trs.Steps = sortTaskRunStepOrder(trs.Steps, taskSpec.Steps)

return *trs, merr.ErrorOrNil()
}

Expand Down Expand Up @@ -329,29 +328,7 @@ func areStepsComplete(pod *corev1.Pod) bool {
return stepsComplete
}

//SortContainerStatuses sort ContainerStatuses based on "FinishedAt"
func SortContainerStatuses(podInstance *corev1.Pod) {
sort.Slice(podInstance.Status.ContainerStatuses, func(i, j int) bool {
var ifinish, istart, jfinish, jstart time.Time
if term := podInstance.Status.ContainerStatuses[i].State.Terminated; term != nil {
ifinish = term.FinishedAt.Time
istart = term.StartedAt.Time
}
if term := podInstance.Status.ContainerStatuses[j].State.Terminated; term != nil {
jfinish = term.FinishedAt.Time
jstart = term.StartedAt.Time
}

if ifinish.Equal(jfinish) {
return istart.Before(jstart)
}
return ifinish.Before(jfinish)
})

}

func getFailureMessage(pod *corev1.Pod) string {
SortContainerStatuses(pod)
// First, try to surface an error about the actual build step that failed.
for _, status := range pod.Status.ContainerStatuses {
term := status.State.Terminated
Expand Down Expand Up @@ -459,53 +436,53 @@ func MarkStatusSuccess(trs *v1beta1.TaskRunStatus) {
})
}

// sortTaskRunStepOrder sorts the StepStates in the same order as the original
// sortPodContainerStatuses sorts the pod container statuses in the same order as the original
// TaskSpec steps.
func sortTaskRunStepOrder(taskRunSteps []v1beta1.StepState, taskSpecSteps []v1beta1.Step) []v1beta1.StepState {
trt := &stepStateSorter{
taskRunSteps: taskRunSteps,
func sortPodContainerStatuses(podContainerStatuses []corev1.ContainerStatus, taskSpecSteps []v1beta1.Step) []corev1.ContainerStatus {
trt := &podContainerStatusSorter{
podContainerStatuses: podContainerStatuses,
}
trt.mapForSort = trt.constructTaskStepsSorter(taskSpecSteps)
trt.mapForSort = trt.constructPodContainerStatusesSorter(taskSpecSteps)
sort.Sort(trt)
return trt.taskRunSteps
return trt.podContainerStatuses
}

// stepStateSorter implements a sorting mechanism to align the order of the steps in TaskRun
// podContainerStatusSorter implements a sorting mechanism to align the order of the pod container statuses in the pod
// with the spec steps in Task.
type stepStateSorter struct {
taskRunSteps []v1beta1.StepState
mapForSort map[string]int
type podContainerStatusSorter struct {
podContainerStatuses []corev1.ContainerStatus
mapForSort map[string]int
}

// constructTaskStepsSorter constructs a map matching the names of
// the steps to their indices for a task.
func (trt *stepStateSorter) constructTaskStepsSorter(taskSpecSteps []v1beta1.Step) map[string]int {
// constructPodContainerStatusesSorter constructs a map matching the names of
// the containers to their step indices for a task.
func (trt *podContainerStatusSorter) constructPodContainerStatusesSorter(taskSpecSteps []v1beta1.Step) map[string]int {
sorter := make(map[string]int)
for index, step := range taskSpecSteps {
stepName := step.Name
if stepName == "" {
stepName = names.SimpleNameGenerator.RestrictLength(fmt.Sprintf("unnamed-%d", index))
containerName := "step-" + step.Name
if containerName == "step-" {
containerName = names.SimpleNameGenerator.RestrictLength(fmt.Sprintf("unnamed-%d", index))
}
sorter[stepName] = index
sorter[containerName] = index
}
return sorter
}

// changeIndex sorts the steps of the task run, based on the
// changeIndex sorts the containers of a pod, based on the
// order of the steps in the task. Instead of changing the element with the one next to it,
// we directly swap it with the desired index.
func (trt *stepStateSorter) changeIndex(index int) {
func (trt *podContainerStatusSorter) changeIndex(index int) {
// Check if the current index is equal to the desired index. If they are equal, do not swap; if they
// are not equal, swap index j with the desired index.
desiredIndex, exist := trt.mapForSort[trt.taskRunSteps[index].Name]
desiredIndex, exist := trt.mapForSort[trt.podContainerStatuses[index].Name]
if exist && index != desiredIndex {
trt.taskRunSteps[desiredIndex], trt.taskRunSteps[index] = trt.taskRunSteps[index], trt.taskRunSteps[desiredIndex]
trt.podContainerStatuses[desiredIndex], trt.podContainerStatuses[index] = trt.podContainerStatuses[index], trt.podContainerStatuses[desiredIndex]
}
}

func (trt *stepStateSorter) Len() int { return len(trt.taskRunSteps) }
func (trt *podContainerStatusSorter) Len() int { return len(trt.podContainerStatuses) }

func (trt *stepStateSorter) Swap(i, j int) {
func (trt *podContainerStatusSorter) Swap(i, j int) {
trt.changeIndex(j)
// The index j is unable to reach the last index.
// When i reaches the end of the array, we need to check whether the last one needs a swap.
Expand All @@ -514,7 +491,7 @@ func (trt *stepStateSorter) Swap(i, j int) {
}
}

func (trt *stepStateSorter) Less(i, j int) bool {
func (trt *podContainerStatusSorter) Less(i, j int) bool {
// Since the logic is complicated, we move it into the Swap function to decide whether
// and how to change the index. We set it to true here in order to iterate all the
// elements of the array in the Swap function.
Expand Down
176 changes: 81 additions & 95 deletions pkg/pod/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,87 @@ func TestMakeTaskRunStatus(t *testing.T) {
CompletionTime: &metav1.Time{Time: time.Now()},
},
},
}, {
desc: "correct TaskRun step order regardless of pod container status order",
taskSpec: v1beta1.TaskSpec{
Steps: []v1beta1.Step{{Container: corev1.Container{
Name: "first",
}}, {Container: corev1.Container{
Name: "second",
}}, {Container: corev1.Container{
Name: "third",
}}, {Container: corev1.Container{
Name: "",
}}, {Container: corev1.Container{
Name: "fourth",
}}},
},
podStatus: corev1.PodStatus{
Phase: corev1.PodSucceeded,
ContainerStatuses: []corev1.ContainerStatus{{
Name: "step-second",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
},
}, {
Name: "step-fourth",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
},
}, {
Name: "step-",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
},
}, {
Name: "step-first",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
},
}, {
Name: "step-third",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
},
}},
},
want: v1beta1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: []apis.Condition{conditionSucceeded},
},
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
Steps: []v1beta1.StepState{{
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
},
Name: "first",
ContainerName: "step-first",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{}},
Name: "second",
ContainerName: "step-second",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{}},
Name: "third",
ContainerName: "step-third",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{}},
Name: "",
ContainerName: "step-",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{}},
Name: "fourth",
ContainerName: "step-fourth",
}},
Sidecars: []v1beta1.SidecarState{},
// We don't actually care about the time, just that it's not nil
CompletionTime: &metav1.Time{Time: time.Now()},
},
},
}} {
t.Run(c.desc, func(t *testing.T) {
now := metav1.Now()
Expand Down Expand Up @@ -1148,101 +1229,6 @@ func TestSidecarsReady(t *testing.T) {
}
}

func TestSortTaskRunStepOrder(t *testing.T) {
steps := []v1beta1.Step{{Container: corev1.Container{
Name: "hello",
}}, {Container: corev1.Container{
Name: "exit",
}}, {Container: corev1.Container{
Name: "world",
}}}

stepStates := []v1beta1.StepState{{
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 0,
Reason: "Completed",
},
},
Name: "world",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 1,
Reason: "Error",
},
},
Name: "exit",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 0,
Reason: "Completed",
},
},
Name: "hello",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 0,
Reason: "Completed",
},
},
Name: "nop",
}}

gotStates := sortTaskRunStepOrder(stepStates, steps)
var gotNames []string
for _, g := range gotStates {
gotNames = append(gotNames, g.Name)
}

want := []string{"hello", "exit", "world", "nop"}
if d := cmp.Diff(want, gotNames); d != "" {
t.Errorf("Unexpected step order %s", diff.PrintWantGot(d))
}
}

func TestSortContainerStatuses(t *testing.T) {
samplePod := corev1.Pod{
Status: corev1.PodStatus{
ContainerStatuses: []corev1.ContainerStatus{
{
Name: "hello",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
FinishedAt: metav1.Time{Time: time.Now()},
},
},
}, {
Name: "my",
State: corev1.ContainerState{
// No Terminated status, terminated == 0 (and no panic)
},
}, {
Name: "world",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
FinishedAt: metav1.Time{Time: time.Now().Add(time.Second * -5)},
},
},
},
},
},
}
SortContainerStatuses(&samplePod)
var gotNames []string
for _, status := range samplePod.Status.ContainerStatuses {
gotNames = append(gotNames, status.Name)
}

want := []string{"my", "world", "hello"}
if d := cmp.Diff(want, gotNames); d != "" {
t.Errorf("Unexpected step order %s", diff.PrintWantGot(d))
}

}

func TestMarkStatusRunning(t *testing.T) {
trs := v1beta1.TaskRunStatus{}
MarkStatusRunning(&trs, v1beta1.TaskRunReasonRunning.String(), "Not all Steps in the Task have finished executing")
Expand Down
Loading

0 comments on commit dd2bfae

Please sign in to comment.