Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch PipelineRun timeout -> TaskRun logic to instead signal the TaskRuns to stop #5134

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions docs/taskruns.md
Original file line number Diff line number Diff line change
Expand Up @@ -582,19 +582,20 @@ steps:

The following tables shows how to read the overall status of a `TaskRun`:

`status`|`reason`|`completionTime` is set|Description
:-------|:-------|:---------------------:|--------------:
Unknown|Started|No|The TaskRun has just been picked up by the controller.
Unknown|Pending|No|The TaskRun is waiting on a Pod in status Pending.
Unknown|Running|No|The TaskRun has been validated and started to perform its work.
Unknown|TaskRunCancelled|No|The user requested the TaskRun to be cancelled. Cancellation has not been done yet.
True|Succeeded|Yes|The TaskRun completed successfully.
False|Failed|Yes|The TaskRun failed because one of the steps failed.
False|\[Error message\]|No|The TaskRun encountered a non-permanent error, and it's still running. It may ultimately succeed.
False|\[Error message\]|Yes|The TaskRun failed with a permanent error (usually validation).
False|TaskRunCancelled|Yes|The TaskRun was cancelled successfully.
False|TaskRunTimeout|Yes|The TaskRun timed out.
False|TaskRunImagePullFailed|Yes|The TaskRun failed due to one of its steps not being able to pull the image.
`status`|`reason`|`message`|`completionTime` is set|Description
:-------|:-------|:--|:---------------------:|--------------:
Unknown|Started|n/a|No|The TaskRun has just been picked up by the controller.
Unknown|Pending|n/a|No|The TaskRun is waiting on a Pod in status Pending.
Unknown|Running|n/a|No|The TaskRun has been validated and started to perform its work.
Unknown|TaskRunCancelled|n/a|No|The user requested the TaskRun to be cancelled. Cancellation has not been done yet.
True|Succeeded|n/a|Yes|The TaskRun completed successfully.
False|Failed|n/a|Yes|The TaskRun failed because one of the steps failed.
False|\[Error message\]|n/a|No|The TaskRun encountered a non-permanent error, and it's still running. It may ultimately succeed.
False|\[Error message\]|n/a|Yes|The TaskRun failed with a permanent error (usually validation).
False|TaskRunCancelled|n/a|Yes|The TaskRun was cancelled successfully.
False|TaskRunCancelled|TaskRun cancelled as the PipelineRun it belongs to has timed out.|Yes|The TaskRun was cancelled because the PipelineRun timed out.
False|TaskRunTimeout|n/a|Yes|The TaskRun timed out.
False|TaskRunImagePullFailed|n/a|Yes|The TaskRun failed due to one of its steps not being able to pull the image.

When a `TaskRun` changes status, [events](events.md#taskruns) are triggered accordingly.

Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/pipeline/v1alpha1/run_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ const (
// RunCancelledByPipelineMsg indicates that the PipelineRun of which part this Run was
// has been cancelled.
RunCancelledByPipelineMsg RunSpecStatusMessage = "Run cancelled as the PipelineRun it belongs to has been cancelled."
// RunCancelledByPipelineTimeoutMsg indicates that the Run was cancelled because the PipelineRun running it timed out.
RunCancelledByPipelineTimeoutMsg RunSpecStatusMessage = "Run cancelled as the PipelineRun it belongs to has timed out."
pritidesai marked this conversation as resolved.
Show resolved Hide resolved
)

// GetParam gets the Param from the RunSpec with the given name
Expand Down
12 changes: 12 additions & 0 deletions pkg/apis/pipeline/v1beta1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions pkg/apis/pipeline/v1beta1/pipelinerun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,40 @@ func (pr *PipelineRun) HasTimedOut(ctx context.Context, c clock.PassiveClock) bo
return false
}

// HaveTasksTimedOut returns true if a pipelinerun has exceeded its spec.Timeouts.Tasks
func (pr *PipelineRun) HaveTasksTimedOut(ctx context.Context, c clock.PassiveClock) bool {
timeout := pr.TasksTimeout()
startTime := pr.Status.StartTime

if !startTime.IsZero() && timeout != nil {
if timeout.Duration == config.NoTimeoutDuration {
return false
}
runtime := c.Since(startTime.Time)
if runtime > timeout.Duration {
return true
}
}
return false
}

// HasFinallyTimedOut returns true if a pipelinerun has exceeded its spec.Timeouts.Finally, based on status.FinallyStartTime
func (pr *PipelineRun) HasFinallyTimedOut(ctx context.Context, c clock.PassiveClock) bool {
timeout := pr.FinallyTimeout()
startTime := pr.Status.FinallyStartTime

if startTime != nil && !startTime.IsZero() && timeout != nil {
if timeout.Duration == config.NoTimeoutDuration {
return false
}
runtime := c.Since(startTime.Time)
if runtime > timeout.Duration {
return true
}
}
return false
}

// HasVolumeClaimTemplate returns true if PipelineRun contains volumeClaimTemplates that is
// used for creating PersistentVolumeClaims with an OwnerReference for each run
func (pr *PipelineRun) HasVolumeClaimTemplate() bool {
Expand Down Expand Up @@ -418,6 +452,10 @@ type PipelineRunStatusFields struct {
// +optional
// +listType=atomic
ChildReferences []ChildStatusReference `json:"childReferences,omitempty"`

// FinallyStartTime is when all non-finally tasks have been completed and only finally tasks are being executed.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if there's a way to look at any of the finally TaskRuns and get their start time? Or would this introduce more race conditions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about this, but at best it'd be messy, and I can't rule out the possibility of race conditions, inconsistent behavior, etc, so I opted for this. It's not bad information to have in general, I think - the dashboard or CLI or some other downstream consumer of PipelineRuns can just look at this field (plus StartTime and CompletionTime) to see how long just tasks took and how long finally tasks took, etc.

// +optional
FinallyStartTime *metav1.Time `json:"finallyStartTime,omitempty"`
}

// SkippedTask is used to describe the Tasks that were skipped due to their When Expressions
Expand Down Expand Up @@ -450,6 +488,12 @@ const (
GracefullyStoppedSkip SkippingReason = "PipelineRun was gracefully stopped"
// MissingResultsSkip means the task was skipped because it's missing necessary results
MissingResultsSkip SkippingReason = "Results were missing"
// PipelineTimedOutSkip means the task was skipped because the PipelineRun has passed its overall timeout.
PipelineTimedOutSkip SkippingReason = "PipelineRun timeout has been reached"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the existing behavior in terms what happens to the unscheduled tasks (dag and finally)? How is this different from what we have today?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now, we create the tasks with a 1s timeout, so that they get created but immediately timeout. Skipping them instead is much more logical, imo.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't that be considered a behavior change? The users will start getting such tasks as part of the list of skipped tasks instead of taskRuns with cancelled status? 🤔

Skipping them instead is much more logical, imo.

skip was introduced for conditional tasks and since then we have extended its usage in many different scenarios which is fine for now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a slight behavior change, but not a functional one, since the end result is the same, just without the wasted steps of creating and immediately timing out the relevant task(s). I don’t think it’s an issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreeing with @abayer

// TasksTimedOutSkip means the task was skipped because the PipelineRun has passed its Timeouts.Tasks.
TasksTimedOutSkip SkippingReason = "PipelineRun Tasks timeout has been reached"
// FinallyTimedOutSkip means the task was skipped because the PipelineRun has passed its Timeouts.Finally.
FinallyTimedOutSkip SkippingReason = "PipelineRun Finally timeout has been reached"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this possible with the constraint we have documented?

timeouts.pipeline >= timeouts.tasks + timeouts.finally

Does this reason only apply to finally tasks? or can also be applied to dag tasks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a few ways - dag tasks finishing quickly enough that the remaining time until the pipeline timeout is greater than the finally timeout, no pipeline timeout at all, etc. We’re using a different skipping reason for finally vs dag tasks to make the specific timeout setting that caused the timeout clear.

// None means the task was not skipped
None SkippingReason = "None"
)
Expand Down
31 changes: 31 additions & 0 deletions pkg/apis/pipeline/v1beta1/pipelinerun_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,37 @@ func TestPipelineRunHasTimedOut(t *testing.T) {
t.Errorf("Expected HasTimedOut to be %t when using pipeline.timeouts.pipeline", tc.expected)
}
})
t.Run("pipeline.timeouts.tasks "+tc.name, func(t *testing.T) {
pr := &v1beta1.PipelineRun{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
Spec: v1beta1.PipelineRunSpec{
Timeouts: &v1beta1.TimeoutFields{Tasks: &metav1.Duration{Duration: tc.timeout}},
},
Status: v1beta1.PipelineRunStatus{PipelineRunStatusFields: v1beta1.PipelineRunStatusFields{
StartTime: &metav1.Time{Time: tc.starttime},
}},
}

if pr.HaveTasksTimedOut(context.Background(), testClock) != tc.expected {
t.Errorf("Expected HasTimedOut to be %t when using pipeline.timeouts.pipeline", tc.expected)
}
})
t.Run("pipeline.timeouts.finally "+tc.name, func(t *testing.T) {
pr := &v1beta1.PipelineRun{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
Spec: v1beta1.PipelineRunSpec{
Timeouts: &v1beta1.TimeoutFields{Finally: &metav1.Duration{Duration: tc.timeout}},
},
Status: v1beta1.PipelineRunStatus{PipelineRunStatusFields: v1beta1.PipelineRunStatusFields{
StartTime: &metav1.Time{Time: tc.starttime},
FinallyStartTime: &metav1.Time{Time: tc.starttime},
}},
}

if pr.HasFinallyTimedOut(context.Background(), testClock) != tc.expected {
t.Errorf("Expected HasTimedOut to be %t when using pipeline.timeouts.pipeline", tc.expected)
}
})
}
}

Expand Down
8 changes: 8 additions & 0 deletions pkg/apis/pipeline/v1beta1/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,10 @@
"x-kubernetes-patch-merge-key": "type",
"x-kubernetes-patch-strategy": "merge"
},
"finallyStartTime": {
"description": "FinallyStartTime is when all non-finally tasks have been completed and only finally tasks are being executed.",
"$ref": "#/definitions/v1.Time"
},
"observedGeneration": {
"description": "ObservedGeneration is the 'Generation' of the Service that was last processed by the controller.",
"type": "integer",
Expand Down Expand Up @@ -1079,6 +1083,10 @@
"description": "CompletionTime is the time the PipelineRun completed.",
"$ref": "#/definitions/v1.Time"
},
"finallyStartTime": {
"description": "FinallyStartTime is when all non-finally tasks have been completed and only finally tasks are being executed.",
"$ref": "#/definitions/v1.Time"
},
"pipelineResults": {
"description": "PipelineResults are the list of results written out by the pipeline task's containers",
"type": "array",
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/pipeline/v1beta1/taskrun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ const (
// TaskRunCancelledByPipelineMsg indicates that the PipelineRun of which this
// TaskRun was a part of has been cancelled.
TaskRunCancelledByPipelineMsg TaskRunSpecStatusMessage = "TaskRun cancelled as the PipelineRun it belongs to has been cancelled."
// TaskRunCancelledByPipelineTimeoutMsg indicates that the TaskRun was cancelled because the PipelineRun running it timed out.
TaskRunCancelledByPipelineTimeoutMsg TaskRunSpecStatusMessage = "TaskRun cancelled as the PipelineRun it belongs to has timed out."
)

// TaskRunDebug defines the breakpoint config for a particular TaskRun
Expand Down
8 changes: 2 additions & 6 deletions pkg/apis/pipeline/v1beta1/taskrun_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func TestTaskRunIsDone(t *testing.T) {
},
}
if !tr.IsDone() {
t.Fatal("Expected pipelinerun status to be done")
t.Fatal("Expected taskrun status to be done")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you for fixing this 🙏

}
}

Expand All @@ -131,11 +131,7 @@ func TestTaskRunIsCancelled(t *testing.T) {
},
}
if !tr.IsCancelled() {
t.Fatal("Expected pipelinerun status to be cancelled")
}
expected := ""
if string(tr.Spec.StatusMessage) != expected {
t.Fatalf("Expected StatusMessage is %s but got %s", expected, tr.Spec.StatusMessage)
t.Fatal("Expected taskrun status to be cancelled")
}
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/pipeline/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 27 additions & 16 deletions pkg/reconciler/pipelinerun/cancel.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"time"

"github.com/tektoncd/pipeline/pkg/apis/config"

"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1alpha1"
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
clientset "github.com/tektoncd/pipeline/pkg/client/clientset/versioned"
Expand All @@ -34,6 +33,7 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"knative.dev/pkg/apis"
)

Expand Down Expand Up @@ -108,9 +108,14 @@ func cancelPipelineRun(ctx context.Context, logger *zap.SugaredLogger, pr *v1bet

// cancelPipelineTaskRuns patches `TaskRun` and `Run` with canceled status
func cancelPipelineTaskRuns(ctx context.Context, logger *zap.SugaredLogger, pr *v1beta1.PipelineRun, clientSet clientset.Interface) []string {
return cancelPipelineTaskRunsForTaskNames(ctx, logger, pr, clientSet, sets.NewString())
}

// cancelPipelineTaskRunsForTaskNames patches `TaskRun`s and `Run`s for the given task names, or all if no task names are given, with canceled status
func cancelPipelineTaskRunsForTaskNames(ctx context.Context, logger *zap.SugaredLogger, pr *v1beta1.PipelineRun, clientSet clientset.Interface, taskNames sets.String) []string {
errs := []string{}

trNames, runNames, err := getChildObjectsFromPRStatus(ctx, pr.Status)
trNames, runNames, err := getChildObjectsFromPRStatusForTaskNames(ctx, pr.Status, taskNames)
if err != nil {
errs = append(errs, err.Error())
}
Expand All @@ -136,9 +141,9 @@ func cancelPipelineTaskRuns(ctx context.Context, logger *zap.SugaredLogger, pr *
return errs
}

// getChildObjectsFromPRStatus returns taskruns and runs in the PipelineRunStatus's ChildReferences or TaskRuns/Runs,
// based on the value of the embedded status flag.
func getChildObjectsFromPRStatus(ctx context.Context, prs v1beta1.PipelineRunStatus) ([]string, []string, error) {
// getChildObjectsFromPRStatusForTaskNames returns taskruns and runs in the PipelineRunStatus's ChildReferences or TaskRuns/Runs,
// based on the value of the embedded status flag and the given set of PipelineTask names. If that set is empty, all are returned.
func getChildObjectsFromPRStatusForTaskNames(ctx context.Context, prs v1beta1.PipelineRunStatus, taskNames sets.String) ([]string, []string, error) {
cfg := config.FromContextOrDefaults(ctx)

var trNames []string
Expand All @@ -147,21 +152,27 @@ func getChildObjectsFromPRStatus(ctx context.Context, prs v1beta1.PipelineRunSta

if cfg.FeatureFlags.EmbeddedStatus != config.FullEmbeddedStatus {
for _, cr := range prs.ChildReferences {
switch cr.Kind {
case "TaskRun":
trNames = append(trNames, cr.Name)
case "Run":
runNames = append(runNames, cr.Name)
default:
unknownChildKinds[cr.Name] = cr.Kind
if taskNames.Len() == 0 || taskNames.Has(cr.PipelineTaskName) {
switch cr.Kind {
case "TaskRun":
trNames = append(trNames, cr.Name)
case "Run":
runNames = append(runNames, cr.Name)
default:
unknownChildKinds[cr.Name] = cr.Kind
}
}
}
} else {
for trName := range prs.TaskRuns {
trNames = append(trNames, trName)
for trName, trs := range prs.TaskRuns {
if taskNames.Len() == 0 || taskNames.Has(trs.PipelineTaskName) {
trNames = append(trNames, trName)
}
}
for runName := range prs.Runs {
runNames = append(runNames, runName)
for runName, runStatus := range prs.Runs {
if taskNames.Len() == 0 || taskNames.Has(runStatus.PipelineTaskName) {
runNames = append(runNames, runName)
}
}
}

Expand Down
22 changes: 20 additions & 2 deletions pkg/reconciler/pipelinerun/cancel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"context"
"testing"

"k8s.io/apimachinery/pkg/util/sets"

"github.com/google/go-cmp/cmp"
"github.com/tektoncd/pipeline/pkg/apis/config"
"github.com/tektoncd/pipeline/test/diff"
Expand Down Expand Up @@ -274,11 +276,12 @@ func TestCancelPipelineRun(t *testing.T) {
}
}

func TestGetChildObjectsFromPRStatus(t *testing.T) {
func TestGetChildObjectsFromPRStatusForTaskNames(t *testing.T) {
testCases := []struct {
name string
embeddedStatus string
prStatus v1beta1.PipelineRunStatus
taskNames sets.String
expectedTRNames []string
expectedRunNames []string
hasError bool
Expand Down Expand Up @@ -319,6 +322,21 @@ func TestGetChildObjectsFromPRStatus(t *testing.T) {
expectedTRNames: []string{"t1"},
expectedRunNames: []string{"r1"},
hasError: false,
}, {
name: "taskrun and run, default embedded, just want taskrun",
embeddedStatus: config.DefaultEmbeddedStatus,
prStatus: v1beta1.PipelineRunStatus{PipelineRunStatusFields: v1beta1.PipelineRunStatusFields{
TaskRuns: map[string]*v1beta1.PipelineRunTaskRunStatus{
"t1": {PipelineTaskName: "task-1"},
},
Runs: map[string]*v1beta1.PipelineRunRunStatus{
"r1": {PipelineTaskName: "run-1"},
},
}},
taskNames: sets.NewString("task-1"),
expectedTRNames: []string{"t1"},
expectedRunNames: nil,
hasError: false,
}, {
name: "full embedded",
embeddedStatus: config.FullEmbeddedStatus,
Expand Down Expand Up @@ -402,7 +420,7 @@ func TestGetChildObjectsFromPRStatus(t *testing.T) {
cfg.OnConfigChanged(withCustomTasks(withEmbeddedStatus(newFeatureFlagsConfigMap(), tc.embeddedStatus)))
ctx = cfg.ToContext(ctx)

trNames, runNames, err := getChildObjectsFromPRStatus(ctx, tc.prStatus)
trNames, runNames, err := getChildObjectsFromPRStatusForTaskNames(ctx, tc.prStatus, tc.taskNames)

if tc.hasError {
if err == nil {
Expand Down
Loading