From a7e035025eb3162c4d52f82afde448da37e2659d Mon Sep 17 00:00:00 2001
From: chengjoey <30427474+chengjoey@users.noreply.github.com>
Date: Tue, 2 Nov 2021 14:43:36 +0800
Subject: [PATCH] Fix/pipeline apitest cost time (#2650)

* modify task wait op, listen executor channel

* apitest action don't set task pointers value

* api-test executor define add snyc.map to store running task
---
 .../actionexecutor/plugins/apitest/apitest.go | 86 +++++++++++++++---
 .../plugins/apitest/logic/meta.go             | 18 ++--
 .../pipengine/reconciler/reconcile.go         |  1 -
 .../pipengine/reconciler/taskrun/framework.go | 12 +--
 .../reconciler/taskrun/taskop/wait.go         | 90 +++++++++----------
 .../reconciler/taskrun/taskop/wait_test.go    | 76 ++++++++++++++++
 .../pipengine/reconciler/taskrun/taskrun.go   | 21 +++--
 7 files changed, 219 insertions(+), 85 deletions(-)
 create mode 100644 modules/pipeline/pipengine/reconciler/taskrun/taskop/wait_test.go

diff --git a/modules/pipeline/pipengine/actionexecutor/plugins/apitest/apitest.go b/modules/pipeline/pipengine/actionexecutor/plugins/apitest/apitest.go
index d69cb97839b..e3b14d7983f 100644
--- a/modules/pipeline/pipengine/actionexecutor/plugins/apitest/apitest.go
+++ b/modules/pipeline/pipengine/actionexecutor/plugins/apitest/apitest.go
@@ -17,6 +17,9 @@ package apitest
 import (
 	"context"
 	"fmt"
+	"sync"
+
+	"github.com/sirupsen/logrus"
 
 	"github.com/erda-project/erda/apistructs"
 	"github.com/erda-project/erda/modules/pipeline/dbclient"
@@ -28,9 +31,10 @@ import (
 var Kind = types.Kind(spec.PipelineTaskExecutorKindAPITest)
 
 type define struct {
-	name     types.Name
-	options  map[string]string
-	dbClient *dbclient.Client
+	name        types.Name
+	options     map[string]string
+	dbClient    *dbclient.Client
+	runningAPIs sync.Map
 }
 
 func (d *define) Kind() types.Kind { return Kind }
@@ -44,7 +48,11 @@ func (d *define) Exist(ctx context.Context, task *spec.PipelineTask) (created bo
 	case status == apistructs.PipelineStatusCreated:
 		return true, false, nil
 	case status == apistructs.PipelineStatusQueue, status == apistructs.PipelineStatusRunning:
-		return true, true, nil
+		// if apitest task is not procesing, should make status-started false
+		if _, alreadyProcessing := d.runningAPIs.Load(d.makeRunningApiKey(task)); alreadyProcessing {
+			return true, true, nil
+		}
+		return true, false, nil
 	case status.IsEndStatus():
 		return true, true, nil
 	default:
@@ -57,7 +65,46 @@ func (d *define) Create(ctx context.Context, task *spec.PipelineTask) (interface
 }
 
 func (d *define) Start(ctx context.Context, task *spec.PipelineTask) (interface{}, error) {
-	logic.Do(ctx, task)
+
+	go func(ctx context.Context, task *spec.PipelineTask) {
+		if _, alreadyProcessing := d.runningAPIs.LoadOrStore(d.makeRunningApiKey(task), task); alreadyProcessing {
+			logrus.Warnf("apitest: task: %d already processing", task.ID)
+			return
+		}
+		executorDoneCh, ok := ctx.Value(spec.MakeTaskExecutorCtxKey(task)).(chan interface{})
+		if !ok {
+			logrus.Warnf("apitest: failed to get executor channel, pipelineID: %d, taskID: %d", task.PipelineID, task.ID)
+		}
+
+		var status = apistructs.PipelineStatusFailed
+		defer func() {
+			if r := recover(); r != nil {
+				logrus.Errorf("api-test logic do panic recover:%s", r)
+			}
+			// if executor chan is nil, task framework can loop query meta get status
+			if executorDoneCh != nil {
+				executorDoneCh <- apistructs.PipelineStatusDesc{Status: status}
+			}
+			d.runningAPIs.Delete(d.makeRunningApiKey(task))
+		}()
+
+		logic.Do(ctx, task)
+
+		latestTask, err := d.dbClient.GetPipelineTask(task.ID)
+		if err != nil {
+			logrus.Errorf("failed to query latest task, err: %v \n", err)
+			return
+		}
+
+		meta := latestTask.Result.Metadata
+		for _, metaField := range meta {
+			if metaField.Name == logic.MetaKeyResult {
+				if metaField.Value == logic.ResultSuccess {
+					status = apistructs.PipelineStatusSuccess
+				}
+			}
+		}
+	}(ctx, task)
 	return nil, nil
 }
 
@@ -70,13 +117,13 @@ func (d *define) Status(ctx context.Context, task *spec.PipelineTask) (apistruct
 	if err != nil {
 		return apistructs.PipelineStatusDesc{}, fmt.Errorf("failed to query latest task, err: %v", err)
 	}
-	*task = latestTask
+	//*task = latestTask
 
 	if task.Status.IsEndStatus() {
 		return apistructs.PipelineStatusDesc{Status: task.Status}, nil
 	}
 
-	created, _, err := d.Exist(ctx, task)
+	created, started, err := d.Exist(ctx, task)
 	if err != nil {
 		return apistructs.PipelineStatusDesc{}, err
 	}
@@ -85,19 +132,27 @@ func (d *define) Status(ctx context.Context, task *spec.PipelineTask) (apistruct
 		return apistructs.PipelineStatusDesc{Status: apistructs.PipelineStatusAnalyzed}, nil
 	}
 
+	if !started && len(latestTask.Result.Metadata) == 0 {
+		return apistructs.PipelineStatusDesc{Status: apistructs.PipelineStatusBorn}, nil
+	}
+
 	// status according to api success or not
 	meta := latestTask.Result.Metadata
+	var status = apistructs.PipelineStatusFailed
 	for _, metaField := range meta {
 		if metaField.Name == logic.MetaKeyResult {
 			if metaField.Value == logic.ResultSuccess {
-				return apistructs.PipelineStatusDesc{Status: apistructs.PipelineStatusSuccess}, nil
+				status = apistructs.PipelineStatusSuccess
+			}
+			if metaField.Value == logic.ResultFailed {
+				status = apistructs.PipelineStatusFailed
 			}
-			return apistructs.PipelineStatusDesc{Status: apistructs.PipelineStatusFailed}, nil
+			return apistructs.PipelineStatusDesc{Status: status}, nil
 		}
 	}
 
 	// return created status to do start step
-	return apistructs.PipelineStatusDesc{Status: apistructs.PipelineStatusCreated}, nil
+	return apistructs.PipelineStatusDesc{Status: apistructs.PipelineStatusRunning}, nil
 }
 
 func (d *define) Inspect(ctx context.Context, task *spec.PipelineTask) (apistructs.TaskInspect, error) {
@@ -116,6 +171,10 @@ func (d *define) BatchDelete(ctx context.Context, actions []*spec.PipelineTask)
 	return nil, nil
 }
 
+func (d *define) makeRunningApiKey(task *spec.PipelineTask) string {
+	return fmt.Sprintf("%d-%d", task.PipelineID, task.ID)
+}
+
 func init() {
 	types.MustRegister(Kind, func(name types.Name, options map[string]string) (types.ActionExecutor, error) {
 		dbClient, err := dbclient.New()
@@ -123,9 +182,10 @@ func init() {
 			return nil, fmt.Errorf("failed to init dbclient, err: %v", err)
 		}
 		return &define{
-			name:     name,
-			options:  options,
-			dbClient: dbClient,
+			name:        name,
+			options:     options,
+			dbClient:    dbClient,
+			runningAPIs: sync.Map{},
 		}, nil
 	})
 }
diff --git a/modules/pipeline/pipengine/actionexecutor/plugins/apitest/logic/meta.go b/modules/pipeline/pipengine/actionexecutor/plugins/apitest/logic/meta.go
index 16bdd5b0db2..489795099f4 100644
--- a/modules/pipeline/pipengine/actionexecutor/plugins/apitest/logic/meta.go
+++ b/modules/pipeline/pipengine/actionexecutor/plugins/apitest/logic/meta.go
@@ -18,6 +18,7 @@ import (
 	"context"
 	"encoding/json"
 	"strconv"
+	"time"
 
 	"github.com/erda-project/erda/apistructs"
 	"github.com/erda-project/erda/modules/actionagent"
@@ -26,6 +27,7 @@ import (
 	"github.com/erda-project/erda/pkg/apitestsv2"
 	"github.com/erda-project/erda/pkg/apitestsv2/cookiejar"
 	"github.com/erda-project/erda/pkg/encoding/jsonparse"
+	"github.com/erda-project/erda/pkg/loop"
 )
 
 const (
@@ -112,11 +114,15 @@ func writeMetaFile(ctx context.Context, task *spec.PipelineTask, meta *Meta) {
 	cb.PipelineTaskID = task.ID
 
 	cbData, _ := json.Marshal(&cb)
-	err := pipelinefunc.CallbackActionFunc(cbData)
-	if err != nil {
-		log.Errorf("failed to callback, err: %v", err)
-		return
-	}
-
+	// apitest should ensure that callback to pipeline after doing request
+	_ = loop.New(loop.WithDeclineRatio(2), loop.WithDeclineLimit(10*time.Second)).
+		Do(func() (bool, error) {
+			err := pipelinefunc.CallbackActionFunc(cbData)
+			if err != nil {
+				log.Errorf("failed to callback, err: %v", err)
+				return false, err
+			}
+			return true, nil
+		})
 	return
 }
diff --git a/modules/pipeline/pipengine/reconciler/reconcile.go b/modules/pipeline/pipengine/reconciler/reconcile.go
index c96997bee98..fe7971148ce 100644
--- a/modules/pipeline/pipengine/reconciler/reconcile.go
+++ b/modules/pipeline/pipengine/reconciler/reconcile.go
@@ -104,7 +104,6 @@ func (r *Reconciler) reconcile(ctx context.Context, pipelineID uint64) error {
 			if err != nil {
 				return
 			}
-
 			tr := taskrun.New(ctx, task,
 				ctx.Value(ctxKeyPipelineExitCh).(chan struct{}), ctx.Value(ctxKeyPipelineExitChCancelFunc).(context.CancelFunc),
 				r.TaskThrottler, executor, &p, r.bdl, r.dbClient, r.js,
diff --git a/modules/pipeline/pipengine/reconciler/taskrun/framework.go b/modules/pipeline/pipengine/reconciler/taskrun/framework.go
index 0db3fba6db4..12b0dcfd4f3 100644
--- a/modules/pipeline/pipengine/reconciler/taskrun/framework.go
+++ b/modules/pipeline/pipengine/reconciler/taskrun/framework.go
@@ -27,7 +27,6 @@ import (
 	"github.com/erda-project/erda/modules/pipeline/conf"
 	"github.com/erda-project/erda/modules/pipeline/pipengine/reconciler/rlog"
 	"github.com/erda-project/erda/modules/pipeline/pkg/errorsx"
-	"github.com/erda-project/erda/modules/pipeline/spec"
 	"github.com/erda-project/erda/pkg/loop"
 	"github.com/erda-project/erda/pkg/strutil"
 )
@@ -35,8 +34,7 @@ import (
 func (tr *TaskRun) Do(itr TaskOp) error {
 	logrus.Infof("reconciler: pipelineID: %d, task %q begin %s", tr.P.ID, tr.Task.Name, itr.Op())
 
-	executorDoneCh := tr.Ctx.Value(spec.MakeTaskExecutorCtxKey(tr.Task)).(chan interface{})
-	o := &Elem{ErrCh: make(chan error), DoneCh: make(chan interface{}), ExitCh: make(chan struct{}), ExecutorDoneCh: executorDoneCh}
+	o := &Elem{ErrCh: make(chan error), DoneCh: make(chan interface{}), ExitCh: make(chan struct{})}
 	o.TimeoutCh, o.Cancel, o.Timeout = itr.TimeoutConfig()
 
 	// define op handle func
@@ -161,14 +159,6 @@ func (tr *TaskRun) waitOp(itr TaskOp, o *Elem) (result error) {
 		// aop
 		_ = aop.Handle(aop.NewContextForTask(*tr.Task, *tr.P, itr.TuneTriggers().AfterProcessing))
 
-	case data := <-o.ExecutorDoneCh:
-		tr.LogStep(itr.Op(), fmt.Sprintf("framework accept signal from executor %s, begin do WhenDone", tr.Executor.Name()))
-		defer tr.LogStep(itr.Op(), fmt.Sprintf("framework accept signal from executor %s, end do WhenDone", tr.Executor.Name()))
-		if err := itr.WhenDone(data); err != nil {
-			errs = append(errs, err.Error())
-		}
-		_ = aop.Handle(aop.NewContextForTask(*tr.Task, *tr.P, itr.TuneTriggers().AfterProcessing))
-
 	case err := <-o.ErrCh:
 		logrus.Errorf("reconciler: pipelineID: %d, task %q %s received error (%v)", tr.P.ID, tr.Task.Name, itr.Op(), err)
 		if errorsx.IsNetworkError(err) {
diff --git a/modules/pipeline/pipengine/reconciler/taskrun/taskop/wait.go b/modules/pipeline/pipengine/reconciler/taskrun/taskop/wait.go
index c3183c20de5..af391770850 100644
--- a/modules/pipeline/pipengine/reconciler/taskrun/taskop/wait.go
+++ b/modules/pipeline/pipengine/reconciler/taskrun/taskop/wait.go
@@ -17,6 +17,7 @@ package taskop
 import (
 	"context"
 	"errors"
+	"math"
 	"time"
 
 	"github.com/sirupsen/logrus"
@@ -25,13 +26,16 @@ import (
 	"github.com/erda-project/erda/modules/pipeline/aop/aoptypes"
 	"github.com/erda-project/erda/modules/pipeline/commonutil/costtimeutil"
 	"github.com/erda-project/erda/modules/pipeline/conf"
-	"github.com/erda-project/erda/modules/pipeline/pipengine/reconciler/rlog"
 	"github.com/erda-project/erda/modules/pipeline/pipengine/reconciler/taskrun"
-	"github.com/erda-project/erda/pkg/loop"
 )
 
 var err4EnableDeclineRatio = errors.New("enable decline ratio")
 
+var (
+	declineRatio float64       = 1.5
+	declineLimit time.Duration = 10 * time.Second
+)
+
 type wait taskrun.TaskRun
 
 func NewWait(tr *taskrun.TaskRun) *wait {
@@ -47,55 +51,42 @@ func (w *wait) TaskRun() *taskrun.TaskRun {
 }
 
 func (w *wait) Processing() (interface{}, error) {
-	stopWaitCh := make(chan struct{})
-	defer func() {
-		stopWaitCh <- struct{}{}
-	}()
-	go func() {
+	var (
+		data        interface{}
+		loopedTimes uint64
+	)
+
+	timer := time.NewTimer(w.calculateNextLoopTimeDuration(loopedTimes))
+	defer timer.Stop()
+	for {
 		select {
+		case doneData := <-w.ExecutorDoneCh:
+			logrus.Infof("%s: accept signal from executor %s, data: %v", w.Op(), w.Executor.Name(), doneData)
+			return doneData, nil
 		case <-w.Ctx.Done():
-			w.StopWaitLoop = true
-			return
+			return data, nil
 		case <-w.PExitCh:
 			logrus.Warnf("reconciler: pipeline exit, stop wait, pipelineID: %d, taskID: %d", w.P.ID, w.Task.ID)
-			return
-		case <-stopWaitCh:
-			rlog.TDebugf(w.P.ID, w.Task.ID, "stop wait")
-			close(stopWaitCh)
-			return
-		}
-	}()
-
-	var (
-		data interface{}
-	)
-
-	err := loop.New(loop.WithDeclineRatio(1.5), loop.WithDeclineLimit(time.Second*10)).Do(func() (abort bool, err error) {
-		if w.QuitWaitTimeout {
-			return true, nil
-		}
-
-		statusDesc, err := w.Executor.Status(w.Ctx, w.Task)
-		if err != nil {
-			logrus.Errorf("[alert] reconciler: pipelineID: %d, task %q wait get status failed, err: %v",
-				w.P.ID, w.Task.Name, err)
-			return true, err
-		}
-
-		if statusDesc.Status == apistructs.PipelineStatusUnknown {
-			logrus.Errorf("[alert] reconciler: pipelineID: %d, task %q wait get status %q, retry", w.P.ID, w.Task.Name, apistructs.PipelineStatusUnknown)
-			return false, err4EnableDeclineRatio
-		}
+			return data, nil
+		case <-timer.C:
+			statusDesc, err := w.Executor.Status(w.Ctx, w.Task)
+			if err != nil {
+				logrus.Errorf("[alert] reconciler: pipelineID: %d, task %q wait get status failed, err: %v",
+					w.P.ID, w.Task.Name, err)
+				return nil, err
+			}
+			if statusDesc.Status.IsEndStatus() {
+				data = statusDesc
+				return data, nil
+			}
+			if statusDesc.Status == apistructs.PipelineStatusUnknown {
+				logrus.Errorf("[alert] reconciler: pipelineID: %d, task %q wait get status %q, retry", w.P.ID, w.Task.Name, apistructs.PipelineStatusUnknown)
+			}
 
-		if statusDesc.Status.IsEndStatus() {
-			data = statusDesc
-			return true, nil
+			loopedTimes++
+			timer.Reset(w.calculateNextLoopTimeDuration(loopedTimes))
 		}
-
-		return w.StopWaitLoop, err4EnableDeclineRatio
-	})
-
-	return data, err
+	}
 }
 
 func (w *wait) WhenDone(data interface{}) error {
@@ -186,3 +177,12 @@ func (w *wait) TuneTriggers() taskrun.TaskOpTuneTriggers {
 		AfterProcessing:  aoptypes.TuneTriggerTaskAfterWait,
 	}
 }
+
+func (w *wait) calculateNextLoopTimeDuration(loopedTimes uint64) time.Duration {
+	lastSleepTime := time.Second
+	lastSleepTime = time.Duration(float64(lastSleepTime) * math.Pow(declineRatio, float64(loopedTimes)))
+	if lastSleepTime > declineLimit {
+		return declineLimit
+	}
+	return lastSleepTime
+}
diff --git a/modules/pipeline/pipengine/reconciler/taskrun/taskop/wait_test.go b/modules/pipeline/pipengine/reconciler/taskrun/taskop/wait_test.go
new file mode 100644
index 00000000000..069eaf47088
--- /dev/null
+++ b/modules/pipeline/pipengine/reconciler/taskrun/taskop/wait_test.go
@@ -0,0 +1,76 @@
+// Copyright (c) 2021 Terminus, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package taskop
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/erda-project/erda/modules/pipeline/pipengine/reconciler/taskrun"
+)
+
+func TestCalculateNextLoopTimeDuration(t *testing.T) {
+	tt := []struct {
+		loopedTimes uint64
+		want        string
+	}{
+		{
+			loopedTimes: 0,
+			want:        "1s",
+		},
+		{
+			loopedTimes: 1,
+			want:        "1.5s",
+		},
+		{
+			loopedTimes: 2,
+			want:        "2.25s",
+		},
+		{
+			loopedTimes: 3,
+			want:        "3.375s",
+		},
+		{
+			loopedTimes: 4,
+			want:        "5.0625s",
+		},
+		{
+			loopedTimes: 5,
+			want:        "7.59375s",
+		},
+		{
+			loopedTimes: 6,
+			want:        "10s",
+		},
+		{
+			loopedTimes: 7,
+			want:        "10s",
+		},
+		{
+			loopedTimes: 8,
+			want:        "10s",
+		},
+		{
+			loopedTimes: 9,
+			want:        "10s",
+		},
+	}
+
+	w := NewWait(&taskrun.TaskRun{})
+	for i := range tt {
+		assert.Equal(t, tt[i].want, w.calculateNextLoopTimeDuration(tt[i].loopedTimes).String())
+	}
+}
diff --git a/modules/pipeline/pipengine/reconciler/taskrun/taskrun.go b/modules/pipeline/pipengine/reconciler/taskrun/taskrun.go
index 6f947ae411c..55b8475b4b4 100644
--- a/modules/pipeline/pipengine/reconciler/taskrun/taskrun.go
+++ b/modules/pipeline/pipengine/reconciler/taskrun/taskrun.go
@@ -50,9 +50,10 @@ type TaskRun struct {
 	StopQueueLoop bool
 	StopWaitLoop  bool
 
-	PExitCh       <-chan struct{}
-	PExitChCancel context.CancelFunc
-	PExit         bool
+	PExitCh        <-chan struct{}
+	PExitChCancel  context.CancelFunc
+	PExit          bool
+	ExecutorDoneCh chan interface{}
 
 	// 轮训状态间隔期间可能任务已经是终态，FakeTimeout = true
 	FakeTimeout bool
@@ -70,8 +71,10 @@ func New(ctx context.Context, task *spec.PipelineTask,
 	actionAgentSvc *actionagentsvc.ActionAgentSvc,
 	extMarketSvc *extmarketsvc.ExtMarketSvc,
 ) *TaskRun {
+	// make executor has buffer, don't block task framework
+	executorCh := make(chan interface{}, 1)
 	return &TaskRun{
-		Ctx:       context.WithValue(ctx, spec.MakeTaskExecutorCtxKey(task), make(chan interface{})),
+		Ctx:       context.WithValue(ctx, spec.MakeTaskExecutorCtxKey(task), executorCh),
 		Task:      task,
 		Executor:  executor,
 		Throttler: throttler,
@@ -87,8 +90,9 @@ func New(ctx context.Context, task *spec.PipelineTask,
 		StopQueueLoop: false,
 		StopWaitLoop:  false,
 
-		PExitCh:       pExitCh,
-		PExitChCancel: pExitChCancel,
+		PExitCh:        pExitCh,
+		PExitChCancel:  pExitChCancel,
+		ExecutorDoneCh: executorCh,
 
 		ActionAgentSvc: actionAgentSvc,
 		ExtMarketSvc:   extMarketSvc,
@@ -135,9 +139,8 @@ type Elem struct {
 	Cancel    context.CancelFunc
 	Timeout   time.Duration
 
-	ErrCh          chan error
-	DoneCh         chan interface{}
-	ExecutorDoneCh chan interface{} // executorDoneCh allow action executor return directly
+	ErrCh  chan error
+	DoneCh chan interface{}
 
 	ExitCh chan struct{}
 }