Skip to content

Commit

Permalink
add qps stats in report
Browse files Browse the repository at this point in the history
Signed-off-by: ii2day <ji.li@daocloud.io>
  • Loading branch information
ii2day committed Nov 1, 2023
1 parent 59b6879 commit ee4ac27
Show file tree
Hide file tree
Showing 21 changed files with 288 additions and 142 deletions.
3 changes: 0 additions & 3 deletions charts/crds/kdoctor.io_netdnses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1132,12 +1132,10 @@ spec:
type: string
durationInSecond:
default: 2
format: int64
minimum: 1
type: integer
perRequestTimeoutInMS:
default: 5
format: int64
minimum: 1
type: integer
protocol:
Expand All @@ -1149,7 +1147,6 @@ spec:
type: string
qps:
default: 5
format: int64
minimum: 1
type: integer
type: object
Expand Down
6 changes: 3 additions & 3 deletions pkg/k8s/apis/kdoctor.io/v1beta1/netdns_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ type NetdnsRequest struct {
// +kubebuilder:validation:Optional
// +kubebuilder:default=2
// +kubebuilder:validation:Minimum=1
DurationInSecond *uint64 `json:"durationInSecond,omitempty"`
DurationInSecond int `json:"durationInSecond,omitempty"`

// +kubebuilder:validation:Optional
// +kubebuilder:default=5
// +kubebuilder:validation:Minimum=1
QPS *uint64 `json:"qps,omitempty"`
QPS int `json:"qps,omitempty"`

// +kubebuilder:validation:Optional
// +kubebuilder:default=5
// +kubebuilder:validation:Minimum=1
PerRequestTimeoutInMS *uint64 `json:"perRequestTimeoutInMS,omitempty"`
PerRequestTimeoutInMS int `json:"perRequestTimeoutInMS,omitempty"`

// +kubebuilder:default=kubernetes.default.svc.cluster.local
// +kubebuilder:validation:Optional
Expand Down
15 changes: 0 additions & 15 deletions pkg/k8s/apis/kdoctor.io/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions pkg/k8s/apis/system/v1beta1/apphttphealthy.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ import (
const AppHttpHealthyTaskName = "AppHttpHealthy"

type AppHttpHealthyTask struct {
TargetType string `json:"TargetType"`
TargetNumber int64 `json:"TargetNumber"`
FailureReason *string `json:"FailureReason,omitempty"`
Succeed bool `json:"Succeed"`
MaxCPU string `json:"MaxCPU"`
MaxMemory string `json:"MaxMemory"`
Detail []AppHttpHealthyTaskDetail `json:"Detail"`
TargetType string `json:"TargetType"`
TargetNumber int64 `json:"TargetNumber"`
FailureReason *string `json:"FailureReason,omitempty"`
Succeed bool `json:"Succeed"`
SystemResource SystemResource `json:"SystemResource"`
TotalRunningLoad TotalRunningLoad `json:"TotalRunningLoad"`
Detail []AppHttpHealthyTaskDetail `json:"Detail"`
}

type AppHttpHealthyTaskDetail struct {
Expand Down
11 changes: 11 additions & 0 deletions pkg/k8s/apis/system/v1beta1/common_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,14 @@ type LatencyDistribution struct {
// Mean is the mean request latency.
Mean float32 `json:"Mean_inMs"`
}

type TotalRunningLoad struct {
AppHttpHealthyQPS int64 `json:"AppHttpHealthyQPS"`
NetReachQPS int64 `json:"NetReachQPS"`
NetDnsQPS int64 `json:"NetDnsQPS"`
}

type SystemResource struct {
MaxCPU string `json:"MaxCPU"`
MaxMemory string `json:"MaxMemory"`
}
14 changes: 7 additions & 7 deletions pkg/k8s/apis/system/v1beta1/netdns.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ import (
const NetDNSTaskName = "Netdns"

type NetDNSTask struct {
TargetType string `json:"targetType"`
TargetNumber int64 `json:"targetNumber"`
FailureReason *string `json:"failureReason,omitempty"`
Succeed bool `json:"succeed"`
MaxCPU string `json:"MaxCPU"`
MaxMemory string `json:"MaxMemory"`
Detail []NetDNSTaskDetail `json:"detail"`
TargetType string `json:"targetType"`
TargetNumber int64 `json:"targetNumber"`
FailureReason *string `json:"failureReason,omitempty"`
Succeed bool `json:"succeed"`
SystemResource SystemResource `json:"SystemResource"`
TotalRunningLoad TotalRunningLoad `json:"TotalRunningLoad"`
Detail []NetDNSTaskDetail `json:"detail"`
}

type NetDNSTaskDetail struct {
Expand Down
14 changes: 7 additions & 7 deletions pkg/k8s/apis/system/v1beta1/netreach.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ package v1beta1
const NetReachTaskName = "NetReach"

type NetReachTask struct {
TargetType string `json:"TargetType"`
TargetNumber int64 `json:"TargetNumber"`
FailureReason *string `json:"FailureReason,omitempty"`
Succeed bool `json:"Succeed"`
MaxCPU string `json:"MaxCPU"`
MaxMemory string `json:"MaxMemory"`
Detail []NetReachTaskDetail `json:"Detail"`
TargetType string `json:"TargetType"`
TargetNumber int64 `json:"TargetNumber"`
FailureReason *string `json:"FailureReason,omitempty"`
Succeed bool `json:"Succeed"`
SystemResource SystemResource `json:"SystemResource"`
TotalRunningLoad TotalRunningLoad `json:"TotalRunningLoad"`
Detail []NetReachTaskDetail `json:"Detail"`
}

type NetReachTaskDetail struct {
Expand Down
36 changes: 36 additions & 0 deletions pkg/k8s/apis/system/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 11 additions & 7 deletions pkg/pluginManager/agentManager.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package pluginManager
import (
"context"
"fmt"
"github.com/kdoctor-io/kdoctor/pkg/runningTask"
networkingv1 "k8s.io/api/networking/v1"
"time"

Expand Down Expand Up @@ -85,20 +86,23 @@ func (s *pluginManager) RunAgentController() {
logger.Sugar().Fatalf("unsupported TaskKind %s in %v", types.AgentConfig.TaskKind, types.TaskKinds)
}

runningTaskManager := runningTask.InitRunningTask()

for name, plugin := range s.chainingPlugins {
if name != types.AgentConfig.TaskKind && !types.AgentConfig.DefaultAgent {
continue
}

logger.Sugar().Infof("run controller for plugin %v", name)
k := &pluginAgentReconciler{
logger: logger.Named(name + "Reconciler"),
plugin: plugin,
client: mgr.GetClient(),
crdKind: name,
taskRoundData: taskStatusManager.NewTaskStatus(),
localNodeName: types.AgentConfig.LocalNodeName,
fm: fm,
logger: logger.Named(name + "Reconciler"),
plugin: plugin,
client: mgr.GetClient(),
crdKind: name,
taskRoundData: taskStatusManager.NewTaskStatus(),
localNodeName: types.AgentConfig.LocalNodeName,
fm: fm,
runningTaskManager: runningTaskManager,
}
if e := k.SetupWithManager(mgr); e != nil {
s.logger.Sugar().Fatalf("failed to builder reconcile for plugin %v, error=%v", name, e)
Expand Down
16 changes: 9 additions & 7 deletions pkg/pluginManager/agentReconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package pluginManager

import (
"context"
"github.com/kdoctor-io/kdoctor/pkg/runningTask"
"reflect"

"go.uber.org/zap"
Expand All @@ -20,13 +21,14 @@ import (
)

type pluginAgentReconciler struct {
client client.Client
plugin plugintypes.ChainingPlugin
logger *zap.Logger
crdKind string
localNodeName string
taskRoundData taskStatusManager.TaskStatus
fm fileManager.FileManager
client client.Client
plugin plugintypes.ChainingPlugin
logger *zap.Logger
crdKind string
localNodeName string
taskRoundData taskStatusManager.TaskStatus
fm fileManager.FileManager
runningTaskManager *runningTask.RunningTask
}

var _ reconcile.Reconciler = &pluginAgentReconciler{}
Expand Down
60 changes: 58 additions & 2 deletions pkg/pluginManager/agentTools.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/kdoctor-io/kdoctor/pkg/runningTask"
"strings"
"time"

Expand All @@ -34,6 +35,60 @@ func (s *pluginAgentReconciler) CallPluginImplementRoundTask(logger *zap.Logger,
defer cancel()
taskSucceed := make(chan bool)
logger.Sugar().Infof("plugin begins to implement, expect deadline %v, ", roundDuration.String())
// get task qps
var qps int
switch s.crdKind {
case KindNameAppHttpHealthy:
app := obj.(*crd.AppHttpHealthy)
qps = app.Spec.Request.QPS
case KindNameNetReach:
app := obj.(*crd.NetReach)
caseNum := 0
// Multiple use cases of netreach are executed simultaneously, so qps needs to be multiplied by the number of use cases
if *app.Spec.Target.ClusterIP {
if *app.Spec.Target.IPv4 {
caseNum += 1
}
if *app.Spec.Target.IPv6 {
caseNum += 1
}
}
if *app.Spec.Target.LoadBalancer {
if *app.Spec.Target.IPv4 {
caseNum += 1
}
if *app.Spec.Target.IPv6 {
caseNum += 1
}
}
if *app.Spec.Target.Endpoint {
if *app.Spec.Target.IPv4 {
caseNum += 1
}
if *app.Spec.Target.IPv6 {
caseNum += 1
}
}
if *app.Spec.Target.NodePort {
if *app.Spec.Target.IPv4 {
caseNum += 1
}
if *app.Spec.Target.IPv6 {
caseNum += 1
}
}
if *app.Spec.Target.Ingress {
if *app.Spec.Target.IPv4 {
caseNum += 1
}
}
qps = app.Spec.Request.QPS * caseNum
case KindNameNetdns:
app := obj.(*crd.Netdns)
qps = app.Spec.Request.QPS
}
logger.Sugar().Debugf("Before the current task starts, the total qps of the tasks being executed is %v", s.runningTaskManager.QpsStats())
s.runningTaskManager.SetTask(runningTask.Task{Name: taskName, Kind: s.crdKind, Qps: qps})

go func() {
// process mem cpu stats
Expand All @@ -49,7 +104,7 @@ func (s *pluginAgentReconciler) CallPluginImplementRoundTask(logger *zap.Logger,
StartTimeStamp: startTime,
ReportType: plugintypes.ReportTypeAgent,
}
failureReason, report, e := s.plugin.AgentExecuteTask(logger, ctx, obj, resourceStats)
failureReason, report, e := s.plugin.AgentExecuteTask(logger, ctx, obj, resourceStats, s.runningTaskManager)

if e != nil {
logger.Sugar().Errorf("plugin failed to implement the round task, error=%v", e)
Expand Down Expand Up @@ -129,6 +184,8 @@ func (s *pluginAgentReconciler) CallPluginImplementRoundTask(logger *zap.Logger,
}
}

s.runningTaskManager.DeleteTask(taskName)

// delete data
go func() {
time.Sleep(roundDuration)
Expand Down Expand Up @@ -179,7 +236,6 @@ func (s *pluginAgentReconciler) HandleAgentTaskRound(logger *zap.Logger, ctx con
if status, existed := s.taskRoundData.CheckTask(taskRoundName); !existed {
// mark to started it
s.taskRoundData.SetTask(taskRoundName, taskStatusManager.RoundStatusOngoing)

// we still have not reported the result for an ongoing round. do it
go s.CallPluginImplementRoundTask(logger.Named(taskRoundName), obj, schedulePlan, taskName, latestRecord.RoundNumber, crdObjSpec)
logger.Sugar().Infof("task %v , trigger to implement task round, and try to poll report after %v second", taskRoundName, types.AgentConfig.Configmap.TaskPollIntervalInSecond)
Expand Down
9 changes: 5 additions & 4 deletions pkg/pluginManager/apphttphealthy/agentExecuteTask.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/kdoctor-io/kdoctor/pkg/loadRequest/loadHttp"
"github.com/kdoctor-io/kdoctor/pkg/pluginManager/types"
"github.com/kdoctor-io/kdoctor/pkg/resource"
"github.com/kdoctor-io/kdoctor/pkg/runningTask"
"go.uber.org/zap"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/utils/pointer"
Expand Down Expand Up @@ -68,7 +69,7 @@ type TestTarget struct {
Method loadHttp.HttpMethod
}

func (s *PluginAppHttpHealthy) AgentExecuteTask(logger *zap.Logger, ctx context.Context, obj runtime.Object, r *resource.UsedResource) (finalfailureReason string, finalReport types.Task, err error) {
func (s *PluginAppHttpHealthy) AgentExecuteTask(logger *zap.Logger, ctx context.Context, obj runtime.Object, r *resource.UsedResource, rt *runningTask.RunningTask) (finalfailureReason string, finalReport types.Task, err error) {
finalfailureReason = ""
task := &v1beta1.AppHttpHealthyTask{}
err = nil
Expand Down Expand Up @@ -172,12 +173,12 @@ func (s *PluginAppHttpHealthy) AgentExecuteTask(logger *zap.Logger, ctx context.
} else {
task.Succeed = true
}
mem, cpu := r.Stats()
task.MaxMemory = fmt.Sprintf("%.2fMB", float64(mem/(1024*1024)))
task.MaxCPU = fmt.Sprintf("%.3f%%", cpu)
task.SystemResource = r.Stats()
// every round done clean cpu mem stats
r.CleanStats()

task.TotalRunningLoad = rt.QpsStats()

return finalfailureReason, task, err

}
Expand Down
Loading

0 comments on commit ee4ac27

Please sign in to comment.