Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update enable schedule logic;update podqos match method #1

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cmd/crane-agent/app/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,13 @@ func Run(ctx context.Context, opts *options.Options) error {
nodeInformer := nodeInformerFactory.Core().V1().Nodes()

craneInformerFactory := craneinformers.NewSharedInformerFactory(craneClient, informerSyncPeriod)
nepInformer := craneInformerFactory.Ensurance().V1alpha1().NodeQOSEnsurancePolicies()
nodeQOSInformer := craneInformerFactory.Ensurance().V1alpha1().NodeQOSs()
podQOSInformer := craneInformerFactory.Ensurance().V1alpha1().PodQOSs()
actionInformer := craneInformerFactory.Ensurance().V1alpha1().AvoidanceActions()
tspInformer := craneInformerFactory.Prediction().V1alpha1().TimeSeriesPredictions()

newAgent, err := agent.NewAgent(ctx, hostname, opts.RuntimeEndpoint, opts.CgroupDriver, kubeClient, craneClient, podInformer, nodeInformer,
nepInformer, actionInformer, tspInformer, opts.NodeResourceReserved, opts.Ifaces, healthCheck, opts.CollectInterval, opts.ExecuteExcess)
nodeQOSInformer, podQOSInformer, actionInformer, tspInformer, opts.NodeResourceReserved, opts.Ifaces, healthCheck, opts.CollectInterval, opts.ExecuteExcess)

if err != nil {
return err
Expand Down
4 changes: 2 additions & 2 deletions cmd/crane-agent/app/options/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ type Options struct {
// Ifaces is the network devices to collect metric
Ifaces []string
NodeResourceReserved map[string]string
// ExecuteExcess is the percentage of executions that exceed the gap between current usage and waterlines
// ExecuteExcess is the percentage of executions that exceed the gap between current usage and watermarks
ExecuteExcess string
}

Expand Down Expand Up @@ -56,5 +56,5 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) {
flags.StringArrayVar(&o.Ifaces, "ifaces", []string{"eth0"}, "The network devices to collect metric, use comma to separated, default: eth0")
flags.Var(cliflag.NewMapStringString(&o.NodeResourceReserved), "node-resource-reserved", "A set of ResourceName=Percent (e.g. cpu=40%,memory=40%)")
flags.DurationVar(&o.MaxInactivity, "max-inactivity", 5*time.Minute, "Maximum time from last recorded activity before automatic restart, default: 5min")
flags.StringVar(&o.ExecuteExcess, "execute-excess", "10%", "The percentage of executions that exceed the gap between current usage and waterlines, default: 10%.")
flags.StringVar(&o.ExecuteExcess, "execute-excess", "10%", "The percentage of executions that exceed the gap between current usage and watermarks, default: 10%.")
}
51 changes: 13 additions & 38 deletions deploy/crane-agent/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,73 +6,48 @@ rules:
- apiGroups:
- ""
resources:
- pods/eviction
- pods/status
- pods
- pods/status
- nodes
- nodes/status
- nodes/finalizers
verbs:
- create
- get
- list
- watch
- update
- patch
- delete
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- events
- pods/eviction
verbs:
- "*"
- create
- apiGroups:
- ""
resources:
- nodes
- nodes/status
- nodes/finalizers
verbs:
- get
- list
- watch
- update
- patch
- apiGroups:
- "ensurance.crane.io"
resources:
- nodeqosensurancepolicies
- configmaps
verbs:
- get
- list
- watch
- update
- patch
- apiGroups:
- "ensurance.crane.io"
- ""
resources:
- podqosensurancepolicies
- events
verbs:
- get
- list
- watch
- update
- patch
- "*"
- apiGroups:
- "ensurance.crane.io"
resources:
- podqoss
- nodeqoss
- avoidanceactions
verbs:
- get
- list
- watch
- update
- patch
- apiGroups:
- "prediction.crane.io"
resources:
Expand All @@ -83,8 +58,8 @@ rules:
- list
- watch
- create
- delete
- update
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Expand Down
47 changes: 47 additions & 0 deletions examples/ensurance/evict-on-cpu-usage-total/be-rules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
apiVersion: ensurance.crane.io/v1alpha1
kind: PodQOS
metadata:
name: all-be-pods
spec:
allowedActions:
- eviction
resourceQOS:
cpuQOS:
cpuPriority: 7
htIsolation:
enable: false
scopeSelector:
matchExpressions:
- operator: In
scopeName: QOSClass
values:
- BestEffort
---
apiVersion: ensurance.crane.io/v1alpha1
kind: NodeQOS
metadata:
name: eviction-on-high-usage
spec:
nodeQualityProbe:
nodeLocalGet:
localCacheTTLSeconds: 60
timeoutSeconds: 10
rules:
- actionName: eviction
avoidanceThreshold: 2
metricRule:
name: cpu_total_usage
value: 5000
name: cpu-usage
restoreThreshold: 2
strategy: None
---
apiVersion: ensurance.crane.io/v1alpha1
kind: AvoidanceAction
metadata:
name: eviction
spec:
coolDownSeconds: 300
description: evict low priority pods
eviction:
terminationGracePeriodSeconds: 30
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.17
require (
github.com/evanphx/json-patch v4.11.0+incompatible
github.com/go-echarts/go-echarts/v2 v2.2.4
github.com/gocrane/api v0.6.1-0.20220812033255-887f4b4e7d8b
github.com/gocrane/api v0.7.1-0.20220819080332-e4c0d60e812d
github.com/google/cadvisor v0.39.2
github.com/mjibson/go-dsp v0.0.0-20180508042940-11479a337f12
github.com/prometheus/client_golang v1.11.0
Expand Down
6 changes: 2 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,8 @@ github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.1.0-rc.5 h1:QOAag7FoBaBYYHRqzqkhhd8fq5RTubvI4v3Ft/gDVVQ=
github.com/gobwas/ws v1.1.0-rc.5/go.mod h1:nzvNcVha5eUziGrbxFCo6qFIojQHjJV5cLYIbezhfL0=
github.com/gocrane/api v0.6.1-0.20220809112454-68f0199a774e h1:pIocbZM7LchSMG7XBbfD9K+Im7zZtMZjVU7paVJOv6I=
github.com/gocrane/api v0.6.1-0.20220809112454-68f0199a774e/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk=
github.com/gocrane/api v0.6.1-0.20220812033255-887f4b4e7d8b h1:ELyVltbne39izU2XaFrgJtqnhdeV+hBt+JBKooN7N4w=
github.com/gocrane/api v0.6.1-0.20220812033255-887f4b4e7d8b/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk=
github.com/gocrane/api v0.7.1-0.20220819080332-e4c0d60e812d h1:qqPrNx1AETykgX80aWAmna/eQMDVWnUdSemWlfaZUNM=
github.com/gocrane/api v0.7.1-0.20220819080332-e4c0d60e812d/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk=
github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
Expand Down
7 changes: 4 additions & 3 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ func NewAgent(ctx context.Context,
craneClient *craneclientset.Clientset,
podInformer coreinformers.PodInformer,
nodeInformer coreinformers.NodeInformer,
nepInformer v1alpha1.NodeQOSEnsurancePolicyInformer,
nodeQOSInformer v1alpha1.NodeQOSInformer,
podQOSInformer v1alpha1.PodQOSInformer,
actionInformer v1alpha1.AvoidanceActionInformer,
tspInformer predictionv1.TimeSeriesPredictionInformer,
nodeResourceReserved map[string]string,
Expand All @@ -85,9 +86,9 @@ func NewAgent(ctx context.Context,
exclusiveCPUSet = cpuManager.GetExclusiveCpu
managers = appendManagerIfNotNil(managers, cpuManager)
}
stateCollector := collector.NewStateCollector(nodeName, nepInformer.Lister(), podInformer.Lister(), nodeInformer.Lister(), ifaces, healthCheck, CollectInterval, exclusiveCPUSet, cadvisorManager)
stateCollector := collector.NewStateCollector(nodeName, nodeQOSInformer.Lister(), podInformer.Lister(), nodeInformer.Lister(), ifaces, healthCheck, CollectInterval, exclusiveCPUSet, cadvisorManager)
managers = appendManagerIfNotNil(managers, stateCollector)
analyzerManager := analyzer.NewAnormalyAnalyzer(kubeClient, nodeName, podInformer, nodeInformer, nepInformer, actionInformer, stateCollector.AnalyzerChann, noticeCh)
analyzerManager := analyzer.NewAnomalyAnalyzer(kubeClient, nodeName, podInformer, nodeInformer, nodeQOSInformer, podQOSInformer, actionInformer, stateCollector.AnalyzerChann, noticeCh)
managers = appendManagerIfNotNil(managers, analyzerManager)
avoidanceManager := executor.NewActionExecutor(kubeClient, nodeName, podInformer, nodeInformer, noticeCh, runtimeEndpoint, stateCollector.State, executeExcess)
managers = appendManagerIfNotNil(managers, avoidanceManager)
Expand Down
Loading