Skip to content

Commit

Permalink
Add plugin for networkTopology and score logic
Browse files Browse the repository at this point in the history
Signed-off-by: Monokaix <changxuzheng@huawei.com>
  • Loading branch information
Monokaix committed Dec 26, 2024
1 parent 90d1c52 commit c01ead5
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 58 deletions.
124 changes: 77 additions & 47 deletions pkg/scheduler/actions/allocate/allocate.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,18 @@ type Action struct {
// configured flag for error cache
enablePredicateErrorCache bool
hyperNodesTiers []int

// hyperNodeScoresByJob stores job total score for all available hyperNodes, this is used for accumulate
// all nodes' scores in each available hyperNode only when job has hard network topology constrains
// jobUID -> hyperNodeName -> score
hyperNodeScoresByJob map[string]map[string]float64
}

func New() *Action {
return &Action{
enablePredicateErrorCache: true, // default to enable it
hyperNodesTiers: []int{},
hyperNodeScoresByJob: make(map[string]map[string]float64),
}
}

Expand Down Expand Up @@ -306,7 +312,7 @@ func (alloc *Action) selectBestHyperNode(jobStmts map[string]*framework.Statemen
candidateHyperNodeGroups[hyperNodeName] = ssn.HyperNodes[hyperNodeName]
}

hyperNodeScores, err := util.PrioritizeHyperNodes(candidateHyperNodeGroups, job, ssn.HyperNodeOrderMapFn)
hyperNodeScores, err := util.PrioritizeHyperNodes(candidateHyperNodeGroups, alloc.hyperNodeScoresByJob[string(job.UID)], job, ssn.HyperNodeOrderMapFn)
if err != nil {
klog.V(3).ErrorS(err, "Failed to allocate resource for job", "jobName", job.UID)
return nil, bestHyperNodeName
Expand Down Expand Up @@ -379,54 +385,12 @@ func (alloc *Action) allocateResourcesForTasks(tasks *util.PriorityQueue, job *a
}
}

// Candidate nodes are divided into two gradients:
// - the first gradient node: a list of free nodes that satisfy the task resource request;
// - The second gradient node: the node list whose sum of node idle resources and future idle meets the task resource request;
// Score the first gradient node first. If the first gradient node meets the requirements, ignore the second gradient node list,
// otherwise, score the second gradient node and select the appropriate node.
var candidateNodes [][]*api.NodeInfo
var idleCandidateNodes []*api.NodeInfo
var futureIdleCandidateNodes []*api.NodeInfo
for _, n := range predicateNodes {
if task.InitResreq.LessEqual(n.Idle, api.Zero) {
idleCandidateNodes = append(idleCandidateNodes, n)
} else if task.InitResreq.LessEqual(n.FutureIdle(), api.Zero) {
futureIdleCandidateNodes = append(futureIdleCandidateNodes, n)
} else {
klog.V(5).Infof("Predicate filtered node %v, idle: %v and future idle: %v do not meet the requirements of task: %v",
n.Name, n.Idle, n.FutureIdle(), task.Name)
}
}
candidateNodes = append(candidateNodes, idleCandidateNodes)
candidateNodes = append(candidateNodes, futureIdleCandidateNodes)

var bestNode *api.NodeInfo
for index, nodes := range candidateNodes {
if klog.V(5).Enabled() {
for _, node := range nodes {
klog.V(5).Infof("node %v, idle: %v, future idle: %v", node.Name, node.Idle, node.FutureIdle())
}
}
switch {
case len(nodes) == 0:
klog.V(5).Infof("Task: %v, no matching node is found in the candidateNodes(index: %d) list.", task.Name, index)
case len(nodes) == 1: // If only one node after predicate, just use it.
bestNode = nodes[0]
case len(nodes) > 1: // If more than one node after predicate, using "the best" one
nodeScores := util.PrioritizeNodes(task, nodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn)

bestNode = ssn.BestNodeFn(task, nodeScores)
if bestNode == nil {
bestNode = util.SelectBestNode(nodeScores)
}
}

// If a proper node is found in idleCandidateNodes, skip futureIdleCandidateNodes and directly return the node information.
if bestNode != nil {
break
}
bestNode, highestScore := alloc.prioritizeNodes(ssn, task, predicateNodes)
if bestNode == nil {
continue
}

alloc.sumNodeScoresInHyperNode(string(job.UID), hyperNode, highestScore)
alloc.allocateResourcesForTask(stmt, task, bestNode, job)

if ssn.JobReady(job) && !tasks.Empty() {
Expand All @@ -445,6 +409,72 @@ func (alloc *Action) allocateResourcesForTasks(tasks *util.PriorityQueue, job *a
}
}

func (alloc *Action) sumNodeScoresInHyperNode(jobUID, hyperNode string, score float64) {
// normal vc job without networkTopology has no hyperNode, skip node scores accumulation.
if hyperNode == "" {
return
}

if alloc.hyperNodeScoresByJob[jobUID] == nil {
alloc.hyperNodeScoresByJob[jobUID] = make(map[string]float64)
}

alloc.hyperNodeScoresByJob[jobUID][hyperNode] += score
}

// prioritizeNodes selects the highest score node.
func (alloc *Action) prioritizeNodes(ssn *framework.Session, task *api.TaskInfo, predicateNodes []*api.NodeInfo) (*api.NodeInfo, float64) {
// Candidate nodes are divided into two gradients:
// - the first gradient node: a list of free nodes that satisfy the task resource request;
// - The second gradient node: the node list whose sum of node idle resources and future idle meets the task resource request;
// Score the first gradient node first. If the first gradient node meets the requirements, ignore the second gradient node list,
// otherwise, score the second gradient node and select the appropriate node.
var candidateNodes [][]*api.NodeInfo
var idleCandidateNodes []*api.NodeInfo
var futureIdleCandidateNodes []*api.NodeInfo
for _, n := range predicateNodes {
if task.InitResreq.LessEqual(n.Idle, api.Zero) {
idleCandidateNodes = append(idleCandidateNodes, n)
} else if task.InitResreq.LessEqual(n.FutureIdle(), api.Zero) {
futureIdleCandidateNodes = append(futureIdleCandidateNodes, n)
} else {
klog.V(5).Infof("Predicate filtered node %v, idle: %v and future idle: %v do not meet the requirements of task: %v",
n.Name, n.Idle, n.FutureIdle(), task.Name)
}
}
candidateNodes = append(candidateNodes, idleCandidateNodes)
candidateNodes = append(candidateNodes, futureIdleCandidateNodes)

var bestNode *api.NodeInfo
var higestScore float64
for index, nodes := range candidateNodes {
if klog.V(5).Enabled() {
for _, node := range nodes {
klog.V(5).Infof("node %v, idle: %v, future idle: %v", node.Name, node.Idle, node.FutureIdle())
}
}
switch {
case len(nodes) == 0:
klog.V(5).Infof("Task: %v, no matching node is found in the candidateNodes(index: %d) list.", task.Name, index)
case len(nodes) == 1: // If only one node after predicate, just use it.
bestNode = nodes[0]
case len(nodes) > 1: // If more than one node after predicate, using "the best" one
nodeScores := util.PrioritizeNodes(task, nodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn)

bestNode = ssn.BestNodeFn(task, nodeScores)
if bestNode == nil {
bestNode, higestScore = util.SelectBestNodeAndScore(nodeScores)
}
}

// If a proper node is found in idleCandidateNodes, skip futureIdleCandidateNodes and directly return the node information.
if bestNode != nil {
break
}
}
return bestNode, higestScore
}

func (alloc *Action) allocateResourcesForTask(stmt *framework.Statement, task *api.TaskInfo, node *api.NodeInfo, job *api.JobInfo) {
// Allocate idle resource to the task.
if task.InitResreq.LessEqual(node.Idle, api.Zero) {
Expand Down
82 changes: 82 additions & 0 deletions pkg/scheduler/actions/allocate/allocate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"volcano.sh/volcano/pkg/scheduler/cache"
"volcano.sh/volcano/pkg/scheduler/conf"
"volcano.sh/volcano/pkg/scheduler/framework"
"volcano.sh/volcano/pkg/scheduler/plugins/binpack"
"volcano.sh/volcano/pkg/scheduler/plugins/drf"
"volcano.sh/volcano/pkg/scheduler/plugins/gang"
"volcano.sh/volcano/pkg/scheduler/plugins/nodeorder"
Expand Down Expand Up @@ -477,6 +478,87 @@ func TestAllocateWithNetWorkTopologies(t *testing.T) {
}
}

func TestNodeLevelScoreWithNetWorkTopologies(t *testing.T) {
plugins := map[string]framework.PluginBuilder{
predicates.PluginName: predicates.New,
gang.PluginName: gang.New,
binpack.PluginName: binpack.New,
}

tests := []uthelper.TestCommonStruct{
{
Name: "hard network topology constrain, allocate job to highest score hypeNode with node level binpack",
PodGroups: []*schedulingv1.PodGroup{
util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "q1", 2, nil, schedulingv1.PodGroupInqueue, "hard", 1),
util.BuildPodGroupWithNetWorkTopologies("pg2", "c1", "q1", 2, nil, schedulingv1.PodGroupRunning, "", 1),
},
Pods: []*v1.Pod{
// should use different role, because allocate actions default to enable the role caches when predicate
util.BuildPod("c1", "p1", "", v1.PodPending, api.BuildResourceList("2", "4Gi"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil),
util.BuildPod("c1", "p2", "", v1.PodPending, api.BuildResourceList("4", "8Gi"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil),

util.BuildPod("c1", "p3", "s0-n1", v1.PodRunning, api.BuildResourceList("2", "4Gi"), "pg2", map[string]string{"volcano.sh/task-spec": "master"}, nil),
util.BuildPod("c1", "p4", "s0-n2", v1.PodRunning, api.BuildResourceList("4", "8Gi"), "pg2", map[string]string{"volcano.sh/task-spec": "worker"}, nil),
},
Nodes: []*v1.Node{
util.BuildNode("s0-n1", api.BuildResourceList("4", "8Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil),
util.BuildNode("s0-n2", api.BuildResourceList("8", "16Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil),
util.BuildNode("s1-n3", api.BuildResourceList("4", "8Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil),
util.BuildNode("s1-n4", api.BuildResourceList("8", "16Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil),
},
HyperNodesListByTier: map[int][]string{0: {"s0", "s1"}},
HyperNodes: map[string]sets.Set[string]{
"s0": sets.New[string]("s0-n1", "s0-n2"),
"s1": sets.New[string]("s1-n3", "s1-n4"),
"s2": sets.New[string]("s0-n1", "s0-n2", "s1-n3", "s1-n4"),
},
Queues: []*schedulingv1.Queue{
util.BuildQueue("q1", 1, nil),
},
ExpectBindsNum: 2,
// "s0-n1" and "s0-n2" nodes have running pods, so get higher score when enable binpack.
ExpectBindMap: map[string]string{
"c1/p1": "s0-n1",
"c1/p2": "s0-n2",
},
},
}

trueValue := true
tiers := []conf.Tier{
{
Plugins: []conf.PluginOption{
{
Name: gang.PluginName,
EnabledJobOrder: &trueValue,
EnabledJobReady: &trueValue,
EnabledJobPipelined: &trueValue,
EnabledJobStarving: &trueValue,
},
{
Name: predicates.PluginName,
EnabledPredicate: &trueValue,
},
{
Name: binpack.PluginName,
EnabledNodeOrder: &trueValue,
},
},
},
}
for i, test := range tests {
t.Run(test.Name, func(t *testing.T) {
test.Plugins = plugins
test.RegisterSession(tiers, nil)
defer test.Close()
test.Run([]framework.Action{New()})
if err := test.CheckAll(i); err != nil {
t.Fatal(err)
}
})
}
}

func TestFareShareAllocate(t *testing.T) {
plugins := map[string]framework.PluginBuilder{
drf.PluginName: drf.New,
Expand Down
2 changes: 1 addition & 1 deletion pkg/scheduler/actions/backfill/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (backfill *Action) Execute(ssn *framework.Session) {
nodeScores := util.PrioritizeNodes(task, predicateNodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn)
node = ssn.BestNodeFn(task, nodeScores)
if node == nil {
node = util.SelectBestNode(nodeScores)
node, _ = util.SelectBestNodeAndScore(nodeScores)
}
}

Expand Down
28 changes: 19 additions & 9 deletions pkg/scheduler/util/scheduler_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,27 +127,37 @@ func PrioritizeNodes(task *api.TaskInfo, nodes []*api.NodeInfo, batchFn api.Batc
return nodeScores
}

// PrioritizeHyperNodes prioritize hyperNodes score of all plugins for job and return hyperNode name with the highest score.
func PrioritizeHyperNodes(candidateHyperNodes map[string][]*api.NodeInfo, job *api.JobInfo, fn api.HyperNodeOrderMapFn) (map[float64][]string, error) {
pluginHyperNodesScoreMap := make(map[string]float64)
// PrioritizeHyperNodes returns a map whose key is hyperNode's score and value are corresponding hyperNodes
// it accumulates two parts score:
// 1.node level scores of each hyperNode in NodeOrder extension.
// 2.hyperNode level scores scored in HyperNodeOrder extension.
func PrioritizeHyperNodes(candidateHyperNodes map[string][]*api.NodeInfo, nodeScoresInHyperNode map[string]float64, job *api.JobInfo, fn api.HyperNodeOrderMapFn) (map[float64][]string, error) {
hyperNodesScoreMap := make(map[string]float64)
mapScores, err := fn(job, candidateHyperNodes)
if err != nil {
return nil, err
}

// plugin scores of hyperNode.
for pluginName, scores := range mapScores {
for hyperNode, score := range scores {
klog.V(5).InfoS("Add plugin score at hypeNode", "jobName", job.UID, "pluginName", pluginName, "hyperNodeName", hyperNode, "score", score)
pluginHyperNodesScoreMap[hyperNode] += score
hyperNodesScoreMap[hyperNode] += score
}
}

// accumulate node scores in NodeOrder and hyperNode score itself as the final score of each hyperNode.
for hyperNodeName, score := range nodeScoresInHyperNode {
klog.V(5).InfoS("Add node level scores to final hyperNode score", "jobName", job.UID, "hyperNodeName", hyperNodeName, "score", score)
hyperNodesScoreMap[hyperNodeName] += score
}

hyperNodeScores := make(map[float64][]string)
hyperNodeScoreMap := make(map[string]float64)
for hyperNodeName := range candidateHyperNodes {
// If no plugin is applied to this node, the default is 0.0
score := 0.0
if value, ok := pluginHyperNodesScoreMap[hyperNodeName]; ok {
if value, ok := hyperNodesScoreMap[hyperNodeName]; ok {
score += value
}
hyperNodeScores[score] = append(hyperNodeScores[score], hyperNodeName)
Expand Down Expand Up @@ -176,8 +186,8 @@ func SortNodes(nodeScores map[float64][]*api.NodeInfo) []*api.NodeInfo {
return nodesInorder
}

// SelectBestNode returns best node whose score is highest, pick one randomly if there are many nodes with same score.
func SelectBestNode(nodeScores map[float64][]*api.NodeInfo) *api.NodeInfo {
// SelectBestNodeAndScore returns the best node whose score is highest and the highest score, pick one randomly if there are many nodes with same score.
func SelectBestNodeAndScore(nodeScores map[float64][]*api.NodeInfo) (*api.NodeInfo, float64) {
var bestNodes []*api.NodeInfo
maxScore := -1.0
for score, nodes := range nodeScores {
Expand All @@ -188,10 +198,10 @@ func SelectBestNode(nodeScores map[float64][]*api.NodeInfo) *api.NodeInfo {
}

if len(bestNodes) == 0 {
return nil
return nil, 0
}

return bestNodes[rand.Intn(len(bestNodes))]
return bestNodes[rand.Intn(len(bestNodes))], maxScore
}

// SelectBestHyperNode return the best hyperNode name whose score is highest, pick one randomly if there are many hyperNodes with same score.
Expand Down
8 changes: 7 additions & 1 deletion pkg/scheduler/util/scheduler_helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ func TestSelectBestNode(t *testing.T) {
NodeScores map[float64][]*api.NodeInfo
// Expected node is one of ExpectedNodes
ExpectedNodes []*api.NodeInfo
ExpectedScore float64
}{
{
NodeScores: map[float64][]*api.NodeInfo{
1.0: {&api.NodeInfo{Name: "node1"}, &api.NodeInfo{Name: "node2"}},
2.0: {&api.NodeInfo{Name: "node3"}, &api.NodeInfo{Name: "node4"}},
},
ExpectedNodes: []*api.NodeInfo{{Name: "node3"}, {Name: "node4"}},
ExpectedScore: 2.0,
},
{
NodeScores: map[float64][]*api.NodeInfo{
Expand All @@ -47,6 +49,7 @@ func TestSelectBestNode(t *testing.T) {
2.0: {&api.NodeInfo{Name: "node4"}, &api.NodeInfo{Name: "node5"}},
},
ExpectedNodes: []*api.NodeInfo{{Name: "node3"}},
ExpectedScore: 3.0,
},
{
NodeScores: map[float64][]*api.NodeInfo{},
Expand All @@ -63,10 +66,13 @@ func TestSelectBestNode(t *testing.T) {
return false
}
for i, test := range cases {
result := SelectBestNode(test.NodeScores)
result, score := SelectBestNodeAndScore(test.NodeScores)
if !oneOf(result, test.ExpectedNodes) {
t.Errorf("Failed test case #%d, expected: %#v, got %#v", i, test.ExpectedNodes, result)
}
if score != test.ExpectedScore {
t.Errorf("Failed test case #%d, expected: %#v, got %#v", i, test.ExpectedScore, score)
}
}
}

Expand Down

0 comments on commit c01ead5

Please sign in to comment.