From b3efb1fe79890fac6f8281028cf1776aa2b23de2 Mon Sep 17 00:00:00 2001 From: JesseStutler Date: Mon, 23 Dec 2024 16:30:15 +0800 Subject: [PATCH] fix: hierarchical queue webhook validation use listing podgroups instead --- .../queues/validate/validate_queue.go | 21 +++++++++++++++---- .../queues/validate/validate_queue_test.go | 21 ++++++++++++++++--- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/pkg/webhooks/admission/queues/validate/validate_queue.go b/pkg/webhooks/admission/queues/validate/validate_queue.go index b315f7baae..616a019776 100644 --- a/pkg/webhooks/admission/queues/validate/validate_queue.go +++ b/pkg/webhooks/admission/queues/validate/validate_queue.go @@ -17,6 +17,7 @@ limitations under the License. package validate import ( + "context" "fmt" "strconv" "strings" @@ -29,6 +30,7 @@ import ( "k8s.io/klog/v2" schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" + "volcano.sh/volcano/pkg/cli/podgroup" "volcano.sh/volcano/pkg/webhooks/router" "volcano.sh/volcano/pkg/webhooks/schema" "volcano.sh/volcano/pkg/webhooks/util" @@ -255,10 +257,21 @@ func validateHierarchicalQueue(queue *schedulingv1beta1.Queue) error { return fmt.Errorf("failed to get parent queue of queue %s: %v", queue.Name, err) } - if parentQueue.Status.Pending+parentQueue.Status.Running+parentQueue.Status.Unknown+parentQueue.Status.Inqueue > 0 { - return fmt.Errorf("queue %s cannot be the parent queue of queue %s because it has PodGroups (pending: %d, running: %d, unknown: %d, inqueue: %d)", - parentQueue.Name, queue.Name, parentQueue.Status.Pending, - parentQueue.Status.Running, parentQueue.Status.Unknown, parentQueue.Status.Inqueue) + pgList, err := config.VolcanoClient.SchedulingV1beta1().PodGroups("").List(context.Background(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to get all podgroups from api-server: %v", err) + } + + pgStats := &podgroup.PodGroupStatistics{} + for _, pg := range pgList.Items { + if pg.Spec.Queue == parentQueue.Name { + pgStats.StatPodGroupCountsForQueue(&pg) + } + } + + if pgStats.Pending+pgStats.Running+pgStats.Unknown+pgStats.Inqueue+pgStats.Completed > 0 { + return fmt.Errorf("queue %s cannot be the parent queue of queue %s because it has PodGroups (pending: %d, running: %d, unknown: %d, inqueue: %d, completed: %d)", + parentQueue.Name, queue.Name, pgStats.Pending, pgStats.Running, pgStats.Unknown, pgStats.Inqueue, pgStats.Completed) } klog.V(3).Infof("Validation passed for hierarchical queue %s with parent queue %s", diff --git a/pkg/webhooks/admission/queues/validate/validate_queue_test.go b/pkg/webhooks/admission/queues/validate/validate_queue_test.go index 039e644bcd..73062ac380 100644 --- a/pkg/webhooks/admission/queues/validate/validate_queue_test.go +++ b/pkg/webhooks/admission/queues/validate/validate_queue_test.go @@ -1049,8 +1049,18 @@ func TestAdmitHierarchicalQueues(t *testing.T) { Spec: schedulingv1beta1.QueueSpec{ Parent: "root", }, - Status: schedulingv1beta1.QueueStatus{ - Running: 2, + } + + jobInQueue := schedulingv1beta1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job-in-queue", + Namespace: "test", + }, + Spec: schedulingv1beta1.PodGroupSpec{ + Queue: "queue-with-jobs", + }, + Status: schedulingv1beta1.PodGroupStatus{ + Phase: schedulingv1beta1.PodGroupRunning, }, } @@ -1077,6 +1087,11 @@ func TestAdmitHierarchicalQueues(t *testing.T) { t.Errorf("Create queue with jobs failed for %v.", err) } + _, err = config.VolcanoClient.SchedulingV1beta1().PodGroups("test").Create(context.TODO(), &jobInQueue, metav1.CreateOptions{}) + if err != nil { + t.Errorf("Create job %s/%s failed for %v", jobInQueue.Namespace, jobInQueue.Name, err) + } + _, err = config.VolcanoClient.SchedulingV1beta1().Queues().Create(context.TODO(), &queueWithoutJobs, metav1.CreateOptions{}) if err != nil { t.Errorf("Create queue without jobs failed for %v.", err) @@ -1130,7 +1145,7 @@ func TestAdmitHierarchicalQueues(t *testing.T) { reviewResponse: &admissionv1.AdmissionResponse{ Allowed: false, Result: &metav1.Status{ - Message: "queue queue-with-jobs cannot be the parent queue of queue parent-queue-with-jobs because it has PodGroups (pending: 0, running: 2, unknown: 0, inqueue: 0)", + Message: "queue queue-with-jobs cannot be the parent queue of queue parent-queue-with-jobs because it has PodGroups (pending: 0, running: 1, unknown: 0, inqueue: 0, completed: 0)", }, }, },