From 74f17764d98c3c2594e2ef5c06022839c7701d6e Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 11 May 2021 18:56:44 +0000 Subject: [PATCH 01/12] changed klog.Infof to klog.V(4).Infof --- .../pkg/scheduler/eventhandlers.go | 40 ++++----- .../plugins/defaultbinder/default_binder.go | 6 +- .../siteavailability/siteavailability.go | 2 +- globalscheduler/pkg/scheduler/scheduler.go | 88 +++++++++---------- .../scheduler/sitecacheinfo/sitecache_info.go | 2 +- 5 files changed, 68 insertions(+), 70 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index 1ed0fae2b..9322054b1 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -138,7 +138,7 @@ func responsibleForPod(pod *v1.Pod, schedulerName string) bool { // addPodToCache add pod to the stack cache of the scheduler func (sched *Scheduler) addPodToCache(obj interface{}) { pod, ok := obj.(*v1.Pod) - klog.Infof("Add a pod: %v", pod) + klog.V(4).Infof("Add a pod: %v", pod.Name) if !ok { klog.Errorf("cannot convert to *v1.Pod: %v", obj) return @@ -160,7 +160,7 @@ func (sched *Scheduler) updatePodInCache(oldObj, newObj interface{}) { return } newPod, ok := newObj.(*v1.Pod) - klog.Infof("Update a pod: %v", newPod) + klog.V(4).Infof("Update a pod: %v", newPod) if !ok { klog.Errorf("cannot convert newObj to *v1.Pod: %v", newObj) return @@ -178,7 +178,7 @@ func (sched *Scheduler) deletePodFromCache(obj interface{}) { switch t := obj.(type) { case *v1.Pod: pod = t - klog.Infof("Delete a pod: %v", pod) + klog.V(4).Infof("Delete a pod: %v", pod.Name) case cache.DeletedFinalStateUnknown: var ok bool pod, ok = t.Obj.(*v1.Pod) @@ -301,15 +301,13 @@ func (sched *Scheduler) updatePodInSchedulingQueue(oldObj, newObj interface{}) { return } newPod, ok := newObj.(*v1.Pod) - klog.Infof("updatePodToSchedulingQueue : %v", newPod) + klog.V(4).Infof("updatePodToSchedulingQueue : %v", newPod) if !ok { klog.Errorf("cannot convert newObj to *v1.Pod: %v", newObj) return } - oldStack := getStackFromPod(oldPod) newStack := getStackFromPod(newPod) - if sched.skipStackUpdate(newStack) { return } @@ -323,7 +321,7 @@ func (sched *Scheduler) deletePodFromSchedulingQueue(obj interface{}) { switch t := obj.(type) { case *v1.Pod: pod = obj.(*v1.Pod) - klog.Infof("deletePodToSchedulingQueue : %v", pod) + klog.V(4).Infof("deletePodToSchedulingQueue : %v", pod.Name) case cache.DeletedFinalStateUnknown: var ok bool pod, ok = t.Obj.(*v1.Pod) @@ -372,14 +370,14 @@ func (sched *Scheduler) skipStackUpdate(stack *types.Stack) bool { if !reflect.DeepEqual(assumedStackCopy, stackCopy) { return false } - klog.V(3).Infof("Skipping stack %s/%s/%s update", stack.Tenant, stack.PodNamespace, stack.PodName) + klog.V(4).Infof("Skipping stack %s/%s/%s update", stack.Tenant, stack.PodNamespace, stack.PodName) return true } func (sched *Scheduler) bindStacks(assumedStacks []types.Stack) { - klog.Infof("assumedStacks: %v", assumedStacks) + klog.V(4).Infof("assumedStacks: %v", assumedStacks) for _, newStack := range assumedStacks { - klog.Infof("newStack: %v", newStack) + klog.V(4).Infof("newStack: %v", newStack) clusterName := newStack.Selected.ClusterName sched.bindToSite(clusterName, &newStack) } @@ -398,7 +396,7 @@ func (sched *Scheduler) setPodScheduleErr(reqStack *types.Stack) error { newStatus := v1.PodStatus{ Phase: v1.PodNoSchedule, } - klog.Infof("Attempting to update pod status from %v to %v", pod.Status, newStatus) + klog.V(4).Infof("Attempting to update pod status from %v to %v", pod.Status, newStatus) _, _, err = statusutil.PatchPodStatus(sched.Client, reqStack.Tenant, reqStack.PodNamespace, reqStack.PodName, pod.Status, newStatus) if err != nil { klog.Warningf("PatchPodStatus for pod %q: %v", reqStack.PodName+"/"+reqStack.PodNamespace+"/"+ @@ -406,7 +404,7 @@ func (sched *Scheduler) setPodScheduleErr(reqStack *types.Stack) error { return err } - klog.Infof("Update pod status from %v to %v success", pod.Status, newStatus) + klog.V(4).Infof("Update pod status from %v to %v success", pod.Status, newStatus) return nil } @@ -424,9 +422,9 @@ func (sched *Scheduler) bindToSite(clusterName string, assumedStack *types.Stack }, } - klog.V(3).Infof("binding: %v", binding) + klog.V(4).Infof("binding: %v", binding) // do api server update here - klog.Infof("Attempting to bind %v to %v", binding.Name, binding.Target.Name) + klog.V(4).Infof("Attempting to bind %v to %v", binding.Name, binding.Target.Name) err := sched.Client.CoreV1().PodsWithMultiTenancy(binding.Namespace, binding.Tenant).Bind(binding) if err != nil { klog.Errorf("Failed to bind stack: %v/%v/%v", assumedStack.Tenant, assumedStack.PodNamespace, @@ -444,7 +442,7 @@ func (sched *Scheduler) addCluster(object interface{}) { resource := object.(*clusterv1.Cluster) clusterCopy := resource.DeepCopy() if sched.verifyClusterInfo(clusterCopy) == false { - klog.Infof(" Cluster data is not correct: %v", clusterCopy) + klog.V(4).Infof(" Cluster data is not correct: %v", clusterCopy) } key, err := controller.KeyFunc(object) if err != nil { @@ -452,7 +450,7 @@ func (sched *Scheduler) addCluster(object interface{}) { return } sched.Enqueue(key, EventType_Create) - klog.Infof("Enqueue Create cluster: %v", key) + klog.V(4).Infof("Enqueue Create cluster: %v", key) } func (sched *Scheduler) updateCluster(oldObject, newObject interface{}) { @@ -461,7 +459,7 @@ func (sched *Scheduler) updateCluster(oldObject, newObject interface{}) { oldClusterCopy := oldResource.DeepCopy() newClusterCopy := newResource.DeepCopy() if sched.verifyClusterInfo(newClusterCopy) { - klog.Infof(" Cluster data is not correct: %v", newResource) + klog.V(4).Infof(" Cluster data is not correct: %v", newResource) } key1, err1 := controller.KeyFunc(oldObject) key2, err2 := controller.KeyFunc(newObject) @@ -478,13 +476,13 @@ func (sched *Scheduler) updateCluster(oldObject, newObject interface{}) { switch eventType { case ClusterUpdateNo: { - klog.Infof("No actual change in clusters, discarding: %v", newClusterCopy.Name) + klog.V(4).Infof("No actual change in clusters, discarding: %v", newClusterCopy.Name) break } case ClusterUpdateYes: { sched.Enqueue(key2, EventType_Update) - klog.Infof("Enqueue Update Cluster: %v", key2) + klog.V(4).Infof("Enqueue Update Cluster: %v", key2) break } default: @@ -499,7 +497,7 @@ func (sched *Scheduler) deleteCluster(object interface{}) { resource := object.(*clusterv1.Cluster) clusterCopy := resource.DeepCopy() if sched.verifyClusterInfo(clusterCopy) == false { - klog.Infof(" Cluster data is not correct: %v", clusterCopy) + klog.V(4).Infof(" Cluster data is not correct: %v", clusterCopy) return } key, err := controller.KeyFunc(object) @@ -510,7 +508,7 @@ func (sched *Scheduler) deleteCluster(object interface{}) { sched.Enqueue(key, EventType_Delete) siteID := clusterCopy.Spec.Region.Region + constants.SiteDelimiter + clusterCopy.Spec.Region.AvailabilityZone sched.deletedClusters[key] = siteID - klog.Infof("Enqueue Delete Cluster: %v", key) + klog.V(4).Infof("Enqueue Delete Cluster: %v", key) } // Enqueue puts key of the cluster object in the work queue diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index 89366bd1a..a8391ea44 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -70,7 +70,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.Errorf("Gettng site selector state failed! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } - klog.Errorf("GetSiteSelectorState: %v", siteSelectedInfo) + klog.Errorf("site selector info: %v", siteSelectedInfo) if len(stack.Resources) != len(siteSelectedInfo.Flavors) { klog.Errorf("flavor count not equal to server count! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ @@ -85,7 +85,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.Warningf("flavor %s not found in region(%s)", flavorID, region) continue } - klog.Infof("flavor %s : %v", flavorID, flv) + klog.V(4).Infof("flavor %s : %v", flavorID, flv) vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) if err != nil || vCPUInt <= 0 { klog.Warningf("flavor %s is invalid in region(%s)", flavorID, region) @@ -111,6 +111,6 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s regionFlavors = map[string]*typed.RegionFlavor{} } siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.Infof("Resource state after deduction: %v", siteCacheInfo) + klog.V(4).Infof("Resource state after deduction: %v", siteCacheInfo) return nil } diff --git a/globalscheduler/pkg/scheduler/framework/plugins/siteavailability/siteavailability.go b/globalscheduler/pkg/scheduler/framework/plugins/siteavailability/siteavailability.go index e85ddddda..3a323f4fd 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/siteavailability/siteavailability.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/siteavailability/siteavailability.go @@ -44,7 +44,7 @@ func (pl *SiteAvailability) Name() string { // Filter invoked at the filter extension point. func (pl *SiteAvailability) Filter(ctx context.Context, cycleState *interfaces.CycleState, stack *types.Stack, siteCacheInfo *sitecacheinfo.SiteCacheInfo) *interfaces.Status { - klog.Infof("Filter- siteCacheInfo: %v", siteCacheInfo) + klog.V(4).Infof("Filter- siteCacheInfo: %v", siteCacheInfo) if siteCacheInfo.GetSite().Status == constants.SiteStatusOffline || siteCacheInfo.GetSite().Status == constants.SiteStatusSellout { msg := fmt.Sprintf("Site(%s) status is %s, not available!", siteCacheInfo.GetSite().SiteID, siteCacheInfo.GetSite().Status) klog.Info(msg) diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 497fc4189..47cab45bd 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -128,7 +128,7 @@ var once sync.Once func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct{}) (*Scheduler, error) { stopEverything := stopCh - klog.Infof("stopEverything to check : %v", stopEverything) + klog.V(4).Infof("stopEverything to check : %v", stopEverything) if stopEverything == nil { stopEverything = wait.NeverStop } @@ -150,7 +150,7 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct //build entire FlavorMap map sched.UpdateFlavor() - klog.Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) + klog.V(4).Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) // init pod, cluster, and scheduler informers for scheduler err = sched.initPodClusterSchedulerInformers(gsconfig, stopEverything) if err != nil { @@ -190,14 +190,14 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { } // start pod informers if sched.PodInformer != nil && sched.InformerFactory != nil { - klog.Infof("Starting scheduler %s informer", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler %s informer", sched.SchedulerName) sched.InformerFactory.Start(stopCh) // Wait for all caches to sync before scheduling. sched.InformerFactory.WaitForCacheSync(stopCh) } // start scheduler informer if sched.schedulerInformer != nil { - klog.Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) go sched.schedulerInformer.Run(stopCh) } // Do scheduling @@ -207,13 +207,13 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { // Run begins watching and scheduling. It waits for cache to be synced, then starts scheduling // and blocked until the context is done. func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan struct{}) { - klog.Infof("Starting scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler %s", sched.SchedulerName) defer utilruntime.HandleCrash() //cluster if clusterWorkers > 0 { defer sched.ClusterQueue.ShutDown() - klog.Infof("Waiting informer caches to sync") + klog.V(4).Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.ClusterSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -225,7 +225,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } defer sched.StackQueue.Close() - klog.Infof("Waiting informer caches to sync") + klog.V(4).Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.PodSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -236,7 +236,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } klog.Info("Started cluster & pod workers") <-stopCh - klog.Infof("Shutting down scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Shutting down scheduler %s", sched.SchedulerName) } // Cache returns the cache in scheduler for test to check the data in scheduler. @@ -261,15 +261,15 @@ func (sched *Scheduler) scheduleOne() bool { if shutdown != nil { return false } - klog.Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) + klog.V(4).Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) allocation, err := sched.generateAllocationFromStack(stack) - klog.Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) + klog.V(4).Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) if err != nil { return false } start := stack.CreateTime end := time.Now().UnixNano() - klog.Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) // 2.do scheduling process start = end @@ -281,21 +281,21 @@ func (sched *Scheduler) scheduleOne() bool { return true } end = time.Now().UnixNano() - klog.Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) - klog.Infof("Schedule result: %v", result) //result is assumed stacks - klog.Infof("3. Assumed Stacks: %v", result) + klog.V(4).Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("Schedule result: %v", result) //result is assumed stacks + klog.V(4).Infof("3. Assumed Stacks: %v", result) // 3.bind scheduler result to pod start = end - klog.Infof("Try to bind to site, stacks:%v", result.Stacks) + klog.V(4).Infof("Try to bind to site, stacks:%v", result.Stacks) sched.bindStacks(result.Stacks) end = time.Now().UnixNano() - klog.Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) // log the elapsed time for the entire schedule if stack.CreateTime != 0 { spendTime := time.Now().UnixNano() - stack.CreateTime - klog.Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) + klog.V(4).Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) } return true } @@ -368,7 +368,7 @@ func (sched *Scheduler) findSitesThatPassFilters(ctx context.Context, state *int err = fmt.Errorf("SiteCacheInfoMap of %v is null", siteID) return nil, err } - klog.Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) + klog.V(4).Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) // Create filtered list with enough space to avoid growing it // and allow assigning. filtered := make([]*types.Site, len(allSiteCacheInfos)) @@ -454,7 +454,7 @@ func (sched *Scheduler) prioritizeSites( // sort by score. sort.Sort(sort.Reverse(result)) - klog.Infof("score sites: %v", result) + klog.V(4).Infof("score sites: %v", result) return result, nil } @@ -502,14 +502,14 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite // Schedule Run begins watching and scheduling. It waits for cache to be synced , // then starts scheduling and blocked until the context is done. func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocation) (result ScheduleResult, err error) { - klog.Infof("Attempting to schedule allocation: %v", allocation.ID) + klog.V(4).Infof("Attempting to schedule allocation: %v", allocation.ID) state := interfaces.NewCycleState() schedulingCycleCtx, cancel := context.WithCancel(ctx) defer cancel() // 1. Snapshot site resource cache start := time.Now() - klog.Infof("[START] snapshot site...") + klog.V(4).Infof("[START] snapshot site...") ///UpdateFlavorMap updates FlavorCache.RegionFlavorMap, FlavorCache.FlavorMap) ///FlavorMap is updated when scheduler starts, RegionFlavorMap is updated @@ -519,16 +519,16 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 2. Run "prefilter" plugins. start = time.Now() - klog.Infof("[START] Running prefilter plugins...") + klog.V(4).Infof("[START] Running prefilter plugins...") preFilterStatus := sched.SchedFrame.RunPreFilterPlugins(schedulingCycleCtx, state, &allocation.Stack) if !preFilterStatus.IsSuccess() { return result, preFilterStatus.AsError() } - klog.Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) // 3. Run "filter" plugins. start = time.Now() - klog.Infof("[START] Running filter plugins...") + klog.V(4).Infof("[START] Running filter plugins...") filteredSitesStatuses := make(interfaces.SiteToStatusMap) allocation.Stack.Selector = allocation.Selector filteredSites, err := sched.findSitesThatPassFilters(ctx, state, &allocation.Stack, filteredSitesStatuses) @@ -536,9 +536,9 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("findSitesThatPassFilters failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) - klog.Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) + klog.V(4).Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) if len(filteredSites) <= 0 { err := fmt.Errorf("filter none site. resultStatus: %s", filteredSitesStatuses.ToString()) klog.Error(err) @@ -547,33 +547,33 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 4. Run "prescore" plugins. start = time.Now() - klog.Infof("[START] Running preScore plugins...") + klog.V(4).Infof("[START] Running preScore plugins...") prescoreStatus := sched.SchedFrame.RunPreScorePlugins(ctx, state, &allocation.Stack, filteredSites) if !prescoreStatus.IsSuccess() { return result, prescoreStatus.AsError() } - klog.Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) // 5. Run "prioritizeSites" plugins. start = time.Now() - klog.Infof("[START] Running prioritizeSites plugins...") + klog.V(4).Infof("[START] Running prioritizeSites plugins...") priorityList, err := sched.prioritizeSites(ctx, state, &allocation.Stack, filteredSites) if err != nil { klog.Errorf("prioritizeSites failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) // 6. Run "strategy" plugins. start = time.Now() - klog.Infof("[START] Running strategy plugins...") + klog.V(4).Infof("[START] Running strategy plugins...") siteCount, strategyStatus := sched.SchedFrame.RunStrategyPlugins(ctx, state, allocation, priorityList) if !strategyStatus.IsSuccess() { klog.Errorf("RunStrategyPlugins failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) - klog.Infof("selected Hosts : %#v", siteCount) + klog.V(4).Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("selected Hosts : %#v", siteCount) // 7. reserve resource start = time.Now() @@ -605,7 +605,7 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("not find suit host") return result, fmt.Errorf("not find suit host") } - klog.Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) + klog.V(4).Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) return } @@ -686,7 +686,7 @@ func (sched *Scheduler) initPodClusterSchedulerInformers(gsconfig *types.GSSched sched.schedulerInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { if sched, ok := obj.(*schedulerv1.Scheduler); ok { - klog.Infof("The scheduler %s process is going to be killed...", sched.Name) + klog.V(4).Infof("The scheduler %s process is going to be killed...", sched.Name) os.Exit(0) } else { klog.Fatalf("The deleted object %v failed to convert to scheduler", obj) @@ -730,7 +730,7 @@ func (sched *Scheduler) processNextClusterItem() bool { if shutdown { return false } - klog.Infof("Process an item in work queue %v ", workItem) + klog.V(4).Infof("Process an item in work queue %v ", workItem) eventKey := workItem.(KeyWithEventType) key := eventKey.Key defer sched.ClusterQueue.Done(key) @@ -739,7 +739,7 @@ func (sched *Scheduler) processNextClusterItem() bool { utilruntime.HandleError(fmt.Errorf("Handle %v of key %v failed with %v", "serivce", key, err)) } sched.ClusterQueue.Forget(key) - klog.Infof("Successfully processed & synced %s", key) + klog.V(4).Infof("Successfully processed & synced %s", key) return true } @@ -749,7 +749,7 @@ func (sched *Scheduler) clusterSyncHandler(keyWithEventType KeyWithEventType) er return err } key := keyWithEventType.Key - klog.Infof("sync cache for key %v", key) + klog.V(4).Infof("sync cache for key %v", key) startTime := time.Now() defer func() { klog.V(4).Infof("Finished syncing %q (%v)", key, time.Since(startTime)) @@ -757,16 +757,16 @@ func (sched *Scheduler) clusterSyncHandler(keyWithEventType KeyWithEventType) er nameSpace, clusterName, err := cache.SplitMetaNamespaceKey(key) //This performs controller logic - create site's static info - klog.Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.V(4).Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) result, err := sched.updateStaticSiteResourceInfo(key, keyWithEventType.EventType, nameSpace, clusterName) if !result { klog.Errorf("Failed a cluster processing - event: %v, key: %v, error: %v", keyWithEventType, key, err) sched.ClusterQueue.AddRateLimited(keyWithEventType) } else { - klog.Infof(" Processed a cluster: %v", key) + klog.V(4).Infof(" Processed a cluster: %v", key) sched.ClusterQueue.Forget(key) } - klog.Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.V(4).Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) if keyWithEventType.EventType != EventType_Delete { cluster, err := sched.ClusterLister.Clusters(nameSpace).Get(clusterName) clusterCopy := cluster.DeepCopy() @@ -821,7 +821,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name: %s", clusterName) return false, err } - klog.Infof("create a site static info, cluster profile: %v", clusterCopy) + klog.V(4).Infof("create a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusCreated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -839,7 +839,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name - %s", clusterName) return false, err } - klog.Infof("update a site static info, cluster profile: %v", clusterCopy) + klog.V(4).Infof("update a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusUpdated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -871,7 +871,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType //This function updates sites' dynamic resource informaton func (sched *Scheduler) UpdateSiteDynamicResource(region string, resource *types.SiteResource) (err error) { //reset total(available) resource - klog.Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) + klog.V(4).Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) var siteID string for _, siteresource := range resource.CPUMemResources { siteID = region + constants.SiteDelimiter + siteresource.AvailabilityZone diff --git a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go index ddf4627fc..361961b1d 100644 --- a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go +++ b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go @@ -568,7 +568,7 @@ func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavor for reqType, reqRes := range n.RequestedResources { resTypes := strings.Split(reqType, constants.FlavorDelimiter) if !utils.IsContain(resTypes, resType) { - klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) continue } reqRes.VCPU += res.VCPU From 207f22974e833d2f5b0ddf35ac0cd617f66c16c1 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 11 May 2021 19:18:23 +0000 Subject: [PATCH 02/12] updated code - removed else and added continue --- .../pkg/scheduler/eventhandlers.go | 2 -- .../scheduler/sitecacheinfo/sitecache_info.go | 25 ++++++++++--------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index 9322054b1..abca84f72 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -421,7 +421,6 @@ func (sched *Scheduler) bindToSite(clusterName string, assumedStack *types.Stack Name: clusterName, }, } - klog.V(4).Infof("binding: %v", binding) // do api server update here klog.V(4).Infof("Attempting to bind %v to %v", binding.Name, binding.Target.Name) @@ -432,7 +431,6 @@ func (sched *Scheduler) bindToSite(clusterName string, assumedStack *types.Stack if err := sched.SchedulerCache.ForgetStack(assumedStack); err != nil { klog.Errorf("scheduler cache ForgetStack failed: %v", err) } - return err } return nil diff --git a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go index 361961b1d..2deceb0ae 100644 --- a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go +++ b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go @@ -556,7 +556,7 @@ func GetStackKey(stack *types.Stack) (string, error) { func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { var resourceTypes []string for resType, res := range resInfo.CpuAndMem { - //binding a pod for the first + //resource type is null, assign default resource type (e.g. when binding a pod for the first time) if resType == "" { resType = string(DefaultResourceType) resourceTypes = append(resourceTypes, resType) @@ -564,17 +564,17 @@ func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavor if len(n.RequestedResources) == 0 { reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} n.RequestedResources[resType] = &reqRes - } else { - for reqType, reqRes := range n.RequestedResources { - resTypes := strings.Split(reqType, constants.FlavorDelimiter) - if !utils.IsContain(resTypes, resType) { - klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) - continue - } - reqRes.VCPU += res.VCPU - reqRes.Memory += res.Memory - n.RequestedResources[resType] = reqRes + continue + } + for reqType, reqRes := range n.RequestedResources { + resTypes := strings.Split(reqType, constants.FlavorDelimiter) + if !utils.IsContain(resTypes, resType) { + klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + continue } + reqRes.VCPU += res.VCPU + reqRes.Memory += res.Memory + n.RequestedResources[resType] = reqRes } } for volType, used := range resInfo.Storage { @@ -625,7 +625,8 @@ func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors m if totalRes == nil { n.deductFlavor() return - } else if requestRes == nil { + } + if requestRes == nil { requestRes = &types.CPUAndMemory{VCPU: 0, Memory: 0} } count := (totalRes.VCPU - requestRes.VCPU) / vCPUInt From c75e43847dd6d95497beda118b803667a5c9fc9c Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 09:18:53 +0000 Subject: [PATCH 03/12] implemented withdraw reserved resource for pod --- globalscheduler/controllers/dispatcher/BUILD | 2 + .../dispatcher/dispatcher_process.go | 27 ++- globalscheduler/controllers/distributor/BUILD | 7 +- .../distributor/distributor_test.go | 8 +- .../pkg/scheduler/eventhandlers.go | 169 +++++++++++++++- .../pkg/scheduler/factory/factory.go | 6 +- .../framework/interfaces/framework.go | 29 +++ .../framework/interfaces/interface.go | 4 + .../plugins/defaultbinder/default_binder.go | 92 ++++++++- .../framework/plugins/flavor/flavor.go | 2 +- .../pkg/scheduler/internal/cache/snapshot.go | 13 ++ globalscheduler/pkg/scheduler/scheduler.go | 129 +++++++----- .../scheduler/sitecacheinfo/sitecache_info.go | 184 +++++++++++++++++- 13 files changed, 602 insertions(+), 70 deletions(-) diff --git a/globalscheduler/controllers/dispatcher/BUILD b/globalscheduler/controllers/dispatcher/BUILD index 5cf8b5c91..1b8dcdde5 100644 --- a/globalscheduler/controllers/dispatcher/BUILD +++ b/globalscheduler/controllers/dispatcher/BUILD @@ -31,8 +31,10 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes/scheme:go_default_library", + "//staging/src/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library", "//staging/src/k8s.io/client-go/rest:go_default_library", "//staging/src/k8s.io/client-go/tools/cache:go_default_library", + "//staging/src/k8s.io/client-go/tools/record:go_default_library", "//vendor/k8s.io/klog:go_default_library", ], ) diff --git a/globalscheduler/controllers/dispatcher/dispatcher_process.go b/globalscheduler/controllers/dispatcher/dispatcher_process.go index 8f3cdeaab..aec245671 100644 --- a/globalscheduler/controllers/dispatcher/dispatcher_process.go +++ b/globalscheduler/controllers/dispatcher/dispatcher_process.go @@ -19,8 +19,11 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" "k8s.io/klog" "k8s.io/kubernetes/globalscheduler/cmd/conf" "k8s.io/kubernetes/globalscheduler/controllers/util" @@ -50,6 +53,7 @@ type Process struct { totalDeleteLatency int64 totalPodCreateNum int totalPodDeleteNum int + recorder record.EventRecorder } func NewProcess(config *rest.Config, namespace string, name string, quit chan struct{}) Process { @@ -74,6 +78,11 @@ func NewProcess(config *rest.Config, namespace string, name string, quit chan st if err != nil { klog.Fatal(err) } + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartLogging(klog.Infof) + eventBroadcaster.StartRecordingToSink( + &typedcorev1.EventSinkImpl{Interface: clientset.CoreV1().Events("")}) + recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: name}) return Process{ namespace: namespace, @@ -89,6 +98,7 @@ func NewProcess(config *rest.Config, namespace string, name string, quit chan st totalDeleteLatency: 0, totalPodCreateNum: 0, totalPodDeleteNum: 0, + recorder: recorder, } } @@ -217,12 +227,13 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { go func() { instanceId, err := openstack.ServerCreate(host, token, &pod.Spec) if err == nil { - klog.V(3).Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) + klog.Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) pod.Status.ClusterInstanceId = instanceId - pod.Status.Phase = v1.ClusterScheduled + //pod.Status.Phase = v1.ClusterScheduled + pod.Status.Phase = v1.PodFailed updatedPod, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod) if err == nil { - klog.V(3).Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) + klog.Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) } else { klog.Warningf("The pod %v failed to update its apiserver database status to scheduled with the error %v", pod.ObjectMeta.Name, err) @@ -234,6 +245,16 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { klog.Warningf("The pod %v failed to update its apiserver dtatbase status to failed with the error %v", pod.ObjectMeta.Name, err) } } + ///for test + klog.Warningf("The openstack vm for the pod %v failed to create with the error", pod.ObjectMeta.Name) + pod.Status.Phase = v1.PodFailed + if _, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod); err != nil { + klog.Warningf("The pod %v failed to update its apiserver dtatbase status to failed with the error %v", pod.ObjectMeta.Name, err) + } + klog.Infof("+++ The pod info %s, %#v, %#v", pod.ObjectMeta.Name, pod.Status) + //p.recorder.Event(pod, corev1.EventTypeNormal, SuccessSynched, MessageResourceSynched) + //p.recorder.Event(pod, v1.EventTypeWarning, "Failed", "Failed to create vm") + // util.CheckTime(pod.Name, "dispatcher", "CreatePod-End", 2) }() } diff --git a/globalscheduler/controllers/distributor/BUILD b/globalscheduler/controllers/distributor/BUILD index 0d06146ed..0363e31f8 100644 --- a/globalscheduler/controllers/distributor/BUILD +++ b/globalscheduler/controllers/distributor/BUILD @@ -59,9 +59,14 @@ filegroup( go_test( name = "go_default_test", - srcs = ["distributor_controller_test.go"], + srcs = [ + "distributor_controller_test.go", + "distributor_test.go", + ], embed = [":go_default_library"], deps = [ + "//globalscheduler/pkg/apis/allocation/v1:go_default_library", + "//globalscheduler/pkg/apis/cluster/v1:go_default_library", "//globalscheduler/pkg/apis/distributor/client/clientset/versioned/fake:go_default_library", "//globalscheduler/pkg/apis/distributor/client/informers/externalversions:go_default_library", "//globalscheduler/pkg/apis/distributor/v1:go_default_library", diff --git a/globalscheduler/controllers/distributor/distributor_test.go b/globalscheduler/controllers/distributor/distributor_test.go index 241418516..da63747b9 100644 --- a/globalscheduler/controllers/distributor/distributor_test.go +++ b/globalscheduler/controllers/distributor/distributor_test.go @@ -22,9 +22,9 @@ import ( ) type Testcase struct { - SchedulerGeoLocation clustercrdv1.GeolocationInfo - AllocationGeoLocation allocv1.GeoLocation - ExpectedResult bool + SchedulerGeoLocation clustercrdv1.GeolocationInfo + AllocationGeoLocation allocv1.GeoLocation + ExpectedResult bool } func createSchedulerGeoLocation(city, province, area, country string) clustercrdv1.GeolocationInfo { @@ -106,7 +106,7 @@ func TestIsAllocationGeoLocationMatched(t *testing.T) { res := isAllocationGeoLocationMatched(&testcase.SchedulerGeoLocation, testcase.AllocationGeoLocation) if res != testcase.ExpectedResult { t.Errorf("The isAllocationGeoLocationMatched test result %v is not empty as expected with geoLocations %v, %v", - res, testcase.SchedulerGeoLocation, testcase.AllocationGeoLocation ) + res, testcase.SchedulerGeoLocation, testcase.AllocationGeoLocation) } } } diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index abca84f72..b6930e73d 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -26,11 +26,13 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" apitypes "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + //"k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" "k8s.io/klog" clusterv1 "k8s.io/kubernetes/globalscheduler/pkg/apis/cluster/v1" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/common/constants" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" + "k8s.io/kubernetes/globalscheduler/pkg/scheduler/utils" "k8s.io/kubernetes/pkg/controller" statusutil "k8s.io/kubernetes/pkg/util/pod" ) @@ -70,6 +72,15 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: + //klog.Infof("###111Pod: %#v", t) + klog.Infof("###111PodStatus: %#v", t.Status) + pod := obj.(*v1.Pod) + klog.Infof("#: %#v", pod.Name) + ppp, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) + if err == nil { + //klog.Infof("###pppPod: %#v", ppp) + klog.Infof("###pppPodStatus: %#v", ppp.Status) + } return assignedPod(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -94,6 +105,16 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: + //klog.Infof("###222Pod: %#v", t) + klog.Infof("###222PodStatus: %#v", t.Status) + pod := obj.(*v1.Pod) + klog.Infof("##: %#v", pod.Name) + + pp, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) + if err == nil { + //klog.Infof("###222ppPod: %#v", pp) + klog.Infof("###222ppPodStatus: %#v", pp.Status) + } return needToSchedule(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -113,6 +134,42 @@ func AddAllEventHandlers(sched *Scheduler) { }, }, ) + // failed pod queue + sched.PodInformer.Informer().AddEventHandler( + cache.FilteringResourceEventHandler{ + FilterFunc: func(obj interface{}) bool { + switch t := obj.(type) { + case *v1.Pod: + pod := obj.(*v1.Pod) + klog.Infof("###: %#v", pod.Name) + p, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) + klog.Infof("###333Pod: %#v", err) + //klog.Infof("###333Pod: %#v", t) + klog.Infof("###333PodStatus: %#v", t.Status) + //p := sched.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).Get(pod.Name, metav1.GetOptions{}) + if err == nil { + //klog.Infof("###pppPod: %#v", p) + klog.Infof("###pppPodStatus: %#v", p.Status) + } + return failedToSchedule(t) && responsibleForPod(t, sched.SchedulerName) + case cache.DeletedFinalStateUnknown: + if pod, ok := t.Obj.(*v1.Pod); ok { + return failedToSchedule(pod) && responsibleForPod(pod, sched.SchedulerName) + } + utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched)) + return false + default: + utilruntime.HandleError(fmt.Errorf("unable to handle object in %T: %T", sched, obj)) + return false + } + }, + Handler: cache.ResourceEventHandlerFuncs{ + AddFunc: sched.addPodWithdrawResource, + UpdateFunc: sched.updatePodWithdrawResource, + DeleteFunc: sched.deletePodWithdrawResource, + }, + }, + ) sched.ClusterInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: sched.addCluster, UpdateFunc: sched.updateCluster, @@ -122,23 +179,36 @@ func AddAllEventHandlers(sched *Scheduler) { // needToSchedule selects pods that need to be scheduled func needToSchedule(pod *v1.Pod) bool { + klog.Infof("$$$$$$$needToSchedule: %v", pod.Name) + klog.Infof("$$$$$$$needToSchedule: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodAssigned) return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodAssigned } // assignedPod selects pods that are assigned (scheduled and running). func assignedPod(pod *v1.Pod) bool { + klog.Infof("$$$$$$$assignedPod: %v", pod.Name) + klog.Infof("$$$$$$$assignedPod: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodBound) return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodBound } // responsibleForPod returns true if the pod has asked to be scheduled by the given scheduler. func responsibleForPod(pod *v1.Pod, schedulerName string) bool { + klog.Infof("$$$$$$$responsibleForPod: %v", pod.Name) + klog.Infof("$$$$$$$responsibleForPod: %v", schedulerName == pod.Status.AssignedScheduler.Name) return schedulerName == pod.Status.AssignedScheduler.Name } +// failedToSchedule selects pods that scheduled but failed to create vm +func failedToSchedule(pod *v1.Pod) bool { + klog.Infof("$$$$$$$failedToSchedule: %v", pod.Name) + klog.Infof("$$$$$$$failedToSchedule: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodFailed) + return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodFailed +} + // addPodToCache add pod to the stack cache of the scheduler func (sched *Scheduler) addPodToCache(obj interface{}) { pod, ok := obj.(*v1.Pod) - klog.V(4).Infof("Add a pod: %v", pod.Name) + klog.Infof("Add a pod: %v", pod.Name) if !ok { klog.Errorf("cannot convert to *v1.Pod: %v", obj) return @@ -433,6 +503,7 @@ func (sched *Scheduler) bindToSite(clusterName string, assumedStack *types.Stack } return err } + // return nil } @@ -528,3 +599,99 @@ func (sched *Scheduler) verifyClusterInfo(cluster *clusterv1.Cluster) (verified verified = true return verified } + +func (sched *Scheduler) verifyPodInfo(pod *v1.Pod) (verified bool) { + verified = false + name := pod.Name + flavors := pod.Spec.VirtualMachine.Flavors + if pod.Name == "" || flavors == nil { + klog.Errorf("pod name:%s, flavors:%v is null", name, flavors) + return verified + } + verified = true + return verified +} + +func (sched *Scheduler) addPodWithdrawResource(object interface{}) { + pod, ok := object.(*v1.Pod) + klog.Infof("Add a pod to withdraw resource: %v", pod.Name) + if !ok { + klog.Errorf("cannot convert to *v1.Pod: %v", object) + return + } + podCopy := pod.DeepCopy() + if sched.verifyPodInfo(podCopy) == false { + klog.Infof(" Pod data is not correct: %v", podCopy) + } + err := sched.withdrawResource(pod.Name) + if err != nil { + klog.Errorf("withdraw resource of pod %s failed", pod.Name) + } +} + +func (sched *Scheduler) updatePodWithdrawResource(oldObj, newObj interface{}) { + oldPod, ok := oldObj.(*v1.Pod) + if !ok { + klog.Errorf("cannot convert oldObj to *v1.Pod: %v", oldObj) + return + } + newPod, ok := newObj.(*v1.Pod) + klog.Infof("Update a pod: %v", newPod) + if !ok { + klog.Errorf("cannot convert newObj to *v1.Pod: %v", newObj) + return + } + if oldPod.Name != newPod.Name { + klog.Errorf("old pod name and new pod name should be equal: %s, %s", oldPod.Name, newPod.Name) + return + } + err := sched.withdrawResource(newPod.Name) + if err != nil { + klog.Errorf("withdraw resource of pod %s failed", oldPod.Name) + } +} + +func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { + var pod *v1.Pod + switch t := obj.(type) { + case *v1.Pod: + pod = t + klog.Infof("Delete a pod: %v", pod.Name) + case cache.DeletedFinalStateUnknown: + var ok bool + pod, ok = t.Obj.(*v1.Pod) + if !ok { + klog.Errorf("cannot convert to *v1.Pod: %v", t.Obj) + return + } + default: + klog.Errorf("cannot convert to *v1.Pod: %v", t) + return + } + + err := sched.withdrawResource(pod.Name) + if err != nil { + klog.Errorf("withdraw resource of pod %s failed", pod.Name) + } +} + +//withdraw reserved resources to a pod & add it to cash to other pods +func (sched *Scheduler) withdrawResource(podName string) error { + resource := sched.ResourceAllocationMap[podName] + //allResInfo := types.AllResInfo{CpuAndMem: resource.CpuMem, Storage: resource.Storage, eipNum: 0} + if (resource == nil){ + klog.Infof("there is no preserved resource for pod: %s", podName) + return nil + } + allResInfo := resource.Resource + regionName := utils.GetRegionName(resource.SiteID) + regionFlavor, err := sched.siteCacheInfoSnapshot.GetRegionFlavors(regionName) + if err != nil { + klog.Errorf("there is no valid flavor for region: %s", regionName) + return err + } + siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[resource.SiteID] + siteCacheInfo.WithdrawSiteResInfo(allResInfo, regionFlavor) + delete(sched.ResourceAllocationMap, podName) + return nil +} diff --git a/globalscheduler/pkg/scheduler/factory/factory.go b/globalscheduler/pkg/scheduler/factory/factory.go index a817abff6..0992ecec3 100644 --- a/globalscheduler/pkg/scheduler/factory/factory.go +++ b/globalscheduler/pkg/scheduler/factory/factory.go @@ -44,9 +44,11 @@ func (i *podInformer) Lister() corelisters.PodLister { // NewPodInformer creates a shared index informer that returns only non-terminal pods. func NewPodInformer(schedulerName string, client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer { + /*selector := fields.ParseSelectorOrDie( + "status.phase=" + string(v1.PodAssigned) + + ",status.assignedScheduler.name=" + schedulerName)*/ selector := fields.ParseSelectorOrDie( - "status.phase=" + string(v1.PodAssigned) + - ",status.assignedScheduler.name=" + schedulerName) + "status.assignedScheduler.name=" + schedulerName) lw := cache.NewListWatchFromClient(client.CoreV1(), string(v1.ResourcePods), metav1.NamespaceAll, selector) return &podInformer{ informer: cache.NewSharedIndexInformer(lw, &v1.Pod{}, resyncPeriod, diff --git a/globalscheduler/pkg/scheduler/framework/interfaces/framework.go b/globalscheduler/pkg/scheduler/framework/interfaces/framework.go index c782d910f..01203fd49 100644 --- a/globalscheduler/pkg/scheduler/framework/interfaces/framework.go +++ b/globalscheduler/pkg/scheduler/framework/interfaces/framework.go @@ -425,11 +425,40 @@ func (f *framework) RunBindPlugins(ctx context.Context, state *CycleState, stack return status } +//resource func (f *framework) runBindPlugin(ctx context.Context, bp BindPlugin, state *CycleState, stack *types.Stack, siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) *Status { return bp.Bind(ctx, state, stack, siteCacheInfo) } +/// RunBindResourcePlugins runs the set of configured bind plugins until one returns a non `Skip` status. +func (f *framework) RunBindResourcePlugins(ctx context.Context, state *CycleState, stack *types.Stack, + siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) (status *Status, siteId string, flavor string, resInfo *types.AllResInfo) { + if len(f.bindPlugins) == 0 { + return NewStatus(Skip, ""), "", "", nil + } + for _, bp := range f.bindPlugins { + status, siteId, flavor, resInfo = f.runBindResourcePlugin(ctx, bp, state, stack, siteCacheInfo) + if status != nil && status.Code() == Skip { + continue + } + if !status.IsSuccess() { + msg := fmt.Sprintf("plugin %q failed to bind pod \"%v\": %v", bp.Name(), stack.PodName, status.Message()) + klog.Errorf("%s", msg) + status = NewStatus(Error, msg) + return status, siteId, flavor, resInfo + } + return status, siteId, flavor, resInfo + } + return status, siteId, flavor, resInfo +} + +///added for resource bind & revoke +func (f *framework) runBindResourcePlugin(ctx context.Context, bp BindPlugin, state *CycleState, stack *types.Stack, + siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) (*Status, string, string, *types.AllResInfo) { + return bp.BindResource(ctx, state, stack, siteCacheInfo) +} + // RunPostBindPlugins runs the set of configured postbind plugins. func (f *framework) RunPostBindPlugins(ctx context.Context, state *CycleState, stack *types.Stack, siteID string) { for _, pl := range f.postBindPlugins { diff --git a/globalscheduler/pkg/scheduler/framework/interfaces/interface.go b/globalscheduler/pkg/scheduler/framework/interfaces/interface.go index d83daf7bc..b330dacf3 100644 --- a/globalscheduler/pkg/scheduler/framework/interfaces/interface.go +++ b/globalscheduler/pkg/scheduler/framework/interfaces/interface.go @@ -386,6 +386,7 @@ type BindPlugin interface { // it must return Skip in its Status code. If a bind plugin returns an Error, the // pod is rejected and will not be bound. Bind(ctx context.Context, state *CycleState, p *types.Stack, siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) *Status + BindResource(ctx context.Context, state *CycleState, p *types.Stack, siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) (*Status, string, string, *types.AllResInfo) } // StrategyPlugin is an interface that must be implemented by "strategy" plugins. strategy @@ -462,6 +463,9 @@ type Framework interface { RunBindPlugins(ctx context.Context, state *CycleState, stack *types.Stack, siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) *Status + RunBindResourcePlugins(ctx context.Context, state *CycleState, stack *types.Stack, + siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) (*Status, string, string, *types.AllResInfo) + //RunStrategyPlugins runs the set of configured strategy plugins. RunStrategyPlugins(ctx context.Context, state *CycleState, allocations *types.Allocation, siteScoreList SiteScoreList) (SiteScoreList, *Status) diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index a8391ea44..fa80d219f 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -23,7 +23,7 @@ import ( "k8s.io/kubernetes/globalscheduler/pkg/scheduler/sitecacheinfo" "strconv" - "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" + _ "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/framework/interfaces" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" @@ -70,7 +70,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.Errorf("Gettng site selector state failed! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } - klog.Errorf("site selector info: %v", siteSelectedInfo) + klog.Infof("site selector info: %v", siteSelectedInfo) if len(stack.Resources) != len(siteSelectedInfo.Flavors) { klog.Errorf("flavor count not equal to server count! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ @@ -102,15 +102,97 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] = reqRes } b.handle.Cache().UpdateSiteWithResInfo(siteID, resInfo) - regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() + /*regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() if err != nil { klog.Errorf("Getting region's flavor failed: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } if regionFlavors == nil || err != nil { regionFlavors = map[string]*typed.RegionFlavor{} - } - siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) + }*/ + /*siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) klog.V(4).Infof("Resource state after deduction: %v", siteCacheInfo) + return nil*/ + + /*klog.Infof("111 Resource state before deduction: %#v", siteCacheInfo) + klog.Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) + siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) + klog.Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ return nil } + +// Bind binds pods to site using the k8s client. +// Same function with Bind except return bound resource info +func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.CycleState, stack *types.Stack, + siteCacheInfo *sitecacheinfo.SiteCacheInfo) (*interfaces.Status, string, string, *types.AllResInfo) { + region := siteCacheInfo.GetSite().RegionAzMap.Region + + //eipNum : private data + resInfo := types.AllResInfo{CpuAndMem: map[string]types.CPUAndMemory{}, Storage: map[string]float64{}} + siteID := siteCacheInfo.Site.SiteID + + stack.Selected.SiteID = siteID + stack.Selected.Region = region + stack.Selected.AvailabilityZone = siteCacheInfo.GetSite().RegionAzMap.AvailabilityZone + stack.Selected.ClusterName = siteCacheInfo.Site.ClusterName + stack.Selected.ClusterNamespace = siteCacheInfo.Site.ClusterNamespace + flavorID := "" + //siteSelectedInfo is type of SiteSelectorInfo at cycle_state.go + siteSelectedInfo, err := interfaces.GetSiteSelectorState(state, siteID) + if err != nil { + klog.Errorf("Gettng site selector state failed! err: %s", err) + status := interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) + return status, siteID, flavorID, &resInfo + } + klog.Infof("site selector info: %v", siteSelectedInfo) + klog.Infof("stack.Resources: %#v", stack.Resources) + klog.Infof("siteSelectedInfo.Flavors: %#v", siteSelectedInfo.Flavors) + if len(stack.Resources) != len(siteSelectedInfo.Flavors) { + klog.Errorf("flavor count not equal to server count! err: %s", err) + return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ + "server count!", siteID)), siteID, flavorID, nil + } + for i := 0; i < len(stack.Resources); i++ { + flavorID = siteSelectedInfo.Flavors[i].FlavorID + stack.Resources[i].FlavorIDSelected = flavorID + klog.Infof("GetFlavor - flavorID: %s, region: %s", flavorID, region) + flv, ok := cache.FlavorCache.GetFlavor(flavorID, region) + if !ok { + klog.Warningf("flavor %s not found in region(%s)", flavorID, region) + continue + } + klog.Infof("flavor %s : %v", flavorID, flv) + vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) + if err != nil || vCPUInt <= 0 { + klog.Warningf("flavor %s is invalid in region(%s)", flavorID, region) + continue + } + reqRes, ok := resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] + if !ok { + reqRes = types.CPUAndMemory{VCPU: 0, Memory: 0} + } + reqRes.VCPU += vCPUInt * int64(stack.Resources[i].Count) + reqRes.Memory += flv.Ram * int64(stack.Resources[i].Count) + + //put them all to resInfo + resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] = reqRes + break + } + klog.Infof("UpdateSiteWithResInfo - siteID: %s, resInfo: %#v", siteID, resInfo) + b.handle.Cache().UpdateSiteWithResInfo(siteID, resInfo) + /*regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() + if err != nil { + klog.Errorf("Getting region's flavor failed: %s", err) + return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)), siteID, flavorID, nil + } + if regionFlavors == nil || err != nil { + regionFlavors = map[string]*typed.RegionFlavor{} + } + + klog.Infof("111 Resource state before deduction: %#v", siteCacheInfo) + klog.Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) + siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) + klog.Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ + klog.Infof("UpdateSiteWithResInfo - return") + return nil, siteID, flavorID, &resInfo +} diff --git a/globalscheduler/pkg/scheduler/framework/plugins/flavor/flavor.go b/globalscheduler/pkg/scheduler/framework/plugins/flavor/flavor.go index 87bfcb38a..e7f0572a4 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/flavor/flavor.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/flavor/flavor.go @@ -310,7 +310,7 @@ func (f *Flavor) Filter(ctx context.Context, cycleState *interfaces.CycleState, var isCommonMatch, _ = isComFlavorMatch(flavorMap, siteCacheInfo) var isSpotMatch, _ = isSpotFlavorMatch(spotFlavorMap, siteCacheInfo) if isCommonMatch && isSpotMatch { - klog.Infof("*** isCommonMatch:%v, isSpotMatch:%v ", isCommonMatch, isSpotMatch) + klog.Infof("isCommonMatch:%v, isSpotMatch:%v ", isCommonMatch, isSpotMatch) return nil } } diff --git a/globalscheduler/pkg/scheduler/internal/cache/snapshot.go b/globalscheduler/pkg/scheduler/internal/cache/snapshot.go index c6e9d6b0c..4015a678c 100644 --- a/globalscheduler/pkg/scheduler/internal/cache/snapshot.go +++ b/globalscheduler/pkg/scheduler/internal/cache/snapshot.go @@ -20,6 +20,7 @@ package cache import ( "fmt" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" + "k8s.io/kubernetes/globalscheduler/pkg/scheduler/common/constants" schedulerlisters "k8s.io/kubernetes/globalscheduler/pkg/scheduler/listers" schedulersitecacheinfo "k8s.io/kubernetes/globalscheduler/pkg/scheduler/sitecacheinfo" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" @@ -135,3 +136,15 @@ func (s *Snapshot) Get(siteID string) (*schedulersitecacheinfo.SiteCacheInfo, er func (s *Snapshot) GetFlavors() (map[string]*typed.RegionFlavor, error) { return s.RegionFlavorMap, nil } + +func (s *Snapshot) GetRegionFlavors(region string) (map[string]*typed.RegionFlavor, error) { + regionFlavorMap := make(map[string]*typed.RegionFlavor) + for flavorId := range s.FlavorMap { + key := region + constants.FlavorDelimiter + flavorId + regionFlavor := s.RegionFlavorMap[key] + if regionFlavor != nil { + regionFlavorMap[key] = regionFlavor + } + } + return regionFlavorMap, nil +} diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 47cab45bd..c81d78df3 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -78,6 +78,13 @@ type ScheduleResult struct { FeasibleSites int // Number of feasible site on one stack scheduled } +type PodSiteResourceAllocation struct { + PodName string + SiteID string + Flavor string + Resource types.AllResInfo +} + // Scheduler watches for new unscheduled pods. It attempts to find // site that they fit on and writes bindings back to the api server. type Scheduler struct { @@ -108,7 +115,8 @@ type Scheduler struct { mu sync.RWMutex //Cluster - KubeClientset clientset.Interface //kubernetes.Interface + //KubeClientset clientset.Interface //kubernetes.Interface + KubeClientset *clientset.Clientset ApiextensionsClientset apiextensionsclientset.Interface ClusterClientset clusterclientset.Interface ClusterInformerFactory externalinformers.SharedInformerFactory @@ -120,6 +128,9 @@ type Scheduler struct { schedulerClientset schedulerclientset.Interface schedulerInformer cache.SharedIndexInformer workerNumber int + + // table to withdraw site resource + ResourceAllocationMap map[string]*PodSiteResourceAllocation } // single scheduler instance @@ -128,7 +139,7 @@ var once sync.Once func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct{}) (*Scheduler, error) { stopEverything := stopCh - klog.V(4).Infof("stopEverything to check : %v", stopEverything) + klog.Infof("stopEverything to check : %v", stopEverything) if stopEverything == nil { stopEverything = wait.NeverStop } @@ -141,8 +152,8 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct ConfigFilePath: gsconfig.ConfigFilePath, deletedClusters: make(map[string]string), workerNumber: 1, - } - + ResourceAllocationMap: make(map[string]*PodSiteResourceAllocation), + } err := sched.buildFramework() if err != nil { return nil, fmt.Errorf("buildFramework by %s failed! err: %v", types.SchedulerDefaultProviderName, err) @@ -150,7 +161,7 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct //build entire FlavorMap map sched.UpdateFlavor() - klog.V(4).Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) + klog.Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) // init pod, cluster, and scheduler informers for scheduler err = sched.initPodClusterSchedulerInformers(gsconfig, stopEverything) if err != nil { @@ -190,14 +201,14 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { } // start pod informers if sched.PodInformer != nil && sched.InformerFactory != nil { - klog.V(4).Infof("Starting scheduler %s informer", sched.SchedulerName) + klog.Infof("Starting scheduler %s informer", sched.SchedulerName) sched.InformerFactory.Start(stopCh) // Wait for all caches to sync before scheduling. sched.InformerFactory.WaitForCacheSync(stopCh) } // start scheduler informer if sched.schedulerInformer != nil { - klog.V(4).Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) + klog.Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) go sched.schedulerInformer.Run(stopCh) } // Do scheduling @@ -207,13 +218,13 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { // Run begins watching and scheduling. It waits for cache to be synced, then starts scheduling // and blocked until the context is done. func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan struct{}) { - klog.V(4).Infof("Starting scheduler %s", sched.SchedulerName) + klog.Infof("Starting scheduler %s", sched.SchedulerName) defer utilruntime.HandleCrash() //cluster if clusterWorkers > 0 { defer sched.ClusterQueue.ShutDown() - klog.V(4).Infof("Waiting informer caches to sync") + klog.Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.ClusterSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -225,7 +236,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } defer sched.StackQueue.Close() - klog.V(4).Infof("Waiting informer caches to sync") + klog.Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.PodSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -236,7 +247,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } klog.Info("Started cluster & pod workers") <-stopCh - klog.V(4).Infof("Shutting down scheduler %s", sched.SchedulerName) + klog.Infof("Shutting down scheduler %s", sched.SchedulerName) } // Cache returns the cache in scheduler for test to check the data in scheduler. @@ -261,15 +272,15 @@ func (sched *Scheduler) scheduleOne() bool { if shutdown != nil { return false } - klog.V(4).Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) + klog.Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) allocation, err := sched.generateAllocationFromStack(stack) - klog.V(4).Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) + klog.Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) if err != nil { return false } start := stack.CreateTime end := time.Now().UnixNano() - klog.V(4).Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) + klog.Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) // 2.do scheduling process start = end @@ -281,21 +292,21 @@ func (sched *Scheduler) scheduleOne() bool { return true } end = time.Now().UnixNano() - klog.V(4).Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) - klog.V(4).Infof("Schedule result: %v", result) //result is assumed stacks - klog.V(4).Infof("3. Assumed Stacks: %v", result) + klog.Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.Infof("Schedule result: %v", result) //result is assumed stacks + klog.Infof("3. Assumed Stacks: %v", result) // 3.bind scheduler result to pod start = end - klog.V(4).Infof("Try to bind to site, stacks:%v", result.Stacks) + klog.Infof("Try to bind to site, stacks:%v", result.Stacks) sched.bindStacks(result.Stacks) end = time.Now().UnixNano() - klog.V(4).Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) // log the elapsed time for the entire schedule if stack.CreateTime != 0 { spendTime := time.Now().UnixNano() - stack.CreateTime - klog.V(4).Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) + klog.Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) } return true } @@ -335,13 +346,11 @@ func (sched *Scheduler) stackPassesFiltersOnSite( info *schedulersitecacheinfo.SiteCacheInfo, ) (bool, *interfaces.Status, error) { var status *interfaces.Status - statusMap := sched.SchedFrame.RunFilterPlugins(ctx, state, stack, info) status = statusMap.Merge() if !status.IsSuccess() && !status.IsUnschedulable() { return false, status, status.AsError() } - return status.IsSuccess(), status, nil } @@ -368,7 +377,7 @@ func (sched *Scheduler) findSitesThatPassFilters(ctx context.Context, state *int err = fmt.Errorf("SiteCacheInfoMap of %v is null", siteID) return nil, err } - klog.V(4).Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) + klog.Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) // Create filtered list with enough space to avoid growing it // and allow assigning. filtered := make([]*types.Site, len(allSiteCacheInfos)) @@ -454,7 +463,7 @@ func (sched *Scheduler) prioritizeSites( // sort by score. sort.Sort(sort.Reverse(result)) - klog.V(4).Infof("score sites: %v", result) + klog.Infof("score sites: %v", result) return result, nil } @@ -488,9 +497,27 @@ func (sched *Scheduler) selectHost(siteScoreList interfaces.SiteScoreList) (stri // We expect this to run asynchronously, so we handle binding metrics internally. func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSiteID string, state *interfaces.CycleState) (err error) { - bindStatus := sched.SchedFrame.RunBindPlugins(ctx, state, stack, + bindStatus, siteId, flavorId, resInfo := sched.SchedFrame.RunBindResourcePlugins(ctx, state, stack, sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID]) if bindStatus.IsSuccess() { + podResporceAlloc := PodSiteResourceAllocation{stack.PodName, siteId, flavorId, *resInfo} + klog.Infof("bind - podResporceAlloc: %#v", podResporceAlloc) + klog.Infof("bind111 - sched.ResourceAllocationMap[stack.PodName]: %#v, stack.PodName:%s", sched.ResourceAllocationMap, stack.PodName) + sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc + /*if (sched.ResourceAllocationMap[stack.PodName] == nil) { + sched.ResourceAllocationMap[stack.PodName] = append(sched.ResourceAllocationMap[stack.PodName], &podResporceAlloc) + } else { + sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc + }*/ + klog.Infof("bind222 - sched.ResourceAllocationMap[stack.PodName]: %#v, stack.PodName:%s", sched.ResourceAllocationMap, stack.PodName) + region := utils.GetRegionName(siteId) + regionFlavors, err := sched.siteCacheInfoSnapshot.GetRegionFlavors(region) + if err != nil { + klog.Errorf("There is no valid flavors in region: %s", region) + return err + } + siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID] + siteCacheInfo.DeductSiteResInfo(*resInfo, regionFlavors) return nil } if bindStatus.Code() == interfaces.Error { @@ -502,14 +529,14 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite // Schedule Run begins watching and scheduling. It waits for cache to be synced , // then starts scheduling and blocked until the context is done. func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocation) (result ScheduleResult, err error) { - klog.V(4).Infof("Attempting to schedule allocation: %v", allocation.ID) + klog.Infof("Attempting to schedule allocation: %v", allocation.ID) state := interfaces.NewCycleState() schedulingCycleCtx, cancel := context.WithCancel(ctx) defer cancel() // 1. Snapshot site resource cache start := time.Now() - klog.V(4).Infof("[START] snapshot site...") + klog.Infof("[START] snapshot site...") ///UpdateFlavorMap updates FlavorCache.RegionFlavorMap, FlavorCache.FlavorMap) ///FlavorMap is updated when scheduler starts, RegionFlavorMap is updated @@ -519,16 +546,16 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 2. Run "prefilter" plugins. start = time.Now() - klog.V(4).Infof("[START] Running prefilter plugins...") + klog.Infof("[START] Running prefilter plugins...") preFilterStatus := sched.SchedFrame.RunPreFilterPlugins(schedulingCycleCtx, state, &allocation.Stack) if !preFilterStatus.IsSuccess() { return result, preFilterStatus.AsError() } - klog.V(4).Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) + klog.Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) // 3. Run "filter" plugins. start = time.Now() - klog.V(4).Infof("[START] Running filter plugins...") + klog.Infof("[START] Running filter plugins...") filteredSitesStatuses := make(interfaces.SiteToStatusMap) allocation.Stack.Selector = allocation.Selector filteredSites, err := sched.findSitesThatPassFilters(ctx, state, &allocation.Stack, filteredSitesStatuses) @@ -536,9 +563,9 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("findSitesThatPassFilters failed! err: %s", err) return result, err } - klog.V(4).Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) + klog.Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) - klog.V(4).Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) + klog.Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) if len(filteredSites) <= 0 { err := fmt.Errorf("filter none site. resultStatus: %s", filteredSitesStatuses.ToString()) klog.Error(err) @@ -547,33 +574,33 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 4. Run "prescore" plugins. start = time.Now() - klog.V(4).Infof("[START] Running preScore plugins...") + klog.Infof("[START] Running preScore plugins...") prescoreStatus := sched.SchedFrame.RunPreScorePlugins(ctx, state, &allocation.Stack, filteredSites) if !prescoreStatus.IsSuccess() { return result, prescoreStatus.AsError() } - klog.V(4).Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) + klog.Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) // 5. Run "prioritizeSites" plugins. start = time.Now() - klog.V(4).Infof("[START] Running prioritizeSites plugins...") + klog.Infof("[START] Running prioritizeSites plugins...") priorityList, err := sched.prioritizeSites(ctx, state, &allocation.Stack, filteredSites) if err != nil { klog.Errorf("prioritizeSites failed! err: %s", err) return result, err } - klog.V(4).Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) + klog.Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) // 6. Run "strategy" plugins. start = time.Now() - klog.V(4).Infof("[START] Running strategy plugins...") + klog.Infof("[START] Running strategy plugins...") siteCount, strategyStatus := sched.SchedFrame.RunStrategyPlugins(ctx, state, allocation, priorityList) if !strategyStatus.IsSuccess() { klog.Errorf("RunStrategyPlugins failed! err: %s", err) return result, err } - klog.V(4).Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) - klog.V(4).Infof("selected Hosts : %#v", siteCount) + klog.Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) + klog.Infof("selected Hosts : %#v", siteCount) // 7. reserve resource start = time.Now() @@ -605,7 +632,7 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("not find suit host") return result, fmt.Errorf("not find suit host") } - klog.V(4).Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) + klog.Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) return } @@ -686,7 +713,7 @@ func (sched *Scheduler) initPodClusterSchedulerInformers(gsconfig *types.GSSched sched.schedulerInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { if sched, ok := obj.(*schedulerv1.Scheduler); ok { - klog.V(4).Infof("The scheduler %s process is going to be killed...", sched.Name) + klog.Infof("The scheduler %s process is going to be killed...", sched.Name) os.Exit(0) } else { klog.Fatalf("The deleted object %v failed to convert to scheduler", obj) @@ -730,7 +757,7 @@ func (sched *Scheduler) processNextClusterItem() bool { if shutdown { return false } - klog.V(4).Infof("Process an item in work queue %v ", workItem) + klog.Infof("Process an item in work queue %v ", workItem) eventKey := workItem.(KeyWithEventType) key := eventKey.Key defer sched.ClusterQueue.Done(key) @@ -739,7 +766,7 @@ func (sched *Scheduler) processNextClusterItem() bool { utilruntime.HandleError(fmt.Errorf("Handle %v of key %v failed with %v", "serivce", key, err)) } sched.ClusterQueue.Forget(key) - klog.V(4).Infof("Successfully processed & synced %s", key) + klog.Infof("Successfully processed & synced %s", key) return true } @@ -749,24 +776,24 @@ func (sched *Scheduler) clusterSyncHandler(keyWithEventType KeyWithEventType) er return err } key := keyWithEventType.Key - klog.V(4).Infof("sync cache for key %v", key) + klog.Infof("sync cache for key %v", key) startTime := time.Now() defer func() { - klog.V(4).Infof("Finished syncing %q (%v)", key, time.Since(startTime)) + klog.Infof("Finished syncing %q (%v)", key, time.Since(startTime)) }() nameSpace, clusterName, err := cache.SplitMetaNamespaceKey(key) //This performs controller logic - create site's static info - klog.V(4).Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) result, err := sched.updateStaticSiteResourceInfo(key, keyWithEventType.EventType, nameSpace, clusterName) if !result { klog.Errorf("Failed a cluster processing - event: %v, key: %v, error: %v", keyWithEventType, key, err) sched.ClusterQueue.AddRateLimited(keyWithEventType) } else { - klog.V(4).Infof(" Processed a cluster: %v", key) + klog.Infof(" Processed a cluster: %v", key) sched.ClusterQueue.Forget(key) } - klog.V(4).Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) if keyWithEventType.EventType != EventType_Delete { cluster, err := sched.ClusterLister.Clusters(nameSpace).Get(clusterName) clusterCopy := cluster.DeepCopy() @@ -821,7 +848,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name: %s", clusterName) return false, err } - klog.V(4).Infof("create a site static info, cluster profile: %v", clusterCopy) + klog.Infof("create a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusCreated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -839,7 +866,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name - %s", clusterName) return false, err } - klog.V(4).Infof("update a site static info, cluster profile: %v", clusterCopy) + klog.Infof("update a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusUpdated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -871,7 +898,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType //This function updates sites' dynamic resource informaton func (sched *Scheduler) UpdateSiteDynamicResource(region string, resource *types.SiteResource) (err error) { //reset total(available) resource - klog.V(4).Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) + klog.Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) var siteID string for _, siteresource := range resource.CPUMemResources { siteID = region + constants.SiteDelimiter + siteresource.AvailabilityZone diff --git a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go index 2deceb0ae..2b935d5f7 100644 --- a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go +++ b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go @@ -552,8 +552,54 @@ func GetStackKey(stack *types.Stack) (string, error) { return uid, nil } +//deduct or add +func (n *SiteCacheInfo) UpdateSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor, deduct bool) error { + var resourceTypes []string + klog.Infof("444 UpdateSiteResInfo - resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavorMap) + for resType, res := range resInfo.CpuAndMem { + //resource type is null, assign default resource type (e.g. when binding a pod for the first time) + if resType == "" { + resType = string(DefaultResourceType) + resourceTypes = append(resourceTypes, resType) + } + if len(n.RequestedResources) == 0 { + reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} + n.RequestedResources[resType] = &reqRes + continue + } + for reqType, reqRes := range n.RequestedResources { + klog.Infof("555 UpdateSiteResInfo - reqType: %#v, reqRes:%#v", reqType, reqRes) + resTypes := strings.Split(reqType, constants.FlavorDelimiter) + if !utils.IsContain(resTypes, resType) { + klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + continue + } + reqRes.VCPU += res.VCPU + reqRes.Memory += res.Memory + klog.Infof("666 UpdateSiteResInfo Before - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) + n.RequestedResources[resType] = reqRes + klog.Infof("777 UpdateSiteResInfo After - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) + + } + } + for volType, used := range resInfo.Storage { + klog.Infof("888 UpdateSiteResInfo Before - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) + reqVol, ok := n.RequestedStorage[volType] + if !ok { + reqVol = 0 + } + reqVol += used + n.RequestedStorage[volType] = reqVol + klog.Infof("888 UpdateSiteResInfo After - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) + + } + n.updateSiteFlavor(resourceTypes, regionFlavorMap, deduct) + n.generation = nextGeneration() + return nil +} + // DeductSiteResInfo deduct site's resource info -func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { +/*func (n *SiteCacheInfo) DeductSiteResInfo2(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { var resourceTypes []string for resType, res := range resInfo.CpuAndMem { //resource type is null, assign default resource type (e.g. when binding a pod for the first time) @@ -588,6 +634,72 @@ func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavor n.updateSiteFlavor(resourceTypes, regionFlavorMap) n.generation = nextGeneration() return nil +}*/ + +func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor, deduct bool) { + n.mu.Lock() + defer n.mu.Unlock() + + klog.Infof("999 updateSiteFlavor Before - resourceTypes: %#v, regionFlavors:%#v", resourceTypes, regionFlavors) + for k, v := range regionFlavors { + klog.Infof("updateSiteFlavor Before - key: %#v, regionFlavor:%#v", k, v) + } + + if n.AllocatableFlavor == nil { + n.AllocatableFlavor = map[string]int64{} + } + supportFlavors := n.AllocatableFlavor + regionName := utils.GetRegionName(n.Site.SiteID) + for flavorid := range supportFlavors { + regionFalvorKey := regionName + constants.FlavorDelimiter + flavorid + flv := regionFlavors[regionFalvorKey] + klog.Infof("000 updateSiteFlavor - flv: %#v", flv) + if flv == nil { + n.deductFlavor() + klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + return + } + vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) + if err != nil { + n.deductFlavor() + klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + return + } + for _, resourceType := range resourceTypes { + klog.Infof("121 updateSiteFlavor Before - resourceType:%#v, n.TotalResources: %#v, RequestedResources: %#v", resourceType, n.TotalResources[resourceType], n.RequestedResources[resourceType]) + totalRes := n.TotalResources[resourceType] + requestRes := n.RequestedResources[resourceType] + if totalRes == nil { + klog.Infof("updateSiteFlavor - totalRes is nil") + n.deductFlavor() + klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + return + } + if requestRes == nil { + klog.Infof("updateSiteFlavor - requestRes is nil") + requestRes = &types.CPUAndMemory{VCPU: 0, Memory: 0} + } + if(deduct == true) { + count := (totalRes.VCPU - requestRes.VCPU) / vCPUInt + memCount := (totalRes.Memory - requestRes.Memory) / flv.Ram + } else { + count := (totalRes.VCPU + requestRes.VCPU) / vCPUInt + memCount := (totalRes.Memory + requestRes.Memory) / flv.Ram + } + if count > memCount { + count = memCount + } + if _, ok := n.AllocatableFlavor[flavorid]; !ok { + n.AllocatableFlavor[flavorid] = 0 + } + klog.Infof("121 n.AllocatableFlavor Before - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + if n.AllocatableFlavor[flavorid] > count { + n.AllocatableFlavor[flavorid] = count + } + klog.Infof("121 n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + klog.Infof("121 updateSiteFlavor After - resourceType:%#v, n.TotalResources: %#v, RequestedResources: %#v", resourceType, n.TotalResources[resourceType], n.RequestedResources[resourceType]) + } + } } /* @@ -598,7 +710,7 @@ updateFlavor(): /home/ubuntu/go/src/k8s.io/arktos/conf/flavors.json global scheduler flavor config file: /home/ubuntu/go/src/k8s.io/arktos/conf/flavor_config.yaml */ -func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor) { +func (n *SiteCacheInfo) updateSiteFlavor2(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor) { n.mu.Lock() defer n.mu.Unlock() @@ -661,3 +773,71 @@ func (n *SiteCacheInfo) deductFlavor() { } } } + +func (n *SiteCacheInfo) updateFlavor(deduct bool) { + n := -1 + if deduct == true { + n=1 + } + if n.AllocatableFlavor == nil { + n.AllocatableFlavor = map[string]int64{} + } + for key, value := range n.AllocatableFlavor { + n.AllocatableFlavor[key] = value - 1 + if n.RequestedFlavor == nil { + n.RequestedFlavor = make(map[string]int64) + } + requested, ok := n.RequestedFlavor[key] + if !ok { + n.RequestedFlavor[key] = 1 + } else { + n.RequestedFlavor[key] = requested + 1 + } + } +} + +//revoke bound site's resource to pod because pod creation failed +func (n *SiteCacheInfo) WithdrawSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { + var resourceTypes []string + klog.Infof("444 WithdrawSiteResInfo - resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavorMap) + for resType, res := range resInfo.CpuAndMem { + //resource type is null, assign default resource type (e.g. when binding a pod for the first time) + if resType == "" { + resType = string(DefaultResourceType) + resourceTypes = append(resourceTypes, resType) + } + if len(n.RequestedResources) == 0 { + reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} + n.RequestedResources[resType] = &reqRes + continue + } + for reqType, reqRes := range n.RequestedResources { + klog.Infof("555 WithdrawSiteResInfo - reqType: %#v, reqRes:%#v", reqType, reqRes) + resTypes := strings.Split(reqType, constants.FlavorDelimiter) + if !utils.IsContain(resTypes, resType) { + klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + continue + } + reqRes.VCPU += res.VCPU + reqRes.Memory += res.Memory + klog.Infof("666 WithdrawSiteResInfo Before - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) + n.RequestedResources[resType] = reqRes + klog.Infof("777 WithdrawSiteResInfo After - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) + + } + } + for volType, used := range resInfo.Storage { + klog.Infof("888 WithdrawSiteResInfo Before - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) + reqVol, ok := n.RequestedStorage[volType] + if !ok { + reqVol = 0 + } + reqVol += used + n.RequestedStorage[volType] = reqVol + klog.Infof("888 WithdrawSiteResInfo After - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) + + } + n.updateSiteFlavor(resourceTypes, regionFlavorMap) + n.generation = nextGeneration() + return nil +} From 0c14cbe1e8854ddbfe96044b9c838a69cc1bc586 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 19:50:27 +0000 Subject: [PATCH 04/12] implemented resource revokation when vm creation failed --- .../dispatcher/dispatcher_process.go | 26 +-- .../pkg/scheduler/eventhandlers.go | 57 +---- .../plugins/defaultbinder/default_binder.go | 31 +-- globalscheduler/pkg/scheduler/scheduler.go | 106 ++++----- .../scheduler/sitecacheinfo/sitecache_info.go | 217 +++--------------- 5 files changed, 93 insertions(+), 344 deletions(-) diff --git a/globalscheduler/controllers/dispatcher/dispatcher_process.go b/globalscheduler/controllers/dispatcher/dispatcher_process.go index aec245671..8ba07f6c3 100644 --- a/globalscheduler/controllers/dispatcher/dispatcher_process.go +++ b/globalscheduler/controllers/dispatcher/dispatcher_process.go @@ -19,11 +19,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/client-go/kubernetes" - "k8s.io/client-go/kubernetes/scheme" - typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" - "k8s.io/client-go/tools/record" "k8s.io/klog" "k8s.io/kubernetes/globalscheduler/cmd/conf" "k8s.io/kubernetes/globalscheduler/controllers/util" @@ -53,7 +50,6 @@ type Process struct { totalDeleteLatency int64 totalPodCreateNum int totalPodDeleteNum int - recorder record.EventRecorder } func NewProcess(config *rest.Config, namespace string, name string, quit chan struct{}) Process { @@ -78,12 +74,6 @@ func NewProcess(config *rest.Config, namespace string, name string, quit chan st if err != nil { klog.Fatal(err) } - eventBroadcaster := record.NewBroadcaster() - eventBroadcaster.StartLogging(klog.Infof) - eventBroadcaster.StartRecordingToSink( - &typedcorev1.EventSinkImpl{Interface: clientset.CoreV1().Events("")}) - recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: name}) - return Process{ namespace: namespace, name: name, @@ -98,7 +88,6 @@ func NewProcess(config *rest.Config, namespace string, name string, quit chan st totalDeleteLatency: 0, totalPodCreateNum: 0, totalPodDeleteNum: 0, - recorder: recorder, } } @@ -107,7 +96,6 @@ func (p *Process) Run(quit chan struct{}) { dispatcherSelector := fields.ParseSelectorOrDie("metadata.name=" + p.name) dispatcherLW := cache.NewListWatchFromClient(p.dispatcherClientset.GlobalschedulerV1(), "dispatchers", p.namespace, dispatcherSelector) - dispatcherInformer := cache.NewSharedIndexInformer(dispatcherLW, &dispatcherv1.Dispatcher{}, 0, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) dispatcherInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -229,8 +217,7 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { if err == nil { klog.Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) pod.Status.ClusterInstanceId = instanceId - //pod.Status.Phase = v1.ClusterScheduled - pod.Status.Phase = v1.PodFailed + pod.Status.Phase = v1.ClusterScheduled updatedPod, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod) if err == nil { klog.Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) @@ -245,17 +232,6 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { klog.Warningf("The pod %v failed to update its apiserver dtatbase status to failed with the error %v", pod.ObjectMeta.Name, err) } } - ///for test - klog.Warningf("The openstack vm for the pod %v failed to create with the error", pod.ObjectMeta.Name) - pod.Status.Phase = v1.PodFailed - if _, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod); err != nil { - klog.Warningf("The pod %v failed to update its apiserver dtatbase status to failed with the error %v", pod.ObjectMeta.Name, err) - } - klog.Infof("+++ The pod info %s, %#v, %#v", pod.ObjectMeta.Name, pod.Status) - //p.recorder.Event(pod, corev1.EventTypeNormal, SuccessSynched, MessageResourceSynched) - //p.recorder.Event(pod, v1.EventTypeWarning, "Failed", "Failed to create vm") - - // util.CheckTime(pod.Name, "dispatcher", "CreatePod-End", 2) }() } } diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index b6930e73d..9b16fe8b1 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -72,15 +72,6 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: - //klog.Infof("###111Pod: %#v", t) - klog.Infof("###111PodStatus: %#v", t.Status) - pod := obj.(*v1.Pod) - klog.Infof("#: %#v", pod.Name) - ppp, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) - if err == nil { - //klog.Infof("###pppPod: %#v", ppp) - klog.Infof("###pppPodStatus: %#v", ppp.Status) - } return assignedPod(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -105,16 +96,6 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: - //klog.Infof("###222Pod: %#v", t) - klog.Infof("###222PodStatus: %#v", t.Status) - pod := obj.(*v1.Pod) - klog.Infof("##: %#v", pod.Name) - - pp, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) - if err == nil { - //klog.Infof("###222ppPod: %#v", pp) - klog.Infof("###222ppPodStatus: %#v", pp.Status) - } return needToSchedule(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -140,17 +121,6 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: - pod := obj.(*v1.Pod) - klog.Infof("###: %#v", pod.Name) - p, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) - klog.Infof("###333Pod: %#v", err) - //klog.Infof("###333Pod: %#v", t) - klog.Infof("###333PodStatus: %#v", t.Status) - //p := sched.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).Get(pod.Name, metav1.GetOptions{}) - if err == nil { - //klog.Infof("###pppPod: %#v", p) - klog.Infof("###pppPodStatus: %#v", p.Status) - } return failedToSchedule(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -179,36 +149,28 @@ func AddAllEventHandlers(sched *Scheduler) { // needToSchedule selects pods that need to be scheduled func needToSchedule(pod *v1.Pod) bool { - klog.Infof("$$$$$$$needToSchedule: %v", pod.Name) - klog.Infof("$$$$$$$needToSchedule: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodAssigned) return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodAssigned } // assignedPod selects pods that are assigned (scheduled and running). func assignedPod(pod *v1.Pod) bool { - klog.Infof("$$$$$$$assignedPod: %v", pod.Name) - klog.Infof("$$$$$$$assignedPod: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodBound) return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodBound } // responsibleForPod returns true if the pod has asked to be scheduled by the given scheduler. func responsibleForPod(pod *v1.Pod, schedulerName string) bool { - klog.Infof("$$$$$$$responsibleForPod: %v", pod.Name) - klog.Infof("$$$$$$$responsibleForPod: %v", schedulerName == pod.Status.AssignedScheduler.Name) return schedulerName == pod.Status.AssignedScheduler.Name } // failedToSchedule selects pods that scheduled but failed to create vm func failedToSchedule(pod *v1.Pod) bool { - klog.Infof("$$$$$$$failedToSchedule: %v", pod.Name) - klog.Infof("$$$$$$$failedToSchedule: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodFailed) - return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodFailed + return pod.Status.Phase == v1.PodFailed } // addPodToCache add pod to the stack cache of the scheduler func (sched *Scheduler) addPodToCache(obj interface{}) { pod, ok := obj.(*v1.Pod) - klog.Infof("Add a pod: %v", pod.Name) + klog.V(4).Infof("Add a pod: %v", pod.Name) if !ok { klog.Errorf("cannot convert to *v1.Pod: %v", obj) return @@ -614,14 +576,14 @@ func (sched *Scheduler) verifyPodInfo(pod *v1.Pod) (verified bool) { func (sched *Scheduler) addPodWithdrawResource(object interface{}) { pod, ok := object.(*v1.Pod) - klog.Infof("Add a pod to withdraw resource: %v", pod.Name) + klog.V(4).Infof("Add a pod to withdraw resource: %v", pod.Name) if !ok { klog.Errorf("cannot convert to *v1.Pod: %v", object) return } podCopy := pod.DeepCopy() if sched.verifyPodInfo(podCopy) == false { - klog.Infof(" Pod data is not correct: %v", podCopy) + klog.V(4).Infof(" Pod data is not correct: %v", podCopy) } err := sched.withdrawResource(pod.Name) if err != nil { @@ -636,7 +598,7 @@ func (sched *Scheduler) updatePodWithdrawResource(oldObj, newObj interface{}) { return } newPod, ok := newObj.(*v1.Pod) - klog.Infof("Update a pod: %v", newPod) + klog.V(4).Infof("Update a pod: %v", newPod) if !ok { klog.Errorf("cannot convert newObj to *v1.Pod: %v", newObj) return @@ -656,7 +618,7 @@ func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { switch t := obj.(type) { case *v1.Pod: pod = t - klog.Infof("Delete a pod: %v", pod.Name) + klog.V(4).Infof("Delete a pod: %v", pod.Name) case cache.DeletedFinalStateUnknown: var ok bool pod, ok = t.Obj.(*v1.Pod) @@ -678,9 +640,8 @@ func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { //withdraw reserved resources to a pod & add it to cash to other pods func (sched *Scheduler) withdrawResource(podName string) error { resource := sched.ResourceAllocationMap[podName] - //allResInfo := types.AllResInfo{CpuAndMem: resource.CpuMem, Storage: resource.Storage, eipNum: 0} - if (resource == nil){ - klog.Infof("there is no preserved resource for pod: %s", podName) + if resource == nil { + klog.V(4).Infof("there is no preserved resource for pod: %s", podName) return nil } allResInfo := resource.Resource @@ -691,7 +652,7 @@ func (sched *Scheduler) withdrawResource(podName string) error { return err } siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[resource.SiteID] - siteCacheInfo.WithdrawSiteResInfo(allResInfo, regionFlavor) + siteCacheInfo.UpdateSiteResInfo(allResInfo, regionFlavor, false) delete(sched.ResourceAllocationMap, podName) return nil } diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index fa80d219f..af9493ccd 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -70,7 +70,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.Errorf("Gettng site selector state failed! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } - klog.Infof("site selector info: %v", siteSelectedInfo) + klog.V(4).Infof("site selector info: %v", siteSelectedInfo) if len(stack.Resources) != len(siteSelectedInfo.Flavors) { klog.Errorf("flavor count not equal to server count! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ @@ -114,10 +114,10 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.V(4).Infof("Resource state after deduction: %v", siteCacheInfo) return nil*/ - /*klog.Infof("111 Resource state before deduction: %#v", siteCacheInfo) - klog.Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) + /*klog.V(4).Infof("111 Resource state before deduction: %#v", siteCacheInfo) + klog.V(4).Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ + klog.V(4).Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ return nil } @@ -144,9 +144,6 @@ func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.Cycle status := interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) return status, siteID, flavorID, &resInfo } - klog.Infof("site selector info: %v", siteSelectedInfo) - klog.Infof("stack.Resources: %#v", stack.Resources) - klog.Infof("siteSelectedInfo.Flavors: %#v", siteSelectedInfo.Flavors) if len(stack.Resources) != len(siteSelectedInfo.Flavors) { klog.Errorf("flavor count not equal to server count! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ @@ -155,13 +152,13 @@ func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.Cycle for i := 0; i < len(stack.Resources); i++ { flavorID = siteSelectedInfo.Flavors[i].FlavorID stack.Resources[i].FlavorIDSelected = flavorID - klog.Infof("GetFlavor - flavorID: %s, region: %s", flavorID, region) + klog.V(4).Infof("GetFlavor - flavorID: %s, region: %s", flavorID, region) flv, ok := cache.FlavorCache.GetFlavor(flavorID, region) if !ok { klog.Warningf("flavor %s not found in region(%s)", flavorID, region) continue } - klog.Infof("flavor %s : %v", flavorID, flv) + klog.V(4).Infof("flavor %s : %v", flavorID, flv) vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) if err != nil || vCPUInt <= 0 { klog.Warningf("flavor %s is invalid in region(%s)", flavorID, region) @@ -178,21 +175,7 @@ func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.Cycle resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] = reqRes break } - klog.Infof("UpdateSiteWithResInfo - siteID: %s, resInfo: %#v", siteID, resInfo) + klog.V(4).Infof("UpdateSiteWithResInfo - siteID: %s, resInfo: %#v", siteID, resInfo) b.handle.Cache().UpdateSiteWithResInfo(siteID, resInfo) - /*regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() - if err != nil { - klog.Errorf("Getting region's flavor failed: %s", err) - return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)), siteID, flavorID, nil - } - if regionFlavors == nil || err != nil { - regionFlavors = map[string]*typed.RegionFlavor{} - } - - klog.Infof("111 Resource state before deduction: %#v", siteCacheInfo) - klog.Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) - siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ - klog.Infof("UpdateSiteWithResInfo - return") return nil, siteID, flavorID, &resInfo } diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index c81d78df3..a5732371d 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -116,7 +116,7 @@ type Scheduler struct { //Cluster //KubeClientset clientset.Interface //kubernetes.Interface - KubeClientset *clientset.Clientset + KubeClientset *clientset.Clientset ApiextensionsClientset apiextensionsclientset.Interface ClusterClientset clusterclientset.Interface ClusterInformerFactory externalinformers.SharedInformerFactory @@ -139,7 +139,7 @@ var once sync.Once func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct{}) (*Scheduler, error) { stopEverything := stopCh - klog.Infof("stopEverything to check : %v", stopEverything) + klog.V(4).Infof("stopEverything to check : %v", stopEverything) if stopEverything == nil { stopEverything = wait.NeverStop } @@ -152,8 +152,8 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct ConfigFilePath: gsconfig.ConfigFilePath, deletedClusters: make(map[string]string), workerNumber: 1, - ResourceAllocationMap: make(map[string]*PodSiteResourceAllocation), - } + ResourceAllocationMap: make(map[string]*PodSiteResourceAllocation), + } err := sched.buildFramework() if err != nil { return nil, fmt.Errorf("buildFramework by %s failed! err: %v", types.SchedulerDefaultProviderName, err) @@ -161,7 +161,7 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct //build entire FlavorMap map sched.UpdateFlavor() - klog.Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) + klog.V(4).Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) // init pod, cluster, and scheduler informers for scheduler err = sched.initPodClusterSchedulerInformers(gsconfig, stopEverything) if err != nil { @@ -201,14 +201,14 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { } // start pod informers if sched.PodInformer != nil && sched.InformerFactory != nil { - klog.Infof("Starting scheduler %s informer", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler %s informer", sched.SchedulerName) sched.InformerFactory.Start(stopCh) // Wait for all caches to sync before scheduling. sched.InformerFactory.WaitForCacheSync(stopCh) } // start scheduler informer if sched.schedulerInformer != nil { - klog.Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) go sched.schedulerInformer.Run(stopCh) } // Do scheduling @@ -218,13 +218,13 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { // Run begins watching and scheduling. It waits for cache to be synced, then starts scheduling // and blocked until the context is done. func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan struct{}) { - klog.Infof("Starting scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler %s", sched.SchedulerName) defer utilruntime.HandleCrash() //cluster if clusterWorkers > 0 { defer sched.ClusterQueue.ShutDown() - klog.Infof("Waiting informer caches to sync") + klog.V(4).Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.ClusterSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -236,7 +236,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } defer sched.StackQueue.Close() - klog.Infof("Waiting informer caches to sync") + klog.V(4).Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.PodSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -247,7 +247,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } klog.Info("Started cluster & pod workers") <-stopCh - klog.Infof("Shutting down scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Shutting down scheduler %s", sched.SchedulerName) } // Cache returns the cache in scheduler for test to check the data in scheduler. @@ -272,15 +272,15 @@ func (sched *Scheduler) scheduleOne() bool { if shutdown != nil { return false } - klog.Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) + klog.V(4).Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) allocation, err := sched.generateAllocationFromStack(stack) - klog.Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) + klog.V(4).Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) if err != nil { return false } start := stack.CreateTime end := time.Now().UnixNano() - klog.Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) // 2.do scheduling process start = end @@ -292,21 +292,21 @@ func (sched *Scheduler) scheduleOne() bool { return true } end = time.Now().UnixNano() - klog.Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) - klog.Infof("Schedule result: %v", result) //result is assumed stacks - klog.Infof("3. Assumed Stacks: %v", result) + klog.V(4).Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("Schedule result: %v", result) //result is assumed stacks + klog.V(4).Infof("3. Assumed Stacks: %v", result) // 3.bind scheduler result to pod start = end - klog.Infof("Try to bind to site, stacks:%v", result.Stacks) + klog.V(4).Infof("Try to bind to site, stacks:%v", result.Stacks) sched.bindStacks(result.Stacks) end = time.Now().UnixNano() - klog.Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) // log the elapsed time for the entire schedule if stack.CreateTime != 0 { spendTime := time.Now().UnixNano() - stack.CreateTime - klog.Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) + klog.V(4).Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) } return true } @@ -377,7 +377,7 @@ func (sched *Scheduler) findSitesThatPassFilters(ctx context.Context, state *int err = fmt.Errorf("SiteCacheInfoMap of %v is null", siteID) return nil, err } - klog.Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) + klog.V(4).Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) // Create filtered list with enough space to avoid growing it // and allow assigning. filtered := make([]*types.Site, len(allSiteCacheInfos)) @@ -463,7 +463,7 @@ func (sched *Scheduler) prioritizeSites( // sort by score. sort.Sort(sort.Reverse(result)) - klog.Infof("score sites: %v", result) + klog.V(4).Infof("score sites: %v", result) return result, nil } @@ -501,15 +501,7 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID]) if bindStatus.IsSuccess() { podResporceAlloc := PodSiteResourceAllocation{stack.PodName, siteId, flavorId, *resInfo} - klog.Infof("bind - podResporceAlloc: %#v", podResporceAlloc) - klog.Infof("bind111 - sched.ResourceAllocationMap[stack.PodName]: %#v, stack.PodName:%s", sched.ResourceAllocationMap, stack.PodName) sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc - /*if (sched.ResourceAllocationMap[stack.PodName] == nil) { - sched.ResourceAllocationMap[stack.PodName] = append(sched.ResourceAllocationMap[stack.PodName], &podResporceAlloc) - } else { - sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc - }*/ - klog.Infof("bind222 - sched.ResourceAllocationMap[stack.PodName]: %#v, stack.PodName:%s", sched.ResourceAllocationMap, stack.PodName) region := utils.GetRegionName(siteId) regionFlavors, err := sched.siteCacheInfoSnapshot.GetRegionFlavors(region) if err != nil { @@ -517,7 +509,7 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite return err } siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID] - siteCacheInfo.DeductSiteResInfo(*resInfo, regionFlavors) + siteCacheInfo.UpdateSiteResInfo(*resInfo, regionFlavors, true) return nil } if bindStatus.Code() == interfaces.Error { @@ -529,14 +521,14 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite // Schedule Run begins watching and scheduling. It waits for cache to be synced , // then starts scheduling and blocked until the context is done. func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocation) (result ScheduleResult, err error) { - klog.Infof("Attempting to schedule allocation: %v", allocation.ID) + klog.V(4).Infof("Attempting to schedule allocation: %v", allocation.ID) state := interfaces.NewCycleState() schedulingCycleCtx, cancel := context.WithCancel(ctx) defer cancel() // 1. Snapshot site resource cache start := time.Now() - klog.Infof("[START] snapshot site...") + klog.V(4).Infof("[START] snapshot site...") ///UpdateFlavorMap updates FlavorCache.RegionFlavorMap, FlavorCache.FlavorMap) ///FlavorMap is updated when scheduler starts, RegionFlavorMap is updated @@ -546,16 +538,16 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 2. Run "prefilter" plugins. start = time.Now() - klog.Infof("[START] Running prefilter plugins...") + klog.V(4).Infof("[START] Running prefilter plugins...") preFilterStatus := sched.SchedFrame.RunPreFilterPlugins(schedulingCycleCtx, state, &allocation.Stack) if !preFilterStatus.IsSuccess() { return result, preFilterStatus.AsError() } - klog.Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) // 3. Run "filter" plugins. start = time.Now() - klog.Infof("[START] Running filter plugins...") + klog.V(4).Infof("[START] Running filter plugins...") filteredSitesStatuses := make(interfaces.SiteToStatusMap) allocation.Stack.Selector = allocation.Selector filteredSites, err := sched.findSitesThatPassFilters(ctx, state, &allocation.Stack, filteredSitesStatuses) @@ -563,9 +555,9 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("findSitesThatPassFilters failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) - klog.Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) + klog.V(4).Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) if len(filteredSites) <= 0 { err := fmt.Errorf("filter none site. resultStatus: %s", filteredSitesStatuses.ToString()) klog.Error(err) @@ -574,33 +566,33 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 4. Run "prescore" plugins. start = time.Now() - klog.Infof("[START] Running preScore plugins...") + klog.V(4).Infof("[START] Running preScore plugins...") prescoreStatus := sched.SchedFrame.RunPreScorePlugins(ctx, state, &allocation.Stack, filteredSites) if !prescoreStatus.IsSuccess() { return result, prescoreStatus.AsError() } - klog.Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) // 5. Run "prioritizeSites" plugins. start = time.Now() - klog.Infof("[START] Running prioritizeSites plugins...") + klog.V(4).Infof("[START] Running prioritizeSites plugins...") priorityList, err := sched.prioritizeSites(ctx, state, &allocation.Stack, filteredSites) if err != nil { klog.Errorf("prioritizeSites failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) // 6. Run "strategy" plugins. start = time.Now() - klog.Infof("[START] Running strategy plugins...") + klog.V(4).Infof("[START] Running strategy plugins...") siteCount, strategyStatus := sched.SchedFrame.RunStrategyPlugins(ctx, state, allocation, priorityList) if !strategyStatus.IsSuccess() { klog.Errorf("RunStrategyPlugins failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) - klog.Infof("selected Hosts : %#v", siteCount) + klog.V(4).Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("selected Hosts : %#v", siteCount) // 7. reserve resource start = time.Now() @@ -632,7 +624,7 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("not find suit host") return result, fmt.Errorf("not find suit host") } - klog.Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) + klog.V(4).Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) return } @@ -713,7 +705,7 @@ func (sched *Scheduler) initPodClusterSchedulerInformers(gsconfig *types.GSSched sched.schedulerInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { if sched, ok := obj.(*schedulerv1.Scheduler); ok { - klog.Infof("The scheduler %s process is going to be killed...", sched.Name) + klog.V(4).Infof("The scheduler %s process is going to be killed...", sched.Name) os.Exit(0) } else { klog.Fatalf("The deleted object %v failed to convert to scheduler", obj) @@ -757,7 +749,7 @@ func (sched *Scheduler) processNextClusterItem() bool { if shutdown { return false } - klog.Infof("Process an item in work queue %v ", workItem) + klog.V(4).Infof("Process an item in work queue %v ", workItem) eventKey := workItem.(KeyWithEventType) key := eventKey.Key defer sched.ClusterQueue.Done(key) @@ -766,7 +758,7 @@ func (sched *Scheduler) processNextClusterItem() bool { utilruntime.HandleError(fmt.Errorf("Handle %v of key %v failed with %v", "serivce", key, err)) } sched.ClusterQueue.Forget(key) - klog.Infof("Successfully processed & synced %s", key) + klog.V(4).Infof("Successfully processed & synced %s", key) return true } @@ -776,24 +768,24 @@ func (sched *Scheduler) clusterSyncHandler(keyWithEventType KeyWithEventType) er return err } key := keyWithEventType.Key - klog.Infof("sync cache for key %v", key) + klog.V(4).Infof("sync cache for key %v", key) startTime := time.Now() defer func() { - klog.Infof("Finished syncing %q (%v)", key, time.Since(startTime)) + klog.V(4).Infof("Finished syncing %q (%v)", key, time.Since(startTime)) }() nameSpace, clusterName, err := cache.SplitMetaNamespaceKey(key) //This performs controller logic - create site's static info - klog.Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.V(4).Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) result, err := sched.updateStaticSiteResourceInfo(key, keyWithEventType.EventType, nameSpace, clusterName) if !result { klog.Errorf("Failed a cluster processing - event: %v, key: %v, error: %v", keyWithEventType, key, err) sched.ClusterQueue.AddRateLimited(keyWithEventType) } else { - klog.Infof(" Processed a cluster: %v", key) + klog.V(4).Infof(" Processed a cluster: %v", key) sched.ClusterQueue.Forget(key) } - klog.Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.V(4).Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) if keyWithEventType.EventType != EventType_Delete { cluster, err := sched.ClusterLister.Clusters(nameSpace).Get(clusterName) clusterCopy := cluster.DeepCopy() @@ -848,7 +840,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name: %s", clusterName) return false, err } - klog.Infof("create a site static info, cluster profile: %v", clusterCopy) + klog.V(4).Infof("create a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusCreated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -866,7 +858,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name - %s", clusterName) return false, err } - klog.Infof("update a site static info, cluster profile: %v", clusterCopy) + klog.V(4).Infof("update a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusUpdated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -898,7 +890,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType //This function updates sites' dynamic resource informaton func (sched *Scheduler) UpdateSiteDynamicResource(region string, resource *types.SiteResource) (err error) { //reset total(available) resource - klog.Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) + klog.V(4).Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) var siteID string for _, siteresource := range resource.CPUMemResources { siteID = region + constants.SiteDelimiter + siteresource.AvailabilityZone diff --git a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go index 2b935d5f7..0397c486d 100644 --- a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go +++ b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go @@ -552,10 +552,9 @@ func GetStackKey(stack *types.Stack) (string, error) { return uid, nil } -//deduct or add +//deduct or add func (n *SiteCacheInfo) UpdateSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor, deduct bool) error { var resourceTypes []string - klog.Infof("444 UpdateSiteResInfo - resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavorMap) for resType, res := range resInfo.CpuAndMem { //resource type is null, assign default resource type (e.g. when binding a pod for the first time) if resType == "" { @@ -568,140 +567,29 @@ func (n *SiteCacheInfo) UpdateSiteResInfo(resInfo types.AllResInfo, regionFlavor continue } for reqType, reqRes := range n.RequestedResources { - klog.Infof("555 UpdateSiteResInfo - reqType: %#v, reqRes:%#v", reqType, reqRes) resTypes := strings.Split(reqType, constants.FlavorDelimiter) if !utils.IsContain(resTypes, resType) { - klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) continue } reqRes.VCPU += res.VCPU reqRes.Memory += res.Memory - klog.Infof("666 UpdateSiteResInfo Before - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) n.RequestedResources[resType] = reqRes - klog.Infof("777 UpdateSiteResInfo After - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) - } } for volType, used := range resInfo.Storage { - klog.Infof("888 UpdateSiteResInfo Before - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) reqVol, ok := n.RequestedStorage[volType] if !ok { reqVol = 0 } reqVol += used n.RequestedStorage[volType] = reqVol - klog.Infof("888 UpdateSiteResInfo After - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) - } n.updateSiteFlavor(resourceTypes, regionFlavorMap, deduct) n.generation = nextGeneration() return nil } -// DeductSiteResInfo deduct site's resource info -/*func (n *SiteCacheInfo) DeductSiteResInfo2(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { - var resourceTypes []string - for resType, res := range resInfo.CpuAndMem { - //resource type is null, assign default resource type (e.g. when binding a pod for the first time) - if resType == "" { - resType = string(DefaultResourceType) - resourceTypes = append(resourceTypes, resType) - } - if len(n.RequestedResources) == 0 { - reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} - n.RequestedResources[resType] = &reqRes - continue - } - for reqType, reqRes := range n.RequestedResources { - resTypes := strings.Split(reqType, constants.FlavorDelimiter) - if !utils.IsContain(resTypes, resType) { - klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) - continue - } - reqRes.VCPU += res.VCPU - reqRes.Memory += res.Memory - n.RequestedResources[resType] = reqRes - } - } - for volType, used := range resInfo.Storage { - reqVol, ok := n.RequestedStorage[volType] - if !ok { - reqVol = 0 - } - reqVol += used - n.RequestedStorage[volType] = reqVol - } - n.updateSiteFlavor(resourceTypes, regionFlavorMap) - n.generation = nextGeneration() - return nil -}*/ - -func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor, deduct bool) { - n.mu.Lock() - defer n.mu.Unlock() - - klog.Infof("999 updateSiteFlavor Before - resourceTypes: %#v, regionFlavors:%#v", resourceTypes, regionFlavors) - for k, v := range regionFlavors { - klog.Infof("updateSiteFlavor Before - key: %#v, regionFlavor:%#v", k, v) - } - - if n.AllocatableFlavor == nil { - n.AllocatableFlavor = map[string]int64{} - } - supportFlavors := n.AllocatableFlavor - regionName := utils.GetRegionName(n.Site.SiteID) - for flavorid := range supportFlavors { - regionFalvorKey := regionName + constants.FlavorDelimiter + flavorid - flv := regionFlavors[regionFalvorKey] - klog.Infof("000 updateSiteFlavor - flv: %#v", flv) - if flv == nil { - n.deductFlavor() - klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - return - } - vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) - if err != nil { - n.deductFlavor() - klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - return - } - for _, resourceType := range resourceTypes { - klog.Infof("121 updateSiteFlavor Before - resourceType:%#v, n.TotalResources: %#v, RequestedResources: %#v", resourceType, n.TotalResources[resourceType], n.RequestedResources[resourceType]) - totalRes := n.TotalResources[resourceType] - requestRes := n.RequestedResources[resourceType] - if totalRes == nil { - klog.Infof("updateSiteFlavor - totalRes is nil") - n.deductFlavor() - klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - return - } - if requestRes == nil { - klog.Infof("updateSiteFlavor - requestRes is nil") - requestRes = &types.CPUAndMemory{VCPU: 0, Memory: 0} - } - if(deduct == true) { - count := (totalRes.VCPU - requestRes.VCPU) / vCPUInt - memCount := (totalRes.Memory - requestRes.Memory) / flv.Ram - } else { - count := (totalRes.VCPU + requestRes.VCPU) / vCPUInt - memCount := (totalRes.Memory + requestRes.Memory) / flv.Ram - } - if count > memCount { - count = memCount - } - if _, ok := n.AllocatableFlavor[flavorid]; !ok { - n.AllocatableFlavor[flavorid] = 0 - } - klog.Infof("121 n.AllocatableFlavor Before - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - if n.AllocatableFlavor[flavorid] > count { - n.AllocatableFlavor[flavorid] = count - } - klog.Infof("121 n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - klog.Infof("121 updateSiteFlavor After - resourceType:%#v, n.TotalResources: %#v, RequestedResources: %#v", resourceType, n.TotalResources[resourceType], n.RequestedResources[resourceType]) - } - } -} - /* updateSiteFlavor() is equal with updateFlavor() functionally. But due to the difference between flavor files and data, @@ -710,10 +598,14 @@ updateFlavor(): /home/ubuntu/go/src/k8s.io/arktos/conf/flavors.json global scheduler flavor config file: /home/ubuntu/go/src/k8s.io/arktos/conf/flavor_config.yaml */ -func (n *SiteCacheInfo) updateSiteFlavor2(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor) { +func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor, deduct bool) { n.mu.Lock() defer n.mu.Unlock() + var count, memCount int64 + for k, v := range regionFlavors { + klog.V(4).Infof("updateSiteFlavor Before - key: %#v, regionFlavor:%#v", k, v) + } if n.AllocatableFlavor == nil { n.AllocatableFlavor = map[string]int64{} } @@ -723,26 +615,31 @@ func (n *SiteCacheInfo) updateSiteFlavor2(resourceTypes []string, regionFlavors regionFalvorKey := regionName + constants.FlavorDelimiter + flavorid flv := regionFlavors[regionFalvorKey] if flv == nil { - n.deductFlavor() + n.updateFlavorCount(deduct) return } vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) if err != nil { - n.deductFlavor() + n.updateFlavorCount(deduct) return } for _, resourceType := range resourceTypes { totalRes := n.TotalResources[resourceType] requestRes := n.RequestedResources[resourceType] if totalRes == nil { - n.deductFlavor() + n.updateFlavorCount(deduct) return } if requestRes == nil { requestRes = &types.CPUAndMemory{VCPU: 0, Memory: 0} } - count := (totalRes.VCPU - requestRes.VCPU) / vCPUInt - memCount := (totalRes.Memory - requestRes.Memory) / flv.Ram + if deduct == true { + count = (totalRes.VCPU - requestRes.VCPU) / vCPUInt + memCount = (totalRes.Memory - requestRes.Memory) / flv.Ram + } else { + count = (totalRes.VCPU + requestRes.VCPU) / vCPUInt + memCount = (totalRes.Memory + requestRes.Memory) / flv.Ram + } if count > memCount { count = memCount } @@ -756,88 +653,28 @@ func (n *SiteCacheInfo) updateSiteFlavor2(resourceTypes []string, regionFlavors } } -func (n *SiteCacheInfo) deductFlavor() { - if n.AllocatableFlavor == nil { - n.AllocatableFlavor = map[string]int64{} - } - for key, value := range n.AllocatableFlavor { - n.AllocatableFlavor[key] = value - 1 - if n.RequestedFlavor == nil { - n.RequestedFlavor = make(map[string]int64) - } - requested, ok := n.RequestedFlavor[key] - if !ok { - n.RequestedFlavor[key] = 1 - } else { - n.RequestedFlavor[key] = requested + 1 - } - } -} - -func (n *SiteCacheInfo) updateFlavor(deduct bool) { - n := -1 +func (n *SiteCacheInfo) updateFlavorCount(deduct bool) { + var m int64 + m = 1 //add if deduct == true { - n=1 + m = -1 //deduct } if n.AllocatableFlavor == nil { n.AllocatableFlavor = map[string]int64{} } for key, value := range n.AllocatableFlavor { - n.AllocatableFlavor[key] = value - 1 + n.AllocatableFlavor[key] = value + m if n.RequestedFlavor == nil { n.RequestedFlavor = make(map[string]int64) } requested, ok := n.RequestedFlavor[key] if !ok { - n.RequestedFlavor[key] = 1 - } else { - n.RequestedFlavor[key] = requested + 1 - } - } -} - -//revoke bound site's resource to pod because pod creation failed -func (n *SiteCacheInfo) WithdrawSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { - var resourceTypes []string - klog.Infof("444 WithdrawSiteResInfo - resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavorMap) - for resType, res := range resInfo.CpuAndMem { - //resource type is null, assign default resource type (e.g. when binding a pod for the first time) - if resType == "" { - resType = string(DefaultResourceType) - resourceTypes = append(resourceTypes, resType) - } - if len(n.RequestedResources) == 0 { - reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} - n.RequestedResources[resType] = &reqRes - continue - } - for reqType, reqRes := range n.RequestedResources { - klog.Infof("555 WithdrawSiteResInfo - reqType: %#v, reqRes:%#v", reqType, reqRes) - resTypes := strings.Split(reqType, constants.FlavorDelimiter) - if !utils.IsContain(resTypes, resType) { - klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) - continue + n.RequestedFlavor[key] = 0 + if deduct == true { + n.RequestedFlavor[key] = 1 } - reqRes.VCPU += res.VCPU - reqRes.Memory += res.Memory - klog.Infof("666 WithdrawSiteResInfo Before - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) - n.RequestedResources[resType] = reqRes - klog.Infof("777 WithdrawSiteResInfo After - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) - - } - } - for volType, used := range resInfo.Storage { - klog.Infof("888 WithdrawSiteResInfo Before - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) - reqVol, ok := n.RequestedStorage[volType] - if !ok { - reqVol = 0 + } else { + n.RequestedFlavor[key] = requested - m } - reqVol += used - n.RequestedStorage[volType] = reqVol - klog.Infof("888 WithdrawSiteResInfo After - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) - } - n.updateSiteFlavor(resourceTypes, regionFlavorMap) - n.generation = nextGeneration() - return nil } From 7773eeb5b55e5d20df1483f36427e0161aa71aed Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 20:42:39 +0000 Subject: [PATCH 05/12] implemented resource revokation when vm creation failed --- globalscheduler/controllers/dispatcher/BUILD | 2 -- globalscheduler/pkg/scheduler/scheduler.go | 6 ------ 2 files changed, 8 deletions(-) diff --git a/globalscheduler/controllers/dispatcher/BUILD b/globalscheduler/controllers/dispatcher/BUILD index 1b8dcdde5..5cf8b5c91 100644 --- a/globalscheduler/controllers/dispatcher/BUILD +++ b/globalscheduler/controllers/dispatcher/BUILD @@ -31,10 +31,8 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes/scheme:go_default_library", - "//staging/src/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library", "//staging/src/k8s.io/client-go/rest:go_default_library", "//staging/src/k8s.io/client-go/tools/cache:go_default_library", - "//staging/src/k8s.io/client-go/tools/record:go_default_library", "//vendor/k8s.io/klog:go_default_library", ], ) diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 3b7e91ae2..0f315173f 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -165,15 +165,9 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct //build entire FlavorMap map sched.UpdateFlavor() -<<<<<<< HEAD - klog.V(4).Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) - // init pod, cluster, and scheduler informers for scheduler - err = sched.initPodClusterSchedulerInformers(gsconfig, stopEverything) -======= klog.Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) // init pod, cluster, scheduler, and allocation informers for scheduler err = sched.initPodClusterSchedulerAllocationInformers(gsconfig, stopEverything) ->>>>>>> f1c13358f172faaa2a74119bbf5395594ebea393 if err != nil { return nil, err } From 3b5a1c36699bc495e161747a58cab4eba53f9fe4 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 22:37:23 +0000 Subject: [PATCH 06/12] applied review --- .../dispatcher/dispatcher_process.go | 6 ++--- .../pkg/scheduler/eventhandlers.go | 4 ++-- .../plugins/defaultbinder/default_binder.go | 22 +++---------------- 3 files changed, 8 insertions(+), 24 deletions(-) diff --git a/globalscheduler/controllers/dispatcher/dispatcher_process.go b/globalscheduler/controllers/dispatcher/dispatcher_process.go index 8ba07f6c3..6490444df 100644 --- a/globalscheduler/controllers/dispatcher/dispatcher_process.go +++ b/globalscheduler/controllers/dispatcher/dispatcher_process.go @@ -100,7 +100,7 @@ func (p *Process) Run(quit chan struct{}) { dispatcherInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { - klog.Infof("The dispatcher %s process is going to be killed...", p.name) + klog.V(3).Infof("The dispatcher %s process is going to be killed...", p.name) os.Exit(0) }, UpdateFunc: func(old, new interface{}) { @@ -215,12 +215,12 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { go func() { instanceId, err := openstack.ServerCreate(host, token, &pod.Spec) if err == nil { - klog.Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) + klog.V(3).Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) pod.Status.ClusterInstanceId = instanceId pod.Status.Phase = v1.ClusterScheduled updatedPod, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod) if err == nil { - klog.Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) + klog.V(3).Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) } else { klog.Warningf("The pod %v failed to update its apiserver database status to scheduled with the error %v", pod.ObjectMeta.Name, err) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index 9b16fe8b1..ce6b0ba9f 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -566,8 +566,8 @@ func (sched *Scheduler) verifyPodInfo(pod *v1.Pod) (verified bool) { verified = false name := pod.Name flavors := pod.Spec.VirtualMachine.Flavors - if pod.Name == "" || flavors == nil { - klog.Errorf("pod name:%s, flavors:%v is null", name, flavors) + if pod.Name == "" { + klog.Errorf("pod name:%s is null", name) return verified } verified = true diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index af9493ccd..049cd1a83 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -67,7 +67,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s //siteSelectedInfo is type of SiteSelectorInfo at cycle_state.go siteSelectedInfo, err := interfaces.GetSiteSelectorState(state, siteID) if err != nil { - klog.Errorf("Gettng site selector state failed! err: %s", err) + klog.Errorf("Getting site selector state failed! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } klog.V(4).Infof("site selector info: %v", siteSelectedInfo) @@ -102,22 +102,6 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] = reqRes } b.handle.Cache().UpdateSiteWithResInfo(siteID, resInfo) - /*regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() - if err != nil { - klog.Errorf("Getting region's flavor failed: %s", err) - return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) - } - if regionFlavors == nil || err != nil { - regionFlavors = map[string]*typed.RegionFlavor{} - }*/ - /*siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.V(4).Infof("Resource state after deduction: %v", siteCacheInfo) - return nil*/ - - /*klog.V(4).Infof("111 Resource state before deduction: %#v", siteCacheInfo) - klog.V(4).Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) - siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.V(4).Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ return nil } @@ -140,12 +124,12 @@ func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.Cycle //siteSelectedInfo is type of SiteSelectorInfo at cycle_state.go siteSelectedInfo, err := interfaces.GetSiteSelectorState(state, siteID) if err != nil { - klog.Errorf("Gettng site selector state failed! err: %s", err) + klog.Errorf("Gettng site selector state failed! err: %v", err) status := interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) return status, siteID, flavorID, &resInfo } if len(stack.Resources) != len(siteSelectedInfo.Flavors) { - klog.Errorf("flavor count not equal to server count! err: %s", err) + klog.Errorf("flavor count not equal to server count! err: %v", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ "server count!", siteID)), siteID, flavorID, nil } From f1e154250c562835fee9588c30520f389ffc4f22 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 22:49:24 +0000 Subject: [PATCH 07/12] updated according to review --- globalscheduler/pkg/scheduler/eventhandlers.go | 1 - 1 file changed, 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index ce6b0ba9f..1b13e907a 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -565,7 +565,6 @@ func (sched *Scheduler) verifyClusterInfo(cluster *clusterv1.Cluster) (verified func (sched *Scheduler) verifyPodInfo(pod *v1.Pod) (verified bool) { verified = false name := pod.Name - flavors := pod.Spec.VirtualMachine.Flavors if pod.Name == "" { klog.Errorf("pod name:%s is null", name) return verified From fe2ba14d0321824c44b4def9122a23441cd4986a Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Wed, 19 May 2021 00:09:26 +0000 Subject: [PATCH 08/12] updated resource data structure --- globalscheduler/pkg/scheduler/eventhandlers.go | 4 ++-- globalscheduler/pkg/scheduler/scheduler.go | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index 1b13e907a..f4e0fa385 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -638,7 +638,7 @@ func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { //withdraw reserved resources to a pod & add it to cash to other pods func (sched *Scheduler) withdrawResource(podName string) error { - resource := sched.ResourceAllocationMap[podName] + resource := sched.PodSiteResourceMap[podName] if resource == nil { klog.V(4).Infof("there is no preserved resource for pod: %s", podName) return nil @@ -652,6 +652,6 @@ func (sched *Scheduler) withdrawResource(podName string) error { } siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[resource.SiteID] siteCacheInfo.UpdateSiteResInfo(allResInfo, regionFlavor, false) - delete(sched.ResourceAllocationMap, podName) + delete(sched.PodSiteResourceMap, podName) return nil } diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 0f315173f..0a4ae2175 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -80,7 +80,8 @@ type ScheduleResult struct { FeasibleSites int // Number of feasible site on one stack scheduled } -type PodSiteResourceAllocation struct { +//perserved site resource for pod +type PodSiteResource struct { PodName string SiteID string Flavor string @@ -134,7 +135,7 @@ type Scheduler struct { workerNumber int // table to withdraw site resource - ResourceAllocationMap map[string]*PodSiteResourceAllocation + PodSiteResourceMap map[string]*PodSiteResource } // single scheduler instance @@ -156,7 +157,7 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct ConfigFilePath: gsconfig.ConfigFilePath, deletedClusters: make(map[string]string), workerNumber: 1, - ResourceAllocationMap: make(map[string]*PodSiteResourceAllocation), + PodSiteResourceMap: make(map[string]*PodSiteResource), } err := sched.buildFramework() if err != nil { @@ -509,8 +510,8 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite bindStatus, siteId, flavorId, resInfo := sched.SchedFrame.RunBindResourcePlugins(ctx, state, stack, sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID]) if bindStatus.IsSuccess() { - podResporceAlloc := PodSiteResourceAllocation{stack.PodName, siteId, flavorId, *resInfo} - sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc + podResource := PodSiteResource{stack.PodName, siteId, flavorId, *resInfo} + sched.PodSiteResourceMap[stack.PodName] = &podResource region := utils.GetRegionName(siteId) regionFlavors, err := sched.siteCacheInfoSnapshot.GetRegionFlavors(region) if err != nil { From ec4735bb5483352256c02dbd869daec1b477fefe Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Wed, 19 May 2021 01:41:31 +0000 Subject: [PATCH 09/12] applied review --- globalscheduler/pkg/scheduler/eventhandlers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index f4e0fa385..c7428e714 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -629,7 +629,6 @@ func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { klog.Errorf("cannot convert to *v1.Pod: %v", t) return } - err := sched.withdrawResource(pod.Name) if err != nil { klog.Errorf("withdraw resource of pod %s failed", pod.Name) @@ -655,3 +654,4 @@ func (sched *Scheduler) withdrawResource(podName string) error { delete(sched.PodSiteResourceMap, podName) return nil } + From 971a2c82eec347aea69830660e2f49e3e25cbe97 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Wed, 19 May 2021 05:01:15 +0000 Subject: [PATCH 10/12] changed data structure name --- globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go index 0397c486d..b97b1ef0b 100644 --- a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go +++ b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go @@ -487,17 +487,14 @@ func (n *SiteCacheInfo) UpdateSiteWithRatio(ratios []types.AllocationRatio) erro var usedMem = int64(float64(totalRes.Memory) * memRatio) n.updateRequestResourceByResType(resType, &types.CPUAndMemory{VCPU: usedCpu, Memory: usedMem}) } - n.updateFlavor() n.generation = nextGeneration() - return nil } //UpdateSpotResources update spot resources func (n *SiteCacheInfo) UpdateSpotResources(spotRes map[string]types.SpotResource) error { n.AllocatableSpotFlavor = spotRes - n.generation = nextGeneration() return nil } From f6caa13251ee7ad098320acb22147cf722124d0f Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Wed, 19 May 2021 05:12:33 +0000 Subject: [PATCH 11/12] changed data structure name --- globalscheduler/pkg/scheduler/eventhandlers.go | 1 - 1 file changed, 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index c7428e714..ebd4ce68a 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -654,4 +654,3 @@ func (sched *Scheduler) withdrawResource(podName string) error { delete(sched.PodSiteResourceMap, podName) return nil } - From df58264dfcef960026f8e6b691df90d519836d2a Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Wed, 19 May 2021 07:22:24 +0000 Subject: [PATCH 12/12] performed cicd test --- globalscheduler/pkg/scheduler/scheduler.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 0a4ae2175..3fbc60855 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -148,7 +148,6 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct if stopEverything == nil { stopEverything = wait.NeverStop } - sched := &Scheduler{ SchedulerName: gsconfig.SchedulerName, ResourceCollectorApiUrl: gsconfig.ResourceCollectorApiUrl, @@ -163,7 +162,6 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct if err != nil { return nil, fmt.Errorf("buildFramework by %s failed! err: %v", types.SchedulerDefaultProviderName, err) } - //build entire FlavorMap map sched.UpdateFlavor() klog.Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) @@ -172,7 +170,6 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct if err != nil { return nil, err } - // add event handler AddAllEventHandlers(sched) return sched, nil