diff --git a/pkg/controllers/clusterresourceplacement/controller.go b/pkg/controllers/clusterresourceplacement/controller.go index 181e2a63b..3f76fd00c 100644 --- a/pkg/controllers/clusterresourceplacement/controller.go +++ b/pkg/controllers/clusterresourceplacement/controller.go @@ -1,3 +1,9 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +// Package clusterresourceplacement features a controller to reconcile the clusterResourcePlacement changes. package clusterresourceplacement import ( @@ -17,6 +23,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils" "go.goms.io/fleet/pkg/utils/controller" ) @@ -86,7 +93,18 @@ func (r *Reconciler) deleteClusterResourceSnapshots(ctx context.Context, crp *fl // clusterSchedulingPolicySnapshot status and work status. // If the error type is ErrUnexpectedBehavior, the controller will skip the reconciling. func (r *Reconciler) handleUpdate(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement) (ctrl.Result, error) { - _, err := r.getOrCreateClusterSchedulingPolicySnapshot(ctx, crp) + revisionLimit := fleetv1beta1.RevisionHistoryLimitDefaultValue + if crp.Spec.RevisionHistoryLimit != nil { + revisionLimit = *crp.Spec.RevisionHistoryLimit + if revisionLimit <= 0 { + err := fmt.Errorf("invalid clusterResourcePlacement %s: invalid revisionHistoryLimit %d", crp.Name, revisionLimit) + klog.ErrorS(controller.NewUnexpectedBehaviorError(err), "Invalid revisionHistoryLimit value and using default value instead", "clusterResourcePlacement", klog.KObj(crp)) + // use the default value instead + revisionLimit = fleetv1beta1.RevisionHistoryLimitDefaultValue + } + } + + _, err := r.getOrCreateClusterSchedulingPolicySnapshot(ctx, crp, int(revisionLimit)) if err != nil { return ctrl.Result{}, err } @@ -97,7 +115,7 @@ func (r *Reconciler) handleUpdate(ctx context.Context, crp *fleetv1beta1.Cluster resourceSnapshotSpec := fleetv1beta1.ResourceSnapshotSpec{ SelectedResources: selectedResources, } - _, err = r.getOrCreateClusterResourceSnapshot(ctx, crp, &resourceSnapshotSpec) + _, err = r.getOrCreateClusterResourceSnapshot(ctx, crp, &resourceSnapshotSpec, int(revisionLimit)) if err != nil { return ctrl.Result{}, err } @@ -107,11 +125,13 @@ func (r *Reconciler) handleUpdate(ctx context.Context, crp *fleetv1beta1.Cluster return ctrl.Result{}, nil } -func (r *Reconciler) getOrCreateClusterSchedulingPolicySnapshot(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement) (*fleetv1beta1.ClusterSchedulingPolicySnapshot, error) { +func (r *Reconciler) getOrCreateClusterSchedulingPolicySnapshot(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement, revisionHistoryLimit int) (*fleetv1beta1.ClusterSchedulingPolicySnapshot, error) { crpKObj := klog.KObj(crp) - schedulingPolicy := *crp.Spec.Policy // will exclude the numberOfClusters - schedulingPolicy.NumberOfClusters = nil - policyHash, err := generatePolicyHash(&schedulingPolicy) + schedulingPolicy := crp.Spec.Policy.DeepCopy() + if schedulingPolicy != nil { + schedulingPolicy.NumberOfClusters = nil // will exclude the numberOfClusters + } + policyHash, err := generatePolicyHash(schedulingPolicy) if err != nil { klog.ErrorS(err, "Failed to generate policy hash of crp", "clusterResourcePlacement", crpKObj) return nil, controller.NewUnexpectedBehaviorError(err) @@ -145,7 +165,7 @@ func (r *Reconciler) getOrCreateClusterSchedulingPolicySnapshot(ctx context.Cont // delete redundant snapshot revisions before creating a new snapshot to guarantee that the number of snapshots // won't exceed the limit. - if err := r.deleteRedundantSchedulingPolicySnapshots(ctx, crp); err != nil { + if err := r.deleteRedundantSchedulingPolicySnapshots(ctx, crp, revisionHistoryLimit); err != nil { return nil, err } @@ -161,7 +181,7 @@ func (r *Reconciler) getOrCreateClusterSchedulingPolicySnapshot(ctx context.Cont }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: &schedulingPolicy, + Policy: schedulingPolicy, PolicyHash: []byte(policyHash), }, } @@ -187,38 +207,83 @@ func (r *Reconciler) getOrCreateClusterSchedulingPolicySnapshot(ctx context.Cont return latestPolicySnapshot, nil } -func (r *Reconciler) deleteRedundantSchedulingPolicySnapshots(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement) error { +func (r *Reconciler) deleteRedundantSchedulingPolicySnapshots(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement, revisionHistoryLimit int) error { sortedList, err := r.listSortedClusterSchedulingPolicySnapshots(ctx, crp) if err != nil { return err } + if len(sortedList.Items) < revisionHistoryLimit { + return nil + } - crpKObj := klog.KObj(crp) - // respect the revisionHistoryLimit field - revisionLimit := fleetv1beta1.RevisionHistoryLimitDefaultValue - if crp.Spec.RevisionHistoryLimit != nil { - revisionLimit = *crp.Spec.RevisionHistoryLimit - if revisionLimit <= 0 { - err := fmt.Errorf("invalid clusterResourcePlacement %s: invalid revisionHistoryLimit %d", crpKObj, revisionLimit) - klog.ErrorS(controller.NewExpectedBehaviorError(err), "Invalid revisionHistoryLimit value and using default value instead", "clusterResourcePlacement", crpKObj) - // use the default value instead - revisionLimit = fleetv1beta1.RevisionHistoryLimitDefaultValue + if len(sortedList.Items)-revisionHistoryLimit > 0 { + // We always delete before creating a new snapshot, the snapshot size should never exceed the limit as there is + // no finalizer added and object should be deleted immediately. + klog.Warningf("The number of clusterSchedulingPolicySnapshots exceeds the revisionHistoryLimit and it should never happen", "clusterResourcePlacement", klog.KObj(crp), "numberOfSnapshots", len(sortedList.Items), "revisionHistoryLimit", revisionHistoryLimit) + } + + // In normal situation, The max of len(sortedList) should be revisionHistoryLimit. + // We just need to delete one policySnapshot before creating a new one. + // As a result of defensive programming, it will delete any redundant snapshots which could be more than one. + for i := 0; i <= len(sortedList.Items)-revisionHistoryLimit; i++ { // need to reserve one slot for the new snapshot + if err := r.Client.Delete(ctx, &sortedList.Items[i]); err != nil && !errors.IsNotFound(err) { + klog.ErrorS(err, "Failed to delete clusterSchedulingPolicySnapshot", "clusterResourcePlacement", klog.KObj(crp), "clusterSchedulingPolicySnapshot", klog.KObj(&sortedList.Items[i])) + return controller.NewAPIServerError(false, err) } } - if len(sortedList.Items) < int(revisionLimit) { + return nil +} + +// deleteRedundantResourceSnapshots handles multiple snapshots in a group. +func (r *Reconciler) deleteRedundantResourceSnapshots(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement, revisionHistoryLimit int) error { + sortedList, err := r.listSortedResourceSnapshots(ctx, crp) + if err != nil { + return err + } + + if len(sortedList.Items) < revisionHistoryLimit { + // If the number of existing snapshots is less than the limit no matter how many snapshots in a group, we don't + // need to delete any snapshots. + // Skip the checking and deleting. return nil } - for i := 0; i <= len(sortedList.Items)-int(revisionLimit); i++ { + + crpKObj := klog.KObj(crp) + lastGroupIndex := -1 + groupCounter := 0 + + // delete the snapshots from the end as there are could be multiple snapshots in a group in order to keep the latest + // snapshots from the end. + for i := len(sortedList.Items) - 1; i >= 0; i-- { + snapshotKObj := klog.KObj(&sortedList.Items[i]) + ii, err := parseResourceIndexFromLabel(&sortedList.Items[i]) + if err != nil { + klog.ErrorS(err, "Failed to parse the resource index label", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", snapshotKObj) + return controller.NewUnexpectedBehaviorError(err) + } + if ii != lastGroupIndex { + groupCounter++ + lastGroupIndex = ii + } + if groupCounter < revisionHistoryLimit { // need to reserve one slot for the new snapshot + // When the number of group is less than the revision limit, skipping deleting the snapshot. + continue + } if err := r.Client.Delete(ctx, &sortedList.Items[i]); err != nil && !errors.IsNotFound(err) { - klog.ErrorS(err, "Failed to delete clusterSchedulingPolicySnapshot", "clusterResourcePlacement", crpKObj, "clusterSchedulingPolicySnapshot", klog.KObj(&sortedList.Items[i])) + klog.ErrorS(err, "Failed to delete clusterResourceSnapshot", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", snapshotKObj) return controller.NewAPIServerError(false, err) } } + if groupCounter-revisionHistoryLimit > 0 { + // We always delete before creating a new snapshot, the snapshot group size should never exceed the limit + // as there is no finalizer added and the object should be deleted immediately. + klog.Warningf("The number of clusterResourceSnapshot groups exceeds the revisionHistoryLimit and it should never happen", "clusterResourcePlacement", klog.KObj(crp), "numberOfSnapshotGroups", groupCounter, "revisionHistoryLimit", revisionHistoryLimit) + } return nil } // TODO handle all the resources selected by placement larger than 1MB size limit of k8s objects. -func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement, resourceSnapshotSpec *fleetv1beta1.ResourceSnapshotSpec) (*fleetv1beta1.ClusterResourceSnapshot, error) { +func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement, resourceSnapshotSpec *fleetv1beta1.ResourceSnapshotSpec, revisionHistoryLimit int) (*fleetv1beta1.ClusterResourceSnapshot, error) { resourceHash, err := generateResourceHash(resourceSnapshotSpec) if err != nil { klog.ErrorS(err, "Failed to generate resource hash of crp", "clusterResourcePlacement", klog.KObj(crp)) @@ -259,6 +324,11 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp return nil, controller.NewAPIServerError(false, err) } } + // delete redundant snapshot revisions before creating a new snapshot to guarantee that the number of snapshots + // won't exceed the limit. + if err := r.deleteRedundantResourceSnapshots(ctx, crp, revisionHistoryLimit); err != nil { + return nil, err + } // create a new resource snapshot latestResourceSnapshotIndex++ @@ -272,6 +342,8 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp }, Annotations: map[string]string{ fleetv1beta1.ResourceGroupHashAnnotation: resourceHash, + // TODO need to updated once we support multiple snapshots + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, }, Spec: *resourceSnapshotSpec, @@ -307,7 +379,7 @@ func (r *Reconciler) ensureLatestPolicySnapshot(ctx context.Context, crp *fleetv if crp.Spec.Policy != nil && crp.Spec.Policy.PlacementType == fleetv1beta1.PickNPlacementType && crp.Spec.Policy.NumberOfClusters != nil { - oldCount, err := parseNumberOfClustersFromAnnotation(latest) + oldCount, err := utils.ExtractNumOfClustersFromPolicySnapshot(latest) if err != nil { klog.ErrorS(err, "Failed to parse the numberOfClusterAnnotation", "clusterSchedulingPolicySnapshot", klog.KObj(latest)) return controller.NewUnexpectedBehaviorError(err) @@ -463,29 +535,85 @@ func (r *Reconciler) lookupLatestResourceSnapshot(ctx context.Context, crp *flee klog.ErrorS(err, "Invalid clusterResourceSnapshots", "clusterResourcePlacement", crpKObj) return nil, -1, controller.NewUnexpectedBehaviorError(err) } - // When there are no active snapshots, find the one who has the largest resource index. - if err := r.Client.List(ctx, snapshotList, client.MatchingLabels{fleetv1beta1.CRPTrackingLabel: crp.Name}); err != nil { - klog.ErrorS(err, "Failed to list all clusterResourceSnapshots", "clusterResourcePlacement", crpKObj) - return nil, -1, controller.NewAPIServerError(false, err) + // When there are no active snapshots, find the first snapshot who has the largest resource index. + // It should be rare only when CRP is crashed before creating the new active snapshot. + sortedList, err := r.listSortedResourceSnapshots(ctx, crp) + if err != nil { + return nil, -1, err } - if len(snapshotList.Items) == 0 { + if len(sortedList.Items) == 0 { // The resource index of the first snapshot will start from 0. return nil, -1, nil } - index := -1 // the index of the cluster resource snapshot array - lastResourceIndex := -1 // the assigned resource index of the cluster resource snapshot - for i := range snapshotList.Items { - resourceIndex, err := parseResourceIndexFromLabel(&snapshotList.Items[i]) + latestSnapshot := &sortedList.Items[len(sortedList.Items)-1] + resourceIndex, err := parseResourceIndexFromLabel(latestSnapshot) + if err != nil { + klog.ErrorS(err, "Failed to parse the resource index label", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", klog.KObj(latestSnapshot)) + return nil, -1, controller.NewUnexpectedBehaviorError(err) + } + return latestSnapshot, resourceIndex, nil +} + +// listSortedResourceSnapshots returns the resource snapshots sorted by its index and its subindex. +// The resourceSnapshot is less than the other one when resourceIndex is less. +// When the resourceIndex is equal, then order by the subindex. +// Note: the snapshot does not have subindex is the largest of a group and there should be only one in a group. +func (r *Reconciler) listSortedResourceSnapshots(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement) (*fleetv1beta1.ClusterResourceSnapshotList, error) { + snapshotList := &fleetv1beta1.ClusterResourceSnapshotList{} + crpKObj := klog.KObj(crp) + if err := r.Client.List(ctx, snapshotList, client.MatchingLabels{fleetv1beta1.CRPTrackingLabel: crp.Name}); err != nil { + klog.ErrorS(err, "Failed to list all clusterResourceSnapshots", "clusterResourcePlacement", crpKObj) + return nil, controller.NewAPIServerError(false, err) + } + var errs []error + sort.Slice(snapshotList.Items, func(i, j int) bool { + iKObj := klog.KObj(&snapshotList.Items[i]) + jKObj := klog.KObj(&snapshotList.Items[j]) + ii, err := parseResourceIndexFromLabel(&snapshotList.Items[i]) if err != nil { - klog.ErrorS(err, "Failed to parse the resource index label", "clusterResourceSnapshot", klog.KObj(&snapshotList.Items[i])) - return nil, -1, controller.NewUnexpectedBehaviorError(err) + klog.ErrorS(err, "Failed to parse the resource index label", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", iKObj) + errs = append(errs, err) + } + ji, err := parseResourceIndexFromLabel(&snapshotList.Items[j]) + if err != nil { + klog.ErrorS(err, "Failed to parse the resource index label", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", jKObj) + errs = append(errs, err) } - if lastResourceIndex < resourceIndex { - index = i - lastResourceIndex = resourceIndex + if ii != ji { + return ii < ji } + + iDoesExist, iSubindex, err := utils.ExtractSubindexFromClusterResourceSnapshot(&snapshotList.Items[i]) + if err != nil { + klog.ErrorS(err, "Failed to parse the subindex index", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", iKObj) + errs = append(errs, err) + } + jDoesExist, jSubindex, err := utils.ExtractSubindexFromClusterResourceSnapshot(&snapshotList.Items[j]) + if err != nil { + klog.ErrorS(err, "Failed to parse the subindex index", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", jKObj) + errs = append(errs, err) + } + + // Both of the snapshots do not have subindex, which should not happen. + if !iDoesExist && !jDoesExist { + klog.ErrorS(err, "There are more than one resource snapshot which do not have subindex in a group", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", iKObj, "clusterResourceSnapshot", jKObj) + errs = append(errs, err) + } + + if !iDoesExist { // check if it's the first snapshot + return false + } + if !jDoesExist { // check if it's the first snapshot + return true + } + return iSubindex < jSubindex + }) + + if len(errs) > 0 { + return nil, controller.NewUnexpectedBehaviorError(utilerrors.NewAggregate(errs)) } - return &snapshotList.Items[index], lastResourceIndex, nil + + return snapshotList, nil } // parsePolicyIndexFromLabel returns error when parsing the label which should never return error in production. @@ -508,16 +636,6 @@ func parseResourceIndexFromLabel(s *fleetv1beta1.ClusterResourceSnapshot) (int, return v, nil } -// parseNumberOfClustersFromAnnotation returns error when parsing the annotation which should never return error in production. -func parseNumberOfClustersFromAnnotation(s *fleetv1beta1.ClusterSchedulingPolicySnapshot) (int, error) { - n := s.Annotations[fleetv1beta1.NumberOfClustersAnnotation] - v, err := strconv.Atoi(n) - if err != nil || v < 0 { - return -1, fmt.Errorf("invalid numberOfCluster %q, error: %w", n, err) - } - return v, nil -} - func generatePolicyHash(policy *fleetv1beta1.PlacementPolicy) (string, error) { jsonBytes, err := json.Marshal(policy) if err != nil { diff --git a/pkg/controllers/clusterresourceplacement/controller_test.go b/pkg/controllers/clusterresourceplacement/controller_test.go index 42ceb8cf4..845ffc309 100644 --- a/pkg/controllers/clusterresourceplacement/controller_test.go +++ b/pkg/controllers/clusterresourceplacement/controller_test.go @@ -1,3 +1,7 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ package clusterresourceplacement import ( @@ -33,7 +37,16 @@ var ( fleetAPIVersion = fleetv1beta1.GroupVersion.String() cmpOptions = []cmp.Option{ cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion"), + cmpopts.SortSlices(func(p1, p2 fleetv1beta1.ClusterSchedulingPolicySnapshot) bool { + return p1.Name < p2.Name + }), + cmpopts.SortSlices(func(r1, r2 fleetv1beta1.ClusterResourceSnapshot) bool { + return r1.Name < r2.Name + }), } + singleRevisionLimit = int32(1) + multipleRevisionLimit = int32(2) + invalidRevisionLimit = int32(0) ) func serviceScheme(t *testing.T) *runtime.Scheme { @@ -72,7 +85,6 @@ func clusterResourcePlacementForTest() *fleetv1beta1.ClusterResourcePlacement { Name: testName, }, Spec: fleetv1beta1.ClusterResourcePlacementSpec{ - Policy: placementPolicyForTest(), ResourceSelectors: []fleetv1beta1.ClusterResourceSelector{ { Group: corev1.GroupName, @@ -83,14 +95,15 @@ func clusterResourcePlacementForTest() *fleetv1beta1.ClusterResourcePlacement { }, }, }, + Policy: placementPolicyForTest(), }, } } func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { - wantPolicy := placementPolicyForTest() - wantPolicy.NumberOfClusters = nil - jsonBytes, err := json.Marshal(wantPolicy) + testPolicy := placementPolicyForTest() + testPolicy.NumberOfClusters = nil + jsonBytes, err := json.Marshal(testPolicy) if err != nil { t.Fatalf("failed to create the policy hash: %v", err) } @@ -100,18 +113,17 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { t.Fatalf("failed to create the policy hash: %v", err) } unspecifiedPolicyHash := []byte(fmt.Sprintf("%x", sha256.Sum256(jsonBytes))) - singleRevisionLimit := int32(1) - multipleRevisionLimit := int32(2) - invalidRevisionLimit := int32(0) tests := []struct { name string + policy *fleetv1beta1.PlacementPolicy revisionHistoryLimit *int32 policySnapshots []fleetv1beta1.ClusterSchedulingPolicySnapshot wantPolicySnapshots []fleetv1beta1.ClusterSchedulingPolicySnapshot wantLatestSnapshotIndex int // index of the wantPolicySnapshots array }{ { - name: "new clusterResourcePolicy and no existing policy snapshots owned by my-crp", + name: "new clusterResourcePolicy and no existing policy snapshots owned by my-crp", + policy: placementPolicyForTest(), policySnapshots: []fleetv1beta1.ClusterSchedulingPolicySnapshot{ { ObjectMeta: metav1.ObjectMeta{ @@ -158,7 +170,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, + Policy: testPolicy, PolicyHash: policyHash, }, }, @@ -166,8 +178,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { wantLatestSnapshotIndex: 1, }, { - name: "new clusterResourcePolicy with invalidRevisionLimit and no existing policy snapshots owned by my-crp", - revisionHistoryLimit: &invalidRevisionLimit, + name: "new clusterResourcePolicy (unspecified policy) and no existing policy snapshots owned by my-crp", policySnapshots: []fleetv1beta1.ClusterSchedulingPolicySnapshot{ { ObjectMeta: metav1.ObjectMeta{ @@ -209,13 +220,9 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { Kind: "ClusterResourcePlacement", }, }, - Annotations: map[string]string{ - fleetv1beta1.NumberOfClustersAnnotation: strconv.Itoa(3), - }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, - PolicyHash: policyHash, + PolicyHash: unspecifiedPolicyHash, }, }, }, @@ -223,6 +230,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, { name: "crp policy has no change", + policy: placementPolicyForTest(), revisionHistoryLimit: &singleRevisionLimit, policySnapshots: []fleetv1beta1.ClusterSchedulingPolicySnapshot{ { @@ -247,7 +255,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, + Policy: testPolicy, PolicyHash: policyHash, }, }, @@ -275,7 +283,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, + Policy: testPolicy, PolicyHash: policyHash, }, }, @@ -286,6 +294,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { name: "crp policy has changed and there is no active snapshot", // It happens when last reconcile loop fails after setting the latest label to false and // before creating a new policy snapshot. + policy: placementPolicyForTest(), revisionHistoryLimit: &multipleRevisionLimit, policySnapshots: []fleetv1beta1.ClusterSchedulingPolicySnapshot{ { @@ -381,7 +390,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, + Policy: testPolicy, PolicyHash: policyHash, }, }, @@ -390,6 +399,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, { name: "crp policy has changed and there is an active snapshot", + policy: placementPolicyForTest(), revisionHistoryLimit: &singleRevisionLimit, policySnapshots: []fleetv1beta1.ClusterSchedulingPolicySnapshot{ { @@ -439,7 +449,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, + Policy: testPolicy, PolicyHash: policyHash, }, }, @@ -447,7 +457,8 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { wantLatestSnapshotIndex: 0, }, { - name: "crp policy has been changed and reverted back and there is no active snapshot", + name: "crp policy has been changed and reverted back and there is no active snapshot", + policy: placementPolicyForTest(), policySnapshots: []fleetv1beta1.ClusterSchedulingPolicySnapshot{ { ObjectMeta: metav1.ObjectMeta{ @@ -493,7 +504,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, + Policy: testPolicy, PolicyHash: policyHash, }, }, @@ -544,7 +555,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, + Policy: testPolicy, PolicyHash: policyHash, }, }, @@ -554,6 +565,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { { name: "crp policy has not been changed and only the numberOfCluster is changed", // cause no new policy snapshot is created, it does not trigger the history limit check. + policy: placementPolicyForTest(), revisionHistoryLimit: &singleRevisionLimit, policySnapshots: []fleetv1beta1.ClusterSchedulingPolicySnapshot{ { @@ -601,7 +613,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, + Policy: testPolicy, PolicyHash: policyHash, }, }, @@ -652,7 +664,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { }, }, Spec: fleetv1beta1.SchedulingPolicySnapshotSpec{ - Policy: wantPolicy, + Policy: testPolicy, PolicyHash: policyHash, }, }, @@ -664,6 +676,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { t.Run(tc.name, func(t *testing.T) { ctx := context.Background() crp := clusterResourcePlacementForTest() + crp.Spec.Policy = tc.policy crp.Spec.RevisionHistoryLimit = tc.revisionHistoryLimit objects := []client.Object{crp} for i := range tc.policySnapshots { @@ -675,7 +688,11 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot(t *testing.T) { WithObjects(objects...). Build() r := Reconciler{Client: fakeClient, Scheme: scheme} - got, err := r.getOrCreateClusterSchedulingPolicySnapshot(ctx, crp) + limit := fleetv1beta1.RevisionHistoryLimitDefaultValue + if tc.revisionHistoryLimit != nil { + limit = *tc.revisionHistoryLimit + } + got, err := r.getOrCreateClusterSchedulingPolicySnapshot(ctx, crp, int(limit)) if err != nil { t.Fatalf("failed to getOrCreateClusterSchedulingPolicySnapshot: %v", err) } @@ -705,9 +722,8 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot_failure(t *testing.T) { } policyHash := []byte(fmt.Sprintf("%x", sha256.Sum256(jsonBytes))) tests := []struct { - name string - revisionHistoryLimit *int32 - policySnapshots []fleetv1beta1.ClusterSchedulingPolicySnapshot + name string + policySnapshots []fleetv1beta1.ClusterSchedulingPolicySnapshot }{ { // Should never hit this case unless there is a bug in the controller or customers manually modify the clusterPolicySnapshot. @@ -932,7 +948,7 @@ func TestGetOrCreateClusterSchedulingPolicySnapshot_failure(t *testing.T) { WithObjects(objects...). Build() r := Reconciler{Client: fakeClient, Scheme: scheme} - _, err := r.getOrCreateClusterSchedulingPolicySnapshot(ctx, crp) + _, err := r.getOrCreateClusterSchedulingPolicySnapshot(ctx, crp, 1) if err == nil { // if error is nil t.Fatal("getOrCreateClusterResourceSnapshot() = nil, want err") } @@ -998,6 +1014,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { tests := []struct { name string resourceSnapshotSpec *fleetv1beta1.ResourceSnapshotSpec + revisionHistoryLimit *int32 resourceSnapshots []fleetv1beta1.ClusterResourceSnapshot wantResourceSnapshots []fleetv1beta1.ClusterResourceSnapshot wantLatestSnapshotIndex int // index of the wantPolicySnapshots array @@ -1005,6 +1022,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { { name: "new resourceSnapshot and no existing snapshots owned by my-crp", resourceSnapshotSpec: resourceSnapshotSpecA, + revisionHistoryLimit: &invalidRevisionLimit, resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ { ObjectMeta: metav1.ObjectMeta{ @@ -1015,7 +1033,8 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.CRPTrackingLabel: "another-crp", }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: "abc", + fleetv1beta1.ResourceGroupHashAnnotation: "abc", + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, }, }, @@ -1030,7 +1049,8 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.CRPTrackingLabel: "another-crp", }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: "abc", + fleetv1beta1.ResourceGroupHashAnnotation: "abc", + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, }, }, @@ -1053,7 +1073,8 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, }, Spec: *resourceSnapshotSpecA, @@ -1064,6 +1085,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { { name: "resource has no change", resourceSnapshotSpec: resourceSnapshotSpecA, + revisionHistoryLimit: &singleRevisionLimit, resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ { ObjectMeta: metav1.ObjectMeta{ @@ -1083,7 +1105,8 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, }, Spec: *resourceSnapshotSpecA, @@ -1108,7 +1131,8 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, }, Spec: *resourceSnapshotSpecA, @@ -1117,10 +1141,11 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { wantLatestSnapshotIndex: 0, }, { - name: "resource has changed and there is no active snapshot", + name: "resource has changed and there is no active snapshot with single revisionLimit", // It happens when last reconcile loop fails after setting the latest label to false and // before creating a new resource snapshot. resourceSnapshotSpec: resourceSnapshotSpecB, + revisionHistoryLimit: &singleRevisionLimit, resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ { ObjectMeta: metav1.ObjectMeta{ @@ -1139,16 +1164,15 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", }, }, Spec: *resourceSnapshotSpecA, }, - }, - wantResourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ { ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 0), + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 0), Labels: map[string]string{ fleetv1beta1.ResourceIndexLabel: "0", fleetv1beta1.CRPTrackingLabel: testName, @@ -1163,17 +1187,64 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "0", + }, + }, + Spec: *resourceSnapshotSpecB, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 1), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "1", + }, + }, + Spec: *resourceSnapshotSpecB, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 1), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "1", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, }, Spec: *resourceSnapshotSpecA, }, + }, + wantResourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ // new resource snapshot { ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 1), + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 2), Labels: map[string]string{ - fleetv1beta1.ResourceIndexLabel: "1", + fleetv1beta1.ResourceIndexLabel: "2", fleetv1beta1.IsLatestSnapshotLabel: "true", fleetv1beta1.CRPTrackingLabel: testName, }, @@ -1187,17 +1258,19 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotBHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotBHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, }, Spec: *resourceSnapshotSpecB, }, }, - wantLatestSnapshotIndex: 1, + wantLatestSnapshotIndex: 0, }, { - name: "resource has changed and there is an active snapshot", + name: "resource has changed and there is an active snapshot with multiple revisionLimit", resourceSnapshotSpec: resourceSnapshotSpecB, + revisionHistoryLimit: &multipleRevisionLimit, resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ { ObjectMeta: metav1.ObjectMeta{ @@ -1217,13 +1290,102 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", }, }, Spec: *resourceSnapshotSpecA, }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "0", + }, + }, + Spec: *resourceSnapshotSpecB, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 1), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "1", + }, + }, + Spec: *resourceSnapshotSpecB, + }, }, wantResourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "0", + }, + }, + Spec: *resourceSnapshotSpecB, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 1), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "1", + }, + }, + Spec: *resourceSnapshotSpecB, + }, { ObjectMeta: metav1.ObjectMeta{ Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 0), @@ -1242,7 +1404,8 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", }, }, Spec: *resourceSnapshotSpecA, @@ -1266,13 +1429,14 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotBHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotBHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, }, Spec: *resourceSnapshotSpecB, }, }, - wantLatestSnapshotIndex: 1, + wantLatestSnapshotIndex: 3, }, { name: "resource has been changed and reverted back and there is no active snapshot", @@ -1295,13 +1459,58 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "2", + }, + }, + Spec: *resourceSnapshotSpecA, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "0", }, }, Spec: *resourceSnapshotSpecA, }, }, wantResourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "0", + }, + }, + Spec: *resourceSnapshotSpecA, + }, { ObjectMeta: metav1.ObjectMeta{ Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 0), @@ -1320,19 +1529,21 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, }, Annotations: map[string]string{ - fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotAHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "2", }, }, Spec: *resourceSnapshotSpecA, }, }, - wantLatestSnapshotIndex: 0, + wantLatestSnapshotIndex: 1, }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { ctx := context.Background() crp := clusterResourcePlacementForTest() + crp.Spec.RevisionHistoryLimit = tc.revisionHistoryLimit objects := []client.Object{crp} for i := range tc.resourceSnapshots { objects = append(objects, &tc.resourceSnapshots[i]) @@ -1346,7 +1557,11 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { Client: fakeClient, Scheme: scheme, } - got, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, tc.resourceSnapshotSpec) + limit := fleetv1beta1.RevisionHistoryLimitDefaultValue + if tc.revisionHistoryLimit != nil { + limit = *tc.revisionHistoryLimit + } + got, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, tc.resourceSnapshotSpec, int(limit)) if err != nil { t.Fatalf("failed to handle getOrCreateClusterResourceSnapshot: %v", err) } @@ -1435,7 +1650,7 @@ func TestGetOrCreateClusterResourceSnapshot_failure(t *testing.T) { }, { // Should never hit this case unless there is a bug in the controller or customers manually modify the clusterResourceSnapshot. - name: "no active policy snapshot exists and policySnapshot with invalid resourceIndex label", + name: "no active resource snapshot exists and resourceSnapshot with invalid resourceIndex label", resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ { ObjectMeta: metav1.ObjectMeta{ @@ -1453,7 +1668,185 @@ func TestGetOrCreateClusterResourceSnapshot_failure(t *testing.T) { }, { // Should never hit this case unless there is a bug in the controller or customers manually modify the clusterResourceSnapshot. - name: "multiple active policy snapshot exist", + name: "no active resource snapshot exists and multiple resourceSnapshots with invalid resourceIndex label", + resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 0), + Labels: map[string]string{ + fleetv1beta1.CRPTrackingLabel: testName, + fleetv1beta1.ResourceIndexLabel: "abc", + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: "abc", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 1), + Labels: map[string]string{ + fleetv1beta1.CRPTrackingLabel: testName, + fleetv1beta1.ResourceIndexLabel: "abc", + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: "abc", + }, + }, + }, + }, + }, + { + // Should never hit this case unless there is a bug in the controller or customers manually modify the clusterResourceSnapshot. + name: "no active resource snapshot exists and multiple resourceSnapshots with invalid subindex annotation", + resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 0), + Labels: map[string]string{ + fleetv1beta1.CRPTrackingLabel: testName, + fleetv1beta1.ResourceIndexLabel: "0", + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: "0", + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "2", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "abc", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 1), + Labels: map[string]string{ + fleetv1beta1.CRPTrackingLabel: testName, + fleetv1beta1.ResourceIndexLabel: "1", + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: "abc", + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "2", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 1, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "abc", + }, + }, + }, + }, + }, + { + // Should never hit this case unless there is a bug in the controller or customers manually modify the clusterResourceSnapshot. + name: "no active resource snapshot exists and multiple resourceSnapshots with invalid subindex (<0) annotation", + resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 0), + Labels: map[string]string{ + fleetv1beta1.CRPTrackingLabel: testName, + fleetv1beta1.ResourceIndexLabel: "0", + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: "0", + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "2", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "-1", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testName, 1), + Labels: map[string]string{ + fleetv1beta1.CRPTrackingLabel: testName, + fleetv1beta1.ResourceIndexLabel: "1", + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: "abc", + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "2", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testName, 1, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testName, + BlockOwnerDeletion: pointer.Bool(true), + Controller: pointer.Bool(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "-1", + }, + }, + }, + }, + }, + { + // Should never hit this case unless there is a bug in the controller or customers manually modify the clusterResourceSnapshot. + name: "multiple active resource snapshot exist", resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ { ObjectMeta: metav1.ObjectMeta{ @@ -1549,7 +1942,7 @@ func TestGetOrCreateClusterResourceSnapshot_failure(t *testing.T) { Client: fakeClient, Scheme: scheme, } - _, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, resourceSnapshotSpecA) + _, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, resourceSnapshotSpecA, 1) if err == nil { // if error is nil t.Fatal("getOrCreateClusterResourceSnapshot() = nil, want err") } diff --git a/pkg/scheduler/framework/framework_test.go b/pkg/scheduler/framework/framework_test.go index 75632b3d0..839dbf052 100644 --- a/pkg/scheduler/framework/framework_test.go +++ b/pkg/scheduler/framework/framework_test.go @@ -53,7 +53,7 @@ var ( lessFuncCluster = func(cluster1, cluster2 *fleetv1beta1.MemberCluster) bool { return cluster1.Name < cluster2.Name } - lessFuncScoredCluster = func(scored1, scored2 ScoredCluster) bool { + lessFuncScoredCluster = func(scored1, scored2 *ScoredCluster) bool { return scored1.Cluster.Name < scored2.Cluster.Name } lessFuncFilteredCluster = func(filtered1, filtered2 *filteredClusterWithStatus) bool { diff --git a/pkg/utils/common.go b/pkg/utils/common.go index 998300038..6241422a2 100644 --- a/pkg/utils/common.go +++ b/pkg/utils/common.go @@ -275,3 +275,17 @@ func ExtractNumOfClustersFromPolicySnapshot(policy *fleetv1beta1.ClusterScheduli return numOfClusters, nil } + +// ExtractSubindexFromClusterResourceSnapshot extracts the subindex value from the annotations from a clusterResourceSnapshot. +func ExtractSubindexFromClusterResourceSnapshot(snapshot *fleetv1beta1.ClusterResourceSnapshot) (doesExist bool, subindex int, err error) { + subindexStr, ok := snapshot.Annotations[fleetv1beta1.SubindexOfResourceSnapshotAnnotation] + if !ok { + return false, -1, nil + } + subindex, err = strconv.Atoi(subindexStr) + if err != nil || subindex < 0 { + return true, -1, fmt.Errorf("invalid annotation %s: %s is invalid: %w", fleetv1beta1.SubindexOfResourceSnapshotAnnotation, subindexStr, err) + } + + return true, subindex, nil +} diff --git a/pkg/utils/common_test.go b/pkg/utils/common_test.go index bb0b25cde..bc16663f7 100644 --- a/pkg/utils/common_test.go +++ b/pkg/utils/common_test.go @@ -86,3 +86,79 @@ func TestExtractNumOfClustersFromPolicySnapshot(t *testing.T) { }) } } + +func TestExtractSubindexFromClusterResourceSnapshot(t *testing.T) { + snapshotName := "test-snapshot" + + testCases := []struct { + name string + snapshot *fleetv1beta1.ClusterResourceSnapshot + wantExist bool + wantSubindex int + wantError bool + }{ + { + name: "valid annotation", + snapshot: &fleetv1beta1.ClusterResourceSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotName, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "1", + }, + }, + }, + wantExist: true, + wantSubindex: 1, + }, + { + name: "no annotation", + snapshot: &fleetv1beta1.ClusterResourceSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotName, + }, + }, + wantExist: false, + wantSubindex: -1, + }, + { + name: "invalid annotation: not an integer", + snapshot: &fleetv1beta1.ClusterResourceSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotName, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "abc", + }, + }, + }, + wantError: true, + }, + { + name: "invalid annotation: negative integer", + snapshot: &fleetv1beta1.ClusterResourceSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotName, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "-1", + }, + }, + }, + wantError: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + gotExist, gotSubindex, err := ExtractSubindexFromClusterResourceSnapshot(tc.snapshot) + if tc.wantError { + if err == nil { + t.Fatalf("ExtractSubindexFromClusterResourceSnapshot() = %v, %v, want error", gotExist, gotSubindex) + } + return + } + + if gotExist != tc.wantExist || gotSubindex != tc.wantSubindex { + t.Fatalf("ExtractSubindexFromClusterResourceSnapshot() = %v, %v, want %v, %v", gotExist, gotSubindex, tc.wantExist, tc.wantSubindex) + } + }) + } +}