From 9c2be0fb86753d5e5f8e038e38608fa1d8b801ce Mon Sep 17 00:00:00 2001 From: "David J. M. Karlsen" Date: Thu, 21 Jan 2021 00:04:13 +0100 Subject: [PATCH] Make un-registration more robust (#167) --- controllers/githubactionrunner_controller.go | 83 ++++++++++++-------- controllers/podrunner_types.go | 18 +++-- controllers/podrunner_types_test.go | 14 ++-- 3 files changed, 71 insertions(+), 44 deletions(-) diff --git a/controllers/githubactionrunner_controller.go b/controllers/githubactionrunner_controller.go index e4318336..0fec70ea 100644 --- a/controllers/githubactionrunner_controller.go +++ b/controllers/githubactionrunner_controller.go @@ -106,7 +106,8 @@ func (r *GithubActionRunnerReconciler) handleScaling(ctx context.Context, instan } // safety guard - always look for finalizers in order to unregister runners for pods about to delete - if err = r.unregisterRunners(ctx, instance, podRunnerPairs); err != nil { + // pods could have been deleted by user directly and not through operator + if err = r.handleFinalization(ctx, instance, podRunnerPairs); err != nil { return r.manageOutcome(ctx, instance, err) } @@ -134,24 +135,36 @@ func (r *GithubActionRunnerReconciler) handleScaling(ctx context.Context, instan return r.manageOutcome(ctx, instance, err) } else if shouldScaleDown(podRunnerPairs, instance) { logger.Info("Scaling down", "runners at github", podRunnerPairs.numRunners(), "maxrunners in CR", instance.Spec.MaxRunners) + err := r.scaleDown(ctx, podRunnerPairs, instance) + return r.manageOutcome(ctx, instance, err) + } - idlePods := podRunnerPairs.getIdlePods(instance.Spec.DeletionOrder) - if len(idlePods) > 0 { - pod := idlePods[0] - err := r.DeleteResourceIfExists(ctx, &pod) - if err == nil { - r.GetRecorder().Event(instance, corev1.EventTypeNormal, "Scaling", fmt.Sprintf("Deleted pod %s/%s", pod.Namespace, pod.Name)) - instance.Status.CurrentSize-- - if err := r.GetClient().Status().Update(ctx, instance); err != nil { - return r.manageOutcome(ctx, instance, err) - } - } + return r.manageOutcome(ctx, instance, err) +} + +// scaleDown will scale down an idle runner based on policy in CR +func (r *GithubActionRunnerReconciler) scaleDown(ctx context.Context, podRunnerPairs podRunnerPairList, instance *garov1alpha1.GithubActionRunner) error { + idles := podRunnerPairs.getIdles(instance.Spec.DeletionOrder) + for _, pair := range idles { + err := r.unregisterRunner(ctx, instance, pair) + if err != nil { // should be improved, here we just assume it's because it's running a job and cannot be removed, skip to next candidate + continue } - return r.manageOutcome(ctx, instance, err) + //then actually delete the pod + err = r.DeleteResourceIfExists(ctx, &pair.pod) + if err != nil { + return err + } + + r.GetRecorder().Event(instance, corev1.EventTypeNormal, "Scaling", pair.getNamespacedName()) + instance.Status.CurrentSize-- + err = r.GetClient().Status().Update(ctx, instance) + + return err } - return r.manageOutcome(ctx, instance, err) + return nil } func shouldScaleUp(podRunnerPairs podRunnerPairList, instance *garov1alpha1.GithubActionRunner) bool { @@ -321,27 +334,35 @@ func (r *GithubActionRunnerReconciler) listRelatedPods(ctx context.Context, cr * return podList, nil } -// unregisterRunners will remove runner from github based on presence of finalizer -func (r *GithubActionRunnerReconciler) unregisterRunners(ctx context.Context, cr *garov1alpha1.GithubActionRunner, list podRunnerPairList) error { - for _, item := range list.getPodsBeingDeleted() { - if util.HasFinalizer(&item.pod, finalizer) { - - if item.runner.GetName() != "" && item.runner.GetID() != 0 { - logr.FromContext(ctx).Info("Unregistering runner", "name", item.runner.GetName(), "id", item.runner.GetID()) - token, err := r.tokenForRef(ctx, cr) - if err != nil { - return err - } - if err = r.GithubAPI.UnregisterRunner(ctx, cr.Spec.Organization, cr.Spec.Repository, token, *item.runner.ID); err != nil { - return err - } +func (r *GithubActionRunnerReconciler) unregisterRunner(ctx context.Context, cr *garov1alpha1.GithubActionRunner, pair podRunnerPair) error { + if util.HasFinalizer(&pair.pod, finalizer) { + if pair.runner.GetName() != "" && pair.runner.GetID() != 0 { + logr.FromContext(ctx).Info("Unregistering runner", "name", pair.runner.GetName(), "id", pair.runner.GetID()) + token, err := r.tokenForRef(ctx, cr) + if err != nil { + return err } - - util.RemoveFinalizer(&item.pod, finalizer) - if err := r.GetClient().Update(ctx, &item.pod); err != nil { + if err = r.GithubAPI.UnregisterRunner(ctx, cr.Spec.Organization, cr.Spec.Repository, token, *pair.runner.ID); err != nil { return err } } + + util.RemoveFinalizer(&pair.pod, finalizer) + if err := r.GetClient().Update(ctx, &pair.pod); err != nil { + return err + } + } + + return nil +} + +// handleFinalization will remove runner from github based on presence of finalizer +func (r *GithubActionRunnerReconciler) handleFinalization(ctx context.Context, cr *garov1alpha1.GithubActionRunner, list podRunnerPairList) error { + for _, item := range list.getPodsBeingDeleted() { + // TODO - cause of failure should be checked more closely, if it does not exist we can ignore it. If it is a comms error we should stick around + if err := r.unregisterRunner(ctx, cr, item); err != nil { + return err + } } return nil diff --git a/controllers/podrunner_types.go b/controllers/podrunner_types.go index aa69645f..e4da004a 100644 --- a/controllers/podrunner_types.go +++ b/controllers/podrunner_types.go @@ -1,6 +1,7 @@ package controllers import ( + "fmt" "github.com/evryfs/github-actions-runner-operator/api/v1alpha1" "github.com/google/go-github/v33/github" "github.com/redhat-cop/operator-utils/pkg/util" @@ -14,6 +15,10 @@ type podRunnerPair struct { runner github.Runner } +func (r *podRunnerPair) getNamespacedName() string { + return fmt.Sprintf("%s/%s", r.pod.Namespace, r.pod.Name) +} + type podRunnerPairList struct { pairs []podRunnerPair podList corev1.PodList @@ -68,22 +73,19 @@ func (r podRunnerPairList) inSync() bool { return r.numPods() == r.numRunners() } -func (r podRunnerPairList) getIdlePods(sortOrder v1alpha1.SortOrder) []corev1.Pod { +func (r podRunnerPairList) getIdles(sortOrder v1alpha1.SortOrder) []podRunnerPair { idles := funk.Filter(r.pairs, func(pair podRunnerPair) bool { return !(pair.runner.GetBusy() || util.IsBeingDeleted(&pair.pod)) }).([]podRunnerPair) - pods := funk.Map(idles, func(pair podRunnerPair) corev1.Pod { - return pair.pod - }).([]corev1.Pod) - sort.SliceStable(pods, func(i, j int) bool { + sort.SliceStable(idles, func(i, j int) bool { if sortOrder == v1alpha1.LeastRecent { - return pods[i].CreationTimestamp.Unix() < pods[j].CreationTimestamp.Unix() + return idles[i].pod.CreationTimestamp.Unix() < idles[j].pod.CreationTimestamp.Unix() } - return pods[i].CreationTimestamp.Unix() > pods[j].CreationTimestamp.Unix() + return idles[i].pod.CreationTimestamp.Unix() > idles[j].pod.CreationTimestamp.Unix() }) - return pods + return idles } func (r podRunnerPairList) getPodsBeingDeleted() []podRunnerPair { diff --git a/controllers/podrunner_types_test.go b/controllers/podrunner_types_test.go index 89178b9a..8d5a3f14 100644 --- a/controllers/podrunner_types_test.go +++ b/controllers/podrunner_types_test.go @@ -76,14 +76,18 @@ func TestSort(t *testing.T) { testCases := []struct { sortOrder v1alpha1.SortOrder podRunnerPairList podRunnerPairList - podList []v1.Pod + podRunnerPair []podRunnerPair }{ - {v1alpha1.LeastRecent, from(&podList, runners), []v1.Pod{podList.Items[0], podList.Items[1]}}, - {v1alpha1.MostRecent, from(&podList, runners), []v1.Pod{podList.Items[1], podList.Items[0]}}, + {v1alpha1.LeastRecent, from(&podList, runners), []podRunnerPair{ + {podList.Items[0], *runners[0]}, + {podList.Items[1], *runners[1]}}}, + {v1alpha1.MostRecent, from(&podList, runners), []podRunnerPair{ + {podList.Items[1], *runners[1]}, + {podList.Items[0], *runners[0]}}}, } for _, tc := range testCases { - podList := tc.podRunnerPairList.getIdlePods(tc.sortOrder) - assert.Equal(t, podList, tc.podList) + podList := tc.podRunnerPairList.getIdles(tc.sortOrder) + assert.Equal(t, podList, tc.podRunnerPair) } }