From 504e9746919ae77306cc731bc7f8997c68903bf0 Mon Sep 17 00:00:00 2001 From: Cezary Zawadka Date: Tue, 5 Apr 2022 14:37:52 +0200 Subject: [PATCH] Improve service deletion: - remove service annotations to allow ILB<->XLB migration - try to delete the instance group to cleanup the instance group, this will - succeed when the last Service/LB using the instance group is deleted - fail if other L4/L7 load balancer is using the instance group otherwise the IG can leak --- pkg/l4lb/l4netlbcontroller.go | 21 +++++++ pkg/l4lb/l4netlbcontroller_test.go | 91 +++++++++++++++++++++++++++--- 2 files changed, 103 insertions(+), 9 deletions(-) diff --git a/pkg/l4lb/l4netlbcontroller.go b/pkg/l4lb/l4netlbcontroller.go index 0b2b717d96..4818f8f6c9 100644 --- a/pkg/l4lb/l4netlbcontroller.go +++ b/pkg/l4lb/l4netlbcontroller.go @@ -403,24 +403,45 @@ func (lc *L4NetLBController) garbageCollectRBSNetLB(key string, svc *v1.Service) l4netLB := loadbalancers.NewL4NetLB(svc, lc.ctx.Cloud, meta.Regional, lc.namer, lc.ctx.Recorder(svc.Namespace), &lc.sharedResourcesLock) lc.ctx.Recorder(svc.Namespace).Eventf(svc, v1.EventTypeNormal, "DeletingLoadBalancer", "Deleting L4 External LoadBalancer for %s", key) + result := l4netLB.EnsureLoadBalancerDeleted(svc) if result.Error != nil { lc.ctx.Recorder(svc.Namespace).Eventf(svc, v1.EventTypeWarning, "DeleteLoadBalancerFailed", "Error deleting L4 External LoadBalancer, err: %v", result.Error) return result } + if err := updateServiceStatus(lc.ctx, svc, &v1.LoadBalancerStatus{}); err != nil { lc.ctx.Recorder(svc.Namespace).Eventf(svc, v1.EventTypeWarning, "DeleteLoadBalancer", "Error reseting L4 External LoadBalancer status to empty, err: %v", err) result.Error = fmt.Errorf("Failed to reset L4 External LoadBalancer status, err: %w", err) return result } + + // Remove LB annotations from the Service when processing the finalizer. + if err := updateAnnotations(lc.ctx, svc, nil); err != nil { + lc.ctx.Recorder(svc.Namespace).Eventf(svc, v1.EventTypeWarning, "DeleteLoadBalancer", + "Error removing resource annotations: %v: %v", err) + result.Error = fmt.Errorf("Failed to reset resource annotations, err: %w", err) + return result + } + if err := common.EnsureDeleteServiceFinalizer(svc, common.NetLBFinalizerV2, lc.ctx.KubeClient); err != nil { lc.ctx.Recorder(svc.Namespace).Eventf(svc, v1.EventTypeWarning, "DeleteLoadBalancerFailed", "Error removing finalizer from L4 External LoadBalancer, err: %v", err) result.Error = fmt.Errorf("Failed to remove L4 External LoadBalancer finalizer, err: %w", err) return result } + + // Try to delete instance group, instancePool.DeleteInstanceGroup ignores errors if resource is in use or not found. + // TODO(cezarygerard) replace with multi-IG management + if err := lc.instancePool.DeleteInstanceGroup(lc.namer.InstanceGroup()); err != nil { + lc.ctx.Recorder(svc.Namespace).Eventf(svc, v1.EventTypeWarning, "DeleteInstanceGroupFailed", + "Error deleting delete Instance Group from L4 External LoadBalancer, err: %v", err) + result.Error = fmt.Errorf("Failed to delete Instance Group, err: %w", err) + return result + } + lc.ctx.Recorder(svc.Namespace).Eventf(svc, v1.EventTypeNormal, "DeletedLoadBalancer", "Deleted L4 External LoadBalancer") return result } diff --git a/pkg/l4lb/l4netlbcontroller_test.go b/pkg/l4lb/l4netlbcontroller_test.go index 3a5811e093..8422938741 100644 --- a/pkg/l4lb/l4netlbcontroller_test.go +++ b/pkg/l4lb/l4netlbcontroller_test.go @@ -19,6 +19,7 @@ package l4lb import ( "context" "fmt" + "google.golang.org/api/googleapi" "math/rand" "net/http" "reflect" @@ -61,6 +62,16 @@ const ( userAddrName = "UserStaticAddress" ) +var ( + serviceAnnotationKeys = []string{ + annotations.FirewallRuleKey, + annotations.BackendServiceKey, + annotations.HealthcheckKey, + annotations.TCPForwardingRuleKey, + annotations.FirewallRuleForHealthcheckKey, + } +) + func getExternalIPS() []string { return []string{"34.122.234.156", "34.122.234.157"} } @@ -240,11 +251,8 @@ func validateNetLBSvcStatus(svc *v1.Service, t *testing.T) { } func validateAnnotations(svc *v1.Service) error { - expectedAnnotationKeys := []string{annotations.FirewallRuleKey, annotations.BackendServiceKey, annotations.HealthcheckKey, - annotations.TCPForwardingRuleKey, annotations.FirewallRuleForHealthcheckKey} - missingKeys := []string{} - for _, key := range expectedAnnotationKeys { + for _, key := range serviceAnnotationKeys { if _, ok := svc.Annotations[key]; !ok { missingKeys = append(missingKeys, key) } @@ -255,6 +263,19 @@ func validateAnnotations(svc *v1.Service) error { return nil } +func validateAnnotationsDeleted(svc *v1.Service) error { + unexpectedKeys := []string{} + for _, key := range serviceAnnotationKeys { + if _, exists := svc.Annotations[key]; exists { + unexpectedKeys = append(unexpectedKeys, key) + } + } + if len(unexpectedKeys) != 0 { + return fmt.Errorf("Unexpeceted annotations: %v, Service annotations %v", unexpectedKeys, svc.Annotations) + } + return nil +} + func TestProcessMultipleNetLBServices(t *testing.T) { backoff := retry.DefaultRetry backoff.Duration = 1 * time.Second @@ -454,17 +475,18 @@ func addUsersStaticAddress(lc *L4NetLBController, netTier cloud.NetworkTier) { func TestProcessServiceDeletion(t *testing.T) { svc, lc := createAndSyncNetLBSvc(t) + if !common.HasGivenFinalizer(svc.ObjectMeta, common.NetLBFinalizerV2) { - t.Fatalf("Expected L4 External LoadBalancer finalizer") + t.Errorf("Expected L4 External LoadBalancer finalizer") } if lc.needsDeletion(svc) { - t.Fatalf("Service should not be marked for deletion") + t.Errorf("Service should not be marked for deletion") } // Mark the service for deletion by updating timestamp svc.DeletionTimestamp = &metav1.Time{} updateNetLBService(lc, svc) if !lc.needsDeletion(svc) { - t.Fatalf("Service should be marked for deletion") + t.Errorf("Service should be marked for deletion") } key, _ := common.KeyFunc(svc) err := lc.sync(key) @@ -476,14 +498,65 @@ func TestProcessServiceDeletion(t *testing.T) { t.Errorf("Failed to lookup service %s, err %v", svc.Name, err) } if len(svc.Status.LoadBalancer.Ingress) > 0 { - t.Fatalf("Expected LoadBalancer status be deleted - %+v", svc.Status.LoadBalancer) + t.Errorf("Expected LoadBalancer status be deleted - %+v", svc.Status.LoadBalancer) } if common.HasGivenFinalizer(svc.ObjectMeta, common.NetLBFinalizerV2) { - t.Fatalf("Unexpected LoadBalancer finalizer %v", svc.ObjectMeta.Finalizers) + t.Errorf("Unexpected LoadBalancer finalizer %v", svc.ObjectMeta.Finalizers) } + + if err = validateAnnotationsDeleted(svc); err != nil { + t.Errorf("RBS Service annotations have NOT been deleted. Error: %v", err) + } + + igName := lc.namer.InstanceGroup() + _, err = lc.ctx.Cloud.GetInstanceGroup(igName, testGCEZone) + if !utils.IsNotFoundError(err) { + t.Errorf("Failed to delete Instance Group %v, err: %v", igName, err) + } + deleteNetLBService(lc, svc) } +func TestServiceDeletionWhenInstanceGroupInUse(t *testing.T) { + svc, lc := createAndSyncNetLBSvc(t) + + (lc.ctx.Cloud.Compute().(*cloud.MockGCE)).MockInstanceGroups.DeleteHook = func(ctx context.Context, key *meta.Key, m *cloud.MockInstanceGroups) (bool, error) { + err := &googleapi.Error{ + Code: http.StatusBadRequest, + Message: "GetErrorInstanceGroupHook: Cannot delete instance group being used by another service", + } + return true, err + } + + svc.DeletionTimestamp = &metav1.Time{} + updateNetLBService(lc, svc) + key, _ := common.KeyFunc(svc) + err := lc.sync(key) + if err != nil { + t.Errorf("Failed to sync service %s, err %v", svc.Name, err) + } + svc, err = lc.ctx.KubeClient.CoreV1().Services(svc.Namespace).Get(context.TODO(), svc.Name, metav1.GetOptions{}) + if err != nil { + t.Errorf("Failed to lookup service %s, err %v", svc.Name, err) + } + if len(svc.Status.LoadBalancer.Ingress) > 0 { + t.Errorf("Expected LoadBalancer status be deleted - %+v", svc.Status.LoadBalancer) + } + if common.HasGivenFinalizer(svc.ObjectMeta, common.NetLBFinalizerV2) { + t.Errorf("Unexpected LoadBalancer finalizer %v", svc.ObjectMeta.Finalizers) + } + + if err = validateAnnotationsDeleted(svc); err != nil { + t.Errorf("RBS Service annotations have NOT been deleted. Error: %v", err) + } + + igName := lc.namer.InstanceGroup() + _, err = lc.ctx.Cloud.GetInstanceGroup(igName, testGCEZone) + if err != nil { + t.Errorf("Failed to get Instance Group named %v. Group should be present. Error: %v", igName, err) + } +} + func TestInternalLoadBalancerShouldNotBeProcessByL4NetLBController(t *testing.T) { lc := newL4NetLBServiceController() ilbSvc := test.NewL4ILBService(false, 8080)