Skip to content

Commit

Permalink
Only Delete LoadBalancerMachine after a grace period (#267)
Browse files Browse the repository at this point in the history
* WIP

* fix setCondition

* Add more tests than is probably needed

* unfocus

* iterate over required conditions first

* configure yawollet exponential backoff

* feedbackc omments

* rewrite condition helper to mimic apimachinery/meta

* cap and document yawollet backoff

* final PR feedback

* cleanup

* Build images on all branches

* Use status subresource client where necessary

* Bump chart version in preparation for release

* Deflake test

---------

Co-authored-by: Tim Ebert <timebertt@gmail.com>
  • Loading branch information
maboehm and timebertt authored Dec 5, 2023
1 parent 07bbca9 commit f949e16
Show file tree
Hide file tree
Showing 16 changed files with 529 additions and 139 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ name: Container Images
on:
push:
branches:
- main
- feature/*
- '*'
tags:
- v*

Expand Down Expand Up @@ -36,4 +35,3 @@ jobs:
run: earthly --version
- name: Run build
run: earthly --ci --push +ci

11 changes: 4 additions & 7 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ linters-settings:
funlen:
lines: 100
statements: 50
goconst:
min-len: 2
min-occurrences: 2
gocritic:
enabled-tags:
- diagnostic
Expand All @@ -23,7 +20,8 @@ linters-settings:
- hugeParam

gocyclo:
min-complexity: 30 # decrease this
min-complexity:
30 # decrease this
# gci:
# local-prefixes: insert your package name here
# goimports:
Expand All @@ -36,7 +34,7 @@ linters-settings:
settings:
mnd:
# don't include the "operation" and "assign"
checks: [argument,case,condition,return]
checks: [argument, case, condition, return]
govet:
check-shadowing: false
lll:
Expand All @@ -58,7 +56,6 @@ linters:
enable:
- bodyclose
- dogsled
- goconst
- gocritic
- gofmt
- goimports
Expand Down Expand Up @@ -128,7 +125,7 @@ issues:

# silence stupid linter errors
exclude:
- directive `// nolint.*` should be written without leading space
- directive `// nolint.*` should be written without leading space

run:
timeout: 10m
Expand Down
2 changes: 1 addition & 1 deletion Earthfile
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ generate:
COPY +controller-gen/bin/controller-gen $BINPATH
COPY --dir api/ .
RUN controller-gen object paths="./..."
RUN controller-gen crd rbac:roleName=manager-role webhook paths="./..." output:crd:artifacts:config="crds"
RUN controller-gen crd webhook paths="./..." output:crd:artifacts:config="crds"
SAVE ARTIFACT ./api/* AS LOCAL api/
SAVE ARTIFACT ./crds/* AS LOCAL charts/yawol-controller/crds/

Expand Down
4 changes: 2 additions & 2 deletions charts/yawol-controller/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ description: Helm chart for yawol-controller
name: yawol-controller
sources:
- https://github.com/stackitcloud/yawol
version: "0.18.2"
appVersion: v0.18.2
version: "0.19.0"
appVersion: v0.19.0
15 changes: 9 additions & 6 deletions cmd/yawol-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func main() {
var lbMachineController bool

var yawolletRequeueTime int
var lbmDeletionGracePeriod time.Duration

var openstackTimeout time.Duration

Expand Down Expand Up @@ -103,6 +104,9 @@ func main() {
"yawollet requeue time in seconds for reconcile if object was successful reconciled. "+
"Values less than 5 are set to 5 and greater than 170 are set to 170. "+
"If unset the default from yawollet is used.")
flag.DurationVar(&lbmDeletionGracePeriod, "lbm-deletion-grace-period", 2*time.Minute,
"Grace period before deleting a load balancer machine AFTER the machine has first been identified as unready.",
)

flag.DurationVar(&openstackTimeout, "openstack-timeout", 20*time.Second, "Timeout for all requests against Openstack.")

Expand Down Expand Up @@ -228,12 +232,11 @@ func main() {
setupLog.Error(err, "unable to create controller", "controller", "LoadBalancerSet")
os.Exit(1)
}
if err := (&loadbalancerset.LoadBalancerMachineStatusReconciler{
Client: loadBalancerSetMgr.GetClient(),
Log: ctrl.Log.WithName("controller").WithName("LoadBalancerMachineStatus"),
Scheme: loadBalancerSetMgr.GetScheme(),
WorkerCount: concurrentWorkersPerReconciler,
RateLimiter: rateLimiter,
if err := (&loadbalancerset.LBMStatusReconciler{
Client: loadBalancerSetMgr.GetClient(),
WorkerCount: concurrentWorkersPerReconciler,
RateLimiter: rateLimiter,
DeletionGracePeriod: lbmDeletionGracePeriod,
}).SetupWithManager(loadBalancerSetMgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "LoadBalancerSet")
os.Exit(1)
Expand Down
3 changes: 2 additions & 1 deletion cmd/yawollet/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"log"
"net"
"os"
"time"

"github.com/envoyproxy/go-control-plane/pkg/resource/v3"
"k8s.io/apimachinery/pkg/fields"
Expand Down Expand Up @@ -214,7 +215,7 @@ func main() {
LoadbalancerMachineName: loadbalancerMachineName,
EnvoyCache: envoyCache,
ListenAddress: listenAddress,
RequeueTime: requeueTime,
RequeueDuration: time.Duration(requeueTime) * time.Second,
KeepalivedStatsFile: keepalivedStatsFile,
Recorder: mgr.GetEventRecorderFor("yawollet"),
RecorderLB: mgr.GetEventRecorderFor("yawol-service"),
Expand Down
118 changes: 118 additions & 0 deletions controllers/yawol-controller/loadbalancerset/helper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package loadbalancerset

import (
"fmt"

yawolv1beta1 "github.com/stackitcloud/yawol/api/v1beta1"
"github.com/stackitcloud/yawol/internal/helper"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

var relevantLBMConditionslForLBS = []helper.LoadbalancerCondition{
helper.ConfigReady,
helper.EnvoyReady,
helper.EnvoyUpToDate,
}

// areRelevantConditionsMet checks if all required conditions (from the
// perspective of the LoadBalancerSet) are both `True` and up-to-date, according
// to the passed expiration time. If `stableConditions` is set, a condition is
// only considered `False` if it has been in that state since the expiration
// time.
func areRelevantConditionsMet(
machine *yawolv1beta1.LoadBalancerMachine,
expiration metav1.Time,
stableConditions bool,
) (ok bool, reason string) {
if machine.Status.Conditions == nil {
return false, "no conditions set"
}

// constuct lookup map
conditions := *machine.Status.Conditions
condMap := make(map[helper.LoadbalancerCondition]corev1.NodeCondition, len(conditions))
for i := range conditions {
condMap[helper.LoadbalancerCondition(conditions[i].Type)] = conditions[i]
}

for _, typ := range relevantLBMConditionslForLBS {
condition, found := condMap[typ]
if !found {
return false, fmt.Sprintf("required condition %s not present on machine", typ)
}

conditionIsStable := true
if stableConditions {
conditionIsStable = condition.LastTransitionTime.Before(&expiration)
}
if conditionIsStable && condition.Status != corev1.ConditionTrue {
return false, fmt.Sprintf(
"condition %s is in status %s with reason: %v, message: %v, lastTransitionTime: %v",
condition.Type, condition.Status, condition.Reason, condition.Message, condition.LastTransitionTime,
)
}
if condition.LastHeartbeatTime.Before(&expiration) {
return false, fmt.Sprintf("condition %s heartbeat is stale", condition.Type)
}
}

return true, ""
}

func findDeletionCondition(machine *yawolv1beta1.LoadBalancerMachine) *corev1.NodeCondition {
if machine.Status.Conditions == nil {
return nil
}
conditions := *machine.Status.Conditions
for i := range conditions {
if conditions[i].Type == helper.DeletionMarkerCondition {
return &conditions[i]
}
}
return nil
}

func setDeletionCondition(machine *yawolv1beta1.LoadBalancerMachine, newCondition corev1.NodeCondition) {
if machine.Status.Conditions == nil {
machine.Status.Conditions = &[]corev1.NodeCondition{}
}

newCondition.Type = helper.DeletionMarkerCondition

existingCondition := findDeletionCondition(machine)
if existingCondition == nil {
if newCondition.LastTransitionTime.IsZero() {
newCondition.LastTransitionTime = metav1.Now()
}
*machine.Status.Conditions = append(*machine.Status.Conditions, newCondition)
return
}

if existingCondition.Status != newCondition.Status {
existingCondition.Status = newCondition.Status
if !newCondition.LastTransitionTime.IsZero() {
existingCondition.LastTransitionTime = newCondition.LastTransitionTime
} else {
existingCondition.LastTransitionTime = metav1.Now()
}
}

existingCondition.Reason = newCondition.Reason
existingCondition.Message = newCondition.Message
existingCondition.LastHeartbeatTime = newCondition.LastHeartbeatTime
}

func removeDeletionCondition(machine *yawolv1beta1.LoadBalancerMachine) {
if machine.Status.Conditions == nil || len(*machine.Status.Conditions) == 0 {
return
}
newConditions := make([]corev1.NodeCondition, 0, len(*machine.Status.Conditions)-1)
for _, condition := range *machine.Status.Conditions {
if condition.Type != helper.DeletionMarkerCondition {
newConditions = append(newConditions, condition)
}
}

*machine.Status.Conditions = newConditions
}
Loading

0 comments on commit f949e16

Please sign in to comment.