Skip to content

Commit

Permalink
fix: correctly determine master nodes by label for SNO
Browse files Browse the repository at this point in the history
also adds tests for various node configurations in the Wild

Signed-off-by: jakobmoellerdev <jmoller@redhat.com>
  • Loading branch information
jakobmoellerdev committed Aug 3, 2023
1 parent 36a785c commit cc4ef90
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 40 deletions.
51 changes: 11 additions & 40 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,24 @@ import (
// to ensure that exec-entrypoint and run can make use of them.
_ "k8s.io/client-go/plugin/pkg/client/auth"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/klog/v2"

ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

snapapi "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
configv1 "github.com/openshift/api/config/v1"
secv1client "github.com/openshift/client-go/security/clientset/versioned/typed/security/v1"
"github.com/openshift/library-go/pkg/config/leaderelection"

lvmv1alpha1 "github.com/openshift/lvm-operator/api/v1alpha1"
"github.com/openshift/lvm-operator/controllers"
persistent_volume "github.com/openshift/lvm-operator/controllers/persistent-volume"
persistent_volume_claim "github.com/openshift/lvm-operator/controllers/persistent-volume-claim"
"github.com/openshift/lvm-operator/pkg/cluster"
topolvmv1 "github.com/topolvm/topolvm/api/v1"
//+kubebuilder:scaffold:imports
)
Expand Down Expand Up @@ -91,8 +89,16 @@ func main() {
}
setupLog.Info("Watching namespace", "Namespace", operatorNamespace)

leaderElectionConfig := resolveLeaderElectionConfig(enableLeaderElection, operatorNamespace)

leaderElectionResolver, err := cluster.NewLeaderElectionResolver(ctrl.GetConfigOrDie(), scheme, enableLeaderElection, operatorNamespace)
if err != nil {
setupLog.Error(err, "unable to setup leader election")
os.Exit(1)
}
leaderElectionConfig, err := leaderElectionResolver.Resolve(context.Background())
if err != nil {
setupLog.Error(err, "unable to resolve leader election config")
os.Exit(1)
}
le, err := leaderelection.ToLeaderElectionWithLease(
ctrl.GetConfigOrDie(), leaderElectionConfig, "lvms", "")
if err != nil {
Expand Down Expand Up @@ -156,41 +162,6 @@ func main() {
}
}

// resolveLeaderElectionConfig returns the correct LeaderElection Settings for Multi- or SNO-Clusters based
// on the amount of nodes discovered in the cluster.
func resolveLeaderElectionConfig(enableLeaderElection bool, operatorNamespace string) configv1.LeaderElection {
leaderElectionConfig := leaderelection.LeaderElectionDefaulting(configv1.LeaderElection{
Disable: !enableLeaderElection,
}, operatorNamespace, "1136b8a6.topolvm.io")

snoChecker, err := client.New(ctrl.GetConfigOrDie(), client.Options{
Scheme: scheme,
})
if err != nil {
setupLog.Error(err, "unable to setup SNO check with lease configuration")
os.Exit(1)
}
nodes := &corev1.NodeList{}
if err := snoChecker.List(context.Background(), nodes); err != nil {
setupLog.Error(err, "unable to retrieve nodes for SNO check with lease configuration")
os.Exit(1)
}
if len(nodes.Items) == 1 {
setupLog.Info("Overwriting defaults with SNO leader election config as only a single node was discovered",
"node", nodes.Items[0].GetName())
leaderElectionConfig = leaderelection.LeaderElectionSNOConfig(leaderElectionConfig)
}
setupLog.Info("leader election config setup succeeded",
"retryPeriod", leaderElectionConfig.RetryPeriod,
"leaseDuration", leaderElectionConfig.LeaseDuration,
"renewDeadline", leaderElectionConfig.RenewDeadline,
"election-namespace", leaderElectionConfig.Namespace,
"election-name", leaderElectionConfig.Name,
"disable", leaderElectionConfig.Disable,
)
return leaderElectionConfig
}

// getOperatorNamespace returns the Namespace the operator should be watching for changes
func getOperatorNamespace() (string, error) {
// The env variable POD_NAMESPACE which specifies the Namespace the pod is running in
Expand Down
77 changes: 77 additions & 0 deletions pkg/cluster/leaderelection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package cluster

import (
"context"
"fmt"
configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/library-go/pkg/config/leaderelection"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/rest"
"os"
"sigs.k8s.io/controller-runtime/pkg/client"
log "sigs.k8s.io/controller-runtime/pkg/log"
)

// ControlPlaneIDLabel identifies a control plane node by the given label,
// see https://kubernetes.io/docs/reference/labels-annotations-taints/#node-role-kubernetes-io-control-plane
const ControlPlaneIDLabel = "node-role.kubernetes.io/control-plane"

type LeaderElectionResolver interface {
Resolve(ctx context.Context) (configv1.LeaderElection, error)
}

// NewLeaderElectionResolver returns the correct LeaderElectionResolver Settings for Multi- or SNO-Clusters based
// on the amount of master nodes discovered in the cluster. If there is exactly one control-plane/master node,
// the returned LeaderElectionResolver settings are optimized for SNO deployments.
func NewLeaderElectionResolver(
config *rest.Config,
scheme *runtime.Scheme,
enableLeaderElection bool,
operatorNamespace string,
) (LeaderElectionResolver, error) {
leaderElectionClient, err := client.New(config, client.Options{Scheme: scheme})
if err != nil {
return nil, fmt.Errorf("cannot create leader election client: %w", err)
}

defaultElectionConfig := leaderelection.LeaderElectionDefaulting(configv1.LeaderElection{
Disable: !enableLeaderElection,
}, operatorNamespace, "1136b8a6.topolvm.io")

return &nodeLookupSNOLeaderElection{
clnt: leaderElectionClient,
defaultElectionConfig: defaultElectionConfig,
}, nil
}

type nodeLookupSNOLeaderElection struct {
clnt client.Client
defaultElectionConfig configv1.LeaderElection
}

func (le *nodeLookupSNOLeaderElection) Resolve(ctx context.Context) (configv1.LeaderElection, error) {
logger := log.FromContext(ctx)
nodes := &corev1.NodeList{}
if err := le.clnt.List(context.Background(), nodes, client.MatchingLabels{
ControlPlaneIDLabel: "",
}); err != nil {
logger.Error(err, "unable to retrieve nodes for SNO check with lease configuration")
os.Exit(1)
}
if len(nodes.Items) != 1 {
return le.defaultElectionConfig, nil
}
logger.Info("Overwriting defaults with SNO leader election config as only a single node was discovered",
"node", nodes.Items[0].GetName())
config := leaderelection.LeaderElectionSNOConfig(le.defaultElectionConfig)
logger.Info("leader election config setup succeeded",
"retry-period", config.RetryPeriod,
"lease-duration", config.LeaseDuration,
"renew-deadline", config.RenewDeadline,
"election-namespace", config.Namespace,
"election-name", config.Name,
"disable", config.Disable,
)
return config, nil
}
102 changes: 102 additions & 0 deletions pkg/cluster/leaderelection_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package cluster

import (
"context"
configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/library-go/pkg/config/leaderelection"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"testing"
"time"
)

func Test_nodeLookupSNOLeaderElection_Resolve(t *testing.T) {
MultiNodeAssertion := func(a *assert.Assertions, le configv1.LeaderElection) bool {
return a.Equal(137*time.Second, le.LeaseDuration.Duration) &&
a.Equal(107*time.Second, le.RenewDeadline.Duration) &&
a.Equal(26*time.Second, le.RetryPeriod.Duration)
}

SNOAssertion := func(a *assert.Assertions, le configv1.LeaderElection) bool {
return a.Equal(270*time.Second, le.LeaseDuration.Duration) &&
a.Equal(240*time.Second, le.RenewDeadline.Duration) &&
a.Equal(60*time.Second, le.RetryPeriod.Duration)
}

tests := []struct {
name string
nodes []client.Object
resolveFn func(a *assert.Assertions, le configv1.LeaderElection) bool
errorFn func(a *assert.Assertions, err error) bool
}{
{
name: "LeaderElection Test Multi-Master",
nodes: []client.Object{
&corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker1"}},
&corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker2"}},
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "master1", Labels: map[string]string{ControlPlaneIDLabel: ""}},
},
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "master2", Labels: map[string]string{ControlPlaneIDLabel: ""}},
},
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "master3", Labels: map[string]string{ControlPlaneIDLabel: ""}},
},
},
resolveFn: MultiNodeAssertion,
errorFn: func(a *assert.Assertions, err error) bool {
return a.NoError(err)
},
},
{
name: "LeaderElection Test SNO",
nodes: []client.Object{
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "master1", Labels: map[string]string{ControlPlaneIDLabel: ""}},
},
},
resolveFn: SNOAssertion,
errorFn: func(a *assert.Assertions, err error) bool {
return a.NoError(err)
},
},
{
name: "LeaderElection Test SNO (added workers)",
nodes: []client.Object{
&corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker1"}},
&corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker2"}},
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "master1", Labels: map[string]string{ControlPlaneIDLabel: ""}},
},
},
resolveFn: SNOAssertion,
errorFn: func(a *assert.Assertions, err error) bool {
return a.NoError(err)
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
clnt := fake.NewClientBuilder().WithObjects(tt.nodes...).Build()
le := &nodeLookupSNOLeaderElection{
clnt: clnt,
defaultElectionConfig: leaderelection.LeaderElectionDefaulting(configv1.LeaderElection{},
"test", "test-leader-id"),
}
got, err := le.Resolve(context.Background())
assertions := assert.New(t)

if !tt.errorFn(assertions, err) {
return
}
if !tt.resolveFn(assertions, got) {
return
}
})
}
}

0 comments on commit cc4ef90

Please sign in to comment.