diff --git a/cmd/manager/main.go b/cmd/manager/main.go index d769cc9d1ca..ff44afbc9ff 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -12,6 +12,7 @@ import ( "net/http/pprof" "os" "path/filepath" + "strconv" "strings" "time" @@ -22,8 +23,11 @@ import ( "go.elastic.co/apm" "go.uber.org/automaxprocs/maxprocs" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/discovery" "k8s.io/client-go/kubernetes" clientgoscheme "k8s.io/client-go/kubernetes/scheme" _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" // allow gcp authentication @@ -284,10 +288,10 @@ func Command() *cobra.Command { DefaultWebhookName, "Name of the Kubernetes ValidatingWebhookConfiguration resource. Only used when enable-webhook is true.", ) - cmd.Flags().Bool( + cmd.Flags().String( operator.SetDefaultSecurityContextFlag, - true, - "Enables setting the default security context with fsGroup=1000 for Elasticsearch 8.0+ Pods. Ignored pre-8.0.", + "auto-detect", + "Enables setting the default security context with fsGroup=1000 for Elasticsearch 8.0+ Pods. Ignored pre-8.0. Possible values: true, false, auto-detect", ) // hide development mode flags from the usage message @@ -544,6 +548,12 @@ func startOperator(ctx context.Context) error { return err } + setDefaultSecurityContext, err := determineSetDefaultSecurityContext(viper.GetString(operator.SetDefaultSecurityContextFlag), clientset) + if err != nil { + log.Error(err, "failed to determine how to set default security context") + return err + } + params := operator.Parameters{ Dialer: dialer, ExposedNodeLabels: exposedNodeLabels, @@ -559,7 +569,7 @@ func startOperator(ctx context.Context) error { RotateBefore: certRotateBefore, }, MaxConcurrentReconciles: viper.GetInt(operator.MaxConcurrentReconcilesFlag), - SetDefaultSecurityContext: viper.GetBool(operator.SetDefaultSecurityContextFlag), + SetDefaultSecurityContext: setDefaultSecurityContext, ValidateStorageClass: viper.GetBool(operator.ValidateStorageClassFlag), Tracer: tracer, } @@ -676,6 +686,54 @@ func chooseAndValidateIPFamily(ipFamilyStr string, ipFamilyDefault corev1.IPFami } } +// determineSetDefaultSecurityContext determines what settings we need to use for security context by using the following rules: +// 1. If the setDefaultSecurityContext is explicitly set to either true, or false, use this value. +// 2. use OpenShift detection to determine whether or not we are running within an OpenShift cluster. +// If we determine we are on an OpenShift cluster, and since OpenShift automatically sets security context, return false, +// otherwise, return true as we'll need to set this security context on non-OpenShift clusters. +func determineSetDefaultSecurityContext(setDefaultSecurityContext string, clientset kubernetes.Interface) (bool, error) { + if setDefaultSecurityContext == "auto-detect" { + openshift, err := isOpenShift(clientset) + return !openshift, err + } + return strconv.ParseBool(setDefaultSecurityContext) +} + +// isOpenShift detects whether we are running on OpenShift. Detection inspired by kubevirt +// https://github.com/kubevirt/kubevirt/blob/f71e9c9615a6c36178169d66814586a93ba515b5/pkg/util/cluster/cluster.go#L21 +func isOpenShift(clientset kubernetes.Interface) (bool, error) { + openshiftSecurityGroupVersion := schema.GroupVersion{Group: "security.openshift.io", Version: "v1"} + apiResourceList, err := clientset.Discovery().ServerResourcesForGroupVersion(openshiftSecurityGroupVersion.String()) + if err != nil { + // In case of an error, check if security.openshift.io is the reason (unlikely). + var e *discovery.ErrGroupDiscoveryFailed + if ok := errors.As(err, &e); ok { + if _, exists := e.Groups[openshiftSecurityGroupVersion]; exists { + // If security.openshift.io is the reason for the error, we are absolutely on OpenShift + return true, nil + } + } + // If the security.openshift.io group isn't found, we are not on OpenShift + if apierrors.IsNotFound(err) { + return false, nil + } + return false, err + } + + // Search for "securitycontextconstraints" within the cluster's API resources, + // since this is an OpenShift specific API resource that does not exist outside of OpenShift. + for _, apiResource := range apiResourceList.APIResources { + if apiResource.Name == "securitycontextconstraints" { + // we have determined we are absolutely running on OpenShift + return true, nil + } + } + + // We could not determine that we are running on an OpenShift cluster, + // so we will behave as if "setDefaultSecurityContext" was set to true. + return false, nil +} + func registerControllers(mgr manager.Manager, params operator.Parameters, accessReviewer rbac.AccessReviewer) error { controllers := []struct { name string diff --git a/cmd/manager/main_test.go b/cmd/manager/main_test.go index d60ddda432e..6d20455572c 100644 --- a/cmd/manager/main_test.go +++ b/cmd/manager/main_test.go @@ -6,6 +6,7 @@ package manager import ( "context" + "fmt" "testing" "github.com/stretchr/testify/require" @@ -13,7 +14,11 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/discovery" + "k8s.io/client-go/kubernetes" + _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" logf "sigs.k8s.io/controller-runtime/pkg/log" apmv1 "github.com/elastic/cloud-on-k8s/pkg/apis/apm/v1" @@ -192,3 +197,144 @@ func Test_garbageCollectSoftOwnedSecrets(t *testing.T) { }) } } + +func Test_determineSetDefaultSecurityContext(t *testing.T) { + type args struct { + setDefaultSecurityContext string + clientset kubernetes.Interface + } + tests := []struct { + name string + args args + want bool + wantErr bool + }{ + { + "auto-detect on OpenShift cluster does not set security context", + args{ + "auto-detect", + newFakeK8sClientsetWithDiscovery([]*metav1.APIResourceList{ + { + GroupVersion: schema.GroupVersion{Group: "security.openshift.io", Version: "v1"}.String(), + APIResources: []metav1.APIResource{ + { + Name: "securitycontextconstraints", + }, + }, + }, + }, nil), + }, + false, + false, + }, + { + "auto-detect on OpenShift cluster, returning group discovery failed error for OpenShift security group+version, does not set security context", + args{ + "auto-detect", + newFakeK8sClientsetWithDiscovery([]*metav1.APIResourceList{}, &discovery.ErrGroupDiscoveryFailed{ + Groups: map[schema.GroupVersion]error{ + {Group: "security.openshift.io", Version: "v1"}: nil, + }, + }), + }, + false, + false, + }, + { + "auto-detect on non-OpenShift cluster, returning not found error, sets security context", + args{ + "auto-detect", + newFakeK8sClientsetWithDiscovery([]*metav1.APIResourceList{}, apierrors.NewNotFound(schema.GroupResource{ + Group: "security.openshift.io", + Resource: "none", + }, "fake")), + }, + true, + false, + }, + { + "auto-detect on non-OpenShift cluster, returning random error, returns error", + args{ + "auto-detect", + newFakeK8sClientsetWithDiscovery([]*metav1.APIResourceList{}, fmt.Errorf("random error")), + }, + true, + true, + }, + { + "true set, returning no error, will set security context", + args{ + "true", + newFakeK8sClientsetWithDiscovery([]*metav1.APIResourceList{}, nil), + }, + true, + false, + }, { + "false set, returning no error, will not set security context", + args{ + "false", + newFakeK8sClientsetWithDiscovery([]*metav1.APIResourceList{}, nil), + }, + false, + false, + }, { + "invalid bool set, returns error", + args{ + "invalid", + newFakeK8sClientsetWithDiscovery([]*metav1.APIResourceList{}, nil), + }, + false, + true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := determineSetDefaultSecurityContext(tt.args.setDefaultSecurityContext, tt.args.clientset) + if (err != nil) != tt.wantErr { + t.Errorf("determineSetDefaultSecurityContext() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("determineSetDefaultSecurityContext() = %v, want %v", got, tt.want) + } + }) + } +} + +type fakeClientset struct { + kubernetes.Interface + discovery discovery.DiscoveryInterface +} + +type fakeDiscovery struct { + discovery.DiscoveryInterface + resources []*metav1.APIResourceList + errServerResourcesForGroupVersion error +} + +func (c *fakeClientset) Discovery() discovery.DiscoveryInterface { + return c.discovery +} + +func (d *fakeDiscovery) ServerResourcesForGroupVersion(groupVersion string) (*metav1.APIResourceList, error) { + if d.errServerResourcesForGroupVersion != nil { + return nil, d.errServerResourcesForGroupVersion + } + for _, resourceList := range d.resources { + if resourceList.GroupVersion == groupVersion { + return resourceList, nil + } + } + return nil, fmt.Errorf("GroupVersion %q not found", groupVersion) +} + +func newFakeK8sClientsetWithDiscovery(resources []*metav1.APIResourceList, discoveryError error) kubernetes.Interface { + discoveryClient := &fakeDiscovery{ + resources: resources, + errServerResourcesForGroupVersion: discoveryError, + } + client := &fakeClientset{ + discovery: discoveryClient, + } + return client +} diff --git a/deploy/eck-operator/values.yaml b/deploy/eck-operator/values.yaml index 10b2c38c67c..a708632c3bd 100644 --- a/deploy/eck-operator/values.yaml +++ b/deploy/eck-operator/values.yaml @@ -159,7 +159,12 @@ config: exposedNodeLabels: [ "topology.kubernetes.io/.*", "failure-domain.beta.kubernetes.io/.*" ] # setDefaultSecurityContext determines whether a default security context is set on application containers created by the operator. - setDefaultSecurityContext: true + # *note* that the default option now is "auto-detect" to attempt to set this properly automatically when both running + # in an openshift cluster, and a standard kubernetes cluster. Valid values are as follows: + # "auto-detect" : auto detect + # "true" : set pod security context when creating resources. + # "false" : do not set pod security context when creating resources. + setDefaultSecurityContext: "auto-detect" # kubeClientTimeout sets the request timeout for Kubernetes API calls made by the operator. kubeClientTimeout: 60s