Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into ingress-scaling
Browse files Browse the repository at this point in the history
  • Loading branch information
HoustonPutman committed Apr 3, 2024
2 parents 6c85674 + 938a7da commit 373dca3
Show file tree
Hide file tree
Showing 17 changed files with 161 additions and 48 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ GO_LICENSES_VERSION=v1.6.0
GINKGO_VERSION = $(shell cat go.mod | grep 'github.com/onsi/ginkgo' | sed 's/.*\(v.*\)$$/\1/g')
KIND_VERSION=v0.20.0
YQ_VERSION=v4.33.3
CONTROLLER_RUNTIME_VERSION = $(shell cat go.mod | grep 'sigs.k8s.io/controller-runtime' | sed 's/.*\(v\(.*\)\.[^.]*\)$$/\2/g')
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION ?= 1.25.0

Expand Down Expand Up @@ -375,7 +376,7 @@ SETUP_ENVTEST = $(LOCALBIN)/setup-envtest
.PHONY: setup-envtest
setup-envtest: $(SETUP_ENVTEST) ## Download setup-envtest locally if necessary.
$(SETUP_ENVTEST): $(LOCALBIN)
$(call go-get-tool,$(SETUP_ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest@latest)
$(call go-get-tool,$(SETUP_ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest@release-$(CONTROLLER_RUNTIME_VERSION))

# go-get-tool will 'go get' any package $2 and install it to $1.
define go-get-tool
Expand Down
4 changes: 2 additions & 2 deletions api/v1beta1/solrcloud_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1115,15 +1115,15 @@ type SolrCloudStatus struct {
//+listMapKey:=name
SolrNodes []SolrNodeStatus `json:"solrNodes"`

// Replicas is the number of desired replicas in the cluster
// Replicas is the number of pods created by the StatefulSet
// +kubebuilder:validation:Minimum=0
// +kubebuilder:default=0
Replicas int32 `json:"replicas"`

// PodSelector for SolrCloud pods, required by the HPA
PodSelector string `json:"podSelector"`

// ReadyReplicas is the number of ready replicas in the cluster
// ReadyReplicas is the number of ready pods in the cluster
// +kubebuilder:validation:Minimum=0
// +kubebuilder:default=0
ReadyReplicas int32 `json:"readyReplicas"`
Expand Down
5 changes: 2 additions & 3 deletions config/crd/bases/solr.apache.org_solrclouds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16674,14 +16674,13 @@ spec:
type: string
readyReplicas:
default: 0
description: ReadyReplicas is the number of ready replicas in the
cluster
description: ReadyReplicas is the number of ready pods in the cluster
format: int32
minimum: 0
type: integer
replicas:
default: 0
description: Replicas is the number of desired replicas in the cluster
description: Replicas is the number of pods created by the StatefulSet
format: int32
minimum: 0
type: integer
Expand Down
4 changes: 2 additions & 2 deletions controllers/solr_cluster_ops_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,8 @@ func handleManagedCloudRollingUpdate(ctx context.Context, r *SolrCloudReconciler
return
}

// cleanupManagedCloudScaleDown does the logic of cleaning-up an incomplete scale down operation.
// This will remove any bad readinessConditions that the scaleDown might have set when trying to scaleDown pods.
// cleanupManagedCloudRollingUpdate does the logic of cleaning-up an incomplete rolling update operation.
// This will remove any bad readinessConditions that the rollingUpdate might have set when trying to restart pods.
func cleanupManagedCloudRollingUpdate(ctx context.Context, r *SolrCloudReconciler, podList []corev1.Pod, logger logr.Logger) (err error) {
// First though, the scaleDown op might have set some pods to be "unready" before deletion. Undo that.
// Before doing anything to the pod, make sure that the pods do not have a stopped readiness condition
Expand Down
17 changes: 12 additions & 5 deletions controllers/solrcloud_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
// Do not reconcile the storage finalizer unless we have PVC Labels that we know the Solr data PVCs are using.
// Otherwise it will delete all PVCs possibly
if len(statefulSet.Spec.Selector.MatchLabels) > 0 {
if err = r.reconcileStorageFinalizer(ctx, instance, statefulSet.Spec.Selector.MatchLabels, logger); err != nil {
if err = r.reconcileStorageFinalizer(ctx, instance, statefulSet, logger); err != nil {
logger.Error(err, "Cannot delete PVCs while garbage collecting after deletion.")
updateRequeueAfter(&requeueOrNot, time.Second*15)
}
Expand Down Expand Up @@ -490,6 +490,7 @@ func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
err = clearClusterOpLockWithPatch(ctx, r, statefulSet, "clusterOp not supported", logger)
}
if operationFound {
err = nil
if operationComplete {
if nextClusterOperation == nil {
// Once the operation is complete, finish the cluster operation by deleting the statefulSet annotations
Expand Down Expand Up @@ -935,9 +936,10 @@ func (r *SolrCloudReconciler) reconcileZk(ctx context.Context, logger logr.Logge
// Logic derived from:
// - https://book.kubebuilder.io/reference/using-finalizers.html
// - https://github.com/pravega/zookeeper-operator/blob/v0.2.9/pkg/controller/zookeepercluster/zookeepercluster_controller.go#L629
func (r *SolrCloudReconciler) reconcileStorageFinalizer(ctx context.Context, cloud *solrv1beta1.SolrCloud, pvcLabelSelector map[string]string, logger logr.Logger) error {
func (r *SolrCloudReconciler) reconcileStorageFinalizer(ctx context.Context, cloud *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, logger logr.Logger) error {
// If persistentStorage is being used by the cloud, and the reclaim policy is set to "Delete",
// then set a finalizer for the storage on the cloud, and delete the PVCs if the solrcloud has been deleted.
pvcLabelSelector := statefulSet.Spec.Selector.MatchLabels

if cloud.Spec.StorageOptions.PersistentStorage != nil && cloud.Spec.StorageOptions.PersistentStorage.VolumeReclaimPolicy == solrv1beta1.VolumeReclaimPolicyDelete {
if cloud.ObjectMeta.DeletionTimestamp.IsZero() {
Expand All @@ -949,7 +951,7 @@ func (r *SolrCloudReconciler) reconcileStorageFinalizer(ctx context.Context, clo
return err
}
}
return r.cleanupOrphanPVCs(ctx, cloud, pvcLabelSelector, logger)
return r.cleanupOrphanPVCs(ctx, cloud, statefulSet, pvcLabelSelector, logger)
} else if util.ContainsString(cloud.ObjectMeta.Finalizers, util.SolrStorageFinalizer) {
// The object is being deleted
logger.Info("Deleting PVCs for SolrCloud")
Expand Down Expand Up @@ -986,17 +988,22 @@ func (r *SolrCloudReconciler) getPVCCount(ctx context.Context, cloud *solrv1beta
return pvcCount, nil
}

func (r *SolrCloudReconciler) cleanupOrphanPVCs(ctx context.Context, cloud *solrv1beta1.SolrCloud, pvcLabelSelector map[string]string, logger logr.Logger) (err error) {
func (r *SolrCloudReconciler) cleanupOrphanPVCs(ctx context.Context, cloud *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, pvcLabelSelector map[string]string, logger logr.Logger) (err error) {
// this check should make sure we do not delete the PVCs before the STS has scaled down
if cloud.Status.ReadyReplicas == cloud.Status.Replicas {
pvcList, err := r.getPVCList(ctx, cloud, pvcLabelSelector)
if err != nil {
return err
}
// We only want to delete PVCs if we will not use them in the future, as in the user has asked for less replicas.
// Even if the statefulSet currently has less replicas, we don't want to delete them if we will eventually scale back up.
if len(pvcList.Items) > int(*cloud.Spec.Replicas) {
for _, pvcItem := range pvcList.Items {
// delete only Orphan PVCs
if util.IsPVCOrphan(pvcItem.Name, *cloud.Spec.Replicas) {
// for orphans, we will use the status replicas (which is derived from the statefulSet)
// Don't use the Spec replicas here, because we might be rolling down 1-by-1 and the PVCs for
// soon-to-be-deleted pods should not be deleted until the pod is deleted.
if util.IsPVCOrphan(pvcItem.Name, *statefulSet.Spec.Replicas) {
r.deletePVC(ctx, pvcItem, logger)
}
}
Expand Down
7 changes: 4 additions & 3 deletions controllers/solrcloud_controller_basic_auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package controllers
import (
"context"
"fmt"

solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
"github.com/apache/solr-operator/controllers/util"
. "github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -289,10 +290,10 @@ func expectBasicAuthConfigOnPodTemplateWithGomega(g Gomega, solrCloud *solrv1bet
g.Expect(basicAuthSecretVolMount.MountPath).To(Equal("/etc/secrets/"+secretName), "Wrong path used to mount Basic Auth volume")

expLivenessProbeCmd := fmt.Sprintf("JAVA_TOOL_OPTIONS=\"-Dbasicauth=$(cat /etc/secrets/%s-solrcloud-basic-auth/username):$(cat /etc/secrets/%s-solrcloud-basic-auth/password) -Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory\" "+
"solr api -get \"http://${SOLR_HOST}:8983%s\"",
"solr api -get \"http://${SOLR_HOST}:8983%s\" 2>&1 | grep -v JAVA_TOOL_OPTIONS",
solrCloud.Name, solrCloud.Name, expLivenessProbePath)
expReadinessProbeCmd := fmt.Sprintf("JAVA_TOOL_OPTIONS=\"-Dbasicauth=$(cat /etc/secrets/%s-solrcloud-basic-auth/username):$(cat /etc/secrets/%s-solrcloud-basic-auth/password) -Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory\" "+
"solr api -get \"http://${SOLR_HOST}:8983%s\"",
"solr api -get \"http://${SOLR_HOST}:8983%s\" 2>&1 | grep -v JAVA_TOOL_OPTIONS",
solrCloud.Name, solrCloud.Name, expReadinessProbePath)

g.Expect(mainContainer.LivenessProbe).To(Not(BeNil()), "main container should have a liveness probe defined")
Expand Down Expand Up @@ -350,7 +351,7 @@ func expectBasicAuthConfigOnPodTemplateWithGomega(g Gomega, solrCloud *solrv1bet

func expectPutSecurityJsonInZkCmd(g Gomega, expInitContainer *corev1.Container) {
g.Expect(expInitContainer).To(Not(BeNil()), "Didn't find the setup-zk InitContainer in the sts!")
expCmd := "ZK_SECURITY_JSON=$(/opt/solr/server/scripts/cloud-scripts/zkcli.sh -zkhost ${ZK_HOST} -cmd get /security.json); " +
expCmd := "ZK_SECURITY_JSON=$(/opt/solr/server/scripts/cloud-scripts/zkcli.sh -zkhost ${ZK_HOST} -cmd get /security.json || echo 'failed-to-get-security.json'); " +
"if [ ${#ZK_SECURITY_JSON} -lt 3 ]; then " +
"echo $SECURITY_JSON > /tmp/security.json; " +
"/opt/solr/server/scripts/cloud-scripts/zkcli.sh -zkhost ${ZK_HOST} -cmd putfile /security.json /tmp/security.json; echo \"put security.json in ZK\"; fi"
Expand Down
5 changes: 3 additions & 2 deletions controllers/util/prometheus_exporter_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,9 @@ func GenerateSolrPrometheusExporterDeployment(solrPrometheusExporter *solr.SolrP
defaultProbeHandler := corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Scheme: corev1.URISchemeHTTP,
Path: "/metrics",
Port: intstr.FromInt(SolrMetricsPort),
// TODO: When 9.0 is the minimum supported version, this can be "/-/healthy"
Path: "/metrics?names[]=",
Port: intstr.FromInt(SolrMetricsPort),
},
}

Expand Down
4 changes: 2 additions & 2 deletions controllers/util/solr_scale_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func BalanceReplicasForCluster(ctx context.Context, solrCloud *solr.SolrCloud, s
if !balanceComplete && err == nil {
logger.Info("Started balancing replicas across cluster.", "requestId", requestId)
requestInProgress = true
} else if err == nil {
} else if err != nil {
logger.Error(err, "Could not balance replicas across the cluster. Will try again.")
}
}
Expand All @@ -88,7 +88,7 @@ func BalanceReplicasForCluster(ctx context.Context, solrCloud *solr.SolrCloud, s

// Delete the async request Id if the async request is successful or failed.
// If the request failed, this will cause a retry since the next reconcile won't find the async requestId in Solr.
if asyncState == "completed" || asyncState == "failed" {
if !requestInProgress {
if _, err = solr_api.DeleteAsyncRequest(ctx, solrCloud, requestId); err != nil {
logger.Error(err, "Could not delete Async request status.", "requestId", requestId)
balanceComplete = false
Expand Down
7 changes: 5 additions & 2 deletions controllers/util/solr_security_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ func addHostHeaderToProbe(httpGet *corev1.HTTPGetAction, host string) {

func cmdToPutSecurityJsonInZk() string {
scriptsDir := "/opt/solr/server/scripts/cloud-scripts"
cmd := " ZK_SECURITY_JSON=$(%s/zkcli.sh -zkhost ${ZK_HOST} -cmd get /security.json); "
cmd := " ZK_SECURITY_JSON=$(%s/zkcli.sh -zkhost ${ZK_HOST} -cmd get /security.json || echo 'failed-to-get-security.json'); "
cmd += "if [ ${#ZK_SECURITY_JSON} -lt 3 ]; then echo $SECURITY_JSON > /tmp/security.json; %s/zkcli.sh -zkhost ${ZK_HOST} -cmd putfile /security.json /tmp/security.json; echo \"put security.json in ZK\"; fi"
return fmt.Sprintf(cmd, scriptsDir, scriptsDir)
}
Expand Down Expand Up @@ -496,13 +496,16 @@ func useSecureProbe(solrCloud *solr.SolrCloud, probe *corev1.Probe, mountPath st

// Future work - SOLR_TOOL_OPTIONS is only in 9.4.0, use JAVA_TOOL_OPTIONS until that is the minimum supported version
var javaToolOptionsStr string
var javaToolOptionsOutputFilter string
if len(javaToolOptions) > 0 {
javaToolOptionsStr = fmt.Sprintf("JAVA_TOOL_OPTIONS=%q ", strings.Join(javaToolOptions, " "))
javaToolOptionsOutputFilter = " 2>&1 | grep -v JAVA_TOOL_OPTIONS"
} else {
javaToolOptionsStr = ""
javaToolOptionsOutputFilter = ""
}

probeCommand := fmt.Sprintf("%ssolr api -get \"%s://${SOLR_HOST}:%d%s\"", javaToolOptionsStr, solrCloud.UrlScheme(false), probe.HTTPGet.Port.IntVal, probe.HTTPGet.Path)
probeCommand := fmt.Sprintf("%ssolr api -get \"%s://${SOLR_HOST}:%d%s\"%s", javaToolOptionsStr, solrCloud.UrlScheme(false), probe.HTTPGet.Port.IntVal, probe.HTTPGet.Path, javaToolOptionsOutputFilter)
probeCommand = regexp.MustCompile(`\s+`).ReplaceAllString(strings.TrimSpace(probeCommand), " ")

// use an Exec instead of an HTTP GET
Expand Down
2 changes: 1 addition & 1 deletion dependency_licenses.csv
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ golang.org/x/term,https://cs.opensource.google/go/x/term/+/v0.13.0:LICENSE,BSD-3
golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.13.0:LICENSE,BSD-3-Clause
golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/v0.3.0:LICENSE,BSD-3-Clause
gomodules.xyz/jsonpatch/v2,https://github.com/gomodules/jsonpatch/blob/v2.4.0/v2/LICENSE,Apache-2.0
google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.31.0/LICENSE,BSD-3-Clause
google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.33.0/LICENSE,BSD-3-Clause
gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause
gopkg.in/yaml.v2,https://github.com/go-yaml/yaml/blob/v2.4.0/LICENSE,Apache-2.0
gopkg.in/yaml.v3,https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE,MIT
Expand Down
9 changes: 8 additions & 1 deletion docs/solr-cloud/solr-cloud-crd.md
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,12 @@ Take a moment to review these authorization rules so that you're aware of the ro
"name": "k8s-probe-0",
"role": null,
"collection": null,
"path": "/admin/info/system"
},
{
"name": "k8s-probe-1",
"role": null,
"collection": null,
"path": "/admin/info/health"
},
{
Expand Down Expand Up @@ -1057,7 +1063,7 @@ A few aspects of the default `security.json` configuration warrant a closer look
"name": "k8s-probe-0",
"role": null,
"collection": null,
"path": "/admin/info/health"
"path": "/admin/info/system"
}
```
In this case, the `"role":null` indicates this endpoint allows anonymous access by unknown users.
Expand Down Expand Up @@ -1153,6 +1159,7 @@ _Note: be sure to use a stronger password for real deployments_

Users need to ensure their `security.json` contains the user supplied in the `basicAuthSecret` has read access to the following endpoints:
```
/admin/info/system
/admin/info/health
/admin/collections
/admin/metrics
Expand Down
18 changes: 11 additions & 7 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,18 @@ require (
github.com/Masterminds/semver/v3 v3.2.1 // indirect
github.com/Masterminds/sprig/v3 v3.2.3 // indirect
github.com/Masterminds/squirrel v1.5.4 // indirect
github.com/Microsoft/hcsshim v0.11.0 // indirect
github.com/Microsoft/hcsshim v0.11.4 // indirect
github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/chai2010/gettext-go v1.0.2 // indirect
github.com/containerd/containerd v1.7.6 // indirect
github.com/containerd/containerd v1.7.11 // indirect
github.com/containerd/log v0.1.0 // indirect
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/docker/cli v24.0.6+incompatible // indirect
github.com/docker/distribution v2.8.2+incompatible // indirect
github.com/docker/docker v24.0.6+incompatible // indirect
github.com/docker/docker v24.0.9+incompatible // indirect
github.com/docker/docker-credential-helpers v0.7.0 // indirect
github.com/docker/go-connections v0.4.0 // indirect
github.com/docker/go-metrics v0.0.1 // indirect
Expand All @@ -50,6 +51,7 @@ require (
github.com/evanphx/json-patch/v5 v5.6.0 // indirect
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect
github.com/fatih/color v1.15.0 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/go-errors/errors v1.4.2 // indirect
github.com/go-gorp/gorp/v3 v3.0.5 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
Expand Down Expand Up @@ -121,8 +123,10 @@ require (
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
go.opentelemetry.io/otel v1.15.0 // indirect
go.opentelemetry.io/otel/trace v1.15.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect
go.opentelemetry.io/otel v1.19.0 // indirect
go.opentelemetry.io/otel/metric v1.19.0 // indirect
go.opentelemetry.io/otel/trace v1.19.0 // indirect
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.25.0 // indirect
Expand All @@ -137,8 +141,8 @@ require (
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832 // indirect
google.golang.org/grpc v1.57.0 // indirect
google.golang.org/protobuf v1.31.0 // indirect
google.golang.org/grpc v1.58.3 // indirect
google.golang.org/protobuf v1.33.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
Loading

0 comments on commit 373dca3

Please sign in to comment.