Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Kibana to terminate all Pods before restarting during version change #2137

Merged
merged 7 commits into from
Nov 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/kibana.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ In this case all the instances must share the same encryption key.

This can be done by setting the `xpack.security.encryptionKey` property using a secure setting as described in the next section.

Note that while most reconfigurations of your Kibana instances will be carried out in rolling upgrade fashion, all version upgrades will cause Kibana downtime. This is due to the link:https://www.elastic.co/guide/en/kibana/current/upgrade.html[requirement] to run only a single version of Kibana at any given time.

[float]
[id="{p}-kibana-secure-settings"]
=== Secure Settings
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/apmserver/apmserver_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ func (r *ReconcileApmServer) deploymentParams(
Selector: labels.NewLabels(as.Name),
Labels: labels.NewLabels(as.Name),
PodTemplateSpec: podSpec,
Strategy: appsv1.RollingUpdateDeploymentStrategyType,
}, nil
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/controller/apmserver/apmserver_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/elastic/cloud-on-k8s/pkg/utils/k8s"
"github.com/go-test/deep"
"github.com/stretchr/testify/require"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -88,6 +89,7 @@ func expectedDeploymentParams() testParams {
Namespace: "",
Selector: map[string]string{"apm.k8s.elastic.co/name": "test-apm-server", "common.k8s.elastic.co/type": "apm-server"},
Labels: map[string]string{"apm.k8s.elastic.co/name": "test-apm-server", "common.k8s.elastic.co/type": "apm-server"},
Strategy: appsv1.RollingUpdateDeploymentStrategyType,
PodTemplateSpec: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
Expand Down
4 changes: 4 additions & 0 deletions pkg/controller/common/deployment/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type Params struct {
Labels map[string]string
PodTemplateSpec corev1.PodTemplateSpec
Replicas int32
Strategy appsv1.DeploymentStrategyType
}

// New creates a Deployment from the given params.
Expand All @@ -45,6 +46,9 @@ func New(params Params) appsv1.Deployment {
},
Template: params.PodTemplateSpec,
Replicas: &params.Replicas,
Strategy: appsv1.DeploymentStrategy{
Type: params.Strategy,
},
},
}
}
Expand Down
34 changes: 33 additions & 1 deletion pkg/controller/kibana/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ import (
"github.com/elastic/cloud-on-k8s/pkg/controller/kibana/version/version7"
"github.com/elastic/cloud-on-k8s/pkg/controller/kibana/volume"
"github.com/elastic/cloud-on-k8s/pkg/utils/k8s"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// initContainersParameters is used to generate the init container that will load the secure settings into a keystore
Expand Down Expand Up @@ -87,6 +89,29 @@ func secretWatchFinalizer(kibana kbtype.Kibana, watches watches.DynamicWatches)
}
}

// getStrategyType decides which deployment strategy (RollingUpdate or Recreate) to use based on whether the version
// upgrade is in progress. Kibana does not support a smooth rolling upgrade from one version to another:
// running multiple versions simultaneously may lead to concurrency bugs and data corruption.
func (d *driver) getStrategyType(kb *kbtype.Kibana) (appsv1.DeploymentStrategyType, error) {
var pods corev1.PodList
var labels client.MatchingLabels = map[string]string{label.KibanaNameLabelName: kb.Name}
if err := d.client.List(&pods, client.InNamespace(kb.Namespace), labels); err != nil {
return "", err
}

for _, pod := range pods.Items {
ver, ok := pod.Labels[label.KibanaVersionLabelName]
// if label is missing we assume that the last reconciliation was done by previous version of the operator
// to be safe, we assume the Kibana version has changed when operator was offline and use Recreate,
// otherwise we may run into data corruption/data loss.
if !ok || ver != kb.Spec.Version {
return appsv1.RecreateDeploymentStrategyType, nil
}
}

return appsv1.RollingUpdateDeploymentStrategyType, nil
}

func (d *driver) deploymentParams(kb *kbtype.Kibana) (deployment.Params, error) {
// setup a keystore with secure settings in an init container, if specified by the user
keystoreResources, err := keystore.NewResources(
Expand Down Expand Up @@ -157,7 +182,6 @@ func (d *driver) deploymentParams(kb *kbtype.Kibana) (deployment.Params, error)
kibanaPodSpec.Spec.InitContainers[i].VolumeMounts = append(kibanaPodSpec.Spec.InitContainers[i].VolumeMounts,
esCertsVolume.VolumeMount())
}

}

if kb.Spec.HTTP.TLS.Enabled() {
Expand Down Expand Up @@ -197,13 +221,20 @@ func (d *driver) deploymentParams(kb *kbtype.Kibana) (deployment.Params, error)
// changes, which will trigger a rolling update)
kibanaPodSpec.Labels[configChecksumLabel] = fmt.Sprintf("%x", configChecksum.Sum(nil))

// decide the strategy type
strategyType, err := d.getStrategyType(kb)
if err != nil {
return deployment.Params{}, err
}

return deployment.Params{
Name: kbname.KBNamer.Suffix(kb.Name),
Namespace: kb.Namespace,
Replicas: kb.Spec.Count,
Selector: label.NewLabels(kb.Name),
Labels: label.NewLabels(kb.Name),
PodTemplateSpec: kibanaPodSpec,
Strategy: strategyType,
}, nil
}

Expand Down Expand Up @@ -245,6 +276,7 @@ func (d *driver) Reconcile(
if err != nil {
return results.WithError(err)
}

expectedDp := deployment.New(deploymentParams)
reconciledDp, err := deployment.Reconcile(d.client, d.scheme, expectedDp, kb)
if err != nil {
Expand Down
190 changes: 184 additions & 6 deletions pkg/controller/kibana/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
package kibana

import (
"errors"
"fmt"
"testing"

"github.com/elastic/cloud-on-k8s/pkg/apis/common/v1beta1"
Expand All @@ -14,24 +16,197 @@ import (
"github.com/elastic/cloud-on-k8s/pkg/controller/common/deployment"
"github.com/elastic/cloud-on-k8s/pkg/controller/common/version"
"github.com/elastic/cloud-on-k8s/pkg/controller/common/watches"
"github.com/elastic/cloud-on-k8s/pkg/controller/kibana/label"
"github.com/elastic/cloud-on-k8s/pkg/controller/kibana/pod"
"github.com/elastic/cloud-on-k8s/pkg/controller/kibana/volume"
"github.com/elastic/cloud-on-k8s/pkg/utils/k8s"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var customResourceLimits = corev1.ResourceRequirements{
Limits: corev1.ResourceList{corev1.ResourceMemory: resource.MustParse("2Gi")},
}

type failingClient struct {
k8s.Client
}

func (f *failingClient) List(list runtime.Object, opts ...client.ListOption) error {
return errors.New("client error")
}

func Test_getStrategyType(t *testing.T) {
// creates `count` of pods belonging to `kbName` Kibana and to `rs-kbName-version` ReplicaSet
getPods := func(kbName string, podCount int, version string) []runtime.Object {
var result []runtime.Object
for i := 0; i < podCount; i++ {
result = append(result, &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{Name: fmt.Sprintf("rs-%v-%v", kbName, version)},
},
Name: fmt.Sprintf("pod-%v-%v-%v", kbName, version, i),
Namespace: "default",
Labels: map[string]string{
label.KibanaNameLabelName: kbName,
label.KibanaVersionLabelName: version,
},
},
})
}
return result
}

clearVersionLabels := func(objects []runtime.Object) []runtime.Object {
for _, object := range objects {
pod, ok := object.(*corev1.Pod)
if !ok {
t.FailNow()
}

delete(pod.Labels, label.KibanaVersionLabelName)
}

return objects
}

tests := []struct {
name string
expectedKbName string
expectedVersion string
initialObjects []runtime.Object
clientError bool
wantErr bool
wantStrategy appsv1.DeploymentStrategyType
}{
{
name: "Pods not created yet",
expectedVersion: "7.4.0",
expectedKbName: "test",
initialObjects: []runtime.Object{},
clientError: false,
wantErr: false,
wantStrategy: appsv1.RollingUpdateDeploymentStrategyType,
},
{
name: "Versions match",
expectedVersion: "7.4.0",
expectedKbName: "test",
initialObjects: getPods("test", 3, "7.4.0"),
clientError: false,
wantErr: false,
wantStrategy: appsv1.RollingUpdateDeploymentStrategyType,
},
{
name: "Versions match - multiple kibana deployments",
expectedVersion: "7.5.0",
expectedKbName: "test2",
initialObjects: append(getPods("test", 3, "7.4.0"), getPods("test2", 3, "7.5.0")...),
clientError: false,
wantErr: false,
wantStrategy: appsv1.RollingUpdateDeploymentStrategyType,
},
{
name: "Version mismatch - single kibana deployment",
expectedVersion: "7.5.0",
expectedKbName: "test",
initialObjects: getPods("test", 3, "7.4.0"),
clientError: false,
wantErr: false,
wantStrategy: appsv1.RecreateDeploymentStrategyType,
},
{
name: "Version mismatch - pods partially behind",
expectedVersion: "7.5.0",
expectedKbName: "test",
initialObjects: append(getPods("test", 2, "7.5.0"), getPods("test", 1, "7.4.0")...),
clientError: false,
wantErr: false,
wantStrategy: appsv1.RecreateDeploymentStrategyType,
},
{
name: "Version mismatch - multiple kibana deployments",
expectedVersion: "7.5.0",
expectedKbName: "test2",
initialObjects: append(getPods("test", 3, "7.5.0"), getPods("test2", 3, "7.4.0")...),
clientError: false,
wantErr: false,
wantStrategy: appsv1.RecreateDeploymentStrategyType,
},
{
name: "Version mismatch - multiple versions in flight",
expectedVersion: "7.5.0",
expectedKbName: "test",
initialObjects: append(
getPods("test", 1, "7.5.0"),
append(
getPods("test", 1, "7.4.0"),
getPods("test", 1, "7.3.0")...)...),
clientError: false,
wantErr: false,
wantStrategy: appsv1.RecreateDeploymentStrategyType,
},
{
name: "Version label missing (operator upgrade case), should assume spec changed",
expectedVersion: "7.5.0",
expectedKbName: "test",
initialObjects: clearVersionLabels(getPods("test", 3, "7.5.0")),
clientError: false,
wantErr: false,
wantStrategy: appsv1.RecreateDeploymentStrategyType,
},
{
name: "Client error",
expectedVersion: "7.4.0",
expectedKbName: "test",
initialObjects: getPods("test", 2, "7.4.0"),
clientError: true,
wantErr: true,
wantStrategy: "",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
w := watches.NewDynamicWatches()
err := w.Secrets.InjectScheme(scheme.Scheme)
assert.NoError(t, err)

kb := kibanaFixture()
kb.Name = tt.expectedKbName
kb.Spec.Version = tt.expectedVersion
kbVersion, err := version.Parse(kb.Spec.Version)
assert.NoError(t, err)

client := k8s.WrappedFakeClient(tt.initialObjects...)
if tt.clientError {
client = &failingClient{}
}

d, err := newDriver(client, scheme.Scheme, *kbVersion, w, record.NewFakeRecorder(100))
assert.NoError(t, err)

strategy, err := d.getStrategyType(kb)
if tt.wantErr {
assert.Empty(t, strategy)
assert.Error(t, err)
} else {
assert.Equal(t, tt.wantStrategy, strategy)
}
})
}
}

func TestDriverDeploymentParams(t *testing.T) {
type args struct {
kb func() *kbtype.Kibana
Expand Down Expand Up @@ -148,11 +323,7 @@ func TestDriverDeploymentParams(t *testing.T) {
},
want: func() deployment.Params {
p := expectedDeploymentParams()
p.PodTemplateSpec.Labels = map[string]string{
"common.k8s.elastic.co/type": "kibana",
"kibana.k8s.elastic.co/name": "test",
"kibana.k8s.elastic.co/config-checksum": "c5496152d789682387b90ea9b94efcd82a2c6f572f40c016fb86c0d7",
}
p.PodTemplateSpec.Labels["kibana.k8s.elastic.co/config-checksum"] = "c5496152d789682387b90ea9b94efcd82a2c6f572f40c016fb86c0d7"
return p
}(),
wantErr: false,
Expand All @@ -169,6 +340,7 @@ func TestDriverDeploymentParams(t *testing.T) {
},
want: func() deployment.Params {
p := expectedDeploymentParams()
p.PodTemplateSpec.Labels["kibana.k8s.elastic.co/version"] = "6.5.0"
return p
}(),
wantErr: false,
Expand All @@ -183,7 +355,11 @@ func TestDriverDeploymentParams(t *testing.T) {
},
initialObjects: defaultInitialObjects,
},
want: expectedDeploymentParams(),
want: func() deployment.Params {
p := expectedDeploymentParams()
p.PodTemplateSpec.Labels["kibana.k8s.elastic.co/version"] = "6.6.0"
return p
}(),
wantErr: false,
},
}
Expand Down Expand Up @@ -221,12 +397,14 @@ func expectedDeploymentParams() deployment.Params {
Selector: map[string]string{"common.k8s.elastic.co/type": "kibana", "kibana.k8s.elastic.co/name": "test"},
Labels: map[string]string{"common.k8s.elastic.co/type": "kibana", "kibana.k8s.elastic.co/name": "test"},
Replicas: 1,
Strategy: appsv1.RollingUpdateDeploymentStrategyType,
PodTemplateSpec: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"common.k8s.elastic.co/type": "kibana",
"kibana.k8s.elastic.co/name": "test",
"kibana.k8s.elastic.co/config-checksum": "c530a02188193a560326ce91e34fc62dcbd5722b45534a3f60957663",
"kibana.k8s.elastic.co/version": "7.0.0",
},
},
Spec: corev1.PodSpec{
Expand Down
3 changes: 3 additions & 0 deletions pkg/controller/kibana/label/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ const (
// KibanaNameLabelName used to represent a Kibana in k8s resources
KibanaNameLabelName = "kibana.k8s.elastic.co/name"

// KibanaVersionLabelName used to propagate Kibana version from the spec to the pods
KibanaVersionLabelName = "kibana.k8s.elastic.co/version"

// Type represents the Kibana type
Type = "kibana"
)
Expand Down
Loading