From a93f475cb13c56b1d8d2fe3dbc712057ec91c193 Mon Sep 17 00:00:00 2001 From: Sebastien Guilloux Date: Fri, 27 Sep 2019 09:49:54 +0200 Subject: [PATCH] Don't monitor health if single node rolling upgrade in E2E tests (#1804) * Don't monitor health if single node rolling upgrade Disable continuous health check monitoring if we're doing a rolling upgrade of a 1-node cluster. This test was flappy, since a 1-node cluster upgrade will temporarily be Red while shards are initializing after the upgrade. The PR also introduces an optional "Skip" field in the TestStep. If defined, a function returning True will disable the associated test. * Check one-data-node clusters instead of one-node clusters * Log skipped tests --- test/e2e/test/elasticsearch/steps_mutation.go | 30 +++++++++++++++++-- test/e2e/test/step.go | 5 ++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/test/e2e/test/elasticsearch/steps_mutation.go b/test/e2e/test/elasticsearch/steps_mutation.go index 5584fba0c0..c749cb02b6 100644 --- a/test/e2e/test/elasticsearch/steps_mutation.go +++ b/test/e2e/test/elasticsearch/steps_mutation.go @@ -50,9 +50,15 @@ func (b Builder) MutationTestSteps(k *test.K8sClient) test.StepList { }, test.Step{ Name: "Start querying Elasticsearch cluster health while mutation is going on", + Skip: func() bool { + // Don't monitor cluster health if we're doing a rolling upgrade of a one data node cluster. + // The cluster will become unavailable at some point, then its health will be red + // after the upgrade while shards are initializing. + return IsOneDataNodeRollingUpgrade(b) + }, Test: func(t *testing.T) { var err error - continuousHealthChecks, err = NewContinuousHealthCheck(b.Elasticsearch, k) + continuousHealthChecks, err = NewContinuousHealthCheck(b, k) require.NoError(t, err) continuousHealthChecks.Start() }, @@ -80,6 +86,9 @@ func (b Builder) MutationTestSteps(k *test.K8sClient) test.StepList { }, test.Step{ Name: "Elasticsearch cluster health should not have been red during mutation process", + Skip: func() bool { + return IsOneDataNodeRollingUpgrade(b) + }, Test: func(t *testing.T) { continuousHealthChecks.Stop() assert.Equal(t, 0, continuousHealthChecks.FailureCount) @@ -97,6 +106,21 @@ func (b Builder) MutationTestSteps(k *test.K8sClient) test.StepList { }) } +func IsOneDataNodeRollingUpgrade(b Builder) bool { + if b.MutatedFrom == nil { + return false + } + initial := b.MutatedFrom.Elasticsearch + mutated := b.Elasticsearch + // consider we're in the 1-node rolling upgrade scenario if we mutate + // from one data node to one data node with the same name + if MustNumDataNodes(initial) == 1 && MustNumDataNodes(mutated) == 1 && + initial.Spec.Nodes[0].Name == mutated.Spec.Nodes[0].Name { + return true + } + return false +} + // ContinuousHealthCheckFailure represents an health check failure type ContinuousHealthCheckFailure struct { err error @@ -114,8 +138,8 @@ type ContinuousHealthCheck struct { } // NewContinuousHealthCheck sets up a ContinuousHealthCheck struct -func NewContinuousHealthCheck(es estype.Elasticsearch, k *test.K8sClient) (*ContinuousHealthCheck, error) { - esClient, err := NewElasticsearchClient(es, k) +func NewContinuousHealthCheck(b Builder, k *test.K8sClient) (*ContinuousHealthCheck, error) { + esClient, err := NewElasticsearchClient(b.Elasticsearch, k) if err != nil { return nil, err } diff --git a/test/e2e/test/step.go b/test/e2e/test/step.go index c404219daf..b280744c9b 100644 --- a/test/e2e/test/step.go +++ b/test/e2e/test/step.go @@ -15,6 +15,7 @@ import ( type Step struct { Name string Test func(t *testing.T) + Skip func() bool // returns true if the test should be skipped } // StepList defines a list of Step @@ -33,6 +34,10 @@ func (l StepList) WithStep(testStep Step) StepList { // RunSequential runs the StepList sequentially, and fails fast on first error. func (l StepList) RunSequential(t *testing.T) { for _, ts := range l { + if ts.Skip != nil && ts.Skip() { + log.Info("Skipping test", "name", ts.Name) + continue + } if !t.Run(ts.Name, ts.Test) { logf.Log.Error(errors.New("test failure"), "stopping early") t.FailNow()