Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kill tidb-operator pods randomly in e2e #2125

Merged
merged 3 commits into from
Apr 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ci/pingcap_tidb_operator_build_kind.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -238,13 +238,13 @@ def call(BUILD_BRANCH, CREDENTIALS_ID, CODECOV_CREDENTIALS_ID) {
def MIRRORS = "DOCKER_IO_MIRROR=http://172.16.4.143:5000 QUAY_IO_MIRROR=http://172.16.4.143:5001"
def builds = [:]
builds["E2E v1.12.10"] = {
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_ ./hack/e2e.sh -- --preload-images", artifacts)
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_ ./hack/e2e.sh -- --preload-images --operator-killer", artifacts)
}
builds["E2E v1.12.10 AdvancedStatefulSet"] = {
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12-advanced-statefulset IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_advanced_statefulset ./hack/e2e.sh -- --preload-images --operator-features AdvancedStatefulSet=true", artifacts)
}
builds["E2E v1.18.0"] = {
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.18 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.18.0 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.18.0_ ./hack/e2e.sh -- -preload-images", artifacts)
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.18 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.18.0 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.18.0_ ./hack/e2e.sh -- -preload-images --operator-killer", artifacts)
}
builds["E2E v1.12.10 Serial"] = {
build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12-serial IMAGE_TAG=${GITHASH} SKIP_BUILD=y KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_serial_ ./hack/e2e.sh -- --preload-images --ginkgo.focus='\\[Serial\\]' --install-operator=false", artifacts)
Expand Down
7 changes: 4 additions & 3 deletions tests/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ import (
"os"
"strings"

"github.com/pingcap/tidb-operator/tests/slack"

utiloperator "github.com/pingcap/tidb-operator/tests/e2e/util/operator"
"github.com/pingcap/tidb-operator/tests/pkg/blockwriter"

"github.com/pingcap/tidb-operator/tests/slack"
"gopkg.in/yaml.v2"
"k8s.io/klog"
)
Expand Down Expand Up @@ -77,6 +76,8 @@ type Config struct {
E2EImage string `yaml:"e2e_image" json:"e2e_image"`

PreloadImages bool `yaml:"preload_images" json:"preload_images"`

OperatorKiller utiloperator.OperatorKillerConfig
}

// Nodes defines a series of nodes that belong to the same physical node.
Expand Down
4 changes: 4 additions & 0 deletions tests/e2e/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"flag"
"fmt"
"io/ioutil"
"time"

"github.com/pingcap/tidb-operator/tests"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -46,6 +47,9 @@ func RegisterTiDBOperatorFlags(flags *flag.FlagSet) {
flags.StringVar(&TestConfig.ChartDir, "chart-dir", "", "chart dir")
flags.BoolVar(&TestConfig.PreloadImages, "preload-images", false, "if set, preload images in the bootstrap of e2e process")
flags.StringVar(&TestConfig.BackupImage, "backup-image", "", "backup image")
flags.BoolVar(&TestConfig.OperatorKiller.Enabled, "operator-killer", false, "whether to enable operator kill")
flags.DurationVar(&TestConfig.OperatorKiller.Interval, "operator-killer-interval", 5*time.Minute, "interval between operator kills")
flags.Float64Var(&TestConfig.OperatorKiller.JitterFactor, "operator-killer-jitter-factor", 1, "factor used to jitter operator kills")
}

func AfterReadingAllFlags() error {
Expand Down
24 changes: 24 additions & 0 deletions tests/e2e/e2e.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ import (
e2econfig "github.com/pingcap/tidb-operator/tests/e2e/config"
utilimage "github.com/pingcap/tidb-operator/tests/e2e/util/image"
utilnode "github.com/pingcap/tidb-operator/tests/e2e/util/node"
utiloperator "github.com/pingcap/tidb-operator/tests/e2e/util/operator"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
runtimeutils "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
Expand All @@ -58,6 +60,10 @@ import (
_ "k8s.io/kubernetes/test/e2e/framework/providers/gce"
)

var (
operatorKillerStopCh chan struct{}
)

// This is modified from framework.SetupSuite().
// setupSuite is the boilerplate that can be used to setup ginkgo test suites, on the SynchronizedBeforeSuite step.
// There are certain operations we only want to run once per overall test invocation
Expand Down Expand Up @@ -256,6 +262,21 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
ginkgo.By("Installing tidb-operator")
oa.CleanOperatorOrDie(ocfg)
oa.DeployOperatorOrDie(ocfg)
if e2econfig.TestConfig.OperatorKiller.Enabled {
operatorKiller := utiloperator.NewOperatorKiller(e2econfig.TestConfig.OperatorKiller, kubeCli, func() ([]v1.Pod, error) {
podList, err := kubeCli.CoreV1().Pods(ocfg.Namespace).List(metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(map[string]string{
"app.kubernetes.io/name": "tidb-operator",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is hard-coded, but I think it's fine in e2e...

}).String(),
})
if err != nil {
return nil, err
}
return podList.Items, nil
})
operatorKillerStopCh := make(chan struct{})
go operatorKiller.Run(operatorKillerStopCh)
}
} else {
ginkgo.By("Skip installing tidb-operator")
}
Expand All @@ -269,6 +290,9 @@ var _ = ginkgo.SynchronizedAfterSuite(func() {
framework.CleanupSuite()
}, func() {
framework.AfterSuiteActions()
if operatorKillerStopCh != nil {
close(operatorKillerStopCh)
}
})

// RunE2ETests checks configuration parameters (specified through flags) and then runs
Expand Down
70 changes: 70 additions & 0 deletions tests/e2e/util/operator/operator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package operator

import (
"time"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
)

// OperatorKillerConfig describes configuration for operator killer.
type OperatorKillerConfig struct {
Enabled bool
// Interval is time between operator failures.
Interval time.Duration
// Operator pods will be deleted between [Interval, Interval * (1.0 + JitterFactor)].
JitterFactor float64
}

// OperatorKiller deletes pods of tidb-operator to simulate operator failures.
type OperatorKiller struct {
config OperatorKillerConfig
client kubernetes.Interface
podLister func() ([]v1.Pod, error)
}

// NewOperatorKiller creates a new operator killer.
func NewOperatorKiller(config OperatorKillerConfig, client kubernetes.Interface, podLister func() ([]v1.Pod, error)) *OperatorKiller {
return &OperatorKiller{
config: config,
client: client,
podLister: podLister,
}
}

// Run starts OperatorKiller until stopCh is closed.
func (k *OperatorKiller) Run(stopCh <-chan struct{}) {
// wait.JitterUntil starts work immediately, so wait first.
time.Sleep(wait.Jitter(k.config.Interval, k.config.JitterFactor))
wait.JitterUntil(func() {
pods, err := k.podLister()
if err != nil {
framework.Logf("failed to list operator pods: %v", err)
return
}
for _, pod := range pods {
err = k.client.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{})
if err != nil {
framework.Logf("failed to delete pod %s/%s: %v", pod.Namespace, pod.Name, err)
} else {
framework.Logf("successfully deleted tidb-operator pod %s/%s", pod.Namespace, pod.Name)
}
}
}, k.config.Interval, k.config.JitterFactor, true, stopCh)
}