From f6937cbd9cc2bcb21c1a6c3f75068181fab854fb Mon Sep 17 00:00:00 2001 From: Yecheng Fu Date: Mon, 30 Mar 2020 17:18:47 +0800 Subject: [PATCH] Automated cherry pick of #2013: add spec.paused field to pause the tidb cluster syncing (#2064) * add spec.paused field to pause the tidb cluster syncing * support auto-failover when paused is enabled * Revert "support auto-failover when paused is enabled" This reverts commit 346f94e4c31dfea5147e6024c817693965085307. * fix --- docs/api-references/docs.html | 26 ++++++++ manifests/crd.yaml | 4 ++ .../pingcap/v1alpha1/openapi_generated.go | 7 ++ pkg/apis/pingcap/v1alpha1/types.go | 5 ++ pkg/manager/member/pd_member_manager.go | 29 +++++++-- pkg/manager/member/pump_member_manager.go | 23 +++++-- pkg/manager/member/tidb_member_manager.go | 28 ++++++-- pkg/manager/member/tikv_member_manager.go | 23 +++++-- tests/e2e/tidbcluster/tidbcluster.go | 64 +++++++++++++++++++ 9 files changed, 192 insertions(+), 17 deletions(-) diff --git a/docs/api-references/docs.html b/docs/api-references/docs.html index 900a1d105b..e8fbc31d28 100644 --- a/docs/api-references/docs.html +++ b/docs/api-references/docs.html @@ -739,6 +739,19 @@

TidbCluster +paused
+ +bool + + + +(Optional) +

Indicates that the tidb cluster is paused and will not be processed by +the controller.

+ + + + version
string @@ -12114,6 +12127,19 @@

TidbClusterSpec +paused
+ +bool + + + +(Optional) +

Indicates that the tidb cluster is paused and will not be processed by +the controller.

+ + + + version
string diff --git a/manifests/crd.yaml b/manifests/crd.yaml index f30ef00f70..21ddc555c9 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -700,6 +700,10 @@ spec: description: Base node selectors of TiDB cluster Pods, components may add or override selectors upon this respectively type: object + paused: + description: Indicates that the tidb cluster is paused and will not + be processed by the controller. + type: boolean pd: description: PDSpec contains details of PD members properties: diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go index ecdab749b9..f8e4924117 100644 --- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go +++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go @@ -6367,6 +6367,13 @@ func schema_pkg_apis_pingcap_v1alpha1_TidbClusterSpec(ref common.ReferenceCallba Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.HelperSpec"), }, }, + "paused": { + SchemaProps: spec.SchemaProps{ + Description: "Indicates that the tidb cluster is paused and will not be processed by the controller.", + Type: []string{"boolean"}, + Format: "", + }, + }, "version": { SchemaProps: spec.SchemaProps{ Description: "TiDB cluster version", diff --git a/pkg/apis/pingcap/v1alpha1/types.go b/pkg/apis/pingcap/v1alpha1/types.go index 008769a6d8..5bd0dfed89 100644 --- a/pkg/apis/pingcap/v1alpha1/types.go +++ b/pkg/apis/pingcap/v1alpha1/types.go @@ -119,6 +119,11 @@ type TidbClusterSpec struct { // +optional Helper *HelperSpec `json:"helper,omitempty"` + // Indicates that the tidb cluster is paused and will not be processed by + // the controller. + // +optional + Paused bool `json:"paused,omitempty"` + // TODO: remove optional after defaulting logic introduced // TiDB cluster version // +optional diff --git a/pkg/manager/member/pd_member_manager.go b/pkg/manager/member/pd_member_manager.go index 75dd9cd46b..ac78b88342 100644 --- a/pkg/manager/member/pd_member_manager.go +++ b/pkg/manager/member/pd_member_manager.go @@ -109,6 +109,11 @@ func (pmm *pdMemberManager) Sync(tc *v1alpha1.TidbCluster) error { } func (pmm *pdMemberManager) syncPDServiceForTidbCluster(tc *v1alpha1.TidbCluster) error { + if tc.Spec.Paused { + klog.V(4).Infof("tidb cluster %s/%s is paused, skip syncing for pd service", tc.GetNamespace(), tc.GetName()) + return nil + } + ns := tc.GetNamespace() tcName := tc.GetName() @@ -148,6 +153,11 @@ func (pmm *pdMemberManager) syncPDServiceForTidbCluster(tc *v1alpha1.TidbCluster } func (pmm *pdMemberManager) syncPDHeadlessServiceForTidbCluster(tc *v1alpha1.TidbCluster) error { + if tc.Spec.Paused { + klog.V(4).Infof("tidb cluster %s/%s is paused, skip syncing for pd headless service", tc.GetNamespace(), tc.GetName()) + return nil + } + ns := tc.GetNamespace() tcName := tc.GetName() @@ -193,6 +203,16 @@ func (pmm *pdMemberManager) syncPDStatefulSetForTidbCluster(tc *v1alpha1.TidbClu setNotExist := errors.IsNotFound(err) oldPDSet := oldPDSetTmp.DeepCopy() + + if err := pmm.syncTidbClusterStatus(tc, oldPDSet); err != nil { + klog.Errorf("failed to sync TidbCluster: [%s/%s]'s status, error: %v", ns, tcName, err) + } + + if tc.Spec.Paused { + klog.V(4).Infof("tidb cluster %s/%s is paused, skip syncing for pd statefulset", tc.GetNamespace(), tc.GetName()) + return nil + } + cm, err := pmm.syncPDConfigMap(tc, oldPDSet) if err != nil { return err @@ -213,10 +233,6 @@ func (pmm *pdMemberManager) syncPDStatefulSetForTidbCluster(tc *v1alpha1.TidbClu return controller.RequeueErrorf("TidbCluster: [%s/%s], waiting for PD cluster running", ns, tcName) } - if err := pmm.syncTidbClusterStatus(tc, oldPDSet); err != nil { - klog.Errorf("failed to sync TidbCluster: [%s/%s]'s status, error: %v", ns, tcName, err) - } - if !tc.Status.PD.Synced { force := NeedForceUpgrade(tc) if force { @@ -251,6 +267,11 @@ func (pmm *pdMemberManager) syncPDStatefulSetForTidbCluster(tc *v1alpha1.TidbClu } func (pmm *pdMemberManager) syncTidbClusterStatus(tc *v1alpha1.TidbCluster, set *apps.StatefulSet) error { + if set == nil { + // skip if not created yet + return nil + } + ns := tc.GetNamespace() tcName := tc.GetName() diff --git a/pkg/manager/member/pump_member_manager.go b/pkg/manager/member/pump_member_manager.go index 995012aa9c..f7352de914 100644 --- a/pkg/manager/member/pump_member_manager.go +++ b/pkg/manager/member/pump_member_manager.go @@ -95,6 +95,16 @@ func (pmm *pumpMemberManager) syncPumpStatefulSetForTidbCluster(tc *v1alpha1.Tid notFound := errors.IsNotFound(err) oldPumpSet := oldPumpSetTemp.DeepCopy() + if err := pmm.syncTiDBClusterStatus(tc, oldPumpSet); err != nil { + klog.Errorf("failed to sync TidbCluster: [%s/%s]'s status, error: %v", tc.Namespace, tc.Name, err) + return err + } + + if tc.Spec.Paused { + klog.V(4).Infof("tikv cluster %s/%s is paused, skip syncing for pump statefulset", tc.GetNamespace(), tc.GetName()) + return nil + } + cm, err := pmm.syncConfigMap(tc, oldPumpSet) if err != nil { return err @@ -112,15 +122,14 @@ func (pmm *pumpMemberManager) syncPumpStatefulSetForTidbCluster(tc *v1alpha1.Tid return pmm.setControl.CreateStatefulSet(tc, newPumpSet) } - if err := pmm.syncTiDBClusterStatus(tc, oldPumpSet); err != nil { - klog.Errorf("failed to sync TidbCluster: [%s/%s]'s status, error: %v", tc.Namespace, tc.Name, err) - return err - } - return updateStatefulSet(pmm.setControl, tc, newPumpSet, oldPumpSet) } func (pmm *pumpMemberManager) syncTiDBClusterStatus(tc *v1alpha1.TidbCluster, set *apps.StatefulSet) error { + if set == nil { + // skip if not created yet + return nil + } tc.Status.Pump.StatefulSet = &set.Status @@ -142,6 +151,10 @@ func (pmm *pumpMemberManager) syncTiDBClusterStatus(tc *v1alpha1.TidbCluster, se } func (pmm *pumpMemberManager) syncHeadlessService(tc *v1alpha1.TidbCluster) error { + if tc.Spec.Paused { + klog.V(4).Infof("tikv cluster %s/%s is paused, skip syncing for pump headless service", tc.GetNamespace(), tc.GetName()) + return nil + } newSvc := getNewPumpHeadlessService(tc) oldSvc, err := pmm.svcLister.Services(newSvc.Namespace).Get(newSvc.Name) diff --git a/pkg/manager/member/tidb_member_manager.go b/pkg/manager/member/tidb_member_manager.go index 8c89511078..e67b2c4788 100644 --- a/pkg/manager/member/tidb_member_manager.go +++ b/pkg/manager/member/tidb_member_manager.go @@ -34,6 +34,7 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" v1 "k8s.io/client-go/listers/apps/v1" corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/klog" "k8s.io/utils/pointer" ) @@ -122,6 +123,11 @@ func (tmm *tidbMemberManager) Sync(tc *v1alpha1.TidbCluster) error { } func (tmm *tidbMemberManager) syncTiDBHeadlessServiceForTidbCluster(tc *v1alpha1.TidbCluster) error { + if tc.Spec.Paused { + klog.V(4).Infof("tidb cluster %s/%s is paused, skip syncing for tidb headless service", tc.GetNamespace(), tc.GetName()) + return nil + } + ns := tc.GetNamespace() tcName := tc.GetName() @@ -169,6 +175,15 @@ func (tmm *tidbMemberManager) syncTiDBStatefulSetForTidbCluster(tc *v1alpha1.Tid setNotExist := errors.IsNotFound(err) oldTiDBSet := oldTiDBSetTemp.DeepCopy() + if err = tmm.syncTidbClusterStatus(tc, oldTiDBSet); err != nil { + return err + } + + if tc.Spec.Paused { + klog.V(4).Infof("tidb cluster %s/%s is paused, skip syncing for tidb statefulset", tc.GetNamespace(), tc.GetName()) + return nil + } + cm, err := tmm.syncTiDBConfigMap(tc, oldTiDBSet) if err != nil { return err @@ -188,10 +203,6 @@ func (tmm *tidbMemberManager) syncTiDBStatefulSetForTidbCluster(tc *v1alpha1.Tid return nil } - if err = tmm.syncTidbClusterStatus(tc, oldTiDBSet); err != nil { - return err - } - if !templateEqual(newTiDBSet, oldTiDBSet) || tc.Status.TiDB.Phase == v1alpha1.UpgradePhase { if err := tmm.tidbUpgrader.Upgrade(tc, oldTiDBSet, newTiDBSet); err != nil { return err @@ -215,6 +226,10 @@ func (tmm *tidbMemberManager) syncTiDBStatefulSetForTidbCluster(tc *v1alpha1.Tid } func (tmm *tidbMemberManager) syncTiDBService(tc *v1alpha1.TidbCluster) error { + if tc.Spec.Paused { + klog.V(4).Infof("tidb cluster %s/%s is paused, skip syncing for tidb service", tc.GetNamespace(), tc.GetName()) + return nil + } newSvc := getNewTiDBServiceOrNil(tc) // TODO: delete tidb service if user remove the service spec deliberately @@ -701,6 +716,11 @@ func getNewTiDBSetForTidbCluster(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) } func (tmm *tidbMemberManager) syncTidbClusterStatus(tc *v1alpha1.TidbCluster, set *apps.StatefulSet) error { + if set == nil { + // skip if not created yet + return nil + } + tc.Status.TiDB.StatefulSet = &set.Status upgrading, err := tmm.tidbStatefulSetIsUpgradingFn(tmm.podLister, set, tc) diff --git a/pkg/manager/member/tikv_member_manager.go b/pkg/manager/member/tikv_member_manager.go index 519ae3fa35..fed2a2b47e 100644 --- a/pkg/manager/member/tikv_member_manager.go +++ b/pkg/manager/member/tikv_member_manager.go @@ -134,6 +134,11 @@ func (tkmm *tikvMemberManager) Sync(tc *v1alpha1.TidbCluster) error { } func (tkmm *tikvMemberManager) syncServiceForTidbCluster(tc *v1alpha1.TidbCluster, svcConfig SvcConfig) error { + if tc.Spec.Paused { + klog.V(4).Infof("tikv cluster %s/%s is paused, skip syncing for tikv service", tc.GetNamespace(), tc.GetName()) + return nil + } + ns := tc.GetNamespace() tcName := tc.GetName() @@ -183,6 +188,16 @@ func (tkmm *tikvMemberManager) syncStatefulSetForTidbCluster(tc *v1alpha1.TidbCl setNotExist := errors.IsNotFound(err) oldSet := oldSetTmp.DeepCopy() + + if err := tkmm.syncTidbClusterStatus(tc, oldSet); err != nil { + return err + } + + if tc.Spec.Paused { + klog.V(4).Infof("tikv cluster %s/%s is paused, skip syncing for tikv statefulset", tc.GetNamespace(), tc.GetName()) + return nil + } + cm, err := tkmm.syncTiKVConfigMap(tc, oldSet) if err != nil { return err @@ -205,10 +220,6 @@ func (tkmm *tikvMemberManager) syncStatefulSetForTidbCluster(tc *v1alpha1.TidbCl return nil } - if err := tkmm.syncTidbClusterStatus(tc, oldSet); err != nil { - return err - } - if _, err := tkmm.setStoreLabelsForTiKV(tc); err != nil { return err } @@ -554,6 +565,10 @@ func labelTiKV(tc *v1alpha1.TidbCluster) label.Label { } func (tkmm *tikvMemberManager) syncTidbClusterStatus(tc *v1alpha1.TidbCluster, set *apps.StatefulSet) error { + if set == nil { + // skip if not created yet + return nil + } tc.Status.TiKV.StatefulSet = &set.Status upgrading, err := tkmm.tikvStatefulSetIsUpgradingFn(tkmm.podLister, tkmm.pdControl, set, tc) if err != nil { diff --git a/tests/e2e/tidbcluster/tidbcluster.go b/tests/e2e/tidbcluster/tidbcluster.go index a5d1437ccd..4614289512 100644 --- a/tests/e2e/tidbcluster/tidbcluster.go +++ b/tests/e2e/tidbcluster/tidbcluster.go @@ -38,6 +38,7 @@ import ( "github.com/pingcap/tidb-operator/tests/apiserver" e2econfig "github.com/pingcap/tidb-operator/tests/e2e/config" utilimage "github.com/pingcap/tidb-operator/tests/e2e/util/image" + utilpod "github.com/pingcap/tidb-operator/tests/e2e/util/pod" "github.com/pingcap/tidb-operator/tests/e2e/util/portforward" "github.com/pingcap/tidb-operator/tests/pkg/apimachinery" "github.com/pingcap/tidb-operator/tests/pkg/blockwriter" @@ -46,8 +47,10 @@ import ( v1 "k8s.io/api/core/v1" apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" utilversion "k8s.io/apimachinery/pkg/util/version" "k8s.io/apimachinery/pkg/util/wait" @@ -863,6 +866,67 @@ var _ = ginkgo.Describe("[tidb-operator] TiDBCluster", func() { framework.ExpectNoError(err) }) + ginkgo.It("TiDB cluster can be paused and unpaused", func() { + tcName := "paused" + tc := fixture.GetTidbCluster(ns, tcName, utilimage.TiDBV3Version) + tc.Spec.PD.Replicas = 1 + tc.Spec.TiKV.Replicas = 1 + tc.Spec.TiDB.Replicas = 1 + err := genericCli.Create(context.TODO(), tc) + framework.ExpectNoError(err) + err = oa.WaitForTidbClusterReady(tc, 30*time.Minute, 15*time.Second) + framework.ExpectNoError(err) + + podListBeforePaused, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("Pause the tidb cluster") + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.Paused = true + return nil + }) + framework.ExpectNoError(err) + ginkgo.By("Make a change") + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.Version = utilimage.TiDBV3UpgradeVersion + return nil + }) + framework.ExpectNoError(err) + + ginkgo.By("Check pods are not changed when the tidb cluster is paused") + err = utilpod.WaitForPodsAreChanged(c, podListBeforePaused.Items, time.Minute*5) + framework.ExpectEqual(err, wait.ErrWaitTimeout, "Pods are changed when the tidb cluster is paused") + + ginkgo.By("Unpause the tidb cluster") + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.Paused = false + return nil + }) + framework.ExpectNoError(err) + + ginkgo.By("Check the tidb cluster will be upgraded now") + listOptions := metav1.ListOptions{ + LabelSelector: labels.SelectorFromSet(label.New().Instance(tcName).Component(label.TiKVLabelVal).Labels()).String(), + } + err = wait.PollImmediate(5*time.Second, 15*time.Minute, func() (bool, error) { + podList, err := c.CoreV1().Pods(ns).List(listOptions) + if err != nil && !apierrors.IsNotFound(err) { + return false, err + } + for _, pod := range podList.Items { + for _, c := range pod.Spec.Containers { + if c.Name == v1alpha1.TiKVMemberType.String() { + if c.Image == tc.TiKVImage() { + return true, nil + } + } + } + } + return false, nil + }) + framework.ExpectNoError(err) + }) + ginkgo.It("tidb-scale: clear TiDB failureMembers when scale TiDB to zero", func() { cluster := newTidbClusterConfig(e2econfig.TestConfig, ns, "tidb-scale", "admin", "") cluster.Resources["pd.replicas"] = "3"