diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md index 8f470743bc6..8912417fb95 100644 --- a/docs/api-references/docs.md +++ b/docs/api-references/docs.md @@ -1546,6 +1546,13 @@ TidbMonitorStatus +

AutoScalerPhase

+

+(Appears on: +BasicAutoScalerStatus) +

+

+

BRConfig

(Appears on: @@ -2350,6 +2357,18 @@ If not set, the default value is 5.

+phase
+ + +AutoScalerPhase + + + + + + + + metrics
diff --git a/go.mod b/go.mod index d5097d79354..d0d5f59d541 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect github.com/grpc-ecosystem/grpc-gateway v1.13.0 // indirect github.com/imdario/mergo v0.3.7 // indirect + github.com/jonboulle/clockwork v0.1.0 github.com/juju/errors v0.0.0-20180806074554-22422dad46e1 github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073 // indirect diff --git a/manifests/crd.yaml b/manifests/crd.yaml index e4106e817f3..0637e6c772d 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -10884,6 +10884,8 @@ spec: - thresholdValue type: object type: array + phase: + type: string recommendedReplicas: description: RecommendedReplicas describes the calculated replicas in the last auto-scaling reconciliation for the component(tidb/tikv) @@ -10931,6 +10933,8 @@ spec: - thresholdValue type: object type: array + phase: + type: string recommendedReplicas: description: RecommendedReplicas describes the calculated replicas in the last auto-scaling reconciliation for the component(tidb/tikv) diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go index c4a0d3d5332..14453d98a4c 100644 --- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go +++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go @@ -894,6 +894,12 @@ func schema_pkg_apis_pingcap_v1alpha1_BasicAutoScalerStatus(ref common.Reference Description: "BasicAutoScalerStatus describe the basic auto-scaling status", Type: []string{"object"}, Properties: map[string]spec.Schema{ + "phase": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, "metrics": { SchemaProps: spec.SchemaProps{ Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation", @@ -7061,6 +7067,12 @@ func schema_pkg_apis_pingcap_v1alpha1_TidbAutoScalerStatus(ref common.ReferenceC Description: "TidbAutoScalerStatus describe the auto-scaling status of tidb", Type: []string{"object"}, Properties: map[string]spec.Schema{ + "phase": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, "metrics": { SchemaProps: spec.SchemaProps{ Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation", @@ -8083,6 +8095,12 @@ func schema_pkg_apis_pingcap_v1alpha1_TikvAutoScalerStatus(ref common.ReferenceC Description: "TikvAutoScalerStatus describe the auto-scaling status of tikv", Type: []string{"object"}, Properties: map[string]spec.Schema{ + "phase": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, "metrics": { SchemaProps: spec.SchemaProps{ Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation", diff --git a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go index e38a570fa1a..118d7b09ebf 100644 --- a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go +++ b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go @@ -18,6 +18,14 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +type AutoScalerPhase string + +const ( + NormalAutoScalerPhase AutoScalerPhase = "Normal" + ReadyToScaleOutAutoScalerPhase AutoScalerPhase = "ReadyToScaleOut" + ReadyToScaleInAutoScalerPhase AutoScalerPhase = "ReadyToScaleIn" +) + // +genclient // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -175,6 +183,7 @@ type TikvAutoScalerStatus struct { // +k8s:openapi-gen=true // BasicAutoScalerStatus describe the basic auto-scaling status type BasicAutoScalerStatus struct { + Phase AutoScalerPhase `json:"phase,omitempty"` // MetricsStatusList describes the metrics status in the last auto-scaling reconciliation // +optional MetricsStatusList []MetricsStatus `json:"metrics,omitempty"` diff --git a/pkg/autoscaler/autoscaler/tidb_autoscaler.go b/pkg/autoscaler/autoscaler/tidb_autoscaler.go index 9b130ec8ce6..cf8ad80404f 100644 --- a/pkg/autoscaler/autoscaler/tidb_autoscaler.go +++ b/pkg/autoscaler/autoscaler/tidb_autoscaler.go @@ -47,6 +47,7 @@ func (am *autoScalerManager) syncTiDB(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti } targetReplicas = limitTargetReplicas(targetReplicas, tac, v1alpha1.TiDBMemberType) if targetReplicas == tc.Spec.TiDB.Replicas { + tac.Status.TiDB.Phase = v1alpha1.NormalAutoScalerPhase return nil } return syncTiDBAfterCalculated(tc, tac, currentReplicas, targetReplicas, sts) @@ -60,6 +61,7 @@ func syncTiDBAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbCluster if recommendedReplicas > currentReplicas { intervalSeconds = tac.Spec.TiDB.ScaleOutIntervalSeconds } + ableToScale, err := checkStsAutoScalingInterval(tac, *intervalSeconds, v1alpha1.TiDBMemberType) if err != nil { return err @@ -72,6 +74,7 @@ func syncTiDBAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbCluster // Currently we didnt' record the auto-scaling out slot for tidb, because it is pointless for now. func updateTcTiDBIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, recommendedReplicas int32) error { + tac.Status.TiDB.Phase = v1alpha1.NormalAutoScalerPhase tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) tc.Spec.TiDB.Replicas = recommendedReplicas tac.Status.TiDB.RecommendedReplicas = &recommendedReplicas diff --git a/pkg/autoscaler/autoscaler/tikv_autoscaler.go b/pkg/autoscaler/autoscaler/tikv_autoscaler.go index b36c00e6017..562e5b18a10 100644 --- a/pkg/autoscaler/autoscaler/tikv_autoscaler.go +++ b/pkg/autoscaler/autoscaler/tikv_autoscaler.go @@ -59,12 +59,24 @@ func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti // The currentReplicas of TiKV calculated in auto-scaling is the count of the StateUp TiKV instance, so we need to // add the number of other state tikv instance replicas when we update the TidbCluster.Spec.TiKV.Replicas func syncTiKVAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error { - intervalSeconds := tac.Spec.TiKV.ScaleInIntervalSeconds - if recommendedReplicas > tc.Spec.TiKV.Replicas { - intervalSeconds = tac.Spec.TiKV.ScaleOutIntervalSeconds + if recommendedReplicas > currentReplicas { + if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleOutAutoScalerPhase { + tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleOutAutoScalerPhase + // phase could change from Normal to ReadyToScaleOut, ReadyToScaleIn to ReadyToScaleOut, + // reset timestamp in both cases. + tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) + } + } else { + if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleInAutoScalerPhase { + tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleInAutoScalerPhase + // phase could change from Normal to ReadyToScaleIn, ReadyToScaleOut to ReadyToScaleIn, + // reset timestamp in both cases. + tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) + } } - ableToScale, err := checkStsAutoScalingInterval(tac, *intervalSeconds, v1alpha1.TiKVMemberType) + + ableToScale, err := checkStsAutoScaling(tac, 123, *intervalSeconds, v1alpha1.TiKVMemberType) if err != nil { return err } @@ -87,7 +99,6 @@ func filterTiKVInstances(tc *v1alpha1.TidbCluster) []string { // we record the auto-scaling out slot for tikv, in order to add special hot labels when they are created func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error { - tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) if recommendedReplicas > currentReplicas { newlyScaleOutOrdinalSets := helper.GetPodOrdinals(recommendedReplicas, sts).Difference(helper.GetPodOrdinals(currentReplicas, sts)) if newlyScaleOutOrdinalSets.Len() > 0 { @@ -102,6 +113,8 @@ func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAuto tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] = v } } + tac.Status.TiDB.Phase = v1alpha1.NormalAutoScalerPhase + tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) tc.Spec.TiKV.Replicas = recommendedReplicas tac.Status.TiKV.RecommendedReplicas = &recommendedReplicas return nil diff --git a/pkg/autoscaler/autoscaler/util.go b/pkg/autoscaler/autoscaler/util.go index 9f21156475d..1fa14e2b556 100644 --- a/pkg/autoscaler/autoscaler/util.go +++ b/pkg/autoscaler/autoscaler/util.go @@ -14,10 +14,12 @@ package autoscaler import ( + "errors" "fmt" "strconv" "time" + "github.com/jonboulle/clockwork" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/label" operatorUtils "github.com/pingcap/tidb-operator/pkg/util" @@ -57,11 +59,79 @@ func checkStsAutoScalingPrerequisites(set *appsv1.StatefulSet) bool { return true } -// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling -func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) { +func checkStsAutoScaling(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) { + realClock := clockwork.NewRealClock() + if tac.Annotations == nil { + tac.Annotations = map[string]string{} + } + ableToScale, err := checkStsLastSyncTimestamp(tac, 30*3, realClock) + if err != nil { + return false, err + } + if !ableToScale { + return false, nil + } + ableToScale, err = checkStsReadyAutoScalingTimestamp(tac, thresholdSeconds, realClock) + if err != nil { + return false, err + } + if !ableToScale { + return false, nil + } + ableToScale, err = checkStsAutoScalingInterval(tac, intervalSeconds, memberType) + if err != nil { + return false, err + } + if !ableToScale { + return false, nil + } + return true, nil +} + +// checkStsLastSyncTimestamp reset TiKV phase if last sync timestamp is longer than thresholdSec +func checkStsLastSyncTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSec int32, clock clockwork.Clock) (bool, error) { if tac.Annotations == nil { tac.Annotations = map[string]string{} } + + lastSyncTimestamp, existed := tac.Annotations[label.AnnStsLastSyncTimestamp] + if !existed { + // record sync timestamp should always happen before checkStsLastSyncTimestamp + return false, errors.New("tidb.pingcap.com/sync-timestamp label is missing") + } + t, err := strconv.ParseInt(lastSyncTimestamp, 10, 64) + if err != nil { + return false, err + } + // if last sync is longer than n * resync during, reset TiKV phase to Normal + if int32(clock.Now().Sub(time.Unix(t, 0)).Seconds()) > thresholdSec { + tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase + return false, nil + } + return true, nil +} + +// checkStsReadyAutoScalingTimestamp would check whether there is enough time window after ready to scale +func checkStsReadyAutoScalingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds int32, clock clockwork.Clock) (bool, error) { + readyAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] + + if !existed { + tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", clock.Now().Unix()) + return false, nil + } + t, err := strconv.ParseInt(readyAutoScalingTimestamp, 10, 32) + if err != nil { + return false, err + } + readyAutoScalingSec := int32(clock.Now().Sub(time.Unix(t, 0)).Seconds()) + if thresholdSeconds > readyAutoScalingSec { + return false, nil + } + return true, nil +} + +// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling +func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) { lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] if memberType == v1alpha1.TiKVMemberType { lastAutoScalingTimestamp, existed = tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] @@ -197,6 +267,7 @@ func checkAndUpdateTacAnn(tac *v1alpha1.TidbClusterAutoScaler) { resetAutoScalingAnn(tac) return } + tac.Annotations[label.AnnStsLastSyncTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) name := tac.Annotations[label.AnnAutoScalingTargetName] namespace := tac.Annotations[label.AnnAutoScalingTargetNamespace] if name == tac.Spec.Cluster.Name && namespace == tac.Spec.Cluster.Namespace { diff --git a/pkg/autoscaler/autoscaler/util_test.go b/pkg/autoscaler/autoscaler/util_test.go index 6668dcf070f..1279767f010 100644 --- a/pkg/autoscaler/autoscaler/util_test.go +++ b/pkg/autoscaler/autoscaler/util_test.go @@ -18,6 +18,7 @@ import ( "testing" "time" + "github.com/jonboulle/clockwork" . "github.com/onsi/gomega" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/label" @@ -26,6 +27,52 @@ import ( "k8s.io/utils/pointer" ) +func TestCheckStsReadyAutoScalingTimestamp(t *testing.T) { + g := NewGomegaWithT(t) + c := clockwork.NewFakeClockAt(time.Now()) + tests := []struct { + name string + withTimestmap bool + readyAutoScalingSec int + expectedPermitScaling bool + }{ + { + name: "tikv, no timestamp", + withTimestmap: false, + readyAutoScalingSec: 0, + expectedPermitScaling: false, + }, + { + name: "tikv, ready autoscaling 60s", + withTimestmap: true, + readyAutoScalingSec: 60, + expectedPermitScaling: false, + }, + { + name: "tikv, ready autoscaling 120s", + withTimestmap: true, + readyAutoScalingSec: 120, + expectedPermitScaling: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + thresholdSec := int32(100) + tac := newTidbClusterAutoScaler() + d := time.Duration(tt.readyAutoScalingSec) * time.Second + if tt.withTimestmap { + tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Add(-d).Unix()) + } else { + tac.Annotations = map[string]string{} + } + r, err := checkStsReadyAutoScalingTimestamp(tac, thresholdSec, c) + g.Expect(err).Should(BeNil()) + g.Expect(r).Should(Equal(tt.expectedPermitScaling)) + }) + } +} + func TestCheckStsAutoScalingInterval(t *testing.T) { g := NewGomegaWithT(t) tests := []struct { diff --git a/pkg/label/label.go b/pkg/label/label.go index 776be784930..d71e07c51ee 100644 --- a/pkg/label/label.go +++ b/pkg/label/label.go @@ -106,6 +106,11 @@ const ( // AnnTiKVLastAutoScalingTimestamp is annotation key of tidbclusterto which ordinal is created by tikv auto-scaling AnnTiKVLastAutoScalingTimestamp = "tikv.tidb.pingcap.com/last-autoscaling-timestamp" + // AnnTiDBReadyToScaleTimestamp records timestamp when tidb ready to scale + AnnTiDBReadyToScaleTimestamp = "tidb.tidb.pingcap.com/ready-to-scale-timestamp" + // AnnTiKVReadyToScaleTimestamp records timestamp when tikv ready to scale + AnnTiKVReadyToScaleTimestamp = "tikv.tidb.pingcap.com/ready-to-scale-timestamp" + // AnnTiDBConsecutiveScaleOutCount describes the least consecutive count to scale-out for tidb AnnTiDBConsecutiveScaleOutCount = "tidb.tidb.pingcap.com/consecutive-scale-out-count" // AnnTiDBConsecutiveScaleInCount describes the least consecutive count to scale-in for tidb