diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md index 8f470743bc6..8912417fb95 100644 --- a/docs/api-references/docs.md +++ b/docs/api-references/docs.md @@ -1546,6 +1546,13 @@ TidbMonitorStatus +
+(Appears on: +BasicAutoScalerStatus) +
++
(Appears on: @@ -2350,6 +2357,18 @@ If not set, the default value is 5.
phase
+
+
+AutoScalerPhase
+
+
+metrics
diff --git a/go.mod b/go.mod
index d5097d79354..d0d5f59d541 100644
--- a/go.mod
+++ b/go.mod
@@ -39,6 +39,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.13.0 // indirect
github.com/imdario/mergo v0.3.7 // indirect
+ github.com/jonboulle/clockwork v0.1.0
github.com/juju/errors v0.0.0-20180806074554-22422dad46e1
github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect
github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073 // indirect
diff --git a/manifests/crd.yaml b/manifests/crd.yaml
index e4106e817f3..0637e6c772d 100644
--- a/manifests/crd.yaml
+++ b/manifests/crd.yaml
@@ -10884,6 +10884,8 @@ spec:
- thresholdValue
type: object
type: array
+ phase:
+ type: string
recommendedReplicas:
description: RecommendedReplicas describes the calculated replicas
in the last auto-scaling reconciliation for the component(tidb/tikv)
@@ -10931,6 +10933,8 @@ spec:
- thresholdValue
type: object
type: array
+ phase:
+ type: string
recommendedReplicas:
description: RecommendedReplicas describes the calculated replicas
in the last auto-scaling reconciliation for the component(tidb/tikv)
diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go
index c4a0d3d5332..14453d98a4c 100644
--- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go
+++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go
@@ -894,6 +894,12 @@ func schema_pkg_apis_pingcap_v1alpha1_BasicAutoScalerStatus(ref common.Reference
Description: "BasicAutoScalerStatus describe the basic auto-scaling status",
Type: []string{"object"},
Properties: map[string]spec.Schema{
+ "phase": {
+ SchemaProps: spec.SchemaProps{
+ Type: []string{"string"},
+ Format: "",
+ },
+ },
"metrics": {
SchemaProps: spec.SchemaProps{
Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation",
@@ -7061,6 +7067,12 @@ func schema_pkg_apis_pingcap_v1alpha1_TidbAutoScalerStatus(ref common.ReferenceC
Description: "TidbAutoScalerStatus describe the auto-scaling status of tidb",
Type: []string{"object"},
Properties: map[string]spec.Schema{
+ "phase": {
+ SchemaProps: spec.SchemaProps{
+ Type: []string{"string"},
+ Format: "",
+ },
+ },
"metrics": {
SchemaProps: spec.SchemaProps{
Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation",
@@ -8083,6 +8095,12 @@ func schema_pkg_apis_pingcap_v1alpha1_TikvAutoScalerStatus(ref common.ReferenceC
Description: "TikvAutoScalerStatus describe the auto-scaling status of tikv",
Type: []string{"object"},
Properties: map[string]spec.Schema{
+ "phase": {
+ SchemaProps: spec.SchemaProps{
+ Type: []string{"string"},
+ Format: "",
+ },
+ },
"metrics": {
SchemaProps: spec.SchemaProps{
Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation",
diff --git a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go
index e38a570fa1a..118d7b09ebf 100644
--- a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go
+++ b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go
@@ -18,6 +18,14 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
+type AutoScalerPhase string
+
+const (
+ NormalAutoScalerPhase AutoScalerPhase = "Normal"
+ ReadyToScaleOutAutoScalerPhase AutoScalerPhase = "ReadyToScaleOut"
+ ReadyToScaleInAutoScalerPhase AutoScalerPhase = "ReadyToScaleIn"
+)
+
// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -175,6 +183,7 @@ type TikvAutoScalerStatus struct {
// +k8s:openapi-gen=true
// BasicAutoScalerStatus describe the basic auto-scaling status
type BasicAutoScalerStatus struct {
+ Phase AutoScalerPhase `json:"phase,omitempty"`
// MetricsStatusList describes the metrics status in the last auto-scaling reconciliation
// +optional
MetricsStatusList []MetricsStatus `json:"metrics,omitempty"`
diff --git a/pkg/autoscaler/autoscaler/tidb_autoscaler.go b/pkg/autoscaler/autoscaler/tidb_autoscaler.go
index 9b130ec8ce6..cf8ad80404f 100644
--- a/pkg/autoscaler/autoscaler/tidb_autoscaler.go
+++ b/pkg/autoscaler/autoscaler/tidb_autoscaler.go
@@ -47,6 +47,7 @@ func (am *autoScalerManager) syncTiDB(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti
}
targetReplicas = limitTargetReplicas(targetReplicas, tac, v1alpha1.TiDBMemberType)
if targetReplicas == tc.Spec.TiDB.Replicas {
+ tac.Status.TiDB.Phase = v1alpha1.NormalAutoScalerPhase
return nil
}
return syncTiDBAfterCalculated(tc, tac, currentReplicas, targetReplicas, sts)
@@ -60,6 +61,7 @@ func syncTiDBAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbCluster
if recommendedReplicas > currentReplicas {
intervalSeconds = tac.Spec.TiDB.ScaleOutIntervalSeconds
}
+
ableToScale, err := checkStsAutoScalingInterval(tac, *intervalSeconds, v1alpha1.TiDBMemberType)
if err != nil {
return err
@@ -72,6 +74,7 @@ func syncTiDBAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbCluster
// Currently we didnt' record the auto-scaling out slot for tidb, because it is pointless for now.
func updateTcTiDBIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, recommendedReplicas int32) error {
+ tac.Status.TiDB.Phase = v1alpha1.NormalAutoScalerPhase
tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
tc.Spec.TiDB.Replicas = recommendedReplicas
tac.Status.TiDB.RecommendedReplicas = &recommendedReplicas
diff --git a/pkg/autoscaler/autoscaler/tikv_autoscaler.go b/pkg/autoscaler/autoscaler/tikv_autoscaler.go
index b36c00e6017..562e5b18a10 100644
--- a/pkg/autoscaler/autoscaler/tikv_autoscaler.go
+++ b/pkg/autoscaler/autoscaler/tikv_autoscaler.go
@@ -59,12 +59,24 @@ func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti
// The currentReplicas of TiKV calculated in auto-scaling is the count of the StateUp TiKV instance, so we need to
// add the number of other state tikv instance replicas when we update the TidbCluster.Spec.TiKV.Replicas
func syncTiKVAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error {
-
intervalSeconds := tac.Spec.TiKV.ScaleInIntervalSeconds
- if recommendedReplicas > tc.Spec.TiKV.Replicas {
- intervalSeconds = tac.Spec.TiKV.ScaleOutIntervalSeconds
+ if recommendedReplicas > currentReplicas {
+ if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleOutAutoScalerPhase {
+ tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleOutAutoScalerPhase
+ // phase could change from Normal to ReadyToScaleOut, ReadyToScaleIn to ReadyToScaleOut,
+ // reset timestamp in both cases.
+ tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
+ }
+ } else {
+ if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleInAutoScalerPhase {
+ tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleInAutoScalerPhase
+ // phase could change from Normal to ReadyToScaleIn, ReadyToScaleOut to ReadyToScaleIn,
+ // reset timestamp in both cases.
+ tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
+ }
}
- ableToScale, err := checkStsAutoScalingInterval(tac, *intervalSeconds, v1alpha1.TiKVMemberType)
+
+ ableToScale, err := checkStsAutoScaling(tac, 123, *intervalSeconds, v1alpha1.TiKVMemberType)
if err != nil {
return err
}
@@ -87,7 +99,6 @@ func filterTiKVInstances(tc *v1alpha1.TidbCluster) []string {
// we record the auto-scaling out slot for tikv, in order to add special hot labels when they are created
func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error {
- tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
if recommendedReplicas > currentReplicas {
newlyScaleOutOrdinalSets := helper.GetPodOrdinals(recommendedReplicas, sts).Difference(helper.GetPodOrdinals(currentReplicas, sts))
if newlyScaleOutOrdinalSets.Len() > 0 {
@@ -102,6 +113,8 @@ func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAuto
tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] = v
}
}
+ tac.Status.TiDB.Phase = v1alpha1.NormalAutoScalerPhase
+ tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
tc.Spec.TiKV.Replicas = recommendedReplicas
tac.Status.TiKV.RecommendedReplicas = &recommendedReplicas
return nil
diff --git a/pkg/autoscaler/autoscaler/util.go b/pkg/autoscaler/autoscaler/util.go
index 9f21156475d..1fa14e2b556 100644
--- a/pkg/autoscaler/autoscaler/util.go
+++ b/pkg/autoscaler/autoscaler/util.go
@@ -14,10 +14,12 @@
package autoscaler
import (
+ "errors"
"fmt"
"strconv"
"time"
+ "github.com/jonboulle/clockwork"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/label"
operatorUtils "github.com/pingcap/tidb-operator/pkg/util"
@@ -57,11 +59,79 @@ func checkStsAutoScalingPrerequisites(set *appsv1.StatefulSet) bool {
return true
}
-// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling
-func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
+func checkStsAutoScaling(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
+ realClock := clockwork.NewRealClock()
+ if tac.Annotations == nil {
+ tac.Annotations = map[string]string{}
+ }
+ ableToScale, err := checkStsLastSyncTimestamp(tac, 30*3, realClock)
+ if err != nil {
+ return false, err
+ }
+ if !ableToScale {
+ return false, nil
+ }
+ ableToScale, err = checkStsReadyAutoScalingTimestamp(tac, thresholdSeconds, realClock)
+ if err != nil {
+ return false, err
+ }
+ if !ableToScale {
+ return false, nil
+ }
+ ableToScale, err = checkStsAutoScalingInterval(tac, intervalSeconds, memberType)
+ if err != nil {
+ return false, err
+ }
+ if !ableToScale {
+ return false, nil
+ }
+ return true, nil
+}
+
+// checkStsLastSyncTimestamp reset TiKV phase if last sync timestamp is longer than thresholdSec
+func checkStsLastSyncTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSec int32, clock clockwork.Clock) (bool, error) {
if tac.Annotations == nil {
tac.Annotations = map[string]string{}
}
+
+ lastSyncTimestamp, existed := tac.Annotations[label.AnnStsLastSyncTimestamp]
+ if !existed {
+ // record sync timestamp should always happen before checkStsLastSyncTimestamp
+ return false, errors.New("tidb.pingcap.com/sync-timestamp label is missing")
+ }
+ t, err := strconv.ParseInt(lastSyncTimestamp, 10, 64)
+ if err != nil {
+ return false, err
+ }
+ // if last sync is longer than n * resync during, reset TiKV phase to Normal
+ if int32(clock.Now().Sub(time.Unix(t, 0)).Seconds()) > thresholdSec {
+ tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase
+ return false, nil
+ }
+ return true, nil
+}
+
+// checkStsReadyAutoScalingTimestamp would check whether there is enough time window after ready to scale
+func checkStsReadyAutoScalingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds int32, clock clockwork.Clock) (bool, error) {
+ readyAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp]
+
+ if !existed {
+ tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", clock.Now().Unix())
+ return false, nil
+ }
+ t, err := strconv.ParseInt(readyAutoScalingTimestamp, 10, 32)
+ if err != nil {
+ return false, err
+ }
+ readyAutoScalingSec := int32(clock.Now().Sub(time.Unix(t, 0)).Seconds())
+ if thresholdSeconds > readyAutoScalingSec {
+ return false, nil
+ }
+ return true, nil
+}
+
+// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling
+func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp]
if memberType == v1alpha1.TiKVMemberType {
lastAutoScalingTimestamp, existed = tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp]
@@ -197,6 +267,7 @@ func checkAndUpdateTacAnn(tac *v1alpha1.TidbClusterAutoScaler) {
resetAutoScalingAnn(tac)
return
}
+ tac.Annotations[label.AnnStsLastSyncTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
name := tac.Annotations[label.AnnAutoScalingTargetName]
namespace := tac.Annotations[label.AnnAutoScalingTargetNamespace]
if name == tac.Spec.Cluster.Name && namespace == tac.Spec.Cluster.Namespace {
diff --git a/pkg/autoscaler/autoscaler/util_test.go b/pkg/autoscaler/autoscaler/util_test.go
index 6668dcf070f..1279767f010 100644
--- a/pkg/autoscaler/autoscaler/util_test.go
+++ b/pkg/autoscaler/autoscaler/util_test.go
@@ -18,6 +18,7 @@ import (
"testing"
"time"
+ "github.com/jonboulle/clockwork"
. "github.com/onsi/gomega"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/label"
@@ -26,6 +27,52 @@ import (
"k8s.io/utils/pointer"
)
+func TestCheckStsReadyAutoScalingTimestamp(t *testing.T) {
+ g := NewGomegaWithT(t)
+ c := clockwork.NewFakeClockAt(time.Now())
+ tests := []struct {
+ name string
+ withTimestmap bool
+ readyAutoScalingSec int
+ expectedPermitScaling bool
+ }{
+ {
+ name: "tikv, no timestamp",
+ withTimestmap: false,
+ readyAutoScalingSec: 0,
+ expectedPermitScaling: false,
+ },
+ {
+ name: "tikv, ready autoscaling 60s",
+ withTimestmap: true,
+ readyAutoScalingSec: 60,
+ expectedPermitScaling: false,
+ },
+ {
+ name: "tikv, ready autoscaling 120s",
+ withTimestmap: true,
+ readyAutoScalingSec: 120,
+ expectedPermitScaling: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ thresholdSec := int32(100)
+ tac := newTidbClusterAutoScaler()
+ d := time.Duration(tt.readyAutoScalingSec) * time.Second
+ if tt.withTimestmap {
+ tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Add(-d).Unix())
+ } else {
+ tac.Annotations = map[string]string{}
+ }
+ r, err := checkStsReadyAutoScalingTimestamp(tac, thresholdSec, c)
+ g.Expect(err).Should(BeNil())
+ g.Expect(r).Should(Equal(tt.expectedPermitScaling))
+ })
+ }
+}
+
func TestCheckStsAutoScalingInterval(t *testing.T) {
g := NewGomegaWithT(t)
tests := []struct {
diff --git a/pkg/label/label.go b/pkg/label/label.go
index 776be784930..d71e07c51ee 100644
--- a/pkg/label/label.go
+++ b/pkg/label/label.go
@@ -106,6 +106,11 @@ const (
// AnnTiKVLastAutoScalingTimestamp is annotation key of tidbclusterto which ordinal is created by tikv auto-scaling
AnnTiKVLastAutoScalingTimestamp = "tikv.tidb.pingcap.com/last-autoscaling-timestamp"
+ // AnnTiDBReadyToScaleTimestamp records timestamp when tidb ready to scale
+ AnnTiDBReadyToScaleTimestamp = "tidb.tidb.pingcap.com/ready-to-scale-timestamp"
+ // AnnTiKVReadyToScaleTimestamp records timestamp when tikv ready to scale
+ AnnTiKVReadyToScaleTimestamp = "tikv.tidb.pingcap.com/ready-to-scale-timestamp"
+
// AnnTiDBConsecutiveScaleOutCount describes the least consecutive count to scale-out for tidb
AnnTiDBConsecutiveScaleOutCount = "tidb.tidb.pingcap.com/consecutive-scale-out-count"
// AnnTiDBConsecutiveScaleInCount describes the least consecutive count to scale-in for tidb