Skip to content

Commit

Permalink
auto scaling noise reduction support
Browse files Browse the repository at this point in the history
* add Normal, ReadyToScaleOut and ReadyToScaleIn three AutoScalerPhase
* add AnnTiDBReadyToScaleTimestamp, AnnTiKVReadyToScaleTimestamp
timestamp labels to record AutoScalerPhase timestamp
* add checkStsReadyAutoScalingTimestamp to check AutoScalerPhase
timestamp
* add checkStsAutoScaling combine checkStsReadyAutoScalingTimestamp and
checkStsAutoScalingInterval
* add tests
* update doc
  • Loading branch information
vincent178 committed Apr 29, 2020
1 parent d17fa91 commit 5140412
Show file tree
Hide file tree
Showing 10 changed files with 197 additions and 7 deletions.
19 changes: 19 additions & 0 deletions docs/api-references/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -1546,6 +1546,13 @@ TidbMonitorStatus
</tr>
</tbody>
</table>
<h3 id="autoscalerphase">AutoScalerPhase</h3>
<p>
(<em>Appears on:</em>
<a href="#basicautoscalerstatus">BasicAutoScalerStatus</a>)
</p>
<p>
</p>
<h3 id="brconfig">BRConfig</h3>
<p>
(<em>Appears on:</em>
Expand Down Expand Up @@ -2350,6 +2357,18 @@ If not set, the default value is 5.</p>
<tbody>
<tr>
<td>
<code>phase</code></br>
<em>
<a href="#autoscalerphase">
AutoScalerPhase
</a>
</em>
</td>
<td>
</td>
</tr>
<tr>
<td>
<code>metrics</code></br>
<em>
<a href="#metricsstatus">
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.13.0 // indirect
github.com/imdario/mergo v0.3.7 // indirect
github.com/jonboulle/clockwork v0.1.0
github.com/juju/errors v0.0.0-20180806074554-22422dad46e1
github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect
github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073 // indirect
Expand Down
4 changes: 4 additions & 0 deletions manifests/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10884,6 +10884,8 @@ spec:
- thresholdValue
type: object
type: array
phase:
type: string
recommendedReplicas:
description: RecommendedReplicas describes the calculated replicas
in the last auto-scaling reconciliation for the component(tidb/tikv)
Expand Down Expand Up @@ -10931,6 +10933,8 @@ spec:
- thresholdValue
type: object
type: array
phase:
type: string
recommendedReplicas:
description: RecommendedReplicas describes the calculated replicas
in the last auto-scaling reconciliation for the component(tidb/tikv)
Expand Down
18 changes: 18 additions & 0 deletions pkg/apis/pingcap/v1alpha1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type AutoScalerPhase string

const (
NormalAutoScalerPhase AutoScalerPhase = "Normal"
ReadyToScaleOutAutoScalerPhase AutoScalerPhase = "ReadyToScaleOut"
ReadyToScaleInAutoScalerPhase AutoScalerPhase = "ReadyToScaleIn"
)

// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

Expand Down Expand Up @@ -175,6 +183,7 @@ type TikvAutoScalerStatus struct {
// +k8s:openapi-gen=true
// BasicAutoScalerStatus describe the basic auto-scaling status
type BasicAutoScalerStatus struct {
Phase AutoScalerPhase `json:"phase,omitempty"`
// MetricsStatusList describes the metrics status in the last auto-scaling reconciliation
// +optional
MetricsStatusList []MetricsStatus `json:"metrics,omitempty"`
Expand Down
3 changes: 3 additions & 0 deletions pkg/autoscaler/autoscaler/tidb_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func (am *autoScalerManager) syncTiDB(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti
}
targetReplicas = limitTargetReplicas(targetReplicas, tac, v1alpha1.TiDBMemberType)
if targetReplicas == tc.Spec.TiDB.Replicas {
tac.Status.TiDB.Phase = v1alpha1.NormalAutoScalerPhase
return nil
}
return syncTiDBAfterCalculated(tc, tac, currentReplicas, targetReplicas, sts)
Expand All @@ -60,6 +61,7 @@ func syncTiDBAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbCluster
if recommendedReplicas > currentReplicas {
intervalSeconds = tac.Spec.TiDB.ScaleOutIntervalSeconds
}

ableToScale, err := checkStsAutoScalingInterval(tac, *intervalSeconds, v1alpha1.TiDBMemberType)
if err != nil {
return err
Expand All @@ -72,6 +74,7 @@ func syncTiDBAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbCluster

// Currently we didnt' record the auto-scaling out slot for tidb, because it is pointless for now.
func updateTcTiDBIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, recommendedReplicas int32) error {
tac.Status.TiDB.Phase = v1alpha1.NormalAutoScalerPhase
tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
tc.Spec.TiDB.Replicas = recommendedReplicas
tac.Status.TiDB.RecommendedReplicas = &recommendedReplicas
Expand Down
23 changes: 18 additions & 5 deletions pkg/autoscaler/autoscaler/tikv_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,24 @@ func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti
// The currentReplicas of TiKV calculated in auto-scaling is the count of the StateUp TiKV instance, so we need to
// add the number of other state tikv instance replicas when we update the TidbCluster.Spec.TiKV.Replicas
func syncTiKVAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error {

intervalSeconds := tac.Spec.TiKV.ScaleInIntervalSeconds
if recommendedReplicas > tc.Spec.TiKV.Replicas {
intervalSeconds = tac.Spec.TiKV.ScaleOutIntervalSeconds
if recommendedReplicas > currentReplicas {
if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleOutAutoScalerPhase {
tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleOutAutoScalerPhase
// phase could change from Normal to ReadyToScaleOut, ReadyToScaleIn to ReadyToScaleOut,
// reset timestamp in both cases.
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
}
} else {
if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleInAutoScalerPhase {
tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleInAutoScalerPhase
// phase could change from Normal to ReadyToScaleIn, ReadyToScaleOut to ReadyToScaleIn,
// reset timestamp in both cases.
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
}
}
ableToScale, err := checkStsAutoScalingInterval(tac, *intervalSeconds, v1alpha1.TiKVMemberType)

ableToScale, err := checkStsAutoScaling(tac, 123, *intervalSeconds, v1alpha1.TiKVMemberType)
if err != nil {
return err
}
Expand All @@ -87,7 +99,6 @@ func filterTiKVInstances(tc *v1alpha1.TidbCluster) []string {

// we record the auto-scaling out slot for tikv, in order to add special hot labels when they are created
func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error {
tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
if recommendedReplicas > currentReplicas {
newlyScaleOutOrdinalSets := helper.GetPodOrdinals(recommendedReplicas, sts).Difference(helper.GetPodOrdinals(currentReplicas, sts))
if newlyScaleOutOrdinalSets.Len() > 0 {
Expand All @@ -102,6 +113,8 @@ func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAuto
tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] = v
}
}
tac.Status.TiDB.Phase = v1alpha1.NormalAutoScalerPhase
tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
tc.Spec.TiKV.Replicas = recommendedReplicas
tac.Status.TiKV.RecommendedReplicas = &recommendedReplicas
return nil
Expand Down
75 changes: 73 additions & 2 deletions pkg/autoscaler/autoscaler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
package autoscaler

import (
"errors"
"fmt"
"strconv"
"time"

"github.com/jonboulle/clockwork"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/label"
operatorUtils "github.com/pingcap/tidb-operator/pkg/util"
Expand Down Expand Up @@ -57,11 +59,79 @@ func checkStsAutoScalingPrerequisites(set *appsv1.StatefulSet) bool {
return true
}

// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling
func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
func checkStsAutoScaling(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
realClock := clockwork.NewRealClock()
if tac.Annotations == nil {
tac.Annotations = map[string]string{}
}
ableToScale, err := checkStsLastSyncTimestamp(tac, 30*3, realClock)
if err != nil {
return false, err
}
if !ableToScale {
return false, nil
}
ableToScale, err = checkStsReadyAutoScalingTimestamp(tac, thresholdSeconds, realClock)
if err != nil {
return false, err
}
if !ableToScale {
return false, nil
}
ableToScale, err = checkStsAutoScalingInterval(tac, intervalSeconds, memberType)
if err != nil {
return false, err
}
if !ableToScale {
return false, nil
}
return true, nil
}

// checkStsLastSyncTimestamp reset TiKV phase if last sync timestamp is longer than thresholdSec
func checkStsLastSyncTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSec int32, clock clockwork.Clock) (bool, error) {
if tac.Annotations == nil {
tac.Annotations = map[string]string{}
}

lastSyncTimestamp, existed := tac.Annotations[label.AnnStsLastSyncTimestamp]
if !existed {
// record sync timestamp should always happen before checkStsLastSyncTimestamp
return false, errors.New("tidb.pingcap.com/sync-timestamp label is missing")
}
t, err := strconv.ParseInt(lastSyncTimestamp, 10, 64)
if err != nil {
return false, err
}
// if last sync is longer than n * resync during, reset TiKV phase to Normal
if int32(clock.Now().Sub(time.Unix(t, 0)).Seconds()) > thresholdSec {
tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase
return false, nil
}
return true, nil
}

// checkStsReadyAutoScalingTimestamp would check whether there is enough time window after ready to scale
func checkStsReadyAutoScalingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds int32, clock clockwork.Clock) (bool, error) {
readyAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp]

if !existed {
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", clock.Now().Unix())
return false, nil
}
t, err := strconv.ParseInt(readyAutoScalingTimestamp, 10, 32)
if err != nil {
return false, err
}
readyAutoScalingSec := int32(clock.Now().Sub(time.Unix(t, 0)).Seconds())
if thresholdSeconds > readyAutoScalingSec {
return false, nil
}
return true, nil
}

// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling
func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp]
if memberType == v1alpha1.TiKVMemberType {
lastAutoScalingTimestamp, existed = tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp]
Expand Down Expand Up @@ -197,6 +267,7 @@ func checkAndUpdateTacAnn(tac *v1alpha1.TidbClusterAutoScaler) {
resetAutoScalingAnn(tac)
return
}
tac.Annotations[label.AnnStsLastSyncTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
name := tac.Annotations[label.AnnAutoScalingTargetName]
namespace := tac.Annotations[label.AnnAutoScalingTargetNamespace]
if name == tac.Spec.Cluster.Name && namespace == tac.Spec.Cluster.Namespace {
Expand Down
47 changes: 47 additions & 0 deletions pkg/autoscaler/autoscaler/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"testing"
"time"

"github.com/jonboulle/clockwork"
. "github.com/onsi/gomega"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/label"
Expand All @@ -26,6 +27,52 @@ import (
"k8s.io/utils/pointer"
)

func TestCheckStsReadyAutoScalingTimestamp(t *testing.T) {
g := NewGomegaWithT(t)
c := clockwork.NewFakeClockAt(time.Now())
tests := []struct {
name string
withTimestmap bool
readyAutoScalingSec int
expectedPermitScaling bool
}{
{
name: "tikv, no timestamp",
withTimestmap: false,
readyAutoScalingSec: 0,
expectedPermitScaling: false,
},
{
name: "tikv, ready autoscaling 60s",
withTimestmap: true,
readyAutoScalingSec: 60,
expectedPermitScaling: false,
},
{
name: "tikv, ready autoscaling 120s",
withTimestmap: true,
readyAutoScalingSec: 120,
expectedPermitScaling: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
thresholdSec := int32(100)
tac := newTidbClusterAutoScaler()
d := time.Duration(tt.readyAutoScalingSec) * time.Second
if tt.withTimestmap {
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Add(-d).Unix())
} else {
tac.Annotations = map[string]string{}
}
r, err := checkStsReadyAutoScalingTimestamp(tac, thresholdSec, c)
g.Expect(err).Should(BeNil())
g.Expect(r).Should(Equal(tt.expectedPermitScaling))
})
}
}

func TestCheckStsAutoScalingInterval(t *testing.T) {
g := NewGomegaWithT(t)
tests := []struct {
Expand Down
5 changes: 5 additions & 0 deletions pkg/label/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ const (
// AnnTiKVLastAutoScalingTimestamp is annotation key of tidbclusterto which ordinal is created by tikv auto-scaling
AnnTiKVLastAutoScalingTimestamp = "tikv.tidb.pingcap.com/last-autoscaling-timestamp"

// AnnTiDBReadyToScaleTimestamp records timestamp when tidb ready to scale
AnnTiDBReadyToScaleTimestamp = "tidb.tidb.pingcap.com/ready-to-scale-timestamp"
// AnnTiKVReadyToScaleTimestamp records timestamp when tikv ready to scale
AnnTiKVReadyToScaleTimestamp = "tikv.tidb.pingcap.com/ready-to-scale-timestamp"

// AnnTiDBConsecutiveScaleOutCount describes the least consecutive count to scale-out for tidb
AnnTiDBConsecutiveScaleOutCount = "tidb.tidb.pingcap.com/consecutive-scale-out-count"
// AnnTiDBConsecutiveScaleInCount describes the least consecutive count to scale-in for tidb
Expand Down

0 comments on commit 5140412

Please sign in to comment.