Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more events for tidbcluster and autoscaler #2150

Merged
merged 14 commits into from
Apr 14, 2020
15 changes: 13 additions & 2 deletions pkg/autoscaler/autoscaler/autoscaler_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ package autoscaler

import (
"fmt"
"strings"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/client/clientset/versioned"
informers "github.com/pingcap/tidb-operator/pkg/client/informers/externalversions"
v1alpha1listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand Down Expand Up @@ -80,7 +82,7 @@ func (am *autoScalerManager) Sync(tac *v1alpha1.TidbClusterAutoScaler) error {
if err := am.syncAutoScaling(tc, tac); err != nil {
return err
}
if err := am.syncTidbClusterReplicas(tc, oldTc); err != nil {
if err := am.syncTidbClusterReplicas(tac, tc, oldTc); err != nil {
return err
}
return am.syncAutoScalingStatus(tc, oldTc, tac)
Expand All @@ -102,7 +104,7 @@ func (am *autoScalerManager) syncAutoScaling(tc *v1alpha1.TidbCluster, tac *v1al
return nil
}

func (am *autoScalerManager) syncTidbClusterReplicas(tc *v1alpha1.TidbCluster, oldTc *v1alpha1.TidbCluster) error {
func (am *autoScalerManager) syncTidbClusterReplicas(tac *v1alpha1.TidbClusterAutoScaler, tc *v1alpha1.TidbCluster, oldTc *v1alpha1.TidbCluster) error {
if tc.Spec.TiDB.Replicas == oldTc.Spec.TiDB.Replicas && tc.Spec.TiKV.Replicas == oldTc.Spec.TiKV.Replicas {
return nil
}
Expand All @@ -111,6 +113,15 @@ func (am *autoScalerManager) syncTidbClusterReplicas(tc *v1alpha1.TidbCluster, o
if err != nil {
return err
}
reason := fmt.Sprintf("Successful %s", strings.Title("auto-scaling"))
msg := ""
if tc.Spec.TiDB.Replicas != oldTc.Spec.TiDB.Replicas {
msg = fmt.Sprintf("%s auto-scaling tidb from %d to %d", msg, oldTc.Spec.TiDB.Replicas, tc.Spec.TiDB.Replicas)
}
if tc.Spec.TiKV.Replicas != oldTc.Spec.TiKV.Replicas {
msg = fmt.Sprintf("%s auto-scaling tikv from %d to %d", msg, oldTc.Spec.TiKV.Replicas, tc.Spec.TiKV.Replicas)
}
am.recorder.Event(tac, corev1.EventTypeNormal, reason, msg)
return nil
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/controller/tidbcluster/tidb_cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ func NewController(
pdScaler := mm.NewPDScaler(pdControl, pvcInformer.Lister(), pvcControl)
tikvScaler := mm.NewTiKVScaler(pdControl, pvcInformer.Lister(), pvcControl, podInformer.Lister())
pdFailover := mm.NewPDFailover(cli, pdControl, pdFailoverPeriod, podInformer.Lister(), podControl, pvcInformer.Lister(), pvcControl, pvInformer.Lister(), recorder)
tikvFailover := mm.NewTiKVFailover(tikvFailoverPeriod)
tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod)
tikvFailover := mm.NewTiKVFailover(tikvFailoverPeriod, recorder)
tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod, recorder)
pdUpgrader := mm.NewPDUpgrader(pdControl, podControl, podInformer.Lister())
tikvUpgrader := mm.NewTiKVUpgrader(pdControl, podControl, podInformer.Lister())
tidbUpgrader := mm.NewTiDBUpgrader(tidbControl, podInformer.Lister())
Expand Down
5 changes: 5 additions & 0 deletions pkg/manager/member/failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ package member

import "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"

const (
unHealthEventReason = "Unhealthy"
unHealthEventMsgPattern = "%s pod[%s] is unhealthy, msg:%s"
)

// Failover implements the logic for pd/tikv/tidb's failover and recovery.
type Failover interface {
Failover(*v1alpha1.TidbCluster) error
Expand Down
4 changes: 2 additions & 2 deletions pkg/manager/member/pd_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error {
return err
}

pf.recorder.Eventf(tc, apiv1.EventTypeWarning, "PDMemberMarkedAsFailure",
"%s(%s) marked as a failure member", podName, pdMember.ID)
msg := fmt.Sprintf("pd member[%s] is unhealthy", pdMember.ID)
pf.recorder.Event(tc, apiv1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "pd", podName, msg))

tc.Status.PD.FailureMembers[podName] = v1alpha1.PDFailureMember{
PodName: podName,
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/member/pd_failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ func TestPDFailoverFailover(t *testing.T) {
events := collectEvents(recorder.Events)
g.Expect(events).To(HaveLen(2))
g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy"))
g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) marked as a failure member"))
g.Expect(events[1]).To(ContainSubstring("Unhealthy pd pod[test-pd-1] is unhealthy, msg:pd member[12891273174085095651] is unhealthy"))
},
},
{
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/member/pd_scaler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ package member

import (
"fmt"
"github.com/pingcap/kvproto/pkg/pdpb"
"testing"
"time"

. "github.com/onsi/gomega"
"github.com/pingcap/kvproto/pkg/pdpb"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
Expand Down
12 changes: 10 additions & 2 deletions pkg/manager/member/tidb_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,26 @@
package member

import (
"fmt"
"time"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
)

type tidbFailover struct {
tidbFailoverPeriod time.Duration
recorder record.EventRecorder
}

// NewTiDBFailover returns a tidbFailover instance
func NewTiDBFailover(failoverPeriod time.Duration) Failover {
func NewTiDBFailover(failoverPeriod time.Duration, recorder record.EventRecorder) Failover {
return &tidbFailover{
tidbFailoverPeriod: failoverPeriod,
recorder: recorder,
}
}

Expand Down Expand Up @@ -59,6 +64,8 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error {
PodName: tidbMember.Name,
CreatedAt: metav1.Now(),
}
msg := fmt.Sprintf("tidb[%s] is unhealthy", tidbMember.Name)
tf.recorder.Event(tc, corev1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "tidb", tidbMember.Name, msg))
break
}
}
Expand All @@ -71,7 +78,8 @@ func (tf *tidbFailover) Recover(tc *v1alpha1.TidbCluster) {
tc.Status.TiDB.FailureMembers = nil
}

type fakeTiDBFailover struct{}
type fakeTiDBFailover struct {
}

// NewFakeTiDBFailover returns a fake Failover
func NewFakeTiDBFailover() Failover {
Expand Down
4 changes: 3 additions & 1 deletion pkg/manager/member/tidb_failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

. "github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"k8s.io/utils/pointer"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
Expand Down Expand Up @@ -382,7 +383,8 @@ func TestFakeTiDBFailoverRecover(t *testing.T) {
}

func newTiDBFailover() Failover {
return &tidbFailover{tidbFailoverPeriod: time.Duration(5 * time.Minute)}
recorder := record.NewFakeRecorder(100)
return &tidbFailover{tidbFailoverPeriod: time.Duration(5 * time.Minute), recorder: recorder}
}

func newTidbClusterForTiDBFailover() *v1alpha1.TidbCluster {
Expand Down
10 changes: 8 additions & 2 deletions pkg/manager/member/tikv_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,24 @@
package member

import (
"fmt"
"time"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
)

type tikvFailover struct {
tikvFailoverPeriod time.Duration
recorder record.EventRecorder
}

// NewTiKVFailover returns a tikv Failover
func NewTiKVFailover(tikvFailoverPeriod time.Duration) Failover {
return &tikvFailover{tikvFailoverPeriod}
func NewTiKVFailover(tikvFailoverPeriod time.Duration, recorder record.EventRecorder) Failover {
return &tikvFailover{tikvFailoverPeriod, recorder}
}

func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error {
Expand Down Expand Up @@ -62,6 +66,8 @@ func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error {
StoreID: store.ID,
CreatedAt: metav1.Now(),
}
msg := fmt.Sprintf("store[%s] is Down", store.ID)
tf.recorder.Event(tc, corev1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "tikv", podName, msg))
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/manager/member/tikv_failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
. "github.com/onsi/gomega"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/utils/pointer"
)

Expand Down Expand Up @@ -304,5 +305,6 @@ func TestTiKVFailoverFailover(t *testing.T) {
}

func newFakeTiKVFailover() *tikvFailover {
return &tikvFailover{1 * time.Hour}
recorder := record.NewFakeRecorder(100)
return &tikvFailover{1 * time.Hour, recorder}
}