Skip to content

Commit

Permalink
etcdserver: add learner metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
WIZARD-CXY committed Jun 4, 2019
1 parent cdca488 commit 4bb39ad
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 0 deletions.
15 changes: 15 additions & 0 deletions docs/metrics/latest
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,21 @@ etcd_server_is_leader
# type: "counter"
etcd_server_leader_changes_seen_total

# name: "etcd_server_is_learner"
# description: "Whether or not this member is a learner. 1 if is, 0 otherwise."
# type: "gauge"
etcd_server_is_learner

# name: "etcd_server_learner_promote_failures"
# description: "The total number of learner promote failures (likely learner not ready)."
# type: "counter"
etcd_server_learner_promote_failures

# name: "etcd_server_learner_promote_successes"
# description: "The total number of successful learner promotions."
# type: "counter"
etcd_server_learner_promote_successes

# name: "etcd_server_proposals_applied_total"
# description: "The total number of consensus proposals applied."
# type: "gauge"
Expand Down
23 changes: 23 additions & 0 deletions etcdserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,26 @@ var (
Name: "leader_changes_seen_total",
Help: "The number of leader changes seen.",
})
isLearner = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "is_learner",
Help: "Whether or not this member is a learner. 1 if is, 0 otherwise.",
})
learnerPromoteFailed = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "learner_promote_failures",
Help: "The total number of learner promote failures (likely learner not ready) while this member is leader.",
},
[]string{"Reason"},
)
learnerPromoteSucceed = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "learner_promote_successes",
Help: "The total number of successful learner promotions while this member is leader.",
})
heartbeatSendFailures = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Expand Down Expand Up @@ -144,6 +164,9 @@ func init() {
prometheus.MustRegister(currentVersion)
prometheus.MustRegister(currentGoVersion)
prometheus.MustRegister(serverID)
prometheus.MustRegister(isLearner)
prometheus.MustRegister(learnerPromoteSucceed)
prometheus.MustRegister(learnerPromoteFailed)

currentVersion.With(prometheus.Labels{
"server_version": version.Version,
Expand Down
14 changes: 14 additions & 0 deletions etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1646,7 +1646,12 @@ func (s *EtcdServer) PromoteMember(ctx context.Context, id uint64) ([]*membershi
// fails with ErrNotLeader, forward the request to leader node via HTTP. If promoteMember call fails with error
// other than ErrNotLeader, return the error.
resp, err := s.promoteMember(ctx, id)
if err == nil {
learnerPromoteSucceed.Inc()
return resp, nil
}
if err != ErrNotLeader {
learnerPromoteFailed.WithLabelValues(err.Error()).Inc()
return resp, err
}

Expand Down Expand Up @@ -2259,6 +2264,15 @@ func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.Con
}
}

// update the isLearner metric when this server id is equal to the id in raft member confChange
if confChangeContext.Member.ID == s.id {
if cc.Type == raftpb.ConfChangeAddLearnerNode {
isLearner.Set(1)
} else {
isLearner.Set(0)
}
}

case raftpb.ConfChangeRemoveNode:
id := types.ID(cc.NodeID)
s.cluster.RemoveMember(id)
Expand Down

0 comments on commit 4bb39ad

Please sign in to comment.