Skip to content
/ etcd Public
forked from etcd-io/etcd

Commit

Permalink
etcdserver/api/rafthttp: add "etcd_network_raft_send_total_duration_s…
Browse files Browse the repository at this point in the history
…econds" metric

Currently, only v2 metrics ("stats.FollowerStats") tracks Raft message
send latencies. Add Prometheus histogram to track Raft messages for
writes, since heartbeats are probed (see etcd-io#10022)
and snapshots are already being tracked via etcd-io#9997.

```
etcd_network_raft_send_total_duration_seconds_bucket{To="7339c4e5e833c029",Type="MsgProp",le="0.0001"} 1
etcd_network_raft_send_total_duration_seconds_bucket{To="7339c4e5e833c029",Type="MsgProp",le="0.0002"} 1
etcd_network_raft_send_total_duration_seconds_bucket{To="729934363faa4a24",Type="MsgApp",le="0.0001"} 9
etcd_network_raft_send_total_duration_seconds_bucket{To="729934363faa4a24",Type="MsgApp",le="0.0002"} 9
etcd_network_raft_send_total_duration_seconds_bucket{To="7339c4e5e833c029",Type="MsgAppResp",le="0.0001"} 8
etcd_network_raft_send_total_duration_seconds_bucket{To="7339c4e5e833c029",Type="MsgAppResp",le="0.0002"} 8
```

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
  • Loading branch information
gyuho committed Aug 18, 2018
1 parent 2f9fdb6 commit ca7d628
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
14 changes: 14 additions & 0 deletions etcdserver/api/rafthttp/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,19 @@ var (
[]string{"From"},
)

raftSendSeconds = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "raft_send_total_duration_seconds",
Help: "Total latency distributions of Raft message sends",

// lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2
// highest bucket start of 0.0001 sec * 2^15 == 3.2768 sec
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 16),
},
[]string{"Type", "To"},
)

rttSec = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "etcd",
Subsystem: "network",
Expand Down Expand Up @@ -162,5 +175,6 @@ func init() {
prometheus.MustRegister(snapshotReceiveFailures)
prometheus.MustRegister(snapshotReceiveSeconds)

prometheus.MustRegister(raftSendSeconds)
prometheus.MustRegister(rttSec)
}
8 changes: 8 additions & 0 deletions etcdserver/api/rafthttp/stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,10 @@ func (cw *streamWriter) run() {
heartbeatc, msgc = nil, nil

case m := <-msgc:
start := time.Now()
err := enc.encode(&m)
if err == nil {
took := time.Since(start)
unflushed += m.Size()

if len(msgc) == 0 || batched > streamBufSize/2 {
Expand All @@ -214,6 +216,12 @@ func (cw *streamWriter) run() {
batched++
}

// TODO: track other messages?
// snapshot sends are tracked via separate metrics
// heartbeats are tracked via prober https://github.com/coreos/etcd/pull/10022
if m.Type == raftpb.MsgProp || m.Type == raftpb.MsgApp || m.Type == raftpb.MsgAppResp {
raftSendSeconds.WithLabelValues(m.Type.String(), types.ID(m.To).String()).Observe(took.Seconds())
}
continue
}

Expand Down

0 comments on commit ca7d628

Please sign in to comment.