From 1c12460e6523caed85c9b5ea555ab1baf0923cf9 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Wed, 15 Jun 2022 14:12:25 -0700 Subject: [PATCH 01/21] Add multimetrics reporter --- metrics.go | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 2 deletions(-) diff --git a/metrics.go b/metrics.go index a875415..c71ed90 100644 --- a/metrics.go +++ b/metrics.go @@ -47,10 +47,20 @@ type metrics struct { reporter MetricsReporter } -// WithMetrics is a resource manager option to enable metrics collection +// WithMetrics is a resource manager option to enable metrics collection. Can be +// called multiple times to add multiple reporters. func WithMetrics(reporter MetricsReporter) Option { return func(r *resourceManager) error { - r.metrics = &metrics{reporter: reporter} + if r.metrics == nil { + r.metrics = &metrics{reporter: reporter} + } else if multimetrics, ok := r.metrics.reporter.(*MultiMetricsReporter); ok { + multimetrics.reporters = append(multimetrics.reporters, reporter) + } else { + // This was a single reporter. Lets convert it to a multimetrics reporter + r.metrics = &metrics{ + reporter: &MultiMetricsReporter{reporters: []MetricsReporter{r.metrics.reporter, reporter}}, + } + } return nil } } @@ -166,3 +176,106 @@ func (m *metrics) BlockMemory(size int) { m.reporter.BlockMemory(size) } + +// MultiMetricsReporter is a helper that allows you to report to multiple metrics reporters. +type MultiMetricsReporter struct { + reporters []MetricsReporter +} + +// AllowConn is invoked when opening a connection is allowed +func (mmr *MultiMetricsReporter) AllowConn(dir network.Direction, usefd bool) { + for _, r := range mmr.reporters { + r.AllowConn(dir, usefd) + } +} + +// BlockConn is invoked when opening a connection is blocked +func (mmr *MultiMetricsReporter) BlockConn(dir network.Direction, usefd bool) { + for _, r := range mmr.reporters { + r.BlockConn(dir, usefd) + } +} + +// AllowStream is invoked when opening a stream is allowed +func (mmr *MultiMetricsReporter) AllowStream(p peer.ID, dir network.Direction) { + for _, r := range mmr.reporters { + r.AllowStream(p, dir) + } +} + +// BlockStream is invoked when opening a stream is blocked +func (mmr *MultiMetricsReporter) BlockStream(p peer.ID, dir network.Direction) { + for _, r := range mmr.reporters { + r.BlockStream(p, dir) + } +} + +// AllowPeer is invoked when attaching ac onnection to a peer is allowed +func (mmr *MultiMetricsReporter) AllowPeer(p peer.ID) { + for _, r := range mmr.reporters { + r.AllowPeer(p) + } +} + +// BlockPeer is invoked when attaching ac onnection to a peer is blocked +func (mmr *MultiMetricsReporter) BlockPeer(p peer.ID) { + for _, r := range mmr.reporters { + r.BlockPeer(p) + } +} + +// AllowProtocol is invoked when setting the protocol for a stream is allowed +func (mmr *MultiMetricsReporter) AllowProtocol(proto protocol.ID) { + for _, r := range mmr.reporters { + r.AllowProtocol(proto) + } +} + +// BlockProtocol is invoked when setting the protocol for a stream is blocked +func (mmr *MultiMetricsReporter) BlockProtocol(proto protocol.ID) { + for _, r := range mmr.reporters { + r.BlockProtocol(proto) + } +} + +// BlockedProtocolPeer is invoekd when setting the protocol for a stream is blocked at the per protocol peer scope +func (mmr *MultiMetricsReporter) BlockProtocolPeer(proto protocol.ID, p peer.ID) { + for _, r := range mmr.reporters { + r.BlockProtocolPeer(proto, p) + } +} + +// AllowPService is invoked when setting the protocol for a stream is allowed +func (mmr *MultiMetricsReporter) AllowService(svc string) { + for _, r := range mmr.reporters { + r.AllowService(svc) + } +} + +// BlockPService is invoked when setting the protocol for a stream is blocked +func (mmr *MultiMetricsReporter) BlockService(svc string) { + for _, r := range mmr.reporters { + r.BlockService(svc) + } +} + +// BlockedServicePeer is invoked when setting the service for a stream is blocked at the per service peer scope +func (mmr *MultiMetricsReporter) BlockServicePeer(svc string, p peer.ID) { + for _, r := range mmr.reporters { + r.BlockServicePeer(svc, p) + } +} + +// AllowMemory is invoked when a memory reservation is allowed +func (mmr *MultiMetricsReporter) AllowMemory(size int) { + for _, r := range mmr.reporters { + r.AllowMemory(size) + } +} + +// BlockMemory is invoked when a memory reservation is blocked +func (mmr *MultiMetricsReporter) BlockMemory(size int) { + for _, r := range mmr.reporters { + r.BlockMemory(size) + } +} From 35abda3b138ba5297e18ca22ef8645c88b70182a Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Fri, 17 Jun 2022 11:37:53 -0700 Subject: [PATCH 02/21] Add trace reporter to expose traces to users --- trace.go | 119 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 44 deletions(-) diff --git a/trace.go b/trace.go index 1018c2c..fb4103f 100644 --- a/trace.go +++ b/trace.go @@ -21,31 +21,54 @@ type trace struct { cancel func() closed chan struct{} - mx sync.Mutex - done bool - pend []interface{} + mx sync.Mutex + done bool + pendingWrites []interface{} + reporters []TraceReporter +} + +type TraceReporter interface { + // ConsumeEvent consumes a trace event. This is called synchronously, + // implementations should process the event quickly. + ConsumeEvent(TraceEvt) } func WithTrace(path string) Option { return func(r *resourceManager) error { - r.trace = &trace{path: path} + if r.trace == nil { + r.trace = &trace{path: path} + } else { + r.trace.path = path + } + return nil + } +} + +func WithTraceReporter(reporter TraceReporter) Option { + return func(r *resourceManager) error { + if r.trace == nil { + r.trace = &trace{reporters: []TraceReporter{reporter}} + } + r.trace.reporters = append(r.trace.reporters, reporter) return nil } } +type TraceEvtTyp string + const ( - traceStartEvt = "start" - traceCreateScopeEvt = "create_scope" - traceDestroyScopeEvt = "destroy_scope" - traceReserveMemoryEvt = "reserve_memory" - traceBlockReserveMemoryEvt = "block_reserve_memory" - traceReleaseMemoryEvt = "release_memory" - traceAddStreamEvt = "add_stream" - traceBlockAddStreamEvt = "block_add_stream" - traceRemoveStreamEvt = "remove_stream" - traceAddConnEvt = "add_conn" - traceBlockAddConnEvt = "block_add_conn" - traceRemoveConnEvt = "remove_conn" + TraceStartEvt TraceEvtTyp = "start" + TraceCreateScopeEvt TraceEvtTyp = "create_scope" + TraceDestroyScopeEvt TraceEvtTyp = "destroy_scope" + TraceReserveMemoryEvt TraceEvtTyp = "reserve_memory" + TraceBlockReserveMemoryEvt TraceEvtTyp = "block_reserve_memory" + TraceReleaseMemoryEvt TraceEvtTyp = "release_memory" + TraceAddStreamEvt TraceEvtTyp = "add_stream" + TraceBlockAddStreamEvt TraceEvtTyp = "block_add_stream" + TraceRemoveStreamEvt TraceEvtTyp = "remove_stream" + TraceAddConnEvt TraceEvtTyp = "add_conn" + TraceBlockAddConnEvt TraceEvtTyp = "block_add_conn" + TraceRemoveConnEvt TraceEvtTyp = "remove_conn" ) type scopeClass struct { @@ -163,7 +186,7 @@ func (s scopeClass) MarshalJSON() ([]byte, error) { type TraceEvt struct { Time string - Type string + Type TraceEvtTyp Scope *scopeClass `json:",omitempty"` Name string `json:",omitempty"` @@ -199,10 +222,16 @@ func (t *trace) push(evt TraceEvt) { evt.Scope = &scopeClass{name: evt.Name} } - t.pend = append(t.pend, evt) + for _, reporter := range t.reporters { + reporter.ConsumeEvent(evt) + } + + if t.path != "" { + t.pendingWrites = append(t.pendingWrites, evt) + } } -func (t *trace) background(out io.WriteCloser) { +func (t *trace) backgroundWriter(out io.WriteCloser) { defer close(t.closed) defer out.Close() @@ -218,8 +247,8 @@ func (t *trace) background(out io.WriteCloser) { getEvents := func() { t.mx.Lock() - tmp := t.pend - t.pend = pend[:0] + tmp := t.pendingWrites + t.pendingWrites = pend[:0] pend = tmp t.mx.Unlock() } @@ -288,15 +317,17 @@ func (t *trace) Start(limits Limiter) error { t.ctx, t.cancel = context.WithCancel(context.Background()) t.closed = make(chan struct{}) - out, err := os.OpenFile(t.path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) - if err != nil { - return nil - } + if t.path != "" { + out, err := os.OpenFile(t.path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + return nil + } - go t.background(out) + go t.backgroundWriter(out) + } t.push(TraceEvt{ - Type: traceStartEvt, + Type: TraceStartEvt, Limit: limits, }) @@ -329,7 +360,7 @@ func (t *trace) CreateScope(scope string, limit Limit) { } t.push(TraceEvt{ - Type: traceCreateScopeEvt, + Type: TraceCreateScopeEvt, Name: scope, Limit: limit, }) @@ -341,7 +372,7 @@ func (t *trace) DestroyScope(scope string) { } t.push(TraceEvt{ - Type: traceDestroyScopeEvt, + Type: TraceDestroyScopeEvt, Name: scope, }) } @@ -356,7 +387,7 @@ func (t *trace) ReserveMemory(scope string, prio uint8, size, mem int64) { } t.push(TraceEvt{ - Type: traceReserveMemoryEvt, + Type: TraceReserveMemoryEvt, Name: scope, Priority: prio, Delta: size, @@ -374,7 +405,7 @@ func (t *trace) BlockReserveMemory(scope string, prio uint8, size, mem int64) { } t.push(TraceEvt{ - Type: traceBlockReserveMemoryEvt, + Type: TraceBlockReserveMemoryEvt, Name: scope, Priority: prio, Delta: size, @@ -392,7 +423,7 @@ func (t *trace) ReleaseMemory(scope string, size, mem int64) { } t.push(TraceEvt{ - Type: traceReleaseMemoryEvt, + Type: TraceReleaseMemoryEvt, Name: scope, Delta: size, Memory: mem, @@ -412,7 +443,7 @@ func (t *trace) AddStream(scope string, dir network.Direction, nstreamsIn, nstre } t.push(TraceEvt{ - Type: traceAddStreamEvt, + Type: TraceAddStreamEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -434,7 +465,7 @@ func (t *trace) BlockAddStream(scope string, dir network.Direction, nstreamsIn, } t.push(TraceEvt{ - Type: traceBlockAddStreamEvt, + Type: TraceBlockAddStreamEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -456,7 +487,7 @@ func (t *trace) RemoveStream(scope string, dir network.Direction, nstreamsIn, ns } t.push(TraceEvt{ - Type: traceRemoveStreamEvt, + Type: TraceRemoveStreamEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -475,7 +506,7 @@ func (t *trace) AddStreams(scope string, deltaIn, deltaOut, nstreamsIn, nstreams } t.push(TraceEvt{ - Type: traceAddStreamEvt, + Type: TraceAddStreamEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -494,7 +525,7 @@ func (t *trace) BlockAddStreams(scope string, deltaIn, deltaOut, nstreamsIn, nst } t.push(TraceEvt{ - Type: traceBlockAddStreamEvt, + Type: TraceBlockAddStreamEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -513,7 +544,7 @@ func (t *trace) RemoveStreams(scope string, deltaIn, deltaOut, nstreamsIn, nstre } t.push(TraceEvt{ - Type: traceRemoveStreamEvt, + Type: TraceRemoveStreamEvt, Name: scope, DeltaIn: -deltaIn, DeltaOut: -deltaOut, @@ -538,7 +569,7 @@ func (t *trace) AddConn(scope string, dir network.Direction, usefd bool, nconnsI } t.push(TraceEvt{ - Type: traceAddConnEvt, + Type: TraceAddConnEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -565,7 +596,7 @@ func (t *trace) BlockAddConn(scope string, dir network.Direction, usefd bool, nc } t.push(TraceEvt{ - Type: traceBlockAddConnEvt, + Type: TraceBlockAddConnEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -592,7 +623,7 @@ func (t *trace) RemoveConn(scope string, dir network.Direction, usefd bool, ncon } t.push(TraceEvt{ - Type: traceRemoveConnEvt, + Type: TraceRemoveConnEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -613,7 +644,7 @@ func (t *trace) AddConns(scope string, deltaIn, deltaOut, deltafd, nconnsIn, nco } t.push(TraceEvt{ - Type: traceAddConnEvt, + Type: TraceAddConnEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -634,7 +665,7 @@ func (t *trace) BlockAddConns(scope string, deltaIn, deltaOut, deltafd, nconnsIn } t.push(TraceEvt{ - Type: traceBlockAddConnEvt, + Type: TraceBlockAddConnEvt, Name: scope, DeltaIn: deltaIn, DeltaOut: deltaOut, @@ -655,7 +686,7 @@ func (t *trace) RemoveConns(scope string, deltaIn, deltaOut, deltafd, nconnsIn, } t.push(TraceEvt{ - Type: traceRemoveConnEvt, + Type: TraceRemoveConnEvt, Name: scope, DeltaIn: -deltaIn, DeltaOut: -deltaOut, From 9ce3760d70828ca4b0eb9f12803f89b37472582a Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Mon, 20 Jun 2022 16:52:34 -0700 Subject: [PATCH 03/21] Flip sign of delta --- trace.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trace.go b/trace.go index fb4103f..464e8cf 100644 --- a/trace.go +++ b/trace.go @@ -425,7 +425,7 @@ func (t *trace) ReleaseMemory(scope string, size, mem int64) { t.push(TraceEvt{ Type: TraceReleaseMemoryEvt, Name: scope, - Delta: size, + Delta: -size, Memory: mem, }) } From 7efd81c617a4d3f46162ad871bf63822d5551148 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Mon, 20 Jun 2022 16:52:56 -0700 Subject: [PATCH 04/21] Add scope name helpers --- rcmgr.go | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 4 deletions(-) diff --git a/rcmgr.go b/rcmgr.go index 65f330e..5d64f7d 100644 --- a/rcmgr.go +++ b/rcmgr.go @@ -3,6 +3,7 @@ package rcmgr import ( "context" "fmt" + "strings" "sync" "time" @@ -434,7 +435,7 @@ func newPeerScope(p peer.ID, limit Limit, rcmgr *resourceManager) *peerScope { return &peerScope{ resourceScope: newResourceScope(limit, []*resourceScope{rcmgr.system.resourceScope}, - fmt.Sprintf("peer:%s", p), rcmgr.trace, rcmgr.metrics), + peerScopeName(p), rcmgr.trace, rcmgr.metrics), peer: p, rcmgr: rcmgr, } @@ -444,7 +445,7 @@ func newConnectionScope(dir network.Direction, usefd bool, limit Limit, rcmgr *r return &connectionScope{ resourceScope: newResourceScope(limit, []*resourceScope{rcmgr.transient.resourceScope, rcmgr.system.resourceScope}, - fmt.Sprintf("conn-%d", rcmgr.nextConnId()), rcmgr.trace, rcmgr.metrics), + connScopeName(rcmgr.nextConnId()), rcmgr.trace, rcmgr.metrics), dir: dir, usefd: usefd, rcmgr: rcmgr, @@ -456,7 +457,7 @@ func newAllowListedConnectionScope(dir network.Direction, usefd bool, limit Limi return &connectionScope{ resourceScope: newResourceScope(limit, []*resourceScope{rcmgr.allowlistedTransient.resourceScope, rcmgr.allowlistedSystem.resourceScope}, - fmt.Sprintf("conn-%d", rcmgr.nextConnId()), rcmgr.trace, rcmgr.metrics), + connScopeName(rcmgr.nextConnId()), rcmgr.trace, rcmgr.metrics), dir: dir, usefd: usefd, rcmgr: rcmgr, @@ -469,13 +470,93 @@ func newStreamScope(dir network.Direction, limit Limit, peer *peerScope, rcmgr * return &streamScope{ resourceScope: newResourceScope(limit, []*resourceScope{peer.resourceScope, rcmgr.transient.resourceScope, rcmgr.system.resourceScope}, - fmt.Sprintf("stream-%d", rcmgr.nextStreamId()), rcmgr.trace, rcmgr.metrics), + streamScopeName(rcmgr.nextStreamId()), rcmgr.trace, rcmgr.metrics), dir: dir, rcmgr: peer.rcmgr, peer: peer, } } +func IsSystemScope(name string) bool { + return strings.HasPrefix(name, "system") +} + +func IsTransientScope(name string) bool { + return strings.HasPrefix(name, "transient") +} + +func streamScopeName(streamId int64) string { + return fmt.Sprintf("stream-%d", streamId) +} + +func IsStreamScope(name string) bool { + return strings.HasPrefix(name, "stream-") +} + +func connScopeName(streamId int64) string { + return fmt.Sprintf("conn-%d", streamId) +} + +func IsConnScope(name string) bool { + return strings.HasPrefix(name, "conn-") +} + +func peerScopeName(p peer.ID) string { + return fmt.Sprintf("peer:%s", p) +} + +// ParsePeerScopeName returns "" if name is not a peerScopeName +func ParsePeerScopeName(name string) peer.ID { + if !strings.HasPrefix(name, "peer:") { + return peer.ID("") + } + parts := strings.SplitN(name, "peer:", 2) + if len(parts) != 2 { + return peer.ID("") + } + p, err := peer.Decode(parts[1]) + if err != nil { + return peer.ID("") + } + return p +} + +// ParseServiceScopeName returns the service name if name is a serviceScopeName. +// Otherwise returns "" +func ParseServiceScopeName(name string) string { + if strings.HasPrefix(name, "service:") { + if strings.Contains(name, "peer:") { + // This is a service peer scope + return "" + } + parts := strings.SplitN(name, ":", 2) + if len(parts) != 2 { + return ("") + } + + return parts[1] + } + return "" +} + +// ParseProtocolScopeName returns the service name if name is a serviceScopeName. +// Otherwise returns "" +func ParseProtocolScopeName(name string) string { + if strings.HasPrefix(name, "protocol:") { + if strings.Contains(name, "peer:") { + // This is a protocol peer scope + return "" + } + parts := strings.SplitN(name, ":", 2) + if len(parts) != 2 { + return ("") + } + + return parts[1] + } + return "" +} + func (s *serviceScope) Name() string { return s.service } From 19b6d584b2b65773fbd6f56bf7293f228b2736c1 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Mon, 20 Jun 2022 16:56:01 -0700 Subject: [PATCH 05/21] Add obs to resource manager --- obs/stats.go | 341 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100644 obs/stats.go diff --git a/obs/stats.go b/obs/stats.go new file mode 100644 index 0000000..cafc885 --- /dev/null +++ b/obs/stats.go @@ -0,0 +1,341 @@ +package obs + +import ( + "context" + "strings" + + logging "github.com/ipfs/go-log/v2" + rcmgr "github.com/libp2p/go-libp2p-resource-manager" + "go.opencensus.io/stats" + "go.opencensus.io/stats/view" + "go.opencensus.io/tag" +) + +var log = logging.Logger("rcmgrObs") + +var ( + systemOutboundConns = stats.Int64("system/outbound/conn", "Number of outbound Connections", stats.UnitDimensionless) + systemInboundConns = stats.Int64("system/inbound/conn", "Number of inbound Connections", stats.UnitDimensionless) + + conns = stats.Int64("connections", "Number of Connections", stats.UnitDimensionless) + + peerConns = stats.Int64("peer/connections", "Number of connections this peer has", stats.UnitDimensionless) + peerConnsNegative = stats.Int64("peer/connections_negative", "Number of connections this peer had", stats.UnitDimensionless) + + streams = stats.Int64("streams", "Number of Streams", stats.UnitDimensionless) + + peerStreams = stats.Int64("peer/streams", "Number of streams this peer has", stats.UnitDimensionless) + peerStreamsNegative = stats.Int64("peer/streams_negative", "Number of streams this peer had", stats.UnitDimensionless) + + memory = stats.Int64("memory", "Amount of memory reserved as reported to the Resource Manager", stats.UnitDimensionless) + peerMemory = stats.Int64("peer/memory", "Amount of memory currently reseved for peer", stats.UnitDimensionless) + peerMemoryNegative = stats.Int64("peer/memory_negative", "Amount of memory previously reseved for peer", stats.UnitDimensionless) + + connMemory = stats.Int64("conn/memory", "Amount of memory currently reseved for the connection", stats.UnitDimensionless) + connMemoryNegative = stats.Int64("conn/memory_negative", "Amount of memory previously reseved for the connection", stats.UnitDimensionless) + + fds = stats.Int64("fds", "Number of fds as reported to the Resource Manager", stats.UnitDimensionless) + + blockedResources = stats.Int64("blocked_resources", "Number of resource requests blocked", stats.UnitDimensionless) +) + +var ( + LessThanEq, _ = tag.NewKey("le") + Direction, _ = tag.NewKey("dir") + Scope, _ = tag.NewKey("scope") + Service, _ = tag.NewKey("service") + Protocol, _ = tag.NewKey("protocol") + Resource, _ = tag.NewKey("resource") +) + +var ( + SystemOutboundConnsView = &view.View{Measure: systemOutboundConns, Aggregation: view.Sum()} + SystemInboundConnsView = &view.View{Measure: systemInboundConns, Aggregation: view.Sum()} + + ConnView = &view.View{Measure: conns, Aggregation: view.Sum(), TagKeys: []tag.Key{Direction, Scope}} + + fibLikeDistribution = []float64{ + 1.1, 2.1, 3.1, 5.1, 8.1, 13.1, 21.1, 34.1, 55.1, 100.1, 200.1, + } + + PeerConnsView = &view.View{ + Measure: peerConns, + Aggregation: view.Distribution(fibLikeDistribution...), + TagKeys: []tag.Key{Direction}, + } + PeerConnsNegativeView = &view.View{ + Measure: peerConnsNegative, + Aggregation: view.Distribution(fibLikeDistribution...), + TagKeys: []tag.Key{Direction}, + } + + StreamView = &view.View{Measure: streams, Aggregation: view.Sum(), TagKeys: []tag.Key{Direction, Scope, Service, Protocol}} + PeerStreamsView = &view.View{Measure: peerStreams, Aggregation: view.Distribution(fibLikeDistribution...), TagKeys: []tag.Key{Direction}} + PeerStreamNegativeView = &view.View{Measure: peerStreamsNegative, Aggregation: view.Distribution(fibLikeDistribution...), TagKeys: []tag.Key{Direction}} + + MemoryView = &view.View{Measure: memory, Aggregation: view.Sum(), TagKeys: []tag.Key{Scope, Service, Protocol}} + + memDistribution = []float64{ + 1 << 10, // 1KB + 1 << 12, // 4KB + 1 << 15, // 32KB + 1 << 20, // 1MB + 1 << 25, // 32MB + 1 << 28, // 256MB + 1 << 29, // 512MB + 1 << 30, // 1GB + 1 << 31, // 2GB + 1 << 32, // 4GB + } + PeerMemoryView = &view.View{ + Measure: peerMemory, + Aggregation: view.Distribution(memDistribution...), + } + PeerMemoryNegativeView = &view.View{ + Measure: peerMemoryNegative, + Aggregation: view.Distribution(memDistribution...), + } + + // Not setup yet. Memory isn't attached to a given connection. + ConnMemoryView = &view.View{ + Measure: connMemory, + Aggregation: view.Distribution(memDistribution...), + } + ConnMemoryNegativeView = &view.View{ + Measure: connMemoryNegative, + Aggregation: view.Distribution(memDistribution...), + } + + FDsView = &view.View{Measure: fds, Aggregation: view.Sum(), TagKeys: []tag.Key{Scope}} + + BlockedResourcesView = &view.View{ + Measure: blockedResources, + Aggregation: view.Sum(), + TagKeys: []tag.Key{Scope, Resource}, + } +) + +var DefaultViews []*view.View = []*view.View{ + ConnView, + PeerConnsView, + PeerConnsNegativeView, + FDsView, + + StreamView, + PeerStreamsView, + PeerStreamNegativeView, + + MemoryView, + PeerMemoryView, + PeerMemoryNegativeView, + + BlockedResourcesView, +} + +// StatsTraceReporter reports stats on the resource manager using its traces. +type StatsTraceReporter struct{} + +func NewStatsTraceReporter() (StatsTraceReporter, error) { + return StatsTraceReporter{}, nil +} + +func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { + ctx := context.Background() + + switch evt.Type { + case rcmgr.TraceAddStreamEvt, rcmgr.TraceRemoveStreamEvt: + if p := rcmgr.ParsePeerScopeName(evt.Scope); p.Validate() == nil { + // Aggregated peer stats. Counts how many peers have N number of streams open. + // Uses two buckets aggregations. One to count how many streams the + // peer has now. The other to count the negative value, or how many + // streams did the peer use to have. When looking at the data you + // take the difference from the two. + + oldStreamsOut := int64(evt.StreamsOut - evt.DeltaOut) + peerStreamsOut := int64(evt.StreamsOut) + if oldStreamsOut != peerStreamsOut { + if oldStreamsOut != 0 { + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "outbound")}, peerStreamsNegative.M(oldStreamsOut)) + } + if peerStreamsOut != 0 { + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "outbound")}, peerStreams.M(peerStreamsOut)) + } + } + + oldStreamsIn := int64(evt.StreamsIn - evt.DeltaIn) + peerStreamsIn := int64(evt.StreamsIn) + if oldStreamsIn != peerStreamsIn { + if oldStreamsIn != 0 { + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "inbound")}, peerStreamsNegative.M(oldStreamsIn)) + } + if peerStreamsIn != 0 { + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "inbound")}, peerStreams.M(peerStreamsIn)) + } + } + } else { + var tags []tag.Mutator + if rcmgr.IsSystemScope(evt.Scope) || rcmgr.IsTransientScope(evt.Scope) { + tags = append(tags, tag.Upsert(Scope, evt.Scope)) + } else if svc := rcmgr.ParseServiceScopeName(evt.Scope); svc != "" { + tags = append(tags, tag.Upsert(Scope, "service"), tag.Upsert(Service, svc)) + } else if proto := rcmgr.ParseProtocolScopeName(evt.Scope); proto != "" { + tags = append(tags, tag.Upsert(Scope, "protocol"), tag.Upsert(Protocol, proto)) + } else { + // Not measuring connscope, servicepeer and protocolpeer. Lots of data, and + // you can use aggregated peer stats + service stats to infer + // this. + break + } + + if evt.DeltaOut != 0 { + stats.RecordWithTags( + ctx, + append([]tag.Mutator{tag.Upsert(Direction, "outbound")}, tags...), + streams.M(int64(evt.DeltaOut)), + ) + } + + if evt.DeltaIn != 0 { + stats.RecordWithTags( + ctx, + append([]tag.Mutator{tag.Upsert(Direction, "inbound")}, tags...), + streams.M(int64(evt.DeltaIn)), + ) + } + } + + case rcmgr.TraceAddConnEvt, rcmgr.TraceRemoveConnEvt: + if p := rcmgr.ParsePeerScopeName(evt.Scope); p.Validate() == nil { + // Aggregated peer stats. Counts how many peers have N number of connections. + // Uses two buckets aggregations. One to count how many streams the + // peer has now. The other to count the negative value, or how many + // conns did the peer use to have. When looking at the data you + // take the difference from the two. + + oldConnsOut := int64(evt.ConnsOut - evt.DeltaOut) + connsOut := int64(evt.ConnsOut) + if oldConnsOut != connsOut { + if oldConnsOut != 0 { + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "outbound")}, peerConnsNegative.M(oldConnsOut)) + } + if connsOut != 0 { + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "outbound")}, peerConns.M(connsOut)) + } + } + + oldConnsIn := int64(evt.ConnsIn - evt.DeltaIn) + connsIn := int64(evt.ConnsIn) + if oldConnsIn != connsIn { + if oldConnsIn != 0 { + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "inbound")}, peerConnsNegative.M(oldConnsIn)) + } + if connsIn != 0 { + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "inbound")}, peerConns.M(connsIn)) + } + } + } else { + var tags []tag.Mutator + if rcmgr.IsSystemScope(evt.Scope) || rcmgr.IsTransientScope(evt.Scope) { + tags = append(tags, tag.Upsert(Scope, evt.Scope)) + } else if rcmgr.IsConnScope(evt.Scope) { + // Not measuring this. I don't think it's useful. + break + } else { + // There shouldn't be anything here. But we keep going so the metrics will tell us if we're wrong (scope="") + log.Debugf("unexpected event in stats: %s", evt.Scope) + } + + if evt.DeltaOut != 0 { + stats.RecordWithTags( + ctx, + append([]tag.Mutator{tag.Upsert(Direction, "outbound")}, tags...), + conns.M(int64(evt.DeltaOut)), + ) + } + + if evt.DeltaIn != 0 { + stats.RecordWithTags( + ctx, + append([]tag.Mutator{tag.Upsert(Direction, "inbound")}, tags...), + conns.M(int64(evt.DeltaIn)), + ) + } + + // Represents the delta in fds + if evt.Delta != 0 { + stats.RecordWithTags( + ctx, + tags, + fds.M(int64(evt.Delta)), + ) + } + } + case rcmgr.TraceReserveMemoryEvt, rcmgr.TraceReleaseMemoryEvt: + if p := rcmgr.ParsePeerScopeName(evt.Scope); p.Validate() == nil { + oldMem := evt.Memory - evt.Delta + if oldMem != evt.Memory { + if oldMem != 0 { + stats.Record(ctx, peerMemoryNegative.M(oldMem)) + } + if evt.Memory != 0 { + stats.Record(ctx, peerMemory.M(evt.Memory)) + } + } + } else if rcmgr.IsConnScope(evt.Scope) { + oldMem := evt.Memory - evt.Delta + if oldMem != evt.Memory { + if oldMem != 0 { + stats.Record(ctx, connMemoryNegative.M(oldMem)) + } + if evt.Memory != 0 { + stats.Record(ctx, connMemory.M(evt.Memory)) + } + } + } else { + var tags []tag.Mutator + if rcmgr.IsSystemScope(evt.Scope) || rcmgr.IsTransientScope(evt.Scope) { + tags = append(tags, tag.Upsert(Scope, evt.Scope)) + } else if svc := rcmgr.ParseServiceScopeName(evt.Scope); svc != "" { + tags = append(tags, tag.Upsert(Scope, "service"), tag.Upsert(Service, svc)) + } else if proto := rcmgr.ParseProtocolScopeName(evt.Scope); proto != "" { + tags = append(tags, tag.Upsert(Scope, "protocol"), tag.Upsert(Protocol, proto)) + } else { + // Not measuring connscope, servicepeer and protocolpeer. Lots of data, and + // you can use aggregated peer stats + service stats to infer + // this. + break + } + + if evt.Delta != 0 { + stats.RecordWithTags(ctx, tags, memory.M(int64(evt.Delta))) + } + } + + case rcmgr.TraceBlockAddConnEvt, rcmgr.TraceBlockAddStreamEvt, rcmgr.TraceBlockReserveMemoryEvt: + var resource string + if evt.Type == rcmgr.TraceBlockAddConnEvt { + resource = "connection" + } else if evt.Type == rcmgr.TraceBlockAddStreamEvt { + resource = "stream" + } else { + resource = "memory" + } + + // Only the top scope. We don't want to get the peerid here. + scope := strings.SplitN(evt.Scope, ":", 2)[0] + + tags := []tag.Mutator{tag.Upsert(Scope, scope), tag.Upsert(Resource, resource)} + + if evt.DeltaIn != 0 { + stats.RecordWithTags(ctx, tags, blockedResources.M(int64(evt.DeltaIn))) + } + + if evt.DeltaOut != 0 { + stats.RecordWithTags(ctx, tags, blockedResources.M(int64(evt.DeltaOut))) + } + + if evt.Delta != 0 { + stats.RecordWithTags(ctx, tags, blockedResources.M(evt.Delta)) + } + } +} From 3a06e88227f22afc605050a2a4f3738cb278141e Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Mon, 20 Jun 2022 18:02:14 -0700 Subject: [PATCH 06/21] Add dep to go.mod/sum --- go.mod | 1 + go.sum | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/go.mod b/go.mod index 3c2ea2d..3071287 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/multiformats/go-multiaddr v0.6.0 github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 github.com/stretchr/testify v1.7.0 + go.opencensus.io v0.23.0 ) require ( diff --git a/go.sum b/go.sum index f4ec102..ccf6dda 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= @@ -19,6 +21,9 @@ github.com/btcsuite/snappy-go v0.0.0-20151229074030-0bdef8d06723/go.mod h1:8woku github.com/btcsuite/snappy-go v1.0.0/go.mod h1:8woku9dyThutzjeg+3xrA5iCpBRH8XEEg3lh6TiUghc= github.com/btcsuite/websocket v0.0.0-20150119174127-31079b680792/go.mod h1:ghJtEyQwv5/p4Mg4C0fgbePVuGr935/5ddU9Z3TmDRY= github.com/btcsuite/winsvc v1.0.0/go.mod h1:jsenWakMcC0zFBFurPLEAyrnc/teJEM1O46fmI40EZs= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/davecgh/go-spew v0.0.0-20171005155431-ecdeabc65495/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -29,10 +34,34 @@ github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs= github.com/decred/dcrd/lru v1.0.0/go.mod h1:mxKOwFd7lFjN2GZYsiz/ecgqR6kkYAl+0pz0tEMk218= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls= github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3 h1:x95R7cp+rSeeqAMI2knLtQ0DKlaBhv2NrtrOvafPHRo= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/ipfs/go-cid v0.0.7/go.mod h1:6Ux9z5e+HpkQdckYoX1PG/6xqKspzlEIR5SDmgqgC/I= github.com/ipfs/go-cid v0.2.0 h1:01JTiihFq9en9Vz0lc0VDWvZe/uBonGpzo4THP0vcQ0= @@ -99,14 +128,18 @@ github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/spacemonkeygo/spacelog v0.0.0-20180420211403-2296661a0572 h1:RC6RW7j+1+HkWaX/Yh71Ee5ZHaHYt7ZP4sQgUrm6cDU= github.com/spacemonkeygo/spacelog v0.0.0-20180420211403-2296661a0572/go.mod h1:w0SWMsp6j9O/dk4/ZpIhL+3CkG8ofA2vuv7k+ltqUMc= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= +go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.11-0.20210813005559-691160354723 h1:sHOAIxRGBp443oHZIPB+HsUGaksVCXVQENPxwTfQdH4= @@ -121,26 +154,39 @@ golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200115085410-6d4e4cb37c7d/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf h1:B2n+Zi5QeYRDAEodEu72OS36gmTWjgpXr2+cWcBW90o= golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180719180050-a680a1efc54d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -153,12 +199,36 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -170,3 +240,5 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= From 1c172ace77b7e5c13e7a3d25ce1200d8bdbab733 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Mon, 20 Jun 2022 18:05:29 -0700 Subject: [PATCH 07/21] Revert "Add multimetrics reporter" This reverts commit 81fb49d3d68153b610b245d6f730f4d6b34b3706. --- metrics.go | 117 +---------------------------------------------------- 1 file changed, 2 insertions(+), 115 deletions(-) diff --git a/metrics.go b/metrics.go index c71ed90..a875415 100644 --- a/metrics.go +++ b/metrics.go @@ -47,20 +47,10 @@ type metrics struct { reporter MetricsReporter } -// WithMetrics is a resource manager option to enable metrics collection. Can be -// called multiple times to add multiple reporters. +// WithMetrics is a resource manager option to enable metrics collection func WithMetrics(reporter MetricsReporter) Option { return func(r *resourceManager) error { - if r.metrics == nil { - r.metrics = &metrics{reporter: reporter} - } else if multimetrics, ok := r.metrics.reporter.(*MultiMetricsReporter); ok { - multimetrics.reporters = append(multimetrics.reporters, reporter) - } else { - // This was a single reporter. Lets convert it to a multimetrics reporter - r.metrics = &metrics{ - reporter: &MultiMetricsReporter{reporters: []MetricsReporter{r.metrics.reporter, reporter}}, - } - } + r.metrics = &metrics{reporter: reporter} return nil } } @@ -176,106 +166,3 @@ func (m *metrics) BlockMemory(size int) { m.reporter.BlockMemory(size) } - -// MultiMetricsReporter is a helper that allows you to report to multiple metrics reporters. -type MultiMetricsReporter struct { - reporters []MetricsReporter -} - -// AllowConn is invoked when opening a connection is allowed -func (mmr *MultiMetricsReporter) AllowConn(dir network.Direction, usefd bool) { - for _, r := range mmr.reporters { - r.AllowConn(dir, usefd) - } -} - -// BlockConn is invoked when opening a connection is blocked -func (mmr *MultiMetricsReporter) BlockConn(dir network.Direction, usefd bool) { - for _, r := range mmr.reporters { - r.BlockConn(dir, usefd) - } -} - -// AllowStream is invoked when opening a stream is allowed -func (mmr *MultiMetricsReporter) AllowStream(p peer.ID, dir network.Direction) { - for _, r := range mmr.reporters { - r.AllowStream(p, dir) - } -} - -// BlockStream is invoked when opening a stream is blocked -func (mmr *MultiMetricsReporter) BlockStream(p peer.ID, dir network.Direction) { - for _, r := range mmr.reporters { - r.BlockStream(p, dir) - } -} - -// AllowPeer is invoked when attaching ac onnection to a peer is allowed -func (mmr *MultiMetricsReporter) AllowPeer(p peer.ID) { - for _, r := range mmr.reporters { - r.AllowPeer(p) - } -} - -// BlockPeer is invoked when attaching ac onnection to a peer is blocked -func (mmr *MultiMetricsReporter) BlockPeer(p peer.ID) { - for _, r := range mmr.reporters { - r.BlockPeer(p) - } -} - -// AllowProtocol is invoked when setting the protocol for a stream is allowed -func (mmr *MultiMetricsReporter) AllowProtocol(proto protocol.ID) { - for _, r := range mmr.reporters { - r.AllowProtocol(proto) - } -} - -// BlockProtocol is invoked when setting the protocol for a stream is blocked -func (mmr *MultiMetricsReporter) BlockProtocol(proto protocol.ID) { - for _, r := range mmr.reporters { - r.BlockProtocol(proto) - } -} - -// BlockedProtocolPeer is invoekd when setting the protocol for a stream is blocked at the per protocol peer scope -func (mmr *MultiMetricsReporter) BlockProtocolPeer(proto protocol.ID, p peer.ID) { - for _, r := range mmr.reporters { - r.BlockProtocolPeer(proto, p) - } -} - -// AllowPService is invoked when setting the protocol for a stream is allowed -func (mmr *MultiMetricsReporter) AllowService(svc string) { - for _, r := range mmr.reporters { - r.AllowService(svc) - } -} - -// BlockPService is invoked when setting the protocol for a stream is blocked -func (mmr *MultiMetricsReporter) BlockService(svc string) { - for _, r := range mmr.reporters { - r.BlockService(svc) - } -} - -// BlockedServicePeer is invoked when setting the service for a stream is blocked at the per service peer scope -func (mmr *MultiMetricsReporter) BlockServicePeer(svc string, p peer.ID) { - for _, r := range mmr.reporters { - r.BlockServicePeer(svc, p) - } -} - -// AllowMemory is invoked when a memory reservation is allowed -func (mmr *MultiMetricsReporter) AllowMemory(size int) { - for _, r := range mmr.reporters { - r.AllowMemory(size) - } -} - -// BlockMemory is invoked when a memory reservation is blocked -func (mmr *MultiMetricsReporter) BlockMemory(size int) { - for _, r := range mmr.reporters { - r.BlockMemory(size) - } -} From 2385a33f55c8e8352272abbe55700c43e9011473 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Mon, 20 Jun 2022 18:09:58 -0700 Subject: [PATCH 08/21] Remove connid from scopename in blocked conns --- obs/stats.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/obs/stats.go b/obs/stats.go index cafc885..d45284b 100644 --- a/obs/stats.go +++ b/obs/stats.go @@ -323,6 +323,8 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { // Only the top scope. We don't want to get the peerid here. scope := strings.SplitN(evt.Scope, ":", 2)[0] + // Drop the connection or stream id + scope = strings.SplitN(scope, "-", 2)[0] tags := []tag.Mutator{tag.Upsert(Scope, scope), tag.Upsert(Resource, resource)} From c9413d73201da127dbe9e78df4f539f9cf0564f5 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Mon, 20 Jun 2022 18:20:29 -0700 Subject: [PATCH 09/21] Add Grafana dashboard --- obs/grafana-dashboards/README.md | 42 + obs/grafana-dashboards/resource-manager.json | 1702 ++++++++++++++++++ 2 files changed, 1744 insertions(+) create mode 100644 obs/grafana-dashboards/README.md create mode 100644 obs/grafana-dashboards/resource-manager.json diff --git a/obs/grafana-dashboards/README.md b/obs/grafana-dashboards/README.md new file mode 100644 index 0000000..2dad848 --- /dev/null +++ b/obs/grafana-dashboards/README.md @@ -0,0 +1,42 @@ +# Ready to go Grafana Dashboard + +Here are some prebuilt dashboards that you can add to your Grafana instance. To +import follow the Grafana docs [here](https://grafana.com/docs/grafana/latest/dashboards/export-import/#import-dashboard) + +## Setup + +To make sure you're emitting the correct metrics you'll have to hook up the +Opencensus views that `stats.go` exports. For Prometheus this looks like: + +``` go +import ( + // ... + ocprom "contrib.go.opencensus.io/exporter/prometheus" + + rcmgr "github.com/libp2p/go-libp2p-resource-manager" + rcmgrObs "github.com/libp2p/go-libp2p-resource-manager/obs" + + "github.com/prometheus/client_golang/prometheus" + "go.opencensus.io/stats/view" +) + + func SetupResourceManager() (network.ResourceManager, error) { + // Hook up the trace reporter metrics + view.Register(rcmgrObs.DefaultViews...) + ocprom.NewExporter(ocprom.Options{ + Registry: prometheus.DefaultRegisterer.(*prometheus.Registry), + Namespace: "rcmgr_trace_metrics", + }) + + str, err := rcmgrObs.NewStatsTraceReporter() + if err != nil { + return nil, err + } + + return rcmgr.NewResourceManager(limiter, rcmgr.WithTraceReporter(str)) + } +``` + +It should be fairly similar for other exporters. See the [OpenCensus +docs](https://opencensus.io/exporters/supported-exporters/go/) to see how to +export to another exporter. \ No newline at end of file diff --git a/obs/grafana-dashboards/resource-manager.json b/obs/grafana-dashboards/resource-manager.json new file mode 100644 index 0000000..ac22c51 --- /dev/null +++ b/obs/grafana-dashboards/resource-manager.json @@ -0,0 +1,1702 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "description": "This should be empty. If it's large you are running into your resource manager limits.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 67, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "count(rcmgr_trace_metrics_blocked_resources) by (resource,scope)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Number of blocked resource requests", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 65, + "panels": [], + "title": "Blocked Resources", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 42, + "panels": [], + "title": "Streams", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 48, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_streams{scope=\"system\"}", + "interval": "", + "legendFormat": "{{dir}}", + "refId": "A" + } + ], + "title": "System Streams", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_streams{scope=\"transient\"}", + "interval": "", + "legendFormat": "{{dir}}", + "refId": "A" + } + ], + "title": "Transient Streams", + "type": "timeseries" + }, + { + "description": "How many streams does each service have open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 43, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_streams{scope=\"service\"}", + "interval": "", + "legendFormat": "{{dir}} {{service}}", + "refId": "A" + } + ], + "title": "Streams by service", + "type": "timeseries" + }, + { + "description": "How many streams does each protocol have open.\n\nA protocol is similiar to a service except there may be many protocols for a single service. A service may have \nmultiple protocols for backwards compatibility. For example, bitswap the service is a single entity, but it supports bitswap protocols v1.1 and v1.2.\n\nA service is attached to a stream manually by the protocol developer. A service may be missing if there is no `StreamScope.SetService` call.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 52, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_streams{scope=\"protocol\"}", + "interval": "", + "legendFormat": "{{dir}} {{protocol}}", + "refId": "A" + } + ], + "title": "Streams by protocol", + "type": "timeseries" + }, + { + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", + "interval": "", + "legendFormat": "p50 {{dir}} connections per peer", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "p90 {{dir}} connections per peer", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "max {{dir}} connections per peer", + "refId": "C" + } + ], + "title": "Streams per peer, aggregated", + "type": "timeseries" + }, + { + "description": "How many peers have N-0.1 streams open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 46 + }, + "id": 46, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": false, + "expr": "rcmgr_trace_metrics_peer_streams_bucket{dir=\"inbound\"}-rcmgr_trace_metrics_peer_streams_negative_bucket{dir=\"inbound\"}", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Current inbound streams per peer histogram", + "type": "bargauge" + }, + { + "description": "How many peers have N-0.1 streams open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 46 + }, + "id": 47, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": false, + "expr": "rcmgr_trace_metrics_peer_streams_bucket{dir=\"outbound\"}-rcmgr_trace_metrics_peer_streams_negative_bucket{dir=\"outbound\"}", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Current outbound streams per peer histogram", + "type": "bargauge" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 29, + "panels": [], + "title": "Connections", + "type": "row" + }, + { + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 55 + }, + "id": 31, + "options": { + "content": "# Libp2p Connections\n\nBroken down by [Resource Scope](https://github.com/libp2p/go-libp2p-resource-manager#resource-scopes). \nScopes represent what is imposing limits on this resource. For connections, we have three main scopes:\n\n1. System. The total number of connections owned by the process. Includes both application usable connections + the number of transient connections.\n2. Transient. The total number of connections that are being upgraded into usable connections in the process.\n3. Peer. The total number of connections associated with this peer. When a connection has this scope it is usable by the application.\n\nAn example of a System connection is a connection you can open a libp2p stream on and send data.\nA transient connection is not yet usable for application data since it may be negotiating \na security handshake or a multiplexer.\n\nConnections start in the transient scope and move over to the System and Peer scopes once they are ready to be used.\n\nIt would be unusual to see a lot of transient connections. It would also be unusal to see a peer with a lot of connections.", + "mode": "markdown" + }, + "pluginVersion": "8.4.5", + "title": "libp2p Connections", + "type": "text" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 64 + }, + "id": 33, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_connections{scope=\"system\"}", + "interval": "", + "legendFormat": "{{dir}}", + "refId": "A" + } + ], + "title": "System Connections", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 64 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_connections{scope=\"transient\"}", + "interval": "", + "legendFormat": "{{dir}}", + "refId": "A" + } + ], + "title": "Transient Connections", + "type": "timeseries" + }, + { + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 72 + }, + "id": 38, + "options": { + "content": "These are aggregated stats. They are grouped by buckets. Each bucket represents how many peers have N number of connections.\n\nDue to a quirk in [opencensus](https://github.com/census-instrumentation/opencensus-go/blob/v0.23.0/stats/view/aggregation_data.go#L195) the bucket values have to be a bit bigger than the integer values.\nSo subtract 0.1 from the number to get the true number of connections. e.g. If a peer has 3 connections, it'll be put in the 3.1 bucket. \n", + "mode": "markdown" + }, + "pluginVersion": "8.4.5", + "title": "Connections per Peer", + "type": "text" + }, + { + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 75 + }, + "id": 45, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", + "interval": "", + "legendFormat": "p50 {{dir}} connections per peer", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "p90 {{dir}} connections per peer", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "max {{dir}} connections per peer", + "refId": "C" + } + ], + "title": "Connections per peer, aggregated", + "type": "timeseries" + }, + { + "description": "How many peers have N-0.1 connections open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 85 + }, + "id": 39, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": false, + "expr": "rcmgr_trace_metrics_peer_connections_bucket{dir=\"inbound\"}-rcmgr_trace_metrics_peer_connections_negative_bucket{dir=\"inbound\"}", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Current inbound connections per peer histogram", + "type": "bargauge" + }, + { + "description": "How many peers have N-0.1 connections open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 85 + }, + "id": 40, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": false, + "expr": "rcmgr_trace_metrics_peer_connections_bucket{dir=\"outbound\"}-rcmgr_trace_metrics_peer_connections_negative_bucket{dir=\"outbound\"} ", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Curent outbound connections per peer histogram", + "type": "bargauge" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 93 + }, + "id": 54, + "panels": [], + "title": "Memory", + "type": "row" + }, + { + "description": "As reported to Resource Manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 94 + }, + "id": 56, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_memory{scope=\"system\"}", + "interval": "", + "legendFormat": "Bytes Reserved", + "refId": "A" + } + ], + "title": "System memory reservations", + "type": "timeseries" + }, + { + "description": "As reported to Resource Manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 103 + }, + "id": 57, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_memory{scope=\"protocol\"}", + "interval": "", + "legendFormat": "{{protocol}}", + "refId": "A" + } + ], + "title": "Memory reservations by protocol", + "type": "timeseries" + }, + { + "description": "As reported to Resource Manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 103 + }, + "id": 58, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_memory{scope=\"service\"}", + "interval": "", + "legendFormat": "{{service}}", + "refId": "A" + } + ], + "title": "Memory reservations by service", + "type": "timeseries" + }, + { + "description": "As reported to the resource manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Number of peers aggregated" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-green", + "mode": "fixed" + } + }, + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 111 + }, + "id": 59, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", + "interval": "", + "legendFormat": "p50 {{dir}} connections per peer", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "p90 {{dir}} connections per peer", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "max {{dir}} connections per peer", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_peer_memory_count-rcmgr_trace_metrics_peer_memory_negative_count", + "hide": false, + "interval": "", + "legendFormat": "Number of peers aggregated", + "refId": "D" + } + ], + "title": "Memory reservations per peer, aggregated", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 121 + }, + "id": 62, + "panels": [], + "title": "File Descriptors", + "type": "row" + }, + { + "description": "As reported to the resource manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 122 + }, + "id": 60, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_fds", + "interval": "", + "legendFormat": "{{scope}}", + "refId": "A" + } + ], + "title": "FDs in use", + "type": "timeseries" + } + ], + "refresh": false, + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Resource Manager", + "uid": "MgmGIjjnk", + "version": 13, + "weekStart": "" + } \ No newline at end of file From 04b17931b06d28ed9bb33b9af12026311287660e Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Mon, 20 Jun 2022 18:31:09 -0700 Subject: [PATCH 10/21] Fix merge conflicts --- obs/stats.go | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/obs/stats.go b/obs/stats.go index d45284b..7838a66 100644 --- a/obs/stats.go +++ b/obs/stats.go @@ -144,7 +144,7 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { switch evt.Type { case rcmgr.TraceAddStreamEvt, rcmgr.TraceRemoveStreamEvt: - if p := rcmgr.ParsePeerScopeName(evt.Scope); p.Validate() == nil { + if p := rcmgr.ParsePeerScopeName(evt.Name); p.Validate() == nil { // Aggregated peer stats. Counts how many peers have N number of streams open. // Uses two buckets aggregations. One to count how many streams the // peer has now. The other to count the negative value, or how many @@ -174,11 +174,11 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { } } else { var tags []tag.Mutator - if rcmgr.IsSystemScope(evt.Scope) || rcmgr.IsTransientScope(evt.Scope) { - tags = append(tags, tag.Upsert(Scope, evt.Scope)) - } else if svc := rcmgr.ParseServiceScopeName(evt.Scope); svc != "" { + if rcmgr.IsSystemScope(evt.Name) || rcmgr.IsTransientScope(evt.Name) { + tags = append(tags, tag.Upsert(Scope, evt.Name)) + } else if svc := rcmgr.ParseServiceScopeName(evt.Name); svc != "" { tags = append(tags, tag.Upsert(Scope, "service"), tag.Upsert(Service, svc)) - } else if proto := rcmgr.ParseProtocolScopeName(evt.Scope); proto != "" { + } else if proto := rcmgr.ParseProtocolScopeName(evt.Name); proto != "" { tags = append(tags, tag.Upsert(Scope, "protocol"), tag.Upsert(Protocol, proto)) } else { // Not measuring connscope, servicepeer and protocolpeer. Lots of data, and @@ -205,7 +205,7 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { } case rcmgr.TraceAddConnEvt, rcmgr.TraceRemoveConnEvt: - if p := rcmgr.ParsePeerScopeName(evt.Scope); p.Validate() == nil { + if p := rcmgr.ParsePeerScopeName(evt.Name); p.Validate() == nil { // Aggregated peer stats. Counts how many peers have N number of connections. // Uses two buckets aggregations. One to count how many streams the // peer has now. The other to count the negative value, or how many @@ -235,14 +235,14 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { } } else { var tags []tag.Mutator - if rcmgr.IsSystemScope(evt.Scope) || rcmgr.IsTransientScope(evt.Scope) { - tags = append(tags, tag.Upsert(Scope, evt.Scope)) - } else if rcmgr.IsConnScope(evt.Scope) { + if rcmgr.IsSystemScope(evt.Name) || rcmgr.IsTransientScope(evt.Name) { + tags = append(tags, tag.Upsert(Scope, evt.Name)) + } else if rcmgr.IsConnScope(evt.Name) { // Not measuring this. I don't think it's useful. break } else { // There shouldn't be anything here. But we keep going so the metrics will tell us if we're wrong (scope="") - log.Debugf("unexpected event in stats: %s", evt.Scope) + log.Debugf("unexpected event in stats: %s", evt.Name) } if evt.DeltaOut != 0 { @@ -271,7 +271,7 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { } } case rcmgr.TraceReserveMemoryEvt, rcmgr.TraceReleaseMemoryEvt: - if p := rcmgr.ParsePeerScopeName(evt.Scope); p.Validate() == nil { + if p := rcmgr.ParsePeerScopeName(evt.Name); p.Validate() == nil { oldMem := evt.Memory - evt.Delta if oldMem != evt.Memory { if oldMem != 0 { @@ -281,7 +281,7 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { stats.Record(ctx, peerMemory.M(evt.Memory)) } } - } else if rcmgr.IsConnScope(evt.Scope) { + } else if rcmgr.IsConnScope(evt.Name) { oldMem := evt.Memory - evt.Delta if oldMem != evt.Memory { if oldMem != 0 { @@ -293,11 +293,11 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { } } else { var tags []tag.Mutator - if rcmgr.IsSystemScope(evt.Scope) || rcmgr.IsTransientScope(evt.Scope) { - tags = append(tags, tag.Upsert(Scope, evt.Scope)) - } else if svc := rcmgr.ParseServiceScopeName(evt.Scope); svc != "" { + if rcmgr.IsSystemScope(evt.Name) || rcmgr.IsTransientScope(evt.Name) { + tags = append(tags, tag.Upsert(Scope, evt.Name)) + } else if svc := rcmgr.ParseServiceScopeName(evt.Name); svc != "" { tags = append(tags, tag.Upsert(Scope, "service"), tag.Upsert(Service, svc)) - } else if proto := rcmgr.ParseProtocolScopeName(evt.Scope); proto != "" { + } else if proto := rcmgr.ParseProtocolScopeName(evt.Name); proto != "" { tags = append(tags, tag.Upsert(Scope, "protocol"), tag.Upsert(Protocol, proto)) } else { // Not measuring connscope, servicepeer and protocolpeer. Lots of data, and @@ -322,7 +322,7 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { } // Only the top scope. We don't want to get the peerid here. - scope := strings.SplitN(evt.Scope, ":", 2)[0] + scope := strings.SplitN(evt.Name, ":", 2)[0] // Drop the connection or stream id scope = strings.SplitN(scope, "-", 2)[0] From bc012d37ffa41a948c900a0ee536cbb36bd5e769 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Tue, 21 Jun 2022 07:38:25 -0700 Subject: [PATCH 11/21] Update dashboard json --- obs/grafana-dashboards/resource-manager.json | 3106 +++++++++--------- 1 file changed, 1553 insertions(+), 1553 deletions(-) diff --git a/obs/grafana-dashboards/resource-manager.json b/obs/grafana-dashboards/resource-manager.json index ac22c51..e9cae6d 100644 --- a/obs/grafana-dashboards/resource-manager.json +++ b/obs/grafana-dashboards/resource-manager.json @@ -1,1702 +1,1702 @@ { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 2, - "links": [], - "liveNow": false, - "panels": [ + "annotations": { + "list": [ { - "description": "This should be empty. If it's large you are running into your resource manager limits.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "description": "This should be empty. If it's large you are running into your resource manager limits.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 67, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": true, - "expr": "count(rcmgr_trace_metrics_blocked_resources) by (resource,scope)", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Number of blocked resource requests", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 65, - "panels": [], - "title": "Blocked Resources", - "type": "row" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 + "id": 67, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "id": 42, - "panels": [], - "title": "Streams", - "type": "row" + "tooltip": { + "mode": "single", + "sort": "none" + } }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 10 - }, - "id": 48, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "exemplar": true, + "expr": "count(rcmgr_trace_metrics_blocked_resources) by (resource,scope)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Number of blocked resource requests", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 65, + "panels": [], + "title": "Blocked Resources", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 42, + "panels": [], + "title": "Streams", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_streams{scope=\"system\"}", - "interval": "", - "legendFormat": "{{dir}}", - "refId": "A" - } - ], - "title": "System Streams", - "type": "timeseries" - }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 10 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 48, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "id": 44, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "exemplar": true, + "expr": "rcmgr_trace_metrics_streams{scope=\"system\"}", + "interval": "", + "legendFormat": "{{dir}}", + "refId": "A" + } + ], + "title": "System Streams", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_streams{scope=\"transient\"}", - "interval": "", - "legendFormat": "{{dir}}", - "refId": "A" - } - ], - "title": "Transient Streams", - "type": "timeseries" - }, - { - "description": "How many streams does each service have open", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 18 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "id": 43, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "exemplar": true, + "expr": "rcmgr_trace_metrics_streams{scope=\"transient\"}", + "interval": "", + "legendFormat": "{{dir}}", + "refId": "A" + } + ], + "title": "Transient Streams", + "type": "timeseries" + }, + { + "description": "How many streams does each service have open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_streams{scope=\"service\"}", - "interval": "", - "legendFormat": "{{dir}} {{service}}", - "refId": "A" - } - ], - "title": "Streams by service", - "type": "timeseries" - }, - { - "description": "How many streams does each protocol have open.\n\nA protocol is similiar to a service except there may be many protocols for a single service. A service may have \nmultiple protocols for backwards compatibility. For example, bitswap the service is a single entity, but it supports bitswap protocols v1.1 and v1.2.\n\nA service is attached to a stream manually by the protocol developer. A service may be missing if there is no `StreamScope.SetService` call.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 27 + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 43, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "id": 52, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "exemplar": true, + "expr": "rcmgr_trace_metrics_streams{scope=\"service\"}", + "interval": "", + "legendFormat": "{{dir}} {{service}}", + "refId": "A" + } + ], + "title": "Streams by service", + "type": "timeseries" + }, + { + "description": "How many streams does each protocol have open.\n\nA protocol is similiar to a service except there may be many protocols for a single service. A service may have \nmultiple protocols for backwards compatibility. For example, bitswap the service is a single entity, but it supports bitswap protocols v1.1 and v1.2.\n\nA service is attached to a stream manually by the protocol developer. A service may be missing if there is no `StreamScope.SetService` call.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_streams{scope=\"protocol\"}", - "interval": "", - "legendFormat": "{{dir}} {{protocol}}", - "refId": "A" - } - ], - "title": "Streams by protocol", - "type": "timeseries" - }, - { - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 36 + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 52, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "id": 35, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": true, - "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", - "interval": "", - "legendFormat": "p50 {{dir}} connections per peer", - "refId": "A" + "exemplar": true, + "expr": "rcmgr_trace_metrics_streams{scope=\"protocol\"}", + "interval": "", + "legendFormat": "{{dir}} {{protocol}}", + "refId": "A" + } + ], + "title": "Streams by protocol", + "type": "timeseries" + }, + { + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "exemplar": true, - "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", - "hide": false, - "interval": "", - "legendFormat": "p90 {{dir}} connections per peer", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "exemplar": true, - "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", - "hide": false, - "interval": "", - "legendFormat": "max {{dir}} connections per peer", - "refId": "C" - } - ], - "title": "Streams per peer, aggregated", - "type": "timeseries" - }, - { - "description": "How many peers have N-0.1 streams open", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 46 + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "id": 46, - "options": { - "displayMode": "gradient", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "pluginVersion": "8.4.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": false, - "expr": "rcmgr_trace_metrics_peer_streams_bucket{dir=\"inbound\"}-rcmgr_trace_metrics_peer_streams_negative_bucket{dir=\"inbound\"}", - "format": "heatmap", - "hide": false, - "interval": "", - "legendFormat": "{{le}}", - "refId": "A" - } - ], - "title": "Current inbound streams per peer histogram", - "type": "bargauge" + "tooltip": { + "mode": "single", + "sort": "none" + } }, - { - "description": "How many peers have N-0.1 streams open", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 46 + "exemplar": true, + "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", + "interval": "", + "legendFormat": "p50 {{dir}} streams per peer", + "refId": "A" }, - "id": 47, - "options": { - "displayMode": "gradient", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "showUnfilled": true + "exemplar": true, + "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "p90 {{dir}} streams per peer", + "refId": "B" }, - "pluginVersion": "8.4.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": false, - "expr": "rcmgr_trace_metrics_peer_streams_bucket{dir=\"outbound\"}-rcmgr_trace_metrics_peer_streams_negative_bucket{dir=\"outbound\"}", - "format": "heatmap", - "hide": false, - "interval": "", - "legendFormat": "{{le}}", - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "max {{dir}} streams per peer", + "refId": "C" + } + ], + "title": "Streams per peer, aggregated", + "type": "timeseries" + }, + { + "description": "How many peers have N-0.1 streams open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } - ], - "title": "Current outbound streams per peer histogram", - "type": "bargauge" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 54 }, - "id": 29, - "panels": [], - "title": "Connections", - "type": "row" + "overrides": [] }, - { - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 55 - }, - "id": 31, - "options": { - "content": "# Libp2p Connections\n\nBroken down by [Resource Scope](https://github.com/libp2p/go-libp2p-resource-manager#resource-scopes). \nScopes represent what is imposing limits on this resource. For connections, we have three main scopes:\n\n1. System. The total number of connections owned by the process. Includes both application usable connections + the number of transient connections.\n2. Transient. The total number of connections that are being upgraded into usable connections in the process.\n3. Peer. The total number of connections associated with this peer. When a connection has this scope it is usable by the application.\n\nAn example of a System connection is a connection you can open a libp2p stream on and send data.\nA transient connection is not yet usable for application data since it may be negotiating \na security handshake or a multiplexer.\n\nConnections start in the transient scope and move over to the System and Peer scopes once they are ready to be used.\n\nIt would be unusual to see a lot of transient connections. It would also be unusal to see a peer with a lot of connections.", - "mode": "markdown" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 46 + }, + "id": 46, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false }, - "pluginVersion": "8.4.5", - "title": "libp2p Connections", - "type": "text" + "showUnfilled": true }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": false, + "expr": "rcmgr_trace_metrics_peer_streams_bucket{dir=\"inbound\"}-rcmgr_trace_metrics_peer_streams_negative_bucket{dir=\"inbound\"}", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Current inbound streams per peer histogram", + "type": "bargauge" + }, + { + "description": "How many peers have N-0.1 streams open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] + ] + } }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 64 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 46 + }, + "id": 47, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false }, - "id": 33, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "showUnfilled": true + }, + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_connections{scope=\"system\"}", - "interval": "", - "legendFormat": "{{dir}}", - "refId": "A" - } - ], - "title": "System Connections", - "type": "timeseries" + "exemplar": false, + "expr": "rcmgr_trace_metrics_peer_streams_bucket{dir=\"outbound\"}-rcmgr_trace_metrics_peer_streams_negative_bucket{dir=\"outbound\"}", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Current outbound streams per peer histogram", + "type": "bargauge" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 29, + "panels": [], + "title": "Connections", + "type": "row" + }, + { + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 55 + }, + "id": 31, + "options": { + "content": "# Libp2p Connections\n\nBroken down by [Resource Scope](https://github.com/libp2p/go-libp2p-resource-manager#resource-scopes). \nScopes represent what is imposing limits on this resource. For connections, we have three main scopes:\n\n1. System. The total number of connections owned by the process. Includes both application usable connections + the number of transient connections.\n2. Transient. The total number of connections that are being upgraded into usable connections in the process.\n3. Peer. The total number of connections associated with this peer. When a connection has this scope it is usable by the application.\n\nAn example of a System connection is a connection you can open a libp2p stream on and send data.\nA transient connection is not yet usable for application data since it may be negotiating \na security handshake or a multiplexer.\n\nConnections start in the transient scope and move over to the System and Peer scopes once they are ready to be used.\n\nIt would be unusual to see a lot of transient connections. It would also be unusal to see a peer with a lot of connections.", + "mode": "markdown" + }, + "pluginVersion": "8.4.5", + "title": "libp2p Connections", + "type": "text" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 64 - }, - "id": 36, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_connections{scope=\"transient\"}", - "interval": "", - "legendFormat": "{{dir}}", - "refId": "A" - } - ], - "title": "Transient Connections", - "type": "timeseries" + "overrides": [] }, - { - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 72 - }, - "id": 38, - "options": { - "content": "These are aggregated stats. They are grouped by buckets. Each bucket represents how many peers have N number of connections.\n\nDue to a quirk in [opencensus](https://github.com/census-instrumentation/opencensus-go/blob/v0.23.0/stats/view/aggregation_data.go#L195) the bucket values have to be a bit bigger than the integer values.\nSo subtract 0.1 from the number to get the true number of connections. e.g. If a peer has 3 connections, it'll be put in the 3.1 bucket. \n", - "mode": "markdown" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 64 + }, + "id": 33, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "pluginVersion": "8.4.5", - "title": "Connections per Peer", - "type": "text" + "tooltip": { + "mode": "single", + "sort": "none" + } }, - { - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_connections{scope=\"system\"}", + "interval": "", + "legendFormat": "{{dir}}", + "refId": "A" + } + ], + "title": "System Connections", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 75 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 64 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "id": 45, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": true, - "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", - "interval": "", - "legendFormat": "p50 {{dir}} connections per peer", - "refId": "A" + "exemplar": true, + "expr": "rcmgr_trace_metrics_connections{scope=\"transient\"}", + "interval": "", + "legendFormat": "{{dir}}", + "refId": "A" + } + ], + "title": "Transient Connections", + "type": "timeseries" + }, + { + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 72 + }, + "id": 38, + "options": { + "content": "These are aggregated stats. They are grouped by buckets. Each bucket represents how many peers have N number of connections.\n\nDue to a quirk in [opencensus](https://github.com/census-instrumentation/opencensus-go/blob/v0.23.0/stats/view/aggregation_data.go#L195) the bucket values have to be a bit bigger than the integer values.\nSo subtract 0.1 from the number to get the true number of connections. e.g. If a peer has 3 connections, it'll be put in the 3.1 bucket. \n", + "mode": "markdown" + }, + "pluginVersion": "8.4.5", + "title": "Connections per Peer", + "type": "text" + }, + { + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "exemplar": true, - "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", - "hide": false, - "interval": "", - "legendFormat": "p90 {{dir}} connections per peer", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "exemplar": true, - "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", - "hide": false, - "interval": "", - "legendFormat": "max {{dir}} connections per peer", - "refId": "C" - } - ], - "title": "Connections per peer, aggregated", - "type": "timeseries" - }, - { - "description": "How many peers have N-0.1 connections open", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 85 + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "id": 39, - "options": { - "displayMode": "gradient", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 75 + }, + "id": 45, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "pluginVersion": "8.4.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": false, - "expr": "rcmgr_trace_metrics_peer_connections_bucket{dir=\"inbound\"}-rcmgr_trace_metrics_peer_connections_negative_bucket{dir=\"inbound\"}", - "format": "heatmap", - "hide": false, - "interval": "", - "legendFormat": "{{le}}", - "refId": "A" - } - ], - "title": "Current inbound connections per peer histogram", - "type": "bargauge" + "tooltip": { + "mode": "single", + "sort": "none" + } }, - { - "description": "How many peers have N-0.1 connections open", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "overrides": [] + "exemplar": true, + "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", + "interval": "", + "legendFormat": "p50 {{dir}} connections per peer", + "refId": "A" }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 85 - }, - "id": 40, - "options": { - "displayMode": "gradient", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "showUnfilled": true + "exemplar": true, + "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "p90 {{dir}} connections per peer", + "refId": "B" }, - "pluginVersion": "8.4.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": false, - "expr": "rcmgr_trace_metrics_peer_connections_bucket{dir=\"outbound\"}-rcmgr_trace_metrics_peer_connections_negative_bucket{dir=\"outbound\"} ", - "format": "heatmap", - "hide": false, - "interval": "", - "legendFormat": "{{le}}", - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "max {{dir}} connections per peer", + "refId": "C" + } + ], + "title": "Connections per peer, aggregated", + "type": "timeseries" + }, + { + "description": "How many peers have N-0.1 connections open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } - ], - "title": "Curent outbound connections per peer histogram", - "type": "bargauge" + }, + "overrides": [] }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 93 + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 85 + }, + "id": 39, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false }, - "id": 54, - "panels": [], - "title": "Memory", - "type": "row" + "showUnfilled": true }, - { - "description": "As reported to Resource Manager", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": false, + "expr": "rcmgr_trace_metrics_peer_connections_bucket{dir=\"inbound\"}-rcmgr_trace_metrics_peer_connections_negative_bucket{dir=\"inbound\"}", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Current inbound connections per peer histogram", + "type": "bargauge" + }, + { + "description": "How many peers have N-0.1 connections open", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] + ] + } }, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 94 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 85 + }, + "id": 40, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false }, - "id": 56, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "showUnfilled": true + }, + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_memory{scope=\"system\"}", - "interval": "", - "legendFormat": "Bytes Reserved", - "refId": "A" - } - ], - "title": "System memory reservations", - "type": "timeseries" + "exemplar": false, + "expr": "rcmgr_trace_metrics_peer_connections_bucket{dir=\"outbound\"}-rcmgr_trace_metrics_peer_connections_negative_bucket{dir=\"outbound\"} ", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Curent outbound connections per peer histogram", + "type": "bargauge" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 93 }, - { - "description": "As reported to Resource Manager", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 54, + "panels": [], + "title": "Memory", + "type": "row" + }, + { + "description": "As reported to Resource Manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "decbytes" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 103 + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 94 + }, + "id": 56, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "id": 57, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "exemplar": true, + "expr": "rcmgr_trace_metrics_memory{scope=\"system\"}", + "interval": "", + "legendFormat": "Bytes Reserved", + "refId": "A" + } + ], + "title": "System memory reservations", + "type": "timeseries" + }, + { + "description": "As reported to Resource Manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_memory{scope=\"protocol\"}", - "interval": "", - "legendFormat": "{{protocol}}", - "refId": "A" - } - ], - "title": "Memory reservations by protocol", - "type": "timeseries" - }, - { - "description": "As reported to Resource Manager", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" + ] }, - "overrides": [] + "unit": "decbytes" }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 103 - }, - "id": 58, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 103 + }, + "id": 57, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_memory{scope=\"service\"}", - "interval": "", - "legendFormat": "{{service}}", - "refId": "A" - } - ], - "title": "Memory reservations by service", - "type": "timeseries" + "tooltip": { + "mode": "single", + "sort": "none" + } }, - { - "description": "As reported to the resource manager", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_memory{scope=\"protocol\"}", + "interval": "", + "legendFormat": "{{protocol}}", + "refId": "A" + } + ], + "title": "Memory reservations by protocol", + "type": "timeseries" + }, + { + "description": "As reported to Resource Manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "decbytes" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Number of peers aggregated" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-green", - "mode": "fixed" - } - }, - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "unit" - } - ] - } - ] + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 111 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 103 + }, + "id": 58, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "id": 59, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": true, - "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", - "interval": "", - "legendFormat": "p50 {{dir}} connections per peer", - "refId": "A" + "exemplar": true, + "expr": "rcmgr_trace_metrics_memory{scope=\"service\"}", + "interval": "", + "legendFormat": "{{service}}", + "refId": "A" + } + ], + "title": "Memory reservations by service", + "type": "timeseries" + }, + { + "description": "As reported to the resource manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "exemplar": true, - "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", - "hide": false, - "interval": "", - "legendFormat": "p90 {{dir}} connections per peer", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "exemplar": true, - "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", - "hide": false, - "interval": "", - "legendFormat": "max {{dir}} connections per peer", - "refId": "C" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, + "unit": "decbytes" + }, + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "matcher": { + "id": "byName", + "options": "Number of peers aggregated" }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_peer_memory_count-rcmgr_trace_metrics_peer_memory_negative_count", - "hide": false, - "interval": "", - "legendFormat": "Number of peers aggregated", - "refId": "D" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-purple", + "mode": "fixed" + } + }, + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit" + } + ] } - ], - "title": "Memory reservations per peer, aggregated", - "type": "timeseries" + ] }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 121 + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 111 + }, + "id": 59, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - "id": 62, - "panels": [], - "title": "File Descriptors", - "type": "row" + "tooltip": { + "mode": "single", + "sort": "none" + } }, - { - "description": "As reported to the resource manager", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "overrides": [] + "exemplar": true, + "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", + "interval": "", + "legendFormat": "p50 memory usage per peer", + "refId": "A" }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 122 + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "p90 memory usage per peer", + "refId": "B" }, - "id": 60, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "exemplar": true, + "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", + "hide": false, + "interval": "", + "legendFormat": "max memory usage per peer", + "refId": "C" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "exemplar": true, - "expr": "rcmgr_trace_metrics_fds", - "interval": "", - "legendFormat": "{{scope}}", - "refId": "A" - } - ], - "title": "FDs in use", - "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 35, - "style": "dark", - "tags": [], - "templating": { - "list": [] + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_peer_memory_count-rcmgr_trace_metrics_peer_memory_negative_count", + "hide": false, + "interval": "", + "legendFormat": "Number of peers aggregated", + "refId": "D" + } + ], + "title": "Memory reservations per peer, aggregated", + "type": "timeseries" }, - "time": { - "from": "now-5m", - "to": "now" + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 121 + }, + "id": 62, + "panels": [], + "title": "File Descriptors", + "type": "row" }, - "timepicker": {}, - "timezone": "", - "title": "Resource Manager", - "uid": "MgmGIjjnk", - "version": 13, - "weekStart": "" - } \ No newline at end of file + { + "description": "As reported to the resource manager", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 122 + }, + "id": 60, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "exemplar": true, + "expr": "rcmgr_trace_metrics_fds", + "interval": "", + "legendFormat": "{{scope}}", + "refId": "A" + } + ], + "title": "FDs in use", + "type": "timeseries" + } + ], + "refresh": false, + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Resource Manager", + "uid": "MgmGIjjnk", + "version": 14, + "weekStart": "" +} \ No newline at end of file From 04e10885af3aaf6081c10228eaf539bd62a6accc Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Thu, 23 Jun 2022 15:29:53 -0700 Subject: [PATCH 12/21] Update dashboard blocked resources --- obs/grafana-dashboards/resource-manager.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/obs/grafana-dashboards/resource-manager.json b/obs/grafana-dashboards/resource-manager.json index e9cae6d..66d390a 100644 --- a/obs/grafana-dashboards/resource-manager.json +++ b/obs/grafana-dashboards/resource-manager.json @@ -102,7 +102,7 @@ "uid": "PBFA97CFB590B2093" }, "exemplar": true, - "expr": "count(rcmgr_trace_metrics_blocked_resources) by (resource,scope)", + "expr": "rate(rcmgr_trace_metrics_blocked_resources[$__rate_interval])", "interval": "", "legendFormat": "", "refId": "A" From d34b9cba642f5587d58e217506d879c9c520e0fa Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Thu, 23 Jun 2022 15:30:13 -0700 Subject: [PATCH 13/21] Update bucket distribution and blocked resources --- obs/stats.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/obs/stats.go b/obs/stats.go index 7838a66..89dc5b5 100644 --- a/obs/stats.go +++ b/obs/stats.go @@ -54,24 +54,24 @@ var ( ConnView = &view.View{Measure: conns, Aggregation: view.Sum(), TagKeys: []tag.Key{Direction, Scope}} - fibLikeDistribution = []float64{ - 1.1, 2.1, 3.1, 5.1, 8.1, 13.1, 21.1, 34.1, 55.1, 100.1, 200.1, + oneTenThenExpDistribution = []float64{ + 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1, 10.1, 16.1, 32.1, 64.1, 128.1, 256.1, } PeerConnsView = &view.View{ Measure: peerConns, - Aggregation: view.Distribution(fibLikeDistribution...), + Aggregation: view.Distribution(oneTenThenExpDistribution...), TagKeys: []tag.Key{Direction}, } PeerConnsNegativeView = &view.View{ Measure: peerConnsNegative, - Aggregation: view.Distribution(fibLikeDistribution...), + Aggregation: view.Distribution(oneTenThenExpDistribution...), TagKeys: []tag.Key{Direction}, } StreamView = &view.View{Measure: streams, Aggregation: view.Sum(), TagKeys: []tag.Key{Direction, Scope, Service, Protocol}} - PeerStreamsView = &view.View{Measure: peerStreams, Aggregation: view.Distribution(fibLikeDistribution...), TagKeys: []tag.Key{Direction}} - PeerStreamNegativeView = &view.View{Measure: peerStreamsNegative, Aggregation: view.Distribution(fibLikeDistribution...), TagKeys: []tag.Key{Direction}} + PeerStreamsView = &view.View{Measure: peerStreams, Aggregation: view.Distribution(oneTenThenExpDistribution...), TagKeys: []tag.Key{Direction}} + PeerStreamNegativeView = &view.View{Measure: peerStreamsNegative, Aggregation: view.Distribution(oneTenThenExpDistribution...), TagKeys: []tag.Key{Direction}} MemoryView = &view.View{Measure: memory, Aggregation: view.Sum(), TagKeys: []tag.Key{Scope, Service, Protocol}} @@ -136,6 +136,7 @@ var DefaultViews []*view.View = []*view.View{ type StatsTraceReporter struct{} func NewStatsTraceReporter() (StatsTraceReporter, error) { + // TODO tell prometheus the system limits return StatsTraceReporter{}, nil } @@ -328,16 +329,19 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { tags := []tag.Mutator{tag.Upsert(Scope, scope), tag.Upsert(Resource, resource)} + // TODO fix the graph in the dashboard for blocked resources. + // TODO change bytes to iec + if evt.DeltaIn != 0 { - stats.RecordWithTags(ctx, tags, blockedResources.M(int64(evt.DeltaIn))) + stats.RecordWithTags(ctx, tags, blockedResources.M(int64(1))) } if evt.DeltaOut != 0 { - stats.RecordWithTags(ctx, tags, blockedResources.M(int64(evt.DeltaOut))) + stats.RecordWithTags(ctx, tags, blockedResources.M(int64(1))) } if evt.Delta != 0 { - stats.RecordWithTags(ctx, tags, blockedResources.M(evt.Delta)) + stats.RecordWithTags(ctx, tags, blockedResources.M(1)) } } } From 6d5898aa99d6282f6bfb9bef2bbf8ac5dbd28ebd Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Thu, 23 Jun 2022 15:33:12 -0700 Subject: [PATCH 14/21] Use bytes rather than decbytes (power of 2) --- obs/grafana-dashboards/resource-manager.json | 8 ++++---- obs/stats.go | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/obs/grafana-dashboards/resource-manager.json b/obs/grafana-dashboards/resource-manager.json index 66d390a..f2fabfe 100644 --- a/obs/grafana-dashboards/resource-manager.json +++ b/obs/grafana-dashboards/resource-manager.json @@ -1224,7 +1224,7 @@ } ] }, - "unit": "decbytes" + "unit": "bytes" }, "overrides": [] }, @@ -1311,7 +1311,7 @@ } ] }, - "unit": "decbytes" + "unit": "bytes" }, "overrides": [] }, @@ -1398,7 +1398,7 @@ } ] }, - "unit": "decbytes" + "unit": "bytes" }, "overrides": [] }, @@ -1485,7 +1485,7 @@ } ] }, - "unit": "decbytes" + "unit": "bytes" }, "overrides": [ { diff --git a/obs/stats.go b/obs/stats.go index 89dc5b5..262028b 100644 --- a/obs/stats.go +++ b/obs/stats.go @@ -329,9 +329,6 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { tags := []tag.Mutator{tag.Upsert(Scope, scope), tag.Upsert(Resource, resource)} - // TODO fix the graph in the dashboard for blocked resources. - // TODO change bytes to iec - if evt.DeltaIn != 0 { stats.RecordWithTags(ctx, tags, blockedResources.M(int64(1))) } From e70da09acb61da6c67910f70ae393eb99139f741 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Mon, 27 Jun 2022 15:58:42 -0700 Subject: [PATCH 15/21] Unexport tag names, remove unused var --- obs/stats.go | 81 ++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/obs/stats.go b/obs/stats.go index 262028b..8fc7ec1 100644 --- a/obs/stats.go +++ b/obs/stats.go @@ -14,9 +14,6 @@ import ( var log = logging.Logger("rcmgrObs") var ( - systemOutboundConns = stats.Int64("system/outbound/conn", "Number of outbound Connections", stats.UnitDimensionless) - systemInboundConns = stats.Int64("system/inbound/conn", "Number of inbound Connections", stats.UnitDimensionless) - conns = stats.Int64("connections", "Number of Connections", stats.UnitDimensionless) peerConns = stats.Int64("peer/connections", "Number of connections this peer has", stats.UnitDimensionless) @@ -40,19 +37,15 @@ var ( ) var ( - LessThanEq, _ = tag.NewKey("le") - Direction, _ = tag.NewKey("dir") - Scope, _ = tag.NewKey("scope") - Service, _ = tag.NewKey("service") - Protocol, _ = tag.NewKey("protocol") - Resource, _ = tag.NewKey("resource") + directionTag, _ = tag.NewKey("dir") + scopeTag, _ = tag.NewKey("scope") + serviceTag, _ = tag.NewKey("service") + protocolTag, _ = tag.NewKey("protocol") + resourceTag, _ = tag.NewKey("resource") ) var ( - SystemOutboundConnsView = &view.View{Measure: systemOutboundConns, Aggregation: view.Sum()} - SystemInboundConnsView = &view.View{Measure: systemInboundConns, Aggregation: view.Sum()} - - ConnView = &view.View{Measure: conns, Aggregation: view.Sum(), TagKeys: []tag.Key{Direction, Scope}} + ConnView = &view.View{Measure: conns, Aggregation: view.Sum(), TagKeys: []tag.Key{directionTag, scopeTag}} oneTenThenExpDistribution = []float64{ 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1, 10.1, 16.1, 32.1, 64.1, 128.1, 256.1, @@ -61,19 +54,19 @@ var ( PeerConnsView = &view.View{ Measure: peerConns, Aggregation: view.Distribution(oneTenThenExpDistribution...), - TagKeys: []tag.Key{Direction}, + TagKeys: []tag.Key{directionTag}, } PeerConnsNegativeView = &view.View{ Measure: peerConnsNegative, Aggregation: view.Distribution(oneTenThenExpDistribution...), - TagKeys: []tag.Key{Direction}, + TagKeys: []tag.Key{directionTag}, } - StreamView = &view.View{Measure: streams, Aggregation: view.Sum(), TagKeys: []tag.Key{Direction, Scope, Service, Protocol}} - PeerStreamsView = &view.View{Measure: peerStreams, Aggregation: view.Distribution(oneTenThenExpDistribution...), TagKeys: []tag.Key{Direction}} - PeerStreamNegativeView = &view.View{Measure: peerStreamsNegative, Aggregation: view.Distribution(oneTenThenExpDistribution...), TagKeys: []tag.Key{Direction}} + StreamView = &view.View{Measure: streams, Aggregation: view.Sum(), TagKeys: []tag.Key{directionTag, scopeTag, serviceTag, protocolTag}} + PeerStreamsView = &view.View{Measure: peerStreams, Aggregation: view.Distribution(oneTenThenExpDistribution...), TagKeys: []tag.Key{directionTag}} + PeerStreamNegativeView = &view.View{Measure: peerStreamsNegative, Aggregation: view.Distribution(oneTenThenExpDistribution...), TagKeys: []tag.Key{directionTag}} - MemoryView = &view.View{Measure: memory, Aggregation: view.Sum(), TagKeys: []tag.Key{Scope, Service, Protocol}} + MemoryView = &view.View{Measure: memory, Aggregation: view.Sum(), TagKeys: []tag.Key{scopeTag, serviceTag, protocolTag}} memDistribution = []float64{ 1 << 10, // 1KB @@ -106,12 +99,12 @@ var ( Aggregation: view.Distribution(memDistribution...), } - FDsView = &view.View{Measure: fds, Aggregation: view.Sum(), TagKeys: []tag.Key{Scope}} + FDsView = &view.View{Measure: fds, Aggregation: view.Sum(), TagKeys: []tag.Key{scopeTag}} BlockedResourcesView = &view.View{ Measure: blockedResources, Aggregation: view.Sum(), - TagKeys: []tag.Key{Scope, Resource}, + TagKeys: []tag.Key{scopeTag, resourceTag}, } ) @@ -156,10 +149,10 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { peerStreamsOut := int64(evt.StreamsOut) if oldStreamsOut != peerStreamsOut { if oldStreamsOut != 0 { - stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "outbound")}, peerStreamsNegative.M(oldStreamsOut)) + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(directionTag, "outbound")}, peerStreamsNegative.M(oldStreamsOut)) } if peerStreamsOut != 0 { - stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "outbound")}, peerStreams.M(peerStreamsOut)) + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(directionTag, "outbound")}, peerStreams.M(peerStreamsOut)) } } @@ -167,20 +160,20 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { peerStreamsIn := int64(evt.StreamsIn) if oldStreamsIn != peerStreamsIn { if oldStreamsIn != 0 { - stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "inbound")}, peerStreamsNegative.M(oldStreamsIn)) + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(directionTag, "inbound")}, peerStreamsNegative.M(oldStreamsIn)) } if peerStreamsIn != 0 { - stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "inbound")}, peerStreams.M(peerStreamsIn)) + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(directionTag, "inbound")}, peerStreams.M(peerStreamsIn)) } } } else { var tags []tag.Mutator if rcmgr.IsSystemScope(evt.Name) || rcmgr.IsTransientScope(evt.Name) { - tags = append(tags, tag.Upsert(Scope, evt.Name)) + tags = append(tags, tag.Upsert(scopeTag, evt.Name)) } else if svc := rcmgr.ParseServiceScopeName(evt.Name); svc != "" { - tags = append(tags, tag.Upsert(Scope, "service"), tag.Upsert(Service, svc)) + tags = append(tags, tag.Upsert(scopeTag, "service"), tag.Upsert(serviceTag, svc)) } else if proto := rcmgr.ParseProtocolScopeName(evt.Name); proto != "" { - tags = append(tags, tag.Upsert(Scope, "protocol"), tag.Upsert(Protocol, proto)) + tags = append(tags, tag.Upsert(scopeTag, "protocol"), tag.Upsert(protocolTag, proto)) } else { // Not measuring connscope, servicepeer and protocolpeer. Lots of data, and // you can use aggregated peer stats + service stats to infer @@ -191,7 +184,7 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { if evt.DeltaOut != 0 { stats.RecordWithTags( ctx, - append([]tag.Mutator{tag.Upsert(Direction, "outbound")}, tags...), + append([]tag.Mutator{tag.Upsert(directionTag, "outbound")}, tags...), streams.M(int64(evt.DeltaOut)), ) } @@ -199,7 +192,7 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { if evt.DeltaIn != 0 { stats.RecordWithTags( ctx, - append([]tag.Mutator{tag.Upsert(Direction, "inbound")}, tags...), + append([]tag.Mutator{tag.Upsert(directionTag, "inbound")}, tags...), streams.M(int64(evt.DeltaIn)), ) } @@ -217,10 +210,10 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { connsOut := int64(evt.ConnsOut) if oldConnsOut != connsOut { if oldConnsOut != 0 { - stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "outbound")}, peerConnsNegative.M(oldConnsOut)) + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(directionTag, "outbound")}, peerConnsNegative.M(oldConnsOut)) } if connsOut != 0 { - stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "outbound")}, peerConns.M(connsOut)) + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(directionTag, "outbound")}, peerConns.M(connsOut)) } } @@ -228,16 +221,16 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { connsIn := int64(evt.ConnsIn) if oldConnsIn != connsIn { if oldConnsIn != 0 { - stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "inbound")}, peerConnsNegative.M(oldConnsIn)) + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(directionTag, "inbound")}, peerConnsNegative.M(oldConnsIn)) } if connsIn != 0 { - stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(Direction, "inbound")}, peerConns.M(connsIn)) + stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(directionTag, "inbound")}, peerConns.M(connsIn)) } } } else { var tags []tag.Mutator if rcmgr.IsSystemScope(evt.Name) || rcmgr.IsTransientScope(evt.Name) { - tags = append(tags, tag.Upsert(Scope, evt.Name)) + tags = append(tags, tag.Upsert(scopeTag, evt.Name)) } else if rcmgr.IsConnScope(evt.Name) { // Not measuring this. I don't think it's useful. break @@ -249,7 +242,7 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { if evt.DeltaOut != 0 { stats.RecordWithTags( ctx, - append([]tag.Mutator{tag.Upsert(Direction, "outbound")}, tags...), + append([]tag.Mutator{tag.Upsert(directionTag, "outbound")}, tags...), conns.M(int64(evt.DeltaOut)), ) } @@ -257,7 +250,7 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { if evt.DeltaIn != 0 { stats.RecordWithTags( ctx, - append([]tag.Mutator{tag.Upsert(Direction, "inbound")}, tags...), + append([]tag.Mutator{tag.Upsert(directionTag, "inbound")}, tags...), conns.M(int64(evt.DeltaIn)), ) } @@ -295,11 +288,11 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { } else { var tags []tag.Mutator if rcmgr.IsSystemScope(evt.Name) || rcmgr.IsTransientScope(evt.Name) { - tags = append(tags, tag.Upsert(Scope, evt.Name)) + tags = append(tags, tag.Upsert(scopeTag, evt.Name)) } else if svc := rcmgr.ParseServiceScopeName(evt.Name); svc != "" { - tags = append(tags, tag.Upsert(Scope, "service"), tag.Upsert(Service, svc)) + tags = append(tags, tag.Upsert(scopeTag, "service"), tag.Upsert(serviceTag, svc)) } else if proto := rcmgr.ParseProtocolScopeName(evt.Name); proto != "" { - tags = append(tags, tag.Upsert(Scope, "protocol"), tag.Upsert(Protocol, proto)) + tags = append(tags, tag.Upsert(scopeTag, "protocol"), tag.Upsert(protocolTag, proto)) } else { // Not measuring connscope, servicepeer and protocolpeer. Lots of data, and // you can use aggregated peer stats + service stats to infer @@ -322,12 +315,12 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { resource = "memory" } - // Only the top scope. We don't want to get the peerid here. - scope := strings.SplitN(evt.Name, ":", 2)[0] + // Only the top scopeName. We don't want to get the peerid here. + scopeName := strings.SplitN(evt.Name, ":", 2)[0] // Drop the connection or stream id - scope = strings.SplitN(scope, "-", 2)[0] + scopeName = strings.SplitN(scopeName, "-", 2)[0] - tags := []tag.Mutator{tag.Upsert(Scope, scope), tag.Upsert(Resource, resource)} + tags := []tag.Mutator{tag.Upsert(scopeTag, scopeName), tag.Upsert(resourceTag, resource)} if evt.DeltaIn != 0 { stats.RecordWithTags(ctx, tags, blockedResources.M(int64(1))) From 5abae10710dac485368a4bedd43e9b7e7b53cbf4 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Wed, 29 Jun 2022 21:03:22 -0700 Subject: [PATCH 16/21] Filter out spans --- obs/stats.go | 4 ++-- rcmgr.go | 21 +++++++++++---------- scope.go | 7 +++++++ 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/obs/stats.go b/obs/stats.go index 8fc7ec1..725b588 100644 --- a/obs/stats.go +++ b/obs/stats.go @@ -235,8 +235,8 @@ func (r StatsTraceReporter) ConsumeEvent(evt rcmgr.TraceEvt) { // Not measuring this. I don't think it's useful. break } else { - // There shouldn't be anything here. But we keep going so the metrics will tell us if we're wrong (scope="") - log.Debugf("unexpected event in stats: %s", evt.Name) + // This could be a span + break } if evt.DeltaOut != 0 { diff --git a/rcmgr.go b/rcmgr.go index 5d64f7d..99c339c 100644 --- a/rcmgr.go +++ b/rcmgr.go @@ -478,11 +478,12 @@ func newStreamScope(dir network.Direction, limit Limit, peer *peerScope, rcmgr * } func IsSystemScope(name string) bool { - return strings.HasPrefix(name, "system") + return name == "system" } func IsTransientScope(name string) bool { return strings.HasPrefix(name, "transient") + return name == "transient" } func streamScopeName(streamId int64) string { @@ -490,7 +491,7 @@ func streamScopeName(streamId int64) string { } func IsStreamScope(name string) bool { - return strings.HasPrefix(name, "stream-") + return strings.HasPrefix(name, "stream-") && !IsSpan(name) } func connScopeName(streamId int64) string { @@ -498,7 +499,7 @@ func connScopeName(streamId int64) string { } func IsConnScope(name string) bool { - return strings.HasPrefix(name, "conn-") + return strings.HasPrefix(name, "conn-") && !IsSpan(name) } func peerScopeName(p peer.ID) string { @@ -507,16 +508,16 @@ func peerScopeName(p peer.ID) string { // ParsePeerScopeName returns "" if name is not a peerScopeName func ParsePeerScopeName(name string) peer.ID { - if !strings.HasPrefix(name, "peer:") { - return peer.ID("") + if !strings.HasPrefix(name, "peer:") || IsSpan(name) { + return "" } parts := strings.SplitN(name, "peer:", 2) if len(parts) != 2 { - return peer.ID("") + return "" } p, err := peer.Decode(parts[1]) if err != nil { - return peer.ID("") + return "" } return p } @@ -524,14 +525,14 @@ func ParsePeerScopeName(name string) peer.ID { // ParseServiceScopeName returns the service name if name is a serviceScopeName. // Otherwise returns "" func ParseServiceScopeName(name string) string { - if strings.HasPrefix(name, "service:") { + if strings.HasPrefix(name, "service:") && !IsSpan(name) { if strings.Contains(name, "peer:") { // This is a service peer scope return "" } parts := strings.SplitN(name, ":", 2) if len(parts) != 2 { - return ("") + return "" } return parts[1] @@ -542,7 +543,7 @@ func ParseServiceScopeName(name string) string { // ParseProtocolScopeName returns the service name if name is a serviceScopeName. // Otherwise returns "" func ParseProtocolScopeName(name string) string { - if strings.HasPrefix(name, "protocol:") { + if strings.HasPrefix(name, "protocol:") && !IsSpan(name) { if strings.Contains(name, "peer:") { // This is a protocol peer scope return "" diff --git a/scope.go b/scope.go index 2dd9707..7205b2e 100644 --- a/scope.go +++ b/scope.go @@ -2,6 +2,7 @@ package rcmgr import ( "fmt" + "strings" "sync" "github.com/libp2p/go-libp2p-core/network" @@ -73,6 +74,12 @@ func newResourceScopeSpan(owner *resourceScope, id int) *resourceScope { return r } +// IsSpan will return true if this name was created by newResourceScopeSpan +func IsSpan(name string) bool { + return strings.Contains(name, ".span-") + return name == "transient" +} + // Resources implementation func (rc *resources) checkMemory(rsvp int64, prio uint8) error { // overflow check; this also has the side effect that we cannot reserve negative memory. From ac716c1bf306ffc088d39e15b48f8b6edf12f54e Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Wed, 29 Jun 2022 21:06:43 -0700 Subject: [PATCH 17/21] Rewrite constants to be more obvious --- obs/stats.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/obs/stats.go b/obs/stats.go index 725b588..a11f0fc 100644 --- a/obs/stats.go +++ b/obs/stats.go @@ -69,16 +69,16 @@ var ( MemoryView = &view.View{Measure: memory, Aggregation: view.Sum(), TagKeys: []tag.Key{scopeTag, serviceTag, protocolTag}} memDistribution = []float64{ - 1 << 10, // 1KB - 1 << 12, // 4KB - 1 << 15, // 32KB - 1 << 20, // 1MB - 1 << 25, // 32MB - 1 << 28, // 256MB - 1 << 29, // 512MB - 1 << 30, // 1GB - 1 << 31, // 2GB - 1 << 32, // 4GB + 1 << 10, // 1KB + 4 << 10, // 4KB + 32 << 10, // 32KB + 1 << 20, // 1MB + 32 << 20, // 32MB + 256 << 20, // 256MB + 512 << 20, // 512MB + 1 << 30, // 1GB + 2 << 30, // 2GB + 4 << 30, // 4GB } PeerMemoryView = &view.View{ Measure: peerMemory, From 662c09420118fe63e298d85b7d38852208b93c68 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Wed, 29 Jun 2022 21:09:48 -0700 Subject: [PATCH 18/21] Don't add reporter twice --- trace.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trace.go b/trace.go index 464e8cf..886f89f 100644 --- a/trace.go +++ b/trace.go @@ -47,7 +47,7 @@ func WithTrace(path string) Option { func WithTraceReporter(reporter TraceReporter) Option { return func(r *resourceManager) error { if r.trace == nil { - r.trace = &trace{reporters: []TraceReporter{reporter}} + r.trace = &trace{} } r.trace.reporters = append(r.trace.reporters, reporter) return nil From ccb0c4dd90aeafd8957edebabfcf174f657ab3b2 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Fri, 1 Jul 2022 08:36:18 -0700 Subject: [PATCH 19/21] Update help text --- obs/stats.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/obs/stats.go b/obs/stats.go index a11f0fc..5030d06 100644 --- a/obs/stats.go +++ b/obs/stats.go @@ -17,19 +17,19 @@ var ( conns = stats.Int64("connections", "Number of Connections", stats.UnitDimensionless) peerConns = stats.Int64("peer/connections", "Number of connections this peer has", stats.UnitDimensionless) - peerConnsNegative = stats.Int64("peer/connections_negative", "Number of connections this peer had", stats.UnitDimensionless) + peerConnsNegative = stats.Int64("peer/connections_negative", "Number of connections this peer had. This is used to get the current connection number per peer histogram by subtracting this from the peer/connections histogram", stats.UnitDimensionless) streams = stats.Int64("streams", "Number of Streams", stats.UnitDimensionless) peerStreams = stats.Int64("peer/streams", "Number of streams this peer has", stats.UnitDimensionless) - peerStreamsNegative = stats.Int64("peer/streams_negative", "Number of streams this peer had", stats.UnitDimensionless) + peerStreamsNegative = stats.Int64("peer/streams_negative", "Number of streams this peer had. This is used to get the current streams number per peer histogram by subtracting this from the peer/streams histogram", stats.UnitDimensionless) memory = stats.Int64("memory", "Amount of memory reserved as reported to the Resource Manager", stats.UnitDimensionless) peerMemory = stats.Int64("peer/memory", "Amount of memory currently reseved for peer", stats.UnitDimensionless) - peerMemoryNegative = stats.Int64("peer/memory_negative", "Amount of memory previously reseved for peer", stats.UnitDimensionless) + peerMemoryNegative = stats.Int64("peer/memory_negative", "Amount of memory previously reseved for peer. This is used to get the current memory per peer histogram by subtracting this from the peer/memory histogram", stats.UnitDimensionless) connMemory = stats.Int64("conn/memory", "Amount of memory currently reseved for the connection", stats.UnitDimensionless) - connMemoryNegative = stats.Int64("conn/memory_negative", "Amount of memory previously reseved for the connection", stats.UnitDimensionless) + connMemoryNegative = stats.Int64("conn/memory_negative", "Amount of memory previously reseved for the connection. This is used to get the current memory per connection histogram by subtracting this from the conn/memory histogram", stats.UnitDimensionless) fds = stats.Int64("fds", "Number of fds as reported to the Resource Manager", stats.UnitDimensionless) From 0f3804229e36e66e459e35ea9bb69d14b017a692 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Fri, 1 Jul 2022 08:53:38 -0700 Subject: [PATCH 20/21] Remove unused var --- obs/stats.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/obs/stats.go b/obs/stats.go index 5030d06..4ba2bf4 100644 --- a/obs/stats.go +++ b/obs/stats.go @@ -4,15 +4,12 @@ import ( "context" "strings" - logging "github.com/ipfs/go-log/v2" rcmgr "github.com/libp2p/go-libp2p-resource-manager" "go.opencensus.io/stats" "go.opencensus.io/stats/view" "go.opencensus.io/tag" ) -var log = logging.Logger("rcmgrObs") - var ( conns = stats.Int64("connections", "Number of Connections", stats.UnitDimensionless) From 836d53a98d66a9c33a9c66df0e89ceb274684c91 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Fri, 1 Jul 2022 09:04:31 -0700 Subject: [PATCH 21/21] Fix go vet issue --- rcmgr.go | 1 - scope.go | 1 - 2 files changed, 2 deletions(-) diff --git a/rcmgr.go b/rcmgr.go index 99c339c..a9fbdae 100644 --- a/rcmgr.go +++ b/rcmgr.go @@ -482,7 +482,6 @@ func IsSystemScope(name string) bool { } func IsTransientScope(name string) bool { - return strings.HasPrefix(name, "transient") return name == "transient" } diff --git a/scope.go b/scope.go index 7205b2e..2190527 100644 --- a/scope.go +++ b/scope.go @@ -77,7 +77,6 @@ func newResourceScopeSpan(owner *resourceScope, id int) *resourceScope { // IsSpan will return true if this name was created by newResourceScopeSpan func IsSpan(name string) bool { return strings.Contains(name, ".span-") - return name == "transient" } // Resources implementation