From d9004d21b45bcb72461150686358fb1afd6c66f4 Mon Sep 17 00:00:00 2001 From: Sukun Date: Sun, 19 Feb 2023 04:41:18 +0530 Subject: [PATCH] swarm: add ip_version to metrics (#2114) * add ip_version to swarm metrics * use "unknown" as a default for the IP version --------- Co-authored-by: Marten Seemann --- dashboards/swarm/swarm.json | 209 ++++++++++++++++++++++++++-- p2p/net/swarm/swarm_conn.go | 2 +- p2p/net/swarm/swarm_dial.go | 4 +- p2p/net/swarm/swarm_listen.go | 2 +- p2p/net/swarm/swarm_metrics.go | 45 ++++-- p2p/net/swarm/swarm_metrics_test.go | 18 ++- 6 files changed, 245 insertions(+), 35 deletions(-) diff --git a/dashboards/swarm/swarm.json b/dashboards/swarm/swarm.json index f0e6804575..09eee4ced7 100644 --- a/dashboards/swarm/swarm.json +++ b/dashboards/swarm/swarm.json @@ -464,7 +464,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "libp2p_swarm_connections_opened_total{dir=\"inbound\"} - libp2p_swarm_connections_closed_total{dir=\"inbound\"}", + "expr": "sum by (transport, security, muxer) (libp2p_swarm_connections_opened_total{dir=\"inbound\"}) - sum by (transport, security, muxer) (libp2p_swarm_connections_closed_total{dir=\"inbound\"})", "legendFormat": "{{transport}} {{security}} {{muxer}}", "range": true, "refId": "A" @@ -691,7 +691,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "libp2p_swarm_connections_opened_total{dir=\"outbound\"} - libp2p_swarm_connections_closed_total{dir=\"outbound\"}", + "expr": "sum by (transport, security, muxer)(libp2p_swarm_connections_opened_total{dir=\"outbound\"}) - sum by (transport, security, muxer) (libp2p_swarm_connections_closed_total{dir=\"outbound\"})", "legendFormat": "{{transport}} {{security}} {{muxer}}", "range": true, "refId": "A" @@ -1289,7 +1289,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__rate_interval])", + "expr": "sum (rate(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__rate_interval])) by (transport, security, muxer)", "legendFormat": "{{transport}} {{security}} {{muxer}}", "range": true, "refId": "A" @@ -1515,7 +1515,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__rate_interval])", + "expr": "sum (rate(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__rate_interval])) by (transport, security, muxer)", "legendFormat": "{{transport}} {{security}} {{muxer}}", "range": true, "refId": "A" @@ -1713,7 +1713,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "increase(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__range])", + "expr": "sum (increase(libp2p_swarm_connections_opened_total{dir=\"inbound\"}[$__range])) by (transport, security, muxer)", "legendFormat": "{{transport}} {{security}} {{muxer}}", "range": true, "refId": "A" @@ -1913,7 +1913,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "increase(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__range])", + "expr": "sum (increase(libp2p_swarm_connections_opened_total{dir=\"outbound\"}[$__range])) by (transport, security, muxer)", "legendFormat": "{{transport}} {{security}} {{muxer}}", "range": true, "refId": "A" @@ -1941,7 +1941,38 @@ }, "mappings": [] }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "ip4" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ip6" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-purple", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, @@ -1949,6 +1980,164 @@ "x": 0, "y": 51 }, + "id": 32, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (ip_version) (libp2p_swarm_connections_opened_total{dir=\"inbound\"}) - sum by (ip_version) (libp2p_swarm_connections_closed_total{dir=\"inbound\"})", + "legendFormat": "{{ip_version}}", + "range": true, + "refId": "A" + } + ], + "title": "New Inbound Connections: IP Version", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "ip6" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ip4" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 34, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (ip_version) (libp2p_swarm_connections_opened_total{dir=\"outbound\"}) - sum by (ip_version) (libp2p_swarm_connections_closed_total{dir=\"outbound\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "New Outbound Connections: IP Version", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 59 + }, "id": 15, "options": { "legend": { @@ -2137,7 +2326,7 @@ "h": 8, "w": 12, "x": 12, - "y": 51 + "y": 59 }, "id": 17, "options": { @@ -2220,7 +2409,7 @@ "h": 8, "w": 12, "x": 0, - "y": 59 + "y": 67 }, "id": 25, "options": { @@ -2296,6 +2485,6 @@ "timezone": "", "title": "libp2p Swarm", "uid": "a15PyhO4z", - "version": 68, + "version": 12, "weekStart": "" } \ No newline at end of file diff --git a/p2p/net/swarm/swarm_conn.go b/p2p/net/swarm/swarm_conn.go index 146305beca..d9202fafa4 100644 --- a/p2p/net/swarm/swarm_conn.go +++ b/p2p/net/swarm/swarm_conn.go @@ -61,7 +61,7 @@ func (c *Conn) Close() error { func (c *Conn) doClose() { if c.swarm.metricsTracer != nil { - c.swarm.metricsTracer.ClosedConnection(c.stat.Direction, time.Since(c.stat.Stats.Opened), c.ConnState()) + c.swarm.metricsTracer.ClosedConnection(c.stat.Direction, time.Since(c.stat.Stats.Opened), c.ConnState(), c.LocalMultiaddr()) } c.swarm.removeConn(c) diff --git a/p2p/net/swarm/swarm_dial.go b/p2p/net/swarm/swarm_dial.go index 4c64e37b61..4f58a9b0e3 100644 --- a/p2p/net/swarm/swarm_dial.go +++ b/p2p/net/swarm/swarm_dial.go @@ -500,8 +500,8 @@ func (s *Swarm) dialAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr) (tra canonicallog.LogPeerStatus(100, connC.RemotePeer(), connC.RemoteMultiaddr(), "connection_status", "established", "dir", "outbound") if s.metricsTracer != nil { connState := connC.ConnState() - s.metricsTracer.OpenedConnection(network.DirOutbound, connC.RemotePublicKey(), connState) - s.metricsTracer.CompletedHandshake(time.Since(start), connState) + s.metricsTracer.OpenedConnection(network.DirOutbound, connC.RemotePublicKey(), connState, connC.LocalMultiaddr()) + s.metricsTracer.CompletedHandshake(time.Since(start), connState, connC.LocalMultiaddr()) } // Trust the transport? Yeah... right. diff --git a/p2p/net/swarm/swarm_listen.go b/p2p/net/swarm/swarm_listen.go index 334abb4ea3..5ad2b08c3e 100644 --- a/p2p/net/swarm/swarm_listen.go +++ b/p2p/net/swarm/swarm_listen.go @@ -131,7 +131,7 @@ func (s *Swarm) AddListenAddr(a ma.Multiaddr) error { } canonicallog.LogPeerStatus(100, c.RemotePeer(), c.RemoteMultiaddr(), "connection_status", "established", "dir", "inbound") if s.metricsTracer != nil { - s.metricsTracer.OpenedConnection(network.DirInbound, c.RemotePublicKey(), c.ConnState()) + s.metricsTracer.OpenedConnection(network.DirInbound, c.RemotePublicKey(), c.ConnState(), c.LocalMultiaddr()) } log.Debugf("swarm listener accepted connection: %s <-> %s", c.LocalMultiaddr(), c.RemoteMultiaddr()) diff --git a/p2p/net/swarm/swarm_metrics.go b/p2p/net/swarm/swarm_metrics.go index d36d0acb83..6af2cf919b 100644 --- a/p2p/net/swarm/swarm_metrics.go +++ b/p2p/net/swarm/swarm_metrics.go @@ -26,7 +26,7 @@ var ( Name: "connections_opened_total", Help: "Connections Opened", }, - []string{"dir", "transport", "security", "muxer"}, + []string{"dir", "transport", "security", "muxer", "ip_version"}, ) keyTypes = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -42,7 +42,7 @@ var ( Name: "connections_closed_total", Help: "Connections Closed", }, - []string{"dir", "transport", "security", "muxer"}, + []string{"dir", "transport", "security", "muxer", "ip_version"}, ) dialError = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -50,7 +50,7 @@ var ( Name: "dial_errors_total", Help: "Dial Error", }, - []string{"transport", "error"}, + []string{"transport", "error", "ip_version"}, ) connDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ @@ -59,7 +59,7 @@ var ( Help: "Duration of a Connection", Buckets: prometheus.ExponentialBuckets(1.0/16, 2, 25), // up to 24 days }, - []string{"dir", "transport", "security", "muxer"}, + []string{"dir", "transport", "security", "muxer", "ip_version"}, ) connHandshakeLatency = prometheus.NewHistogramVec( prometheus.HistogramOpts{ @@ -68,7 +68,7 @@ var ( Help: "Duration of the libp2p Handshake", Buckets: prometheus.ExponentialBuckets(0.001, 1.3, 35), }, - []string{"transport", "security", "muxer"}, + []string{"transport", "security", "muxer", "ip_version"}, ) ) @@ -79,9 +79,9 @@ func initMetrics() { } type MetricsTracer interface { - OpenedConnection(network.Direction, crypto.PubKey, network.ConnectionState) - ClosedConnection(network.Direction, time.Duration, network.ConnectionState) - CompletedHandshake(time.Duration, network.ConnectionState) + OpenedConnection(network.Direction, crypto.PubKey, network.ConnectionState, ma.Multiaddr) + ClosedConnection(network.Direction, time.Duration, network.ConnectionState, ma.Multiaddr) + CompletedHandshake(time.Duration, network.ConnectionState, ma.Multiaddr) FailedDialing(ma.Multiaddr, error) } @@ -108,12 +108,28 @@ func appendConnectionState(tags []string, cs network.ConnectionState) []string { return tags } -func (m *metricsTracer) OpenedConnection(dir network.Direction, p crypto.PubKey, cs network.ConnectionState) { +func getIPVersion(addr ma.Multiaddr) string { + version := "unknown" + ma.ForEach(addr, func(c ma.Component) bool { + if c.Protocol().Code == ma.P_IP4 { + version = "ip4" + return false + } else if c.Protocol().Code == ma.P_IP6 { + version = "ip6" + return false + } + return true + }) + return version +} + +func (m *metricsTracer) OpenedConnection(dir network.Direction, p crypto.PubKey, cs network.ConnectionState, laddr ma.Multiaddr) { tags := metricshelper.GetStringSlice() defer metricshelper.PutStringSlice(tags) *tags = append(*tags, metricshelper.GetDirection(dir)) *tags = appendConnectionState(*tags, cs) + *tags = append(*tags, getIPVersion(laddr)) connsOpened.WithLabelValues(*tags...).Inc() *tags = (*tags)[:0] @@ -122,25 +138,23 @@ func (m *metricsTracer) OpenedConnection(dir network.Direction, p crypto.PubKey, keyTypes.WithLabelValues(*tags...).Inc() } -func (m *metricsTracer) ClosedConnection(dir network.Direction, duration time.Duration, cs network.ConnectionState) { +func (m *metricsTracer) ClosedConnection(dir network.Direction, duration time.Duration, cs network.ConnectionState, laddr ma.Multiaddr) { tags := metricshelper.GetStringSlice() defer metricshelper.PutStringSlice(tags) *tags = append(*tags, metricshelper.GetDirection(dir)) *tags = appendConnectionState(*tags, cs) + *tags = append(*tags, getIPVersion(laddr)) connsClosed.WithLabelValues(*tags...).Inc() - - *tags = (*tags)[:0] - *tags = append(*tags, metricshelper.GetDirection(dir)) - *tags = appendConnectionState(*tags, cs) connDuration.WithLabelValues(*tags...).Observe(duration.Seconds()) } -func (m *metricsTracer) CompletedHandshake(t time.Duration, cs network.ConnectionState) { +func (m *metricsTracer) CompletedHandshake(t time.Duration, cs network.ConnectionState, laddr ma.Multiaddr) { tags := metricshelper.GetStringSlice() defer metricshelper.PutStringSlice(tags) *tags = appendConnectionState(*tags, cs) + *tags = append(*tags, getIPVersion(laddr)) connHandshakeLatency.WithLabelValues(*tags...).Observe(t.Seconds()) } @@ -171,5 +185,6 @@ func (m *metricsTracer) FailedDialing(addr ma.Multiaddr, err error) { defer metricshelper.PutStringSlice(tags) *tags = append(*tags, transport, e) + *tags = append(*tags, getIPVersion(addr)) dialError.WithLabelValues(*tags...).Inc() } diff --git a/p2p/net/swarm/swarm_metrics_test.go b/p2p/net/swarm/swarm_metrics_test.go index 24af849304..de159217d8 100644 --- a/p2p/net/swarm/swarm_metrics_test.go +++ b/p2p/net/swarm/swarm_metrics_test.go @@ -29,13 +29,15 @@ func BenchmarkMetricsConnOpen(b *testing.B) { } _, pub, err := crypto.GenerateEd25519Key(rand.Reader) require.NoError(b, err) + quicAddr := ma.StringCast("/ip4/1.2.3.4/udp/1/quic") + tcpAddr := ma.StringCast("/ip4/1.2.3.4/tcp/1/") tr := NewMetricsTracer() for i := 0; i < b.N; i++ { switch i % 2 { case 0: - tr.OpenedConnection(network.DirInbound, pub, quicConnState) + tr.OpenedConnection(network.DirInbound, pub, quicConnState, quicAddr) case 1: - tr.OpenedConnection(network.DirInbound, pub, tcpConnState) + tr.OpenedConnection(network.DirInbound, pub, tcpConnState, tcpAddr) } } } @@ -77,12 +79,16 @@ func TestMetricsNoAllocNoCover(t *testing.T) { } tests := map[string]func(){ - "OpenedConnection": func() { mt.OpenedConnection(randItem(directions), randItem(keys), randItem(connections)) }, + "OpenedConnection": func() { + mt.OpenedConnection(randItem(directions), randItem(keys), randItem(connections), randItem(addrs)) + }, "ClosedConnection": func() { - mt.ClosedConnection(randItem(directions), time.Duration(mrand.Intn(100))*time.Second, randItem(connections)) + mt.ClosedConnection(randItem(directions), time.Duration(mrand.Intn(100))*time.Second, randItem(connections), randItem(addrs)) + }, + "CompletedHandshake": func() { + mt.CompletedHandshake(time.Duration(mrand.Intn(100))*time.Second, randItem(connections), randItem(addrs)) }, - "CompletedHandshake": func() { mt.CompletedHandshake(time.Duration(mrand.Intn(100))*time.Second, randItem(connections)) }, - "FailedDialing": func() { mt.FailedDialing(randItem(addrs), randItem(errors)) }, + "FailedDialing": func() { mt.FailedDialing(randItem(addrs), randItem(errors)) }, } for method, f := range tests {