Skip to content

Commit

Permalink
multi: add htlc resolution time to routing dashboard
Browse files Browse the repository at this point in the history
  • Loading branch information
carlaKC committed Nov 17, 2020
1 parent 6c0ee9f commit 7bda61d
Show file tree
Hide file tree
Showing 2 changed files with 288 additions and 10 deletions.
51 changes: 41 additions & 10 deletions collectors/htlcs_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"strconv"
"sync"
"time"

"github.com/lightninglabs/lndclient"
"github.com/lightningnetwork/lnd/channeldb"
Expand Down Expand Up @@ -44,8 +45,13 @@ type htlcMonitor struct {
// htlcs.
resolvedCounter *prometheus.CounterVec

// activeHtlcs holds a map of our currently active htlcs.
activeHtlcs map[htlcswitch.HtlcKey]struct{}
// activeHtlcs holds a map of our currently active htlcs to their
// original forward time.
activeHtlcs map[htlcswitch.HtlcKey]time.Time

// resolutionTimeHistogram tracks the time it takes our htlcs to
// resolve.
resolutionTimeHistogram *prometheus.HistogramVec

// quit is closed to signal that we need to shutdown.
quit chan struct{}
Expand All @@ -62,13 +68,32 @@ func newHtlcMonitor(router lndclient.RouterClient,

return &htlcMonitor{
router: router,
activeHtlcs: make(map[htlcswitch.HtlcKey]struct{}),
activeHtlcs: make(map[htlcswitch.HtlcKey]time.Time),
resolvedCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "lnd",
Subsystem: "htlcs",
Name: "resolved_htlcs",
Help: "count of resolved htlcs",
}, htlcLabels),
resolutionTimeHistogram: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "lnd",
Subsystem: "htlcs",
Name: "resolution_time",
Help: "the time (in seconds) taken to " +
"resolve a htlc",
// Buckets are broken up into 1s/10s/1m/2m/5m/
// 10m and 1h/5h/1d/7d with the logic that if a
// payment doesn't resolve quickly, it will
// likely take very long.
Buckets: []float64{
1, 10, 60, 60 * 2, 60 * 5, 60 * 10,
60 * 60, 60 * 60 * 5, 60 * 60 * 24,
60 * 60 * 24 * 7,
},
},
htlcLabels,
),
quit: make(chan struct{}),
errChan: errChan,
}
Expand All @@ -93,7 +118,7 @@ func (h *htlcMonitor) stop() {
// collectors returns all of the collectors that the htlc monitor uses.
func (h *htlcMonitor) collectors() []prometheus.Collector {
return []prometheus.Collector{
h.resolvedCounter,
h.resolvedCounter, h.resolutionTimeHistogram,
}
}

Expand Down Expand Up @@ -165,6 +190,8 @@ func (h *htlcMonitor) processHtlcEvent(event *routerrpc.HtlcEvent) error {
},
}

ts := time.Unix(0, int64(event.TimestampNs))

switch event.Event.(type) {
// If we have received a forwarding event, we add it to our map if it
// is not already present. We are ok with duplicate events, because
Expand All @@ -177,22 +204,22 @@ func (h *htlcMonitor) processHtlcEvent(event *routerrpc.HtlcEvent) error {
}

// Add to our set of known active htlcs.
h.activeHtlcs[key] = struct{}{}
h.activeHtlcs[key] = ts

case *routerrpc.HtlcEvent_SettleEvent:
err := h.recordResolution(key, event.EventType, true)
err := h.recordResolution(key, event.EventType, ts, true)
if err != nil {
return err
}

case *routerrpc.HtlcEvent_ForwardFailEvent:
err := h.recordResolution(key, event.EventType, false)
err := h.recordResolution(key, event.EventType, ts, false)
if err != nil {
return err
}

case *routerrpc.HtlcEvent_LinkFailEvent:
err := h.recordResolution(key, event.EventType, false)
err := h.recordResolution(key, event.EventType, ts, false)
if err != nil {
return err
}
Expand All @@ -207,7 +234,8 @@ func (h *htlcMonitor) processHtlcEvent(event *routerrpc.HtlcEvent) error {
// recordResolution records the outcome of a htlc resolution (settle/fail) in
// our metrics.
func (h *htlcMonitor) recordResolution(key htlcswitch.HtlcKey,
eventType routerrpc.HtlcEvent_EventType, success bool) error {
eventType routerrpc.HtlcEvent_EventType, ts time.Time,
success bool) error {

// Create the set of labels we want to track this resolution.
labels := map[string]string{
Expand Down Expand Up @@ -249,14 +277,17 @@ func (h *htlcMonitor) recordResolution(key htlcswitch.HtlcKey,
// worry if we can't find it because htlcs are only tracked in memory
// (we might have restarted after we forwarded it, so would not have it
// tracked).
_, ok := h.activeHtlcs[key]
fwdTs, ok := h.activeHtlcs[key]
if !ok {
htlcLogger.Infof("resolved htlc: %v original forward "+
"not found", key)

return nil
}

// Add the amount of time the htlc took to resolve to our histogram.
h.resolutionTimeHistogram.With(labels).Observe(ts.Sub(fwdTs).Seconds())

// Delete the htlc from our set of active htlcs.
delete(h.activeHtlcs, key)
return nil
Expand Down
247 changes: 247 additions & 0 deletions grafana/provisioning/dashboards/routing.json
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,253 @@
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": true,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"description": "A frequency distribution of the time ${HtlcType}s have taken to resolve. Note that this value is not tracked for receives.",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"id": 11,
"interval": "",
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": false,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"1\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "0-1 seconds",
"refId": "A"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"10\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"1\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "1-10 seconds",
"refId": "B"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"60\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"10\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "11-60 seconds",
"refId": "C"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"120\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"60\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "1-2 minutes",
"refId": "D"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"300\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"120\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "2-5 minutes",
"refId": "E"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"600\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"300\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "5-10 minutes",
"refId": "F"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"3600\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"600\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "10-60 minutes",
"refId": "G"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"18000\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"3600\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "1-5 hours",
"refId": "H"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"86400\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"18000\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "5-24 hours",
"refId": "I"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"604800\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"86400\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "1-7 days",
"refId": "J"
},
{
"expr": "sum(lnd_htlcs_resolution_time_bucket{le=\"++Inf\",type=\"$HtlcType\"}) - ignoring(le) sum(lnd_htlcs_resolution_time_bucket{le=\"604800\",type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "> 7 days",
"refId": "K"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Htlc Resolution Time",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "series",
"name": null,
"show": true,
"values": [
"max"
]
},
"yaxes": [
{
"format": "short",
"label": "HTLC Count",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"description": "The average time taken to settle or fail HTLCs for ${HtlcType}s. Note that this value is not tracked for receives.",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"id": 13,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(lnd_htlcs_resolution_time_sum{type=\"$HtlcType\"})/sum(lnd_htlcs_resolution_time_count{type=\"$HtlcType\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "average",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Average HTLC Resolution Time",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": false,
Expand Down

0 comments on commit 7bda61d

Please sign in to comment.