From f1786f9454e15f3c82c01a3ee39c3a6f45e087a1 Mon Sep 17 00:00:00 2001 From: Conner Fromknecht Date: Wed, 17 Feb 2021 18:13:29 -0800 Subject: [PATCH 1/3] routing: dial back max concurrent block fetches This commit reduces the number of concurrent validation operations the router will perform when fully validating the channel graph. Reports from several users indicate that GetInfo would hang for several minutes, which is believed to be caused by attempting to validate massive amounts of channels in parallel. This commit returns the limit back to its original state before adding the batched gossip improvements. We keep the 1000 concurrent validation request limit for AssumeChannelValid, since we don't fetch blocks in that case. This allows us to still keep the performance benefits on mobile/low-resource devices. --- routing/router.go | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/routing/router.go b/routing/router.go index b308338a66..0cb0ced8e4 100644 --- a/routing/router.go +++ b/routing/router.go @@ -3,6 +3,7 @@ package routing import ( "bytes" "fmt" + "runtime" "sync" "sync/atomic" "time" @@ -914,7 +915,28 @@ func (r *ChannelRouter) networkHandler() { // We'll use this validation barrier to ensure that we process all jobs // in the proper order during parallel validation. - validationBarrier := NewValidationBarrier(1000, r.quit) + // + // NOTE: For AssumeChannelValid, we bump up the maximum number of + // concurrent validation requests since there are no blocks being + // fetched. This significantly increases the performance of IGD for + // neutrino nodes. + // + // However, we dial back to use multiple of the number of cores when + // fully validating, to avoid fetching up to 1000 blocks from the + // backend. On bitcoind, this will empirically cause massive latency + // spikes when executing this many concurrent RPC calls. Critical + // subsystems or basic rpc calls that rely on calls such as GetBestBlock + // will hang due to excessive load. + // + // See https://github.com/lightningnetwork/lnd/issues/4892. + var validationBarrier *ValidationBarrier + if r.cfg.AssumeChannelValid { + validationBarrier = NewValidationBarrier(1000, r.quit) + } else { + validationBarrier = NewValidationBarrier( + 4*runtime.NumCPU(), r.quit, + ) + } for { From 5fd74659bcbda760771700751683f68e66ebd730 Mon Sep 17 00:00:00 2001 From: Conner Fromknecht Date: Wed, 17 Feb 2021 18:55:56 -0800 Subject: [PATCH 2/3] routing: avoid modifying AssumeChannelValid in unit tests This produces a race condition when reading AssumeChannelValid from a different goroutine. Instead we isolate the test cases and initial AssumeChannelValid properly. --- routing/router_test.go | 57 +++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/routing/router_test.go b/routing/router_test.go index 5db80dc97f..6c9299fe4b 100644 --- a/routing/router_test.go +++ b/routing/router_test.go @@ -72,6 +72,14 @@ func (c *testCtx) RestartRouter() error { func createTestCtxFromGraphInstance(startingHeight uint32, graphInstance *testGraphInstance) ( *testCtx, func(), error) { + return createTestCtxFromGraphInstanceAssumeValid( + startingHeight, graphInstance, false, + ) +} + +func createTestCtxFromGraphInstanceAssumeValid(startingHeight uint32, + graphInstance *testGraphInstance, assumeValid bool) (*testCtx, func(), error) { + // We'll initialize an instance of the channel router with mock // versions of the chain and channel notifier. As we don't need to test // any p2p functionality, the peer send and switch send messages won't @@ -124,8 +132,9 @@ func createTestCtxFromGraphInstance(startingHeight uint32, graphInstance *testGr next := atomic.AddUint64(&uniquePaymentID, 1) return next, nil }, - PathFindingConfig: pathFindingConfig, - Clock: clock.NewTestClock(time.Unix(1, 0)), + PathFindingConfig: pathFindingConfig, + Clock: clock.NewTestClock(time.Unix(1, 0)), + AssumeChannelValid: assumeValid, }) if err != nil { return nil, nil, fmt.Errorf("unable to create router %v", err) @@ -2032,6 +2041,15 @@ func TestPruneChannelGraphStaleEdges(t *testing.T) { func TestPruneChannelGraphDoubleDisabled(t *testing.T) { t.Parallel() + t.Run("no_assumechannelvalid", func(t *testing.T) { + testPruneChannelGraphDoubleDisabled(t, false) + }) + t.Run("assumechannelvalid", func(t *testing.T) { + testPruneChannelGraphDoubleDisabled(t, true) + }) +} + +func testPruneChannelGraphDoubleDisabled(t *testing.T, assumeValid bool) { // We'll create the following test graph so that only the last channel // is pruned. We'll use a fresh timestamp to ensure they're not pruned // according to that heuristic. @@ -2123,34 +2141,37 @@ func TestPruneChannelGraphDoubleDisabled(t *testing.T) { defer testGraph.cleanUp() const startingHeight = 100 - ctx, cleanUp, err := createTestCtxFromGraphInstance( - startingHeight, testGraph, + ctx, cleanUp, err := createTestCtxFromGraphInstanceAssumeValid( + startingHeight, testGraph, assumeValid, ) if err != nil { t.Fatalf("unable to create test context: %v", err) } defer cleanUp() - // All the channels should exist within the graph before pruning them. - assertChannelsPruned(t, ctx.graph, testChannels) - - // If we attempt to prune them without AssumeChannelValid being set, - // none should be pruned. - if err := ctx.router.pruneZombieChans(); err != nil { - t.Fatalf("unable to prune zombie channels: %v", err) + // All the channels should exist within the graph before pruning them + // when not using AssumeChannelValid, otherwise we should have pruned + // the last channel on startup. + if !assumeValid { + assertChannelsPruned(t, ctx.graph, testChannels) + } else { + prunedChannel := testChannels[len(testChannels)-1].ChannelID + assertChannelsPruned(t, ctx.graph, testChannels, prunedChannel) } - assertChannelsPruned(t, ctx.graph, testChannels) - - // Now that AssumeChannelValid is set, we'll prune the graph again and - // the last channel should be the only one pruned. - ctx.router.cfg.AssumeChannelValid = true if err := ctx.router.pruneZombieChans(); err != nil { t.Fatalf("unable to prune zombie channels: %v", err) } - prunedChannel := testChannels[len(testChannels)-1].ChannelID - assertChannelsPruned(t, ctx.graph, testChannels, prunedChannel) + // If we attempted to prune them without AssumeChannelValid being set, + // none should be pruned. Otherwise the last channel should still be + // pruned. + if !assumeValid { + assertChannelsPruned(t, ctx.graph, testChannels) + } else { + prunedChannel := testChannels[len(testChannels)-1].ChannelID + assertChannelsPruned(t, ctx.graph, testChannels, prunedChannel) + } } // TestFindPathFeeWeighting tests that the findPath method will properly prefer From 7bfec3f8c21a7dd45e1c948303886107c613945a Mon Sep 17 00:00:00 2001 From: Conner Fromknecht Date: Thu, 18 Feb 2021 08:52:54 -0800 Subject: [PATCH 3/3] build/version: bump to v0.12.1-beta.rc6 --- build/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/version.go b/build/version.go index b8b2fec525..faf3fe3db1 100644 --- a/build/version.go +++ b/build/version.go @@ -48,7 +48,7 @@ const ( // AppPreRelease MUST only contain characters from semanticAlphabet // per the semantic versioning spec. - AppPreRelease = "beta.rc5" + AppPreRelease = "beta.rc6" ) func init() {