-
Notifications
You must be signed in to change notification settings - Fork 226
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(bootstrap): take autobootstrap to completion #403
Changes from all commits
e2842f0
7cce5bd
da6edaf
632f3c5
98cf914
2fdad28
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,8 @@ import ( | |
"sync" | ||
"time" | ||
|
||
process "github.com/jbenet/goprocess" | ||
processctx "github.com/jbenet/goprocess/context" | ||
"github.com/libp2p/go-libp2p-core/routing" | ||
"github.com/multiformats/go-multiaddr" | ||
_ "github.com/multiformats/go-multiaddr-dns" | ||
|
@@ -41,52 +43,57 @@ func init() { | |
} | ||
} | ||
|
||
// BootstrapConfig runs cfg.Queries bootstrap queries every cfg.BucketPeriod. | ||
func (dht *IpfsDHT) Bootstrap(ctx context.Context) error { | ||
triggerBootstrapFnc := func() { | ||
logger.Infof("triggerBootstrapFnc: RT only has %d peers which is less than the min threshold of %d, triggering self & bucket bootstrap", | ||
dht.routingTable.Size(), minRTBootstrapThreshold) | ||
|
||
if err := dht.selfWalk(ctx); err != nil { | ||
logger.Warningf("triggerBootstrapFnc: self walk: error: %s", err) | ||
} | ||
// Start the bootstrap worker. | ||
func (dht *IpfsDHT) startBootstrapping() error { | ||
// scan the RT table periodically & do a random walk on k-buckets that haven't been queried since the given bucket period | ||
dht.proc.Go(func(proc process.Process) { | ||
ctx := processctx.OnClosingContext(proc) | ||
|
||
if err := dht.bootstrapBuckets(ctx); err != nil { | ||
logger.Warningf("triggerBootstrapFnc: bootstrap buckets: error bootstrapping: %s", err) | ||
} | ||
} | ||
scanInterval := time.NewTicker(dht.bootstrapCfg.BucketPeriod) | ||
defer scanInterval.Stop() | ||
|
||
// we should query for self periodically so we can discover closer peers | ||
go func() { | ||
for { | ||
err := dht.selfWalk(ctx) | ||
if err != nil { | ||
logger.Warningf("self walk: error: %s", err) | ||
} | ||
select { | ||
case <-time.After(dht.bootstrapCfg.SelfQueryInterval): | ||
case <-ctx.Done(): | ||
return | ||
// run bootstrap if option is set | ||
if dht.triggerAutoBootstrap { | ||
if err := dht.doBootstrap(ctx, true); err != nil { | ||
logger.Warningf("bootstrap error: %s", err) | ||
} | ||
} else { | ||
// disable the "auto-bootstrap" ticker so that no more ticks are sent to this channel | ||
scanInterval.Stop() | ||
} | ||
}() | ||
|
||
// scan the RT table periodically & do a random walk on k-buckets that haven't been queried since the given bucket period | ||
go func() { | ||
for { | ||
err := dht.bootstrapBuckets(ctx) | ||
if err != nil { | ||
logger.Warningf("bootstrap buckets: error bootstrapping: %s", err) | ||
} | ||
select { | ||
case <-time.After(dht.bootstrapCfg.RoutingTableScanInterval): | ||
case now := <-scanInterval.C: | ||
walkSelf := now.After(dht.latestSelfWalk.Add(dht.bootstrapCfg.SelfQueryInterval)) | ||
if err := dht.doBootstrap(ctx, walkSelf); err != nil { | ||
logger.Warning("bootstrap error: %s", err) | ||
} | ||
case <-dht.triggerBootstrap: | ||
triggerBootstrapFnc() | ||
logger.Infof("triggering a bootstrap: RT has %d peers", dht.routingTable.Size()) | ||
if err := dht.doBootstrap(ctx, true); err != nil { | ||
logger.Warning("bootstrap error: %s", err) | ||
} | ||
case <-ctx.Done(): | ||
return | ||
} | ||
} | ||
}() | ||
}) | ||
|
||
return nil | ||
} | ||
|
||
func (dht *IpfsDHT) doBootstrap(ctx context.Context, walkSelf bool) error { | ||
if walkSelf { | ||
if err := dht.selfWalk(ctx); err != nil { | ||
return fmt.Errorf("self walk: error: %s", err) | ||
} | ||
dht.latestSelfWalk = time.Now() | ||
} | ||
|
||
if err := dht.bootstrapBuckets(ctx); err != nil { | ||
return fmt.Errorf("bootstrap buckets: error bootstrapping: %s", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
@@ -166,11 +173,13 @@ func (dht *IpfsDHT) selfWalk(ctx context.Context) error { | |
return err | ||
} | ||
|
||
// synchronous bootstrap. | ||
func (dht *IpfsDHT) bootstrapOnce(ctx context.Context) error { | ||
if err := dht.selfWalk(ctx); err != nil { | ||
return errors.Wrap(err, "failed bootstrap while searching for self") | ||
} else { | ||
return dht.bootstrapBuckets(ctx) | ||
// Bootstrap tells the DHT to get into a bootstrapped state. | ||
// | ||
// Note: the context is ignored. | ||
func (dht *IpfsDHT) Bootstrap(_ context.Context) error { | ||
select { | ||
case dht.triggerBootstrap <- struct{}{}: | ||
default: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is going to behave inconsistently. If we're currently bootstrapping, we'll return immediately. If we're going to block, a new bootstrap call should probably try to "join" an existing one. However, that will add some complexity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hsanjuan what is cluster's specific need:
Note: queries should still work even if the DHT isn't bootstrapped, as long as you have some peers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I need a peer to get start getting "well-connected" after the first contact to other peer(s) [before that the peer would have had no connections). I don't need to wait until the end of the bootstrap round (it can just happen in background) You may say "well, call Bootstrap()" then and not before, but things are a bit more tricky (the dht may have already been bootstrapped externally and what not). Being able to trigger a bootstrap round on demand (independent from the recurring configured one) is handy here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This should now be automatic.
SGTM. I agree bootstrap should actually trigger a bootstrap. @aarshkshah1992 given this, I wouldn't wait. I'd just trigger the bootstrap and move on. |
||
} | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,7 @@ func (nn *netNotifiee) Connected(n network.Network, v network.Conn) { | |
if dht.host.Network().Connectedness(p) == network.Connected { | ||
bootstrap := dht.routingTable.Size() <= minRTBootstrapThreshold | ||
dht.Update(dht.Context(), p) | ||
if bootstrap { | ||
if bootstrap && dht.triggerAutoBootstrap { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some tests rely on a specific number of peers in the RT/ specific number of results in a query after a peer has finished connecting to other peers. However, this code causes the peer to connect to more peers/adds more peers to the RT than it intended to in the test & that causes the test to fail. So, we need to disable this feature for tests. |
||
select { | ||
case dht.triggerBootstrap <- struct{}{}: | ||
default: | ||
|
@@ -80,7 +80,7 @@ func (nn *netNotifiee) testConnection(v network.Conn) { | |
if dht.host.Network().Connectedness(p) == network.Connected { | ||
bootstrap := dht.routingTable.Size() <= minRTBootstrapThreshold | ||
dht.Update(dht.Context(), p) | ||
if bootstrap { | ||
if bootstrap && dht.triggerAutoBootstrap { | ||
select { | ||
case dht.triggerBootstrap <- struct{}{}: | ||
default: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bootstrap is actually supposed to be async (at the moment). We can change this, but we'll need to propagate the change up the interfaces.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As discussed below, this will now be async.