Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

swarm/network: Saturation check for healthy networks #19071

Merged
merged 9 commits into from
Feb 14, 2019
74 changes: 64 additions & 10 deletions swarm/network/kademlia.go
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,8 @@ func (k *Kademlia) string() string {
// used for testing only
// TODO move to separate testing tools file
type PeerPot struct {
NNSet [][]byte
NNSet [][]byte
PeersPerBin []int
}

// NewPeerPotMap creates a map of pot record of *BzzAddr with keys
Expand All @@ -651,6 +652,7 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {

// all nn-peers
var nns [][]byte
peersPerBin := make([]int, depth)

// iterate through the neighbours, going from the deepest to the shallowest
np.EachNeighbour(a, Pof, func(val pot.Val, po int) bool {
Expand All @@ -664,14 +666,18 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {
// a neighbor is any peer in or deeper than the depth
if po >= depth {
nns = append(nns, addr)
return true
} else {
// for peers < depth, we just count the number in each bin
// the bin is the index of the slice
peersPerBin[po]++
}
return false
return true
})

log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s", addrs[i][:4], LogAddrs(nns)))
log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s, peersPerBin", addrs[i][:4], LogAddrs(nns)))
ppmap[common.Bytes2Hex(a)] = &PeerPot{
NNSet: nns,
NNSet: nns,
PeersPerBin: peersPerBin,
}
}
return ppmap
Expand All @@ -695,6 +701,39 @@ func (k *Kademlia) saturation() int {
return prev
}

// isSaturated returns true if the kademlia is considered saturated, or false if not.
// It checks this by checking an array of ints called unsaturatedBins; each item in that array corresponds
// to the bin which is unsaturated (number of connections < k.MinBinSize).
// The bin is considered unsaturated only if there are actual peers in that PeerPot's bin (peersPerBin)
// (if there is no peer for a given bin, then no connection could ever be established;
// in a God's view this is relevant as no more peers will ever appear on that bin)
func (k *Kademlia) isSaturated(peersPerBin []int, depth int) bool {
// depth could be calculated from k but as this is called from `GetHealthInfo()`,
// the depth has already been calculated so we can require it as a parameter

// early check for depth
if depth != len(peersPerBin) {
return false
}
unsaturatedBins := make([]int, 0)
k.conns.EachBin(k.base, Pof, 0, func(po, size int, f func(func(val pot.Val) bool) bool) bool {

if po >= depth {
return false
}
log.Trace("peers per bin", "peersPerBin[po]", peersPerBin[po], "po", po)
// if there are actually peers in the PeerPot who can fulfill k.MinBinSize
if size < k.MinBinSize && size < peersPerBin[po] {
log.Trace("connections for po", "po", po, "size", size)
unsaturatedBins = append(unsaturatedBins, po)
}
return true
})

log.Trace("list of unsaturated bins", "unsaturatedBins", unsaturatedBins)
return len(unsaturatedBins) == 0
}

// knowNeighbours tests if all neighbours in the peerpot
// are found among the peers known to the kademlia
// It is used in Healthy function for testing only
Expand Down Expand Up @@ -777,19 +816,21 @@ type Health struct {
ConnectNN bool // whether node is connected to all its neighbours
CountConnectNN int // amount of neighbours connected to
MissingConnectNN [][]byte // which neighbours we should have been connected to but we're not
Saturated bool // whether we are connected to all the peers we would have liked to
Hive string
// Saturated: if in all bins < depth number of connections >= MinBinsize or,
// if number of connections < MinBinSize, to the number of available peers in that bin
Saturated bool
Hive string
}

// Healthy reports the health state of the kademlia connectivity
// GetHealthInfo reports the health state of the kademlia connectivity
//
// The PeerPot argument provides an all-knowing view of the network
// The resulting Health object is a result of comparisons between
// what is the actual composition of the kademlia in question (the receiver), and
// what SHOULD it have been when we take all we know about the network into consideration.
//
// used for testing only
func (k *Kademlia) Healthy(pp *PeerPot) *Health {
func (k *Kademlia) GetHealthInfo(pp *PeerPot) *Health {
k.lock.RLock()
defer k.lock.RUnlock()
if len(pp.NNSet) < k.NeighbourhoodSize {
Expand All @@ -798,7 +839,10 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
gotnn, countgotnn, culpritsgotnn := k.connectedNeighbours(pp.NNSet)
knownn, countknownn, culpritsknownn := k.knowNeighbours(pp.NNSet)
depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base)
saturated := k.saturation() < depth

// check saturation
saturated := k.isSaturated(pp.PeersPerBin, depth)

log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, saturated: %v\n", k.base, knownn, gotnn, saturated))
return &Health{
KnowNN: knownn,
Expand All @@ -811,3 +855,13 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
Hive: k.string(),
}
}

// Healthy return the strict interpretation of `Healthy` given a `Health` struct
// definition of strict health: all conditions must be true:
// - we at least know one peer
// - we know all neighbors
// - we are connected to all known neighbors
// - it is saturated
func (h *Health) Healthy() bool {
return h.KnowNN && h.ConnectNN && h.CountKnowNN > 0 && h.Saturated
}
126 changes: 111 additions & 15 deletions swarm/network/kademlia_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,46 @@ func TestNeighbourhoodDepth(t *testing.T) {
testNum++
}

// TestHighMinBinSize tests that the saturation function also works
// if MinBinSize is > 2, the connection count is < k.MinBinSize
// and there are more peers available than connected
func TestHighMinBinSize(t *testing.T) {
// a function to test for different MinBinSize values
testKad := func(minBinSize int) {
// create a test kademlia
tk := newTestKademlia(t, "11111111")
// set its MinBinSize to desired value
tk.KadParams.MinBinSize = minBinSize

// add a couple of peers (so we have NN and depth)
tk.On("00000000") // bin 0
tk.On("11100000") // bin 3
tk.On("11110000") // bin 4

first := "10000000" // add a first peer at bin 1
tk.Register(first) // register it
// we now have one registered peer at bin 1;
// iterate and connect one peer at each iteration;
// should be unhealthy until at minBinSize - 1
// we connect the unconnected but registered peer
for i := 1; i < minBinSize; i++ {
peer := fmt.Sprintf("1000%b", 8|i)
tk.On(peer)
if i == minBinSize-1 {
tk.On(first)
tk.checkHealth(true)
return
}
tk.checkHealth(false)
}
}
// test MinBinSizes of 3 to 5
testMinBinSizes := []int{3, 4, 5}
for _, k := range testMinBinSizes {
testKad(k)
}
}

// TestHealthStrict tests the simplest definition of health
// Which means whether we are connected to all neighbors we know of
func TestHealthStrict(t *testing.T) {
Expand All @@ -176,60 +216,116 @@ func TestHealthStrict(t *testing.T) {
// no peers
// unhealthy (and lonely)
tk := newTestKademlia(t, "11111111")
tk.checkHealth(false, false)
tk.checkHealth(false)

// know one peer but not connected
// unhealthy
tk.Register("11100000")
tk.checkHealth(false, false)
tk.checkHealth(false)

// know one peer and connected
// healthy
// unhealthy: not saturated
tk.On("11100000")
tk.checkHealth(true, false)
tk.checkHealth(true)

// know two peers, only one connected
// unhealthy
tk.Register("11111100")
tk.checkHealth(false, false)
tk.checkHealth(false)

// know two peers and connected to both
// healthy
tk.On("11111100")
tk.checkHealth(true, false)
tk.checkHealth(true)

// know three peers, connected to the two deepest
// healthy
tk.Register("00000000")
tk.checkHealth(true, false)
tk.checkHealth(false)

// know three peers, connected to all three
// healthy
tk.On("00000000")
tk.checkHealth(true, false)
tk.checkHealth(true)

// add fourth peer deeper than current depth
// unhealthy
tk.Register("11110000")
tk.checkHealth(false, false)
tk.checkHealth(false)

// connected to three deepest peers
// healthy
tk.On("11110000")
tk.checkHealth(true, false)
tk.checkHealth(true)

// add additional peer in same bin as deepest peer
// unhealthy
tk.Register("11111101")
tk.checkHealth(false, false)
tk.checkHealth(false)

// four deepest of five peers connected
// healthy
tk.On("11111101")
tk.checkHealth(true, false)
tk.checkHealth(true)

// add additional peer in bin 0
// unhealthy: unsaturated bin 0, 2 known but 1 connected
tk.Register("00000001")
tk.checkHealth(false)

// Connect second in bin 0
// healthy
tk.On("00000001")
tk.checkHealth(true)

// add peer in bin 1
// unhealthy, as it is known but not connected
tk.Register("10000000")
tk.checkHealth(false)

// connect peer in bin 1
// depth change, is now 1
// healthy, 1 peer in bin 1 known and connected
tk.On("10000000")
tk.checkHealth(true)

// add second peer in bin 1
// unhealthy, as it is known but not connected
tk.Register("10000001")
tk.checkHealth(false)

// connect second peer in bin 1
// healthy,
tk.On("10000001")
tk.checkHealth(true)

// connect third peer in bin 1
// healthy,
tk.On("10000011")
tk.checkHealth(true)

// add peer in bin 2
// unhealthy, no depth change
tk.Register("11000000")
tk.checkHealth(false)

// connect peer in bin 2
// depth change - as we already have peers in bin 3 and 4,
// we have contiguous bins, no bin < po 5 is empty -> depth 5
// healthy, every bin < depth has the max available peers,
// even if they are < MinBinSize
tk.On("11000000")
tk.checkHealth(true)

// add peer in bin 2
// unhealthy, peer bin is below depth 5 but
// has more available peers (2) than connected ones (1)
// --> unsaturated
tk.Register("11000011")
tk.checkHealth(false)
}

func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
func (tk *testKademlia) checkHealth(expectHealthy bool) {
tk.t.Helper()
kid := common.Bytes2Hex(tk.BaseAddr())
addrs := [][]byte{tk.BaseAddr()}
Expand All @@ -239,13 +335,13 @@ func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
})

pp := NewPeerPotMap(tk.NeighbourhoodSize, addrs)
healthParams := tk.Healthy(pp[kid])
healthParams := tk.GetHealthInfo(pp[kid])

// definition of health, all conditions but be true:
// - we at least know one peer
// - we know all neighbors
// - we are connected to all known neighbors
health := healthParams.KnowNN && healthParams.ConnectNN && healthParams.CountKnowNN > 0
health := healthParams.Healthy()
if expectHealthy != health {
tk.t.Fatalf("expected kademlia health %v, is %v\n%v", expectHealthy, health, tk.String())
}
Expand Down
2 changes: 1 addition & 1 deletion swarm/network/simulation/kademlia.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
addr := common.Bytes2Hex(k.BaseAddr())
pp := ppmap[addr]
//call Healthy RPC
h := k.Healthy(pp)
h := k.GetHealthInfo(pp)
//print info
log.Debug(k.String())
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
Expand Down
6 changes: 3 additions & 3 deletions swarm/network/simulations/discovery/discovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simul
}

healthy := &network.Health{}
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return false, fmt.Errorf("error getting node health: %s", err)
}
log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive))
Expand Down Expand Up @@ -352,7 +352,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
healthy := &network.Health{}
addr := id.String()
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return fmt.Errorf("error getting node health: %s", err)
}

Expand Down Expand Up @@ -422,7 +422,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
healthy := &network.Health{}
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)

if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return false, fmt.Errorf("error getting node health: %s", err)
}
log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN))
Expand Down