From fad10928546420c8628b8aa217795432e7a6f15b Mon Sep 17 00:00:00 2001 From: Yuri van Oers Date: Wed, 11 Nov 2020 22:30:11 +0100 Subject: [PATCH] Fix uneven balancing between nodes Rename nameIndex var to names --- dkron/agent.go | 12 ++++++------ dkron/agent_test.go | 46 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/dkron/agent.go b/dkron/agent.go index d95339c17..a439c949c 100644 --- a/dkron/agent.go +++ b/dkron/agent.go @@ -705,17 +705,17 @@ func (a *Agent) processFilteredNodes(job *Job) (map[string]string, map[string]st } // Create an array of node names to aid in computing resulting set based on cardinality - var nameIndex []string + var names []string for name := range execNodes { - nameIndex = append(nameIndex, name) + names = append(names, name) } nodes := make(map[string]string) rand.Seed(time.Now().UnixNano()) for ; cardinality > 0; cardinality-- { // Pick a node, any node - randomIndex := rand.Intn(cardinality) - m := execNodes[nameIndex[randomIndex]] + randomIndex := rand.Intn(len(names)) + m := execNodes[names[randomIndex]] // Store name and address if addr, ok := m.Tags["rpc_addr"]; ok { @@ -725,8 +725,8 @@ func (a *Agent) processFilteredNodes(job *Job) (map[string]string, map[string]st } // Swap picked node with the first one and shorten array, so node can't get picked again - nameIndex[randomIndex], nameIndex[0] = nameIndex[0], nameIndex[randomIndex] - nameIndex = nameIndex[1:] + names[randomIndex], names[0] = names[0], names[randomIndex] + names = names[1:] } return nodes, tags, nil diff --git a/dkron/agent_test.go b/dkron/agent_test.go index 48413b6b8..b454c8e0e 100644 --- a/dkron/agent_test.go +++ b/dkron/agent_test.go @@ -1,6 +1,7 @@ package dkron import ( + "fmt" "io/ioutil" "os" "testing" @@ -283,24 +284,47 @@ func Test_processFilteredNodes(t *testing.T) { // Test two tags matching same 3 servers and cardinality of 1 should always return 1 server - // Do this 10 times: an old bug caused this to sometimes succeed and sometimes fail due to the use of math.rand - // Statistically, with 10 tries about 3 should succeed and the rest should fail, if the code is buggy. - for i := 0; i < 10; i++ { - job8 := &Job{ - Name: "test_job_7", - Tags: map[string]string{ - "additional": "value:1", - "additional2": "value2:1", - }, - } - + // Do this multiple times: an old bug caused this to sometimes succeed and + // sometimes fail (=return no nodes at all) due to the use of math.rand + // Statistically, about 33% should succeed and the rest should fail if + // the code is buggy. + // Another bug caused one node to be favored over the others. With a + // large enough number of attempts, each node should be chosen about 1/3 + // of the time. + job8 := &Job{ + Name: "test_job_8", + Tags: map[string]string{ + "additional": "value:1", + "additional2": "value2:1", + }, + } + distrib := make(map[string]int) + var sampleSize = 1000 + for i := 0; i < sampleSize; i++ { nodes, tags, err = a1.processFilteredNodes(job8) require.NoError(t, err) assert.Len(t, nodes, 1) assert.Equal(t, tags["additional"], "value") assert.Equal(t, tags["additional2"], "value2") + for name := range nodes { + distrib[name] = distrib[name] + 1 + } + } + + // Each node must have been chosen between 30% and 36% of the time, + // for the distribution to be considered equal. + // Note: This test should almost never, but still can, fail even if the + // code is fine. To fix this, the randomizer ought to be mocked. + for name, count := range distrib { + fmt.Println(name, float64(count)/float64(sampleSize)*100.0, "%") } + assert.Greater(t, float64(distrib["test1"])/float64(sampleSize), 0.3) + assert.Less(t, float64(distrib["test1"])/float64(sampleSize), 0.36) + assert.Greater(t, float64(distrib["test2"])/float64(sampleSize), 0.3) + assert.Less(t, float64(distrib["test2"])/float64(sampleSize), 0.36) + assert.Greater(t, float64(distrib["test3"])/float64(sampleSize), 0.3) + assert.Less(t, float64(distrib["test3"])/float64(sampleSize), 0.36) // Clean up a1.Stop()