forked from 3XX0/pooly
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bandit.go
130 lines (117 loc) · 3.17 KB
/
bandit.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package pooly
import (
"math"
"math/rand"
"sync"
)
// SoftMax strategy varies host selection probabilities as a graded function of their estimated scores.
// The temperature parameter is used to tweak the algorithm behavior:
// high temperature (+inf) means that all hosts will have nearly the same probability of being selected (equiprobable)
// low temperature (+0) favors a greedy selection and will tend to select hosts having the highest scores
type SoftMax struct {
temperature float32
}
// NewSoftMax creates a new SoftMax bandit strategy.
func NewSoftMax(temperature float32) *SoftMax {
return &SoftMax{temperature}
}
// Select implements the Selecter interface.
func (s *SoftMax) Select(hosts map[string]*Host) *Host {
var sum, prob float64
exp := make(map[*Host]float64, len(hosts))
for _, h := range hosts {
score := h.Score()
if score < 0 { // no score recorded
exp[h] = 0
continue
}
exp[h] = math.Exp(score / float64(s.temperature))
sum += exp[h]
}
p := rand.Float64()
for _, h := range hosts {
if sum == 0 {
return h
}
prob += exp[h] / sum // cumulative probability
if prob > p {
return h
}
}
return nil
}
// EpsilonGreedy strategy selects generally the host having the highest score (greedy) but every once in a while
// it will randomly explore for other alternatives.
// The epsilon parameter (0-1) defines the proportion that the exploration phase occupies (e.g 1 for 100%).
type EpsilonGreedy struct {
epsilon float32
}
// NewEpsilonGreedy creates a new EpsilonGreedy bandit strategy.
func NewEpsilonGreedy(epsilon float32) *EpsilonGreedy {
return &EpsilonGreedy{epsilon}
}
// Select implements the Selecter interface.
func (e *EpsilonGreedy) Select(hosts map[string]*Host) (host *Host) {
if rand.Float32() > e.epsilon { // exploit
var max float64 = -1
for _, h := range hosts {
score := h.Score()
if max = math.Max(max, score); max == score {
host = h
}
}
} else { // explore
i, n := 0, rand.Intn(len(hosts))
for _, h := range hosts {
if i == n {
host = h
break
}
i++
}
}
return
}
// RoundRobin strategy selects hosts in circular manner with every request returning the next host in line.
type RoundRobin struct {
sync.Mutex
nextSchedule int64
nextAvailSlot int64
}
// NewRoundRobin creates a new RoundRobin bandit strategy.
func NewRoundRobin() *RoundRobin {
return new(RoundRobin)
}
// Select implements the Selecter interface.
func (r *RoundRobin) Select(hosts map[string]*Host) (host *Host) {
var offset int64
var found bool
// XXX score is not used, use it to attribute round robin scheduling instead
// we don't need proper synchronization since score memoization isn't running here
r.Lock()
for _, h := range hosts {
if h.score < 0 { // no score recorded
h.score = float64(r.nextAvailSlot)
r.nextAvailSlot++
}
if int64(h.score) == r.nextSchedule {
offset = 1
host = h
found = true
}
if !found {
// Find the next best schedule
o := int64(h.score) - r.nextSchedule
if o < 0 {
o = r.nextAvailSlot + o
}
if offset == 0 || o < offset {
offset = o + 1
host = h
}
}
}
r.nextSchedule = (r.nextSchedule + offset) % r.nextAvailSlot
r.Unlock()
return
}