-
Notifications
You must be signed in to change notification settings - Fork 11
/
Adam.py
62 lines (53 loc) · 1.63 KB
/
Adam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
def adam(w, dw, config=None):
"""
config format:
- learning_rate: Scalar learning rate.
- beta1: Decay rate for moving average of first moment of gradient.
- beta2: Decay rate for moving average of second moment of gradient.
- epsilon: Small scalar used for smoothing to avoid dividing by zero.
- m: Moving average of gradient.
- v: Moving average of squared gradient.
- t: Iteration number.
"""
if config is None: config = {}
config.setdefault('learning_rate', 1e-3)
config.setdefault('beta1', 0.9)
config.setdefault('beta2', 0.999)
config.setdefault('epsilon', 1e-8)
config.setdefault('m', np.zeros_like(w))
config.setdefault('v', np.zeros_like(w))
config.setdefault('t', 0)
m = config['m']
v = config['v']
t = config['t'] + 1
beta1 = config['beta1']
beta2 = config['beta2']
epsilon = config['epsilon']
learning_rate = config['learning_rate']
m = beta1 * m + (1 - beta1) * dw
v = beta2 * v + (1 - beta2) * (dw ** 2)
mb = m / (1 - beta1 ** t)
vb = v / (1 - beta2 ** t)
next_w = w - learning_rate * mb / (np.sqrt(vb) + epsilon)
config['m'] = m
config['v'] = v
config['t'] = t
print(config['t'])
return next_w, config
def tes1t1():
epochs = 100000
x_0 = np.ones((4, 4))
x_0 += 5
cf = None
x = x_0
x_last = x
for epoch in range(epochs):
x_last = x
x, cf = adam(x, 2 * x, cf)
if np.linalg.norm(x-x_last)<= 1e-16:
break
print(x)
pass
if __name__ == "__main__":
tes1t1()