-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_hacker_test.py
110 lines (81 loc) · 2.51 KB
/
train_hacker_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gym
from time import time
from keras.models import Sequential, model_from_json
from keras.optimizers import RMSprop
from keras.layers import *
from keras import backend as K
from keras.utils import to_categorical
from keras.callbacks import TensorBoard
from collections import deque
from itertools import islice
import random
import numpy as np
from time import sleep
env = gym.make('FishingDerby-ram-v4')
env.seed(42)
def phi(x):
line_x = int(x[32])
line_y = int(x[67])
fish6_x = int(x[70])
fish6_y = 245
v1 = line_x - fish6_x
v2 = fish6_y - line_y
v5 = 0 if x[113] == 0 else 1
return np.array([v1, v2, v5])
observation = env.reset()
state_size = phi(observation).shape[0]
actions = [0,2,3,4,5]
n_actions = 5 #env.action_space.n
print(env.unwrapped.get_action_meanings())
print('State size:', state_size)
test = True
load_model = True
hist_size = 1
# Initialize value function
model = None
if load_model:
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights("model.h5")
# Note: pass in_keras=False to use this function with raw numbers of numpy arrays for testing
def huber_loss(a, b, in_keras=True):
error = a - b
quadratic_term = error*error / 2
linear_term = abs(error) - 1/2
use_linear_term = (abs(error) > 1.0)
if in_keras:
# Keras won't let us multiply floats by booleans, so we explicitly cast the booleans to floats
use_linear_term = K.cast(use_linear_term, 'float32')
return use_linear_term * linear_term + (1-use_linear_term) * quadratic_term
opt = RMSprop(lr=0.00025)
model.compile(loss=huber_loss, optimizer=opt)
e = 0.05
gamma = 0.99
counter = 0
episode = 0
while True:
observation = env.reset()
total_catch_value = 0
total_value = 0
done = False
while not done:
env.render()
state = phi(observation)
# Take a random action fraction e (epsilon) of the time
action = None
if np.random.rand() < e or counter < hist_size:
action = np.random.choice(range(n_actions), p=[0.05, 0.24,0.22,0.22,0.27])
# action = np.random.choice(range(n_actions), p=[0.48,0.52])
# action = np.random.choice(range(n_actions))
else:
q_values = model.predict(state.reshape(1, state_size, hist_size))
action = q_values[0].argsort()[-1]
# Take the chosen action
observation_, reward, done, info = env.step(actions[action])
# Store the tuple
state_ = phi(observation_)
observation = observation_
print('Finished episode', episode, total_catch_value, total_value, counter, e)
episode += 1