-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
66 lines (50 loc) · 1.96 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
Test
"""
import numpy as np
import snake.snake_game as snake
import deep_rf as rf
frame_height = 4
frame_width = 4
num_frames = 2
my_game = snake.SnakeGame(board_height=frame_height, board_width=frame_width)
my_q_graph = rf.QGraph(name='snake_default', frame_height=frame_height,
frame_width=frame_width, num_frames=num_frames,
num_actions=len(my_game.action_list))
def my_reward(params):
return params['new_score'] - params['last_score'] + \
(-1.0 if params['is_game_over'] else 0.0) - .001
my_rf = rf.DeepRFLearner(my_game, my_q_graph, my_reward)
def play_one_game(deep_rf_learner):
game = snake.SnakeGame(frame_height, frame_width)
first_frame = game.get_frame()
state_padding = [np.zeros(first_frame.shape) for _ in
range(num_frames - 1)]
current_state = rf.State(
frames_tuple=tuple(state_padding) + (first_frame,))
def print_frame_and_get_action(state):
print "\n" * 20 + str(game)
print "\nQ-val with actions: " + \
str(dict(zip(game.action_list,
np.round(deep_rf_learner.evaluate_q_function(
state=state), 3))))
action = deep_rf_learner.choose_action(state=state)
print "Next Action: " + action
return action
action = print_frame_and_get_action(current_state)
while True:
r = raw_input("Press q to quit or Press r to reset: ")
if r == 'q':
break
elif r == 'r' or game.is_game_over():
return play_one_game(deep_rf_learner)
else:
game.do_action(action)
current_state = current_state.new_state_from_old(game.get_frame())
action = print_frame_and_get_action(current_state)
return
while True:
my_rf.learn_q_function(num_iterations=50,
batch_size=100,
num_training_steps=100)
play_one_game(my_rf)