-
Notifications
You must be signed in to change notification settings - Fork 1
/
evaluator.py
120 lines (85 loc) · 3.9 KB
/
evaluator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from header_import import *
class grid_evaluate(Grid_World_Enviroment_with_Wind_Obstacle):
def __init__(self, grid_world_size, graph_data_name, grid_play_task = "False"):
super().__init__(grid_world_size)
self.grid_world_size = grid_world_size
self.grid_play_task = grid_play_task
self.path = "graphs_charts/"
self.chart_path = self.path + "charts/"
self.q_value_path = "q_values/"
self.size_of_world_path = str(grid_world_size) + "/"
self.graph_data_name = graph_data_name
def action_path(self, q_value, starting_position):
x, y = starting_position
path = [starting_position]
reward_list = 0
for _ in range(100):
best_action = np.argmax([q_value[(x,y), a] for a in self.action_space])
x, y, reward = self.transition(x, y, best_action)
path.append((x,y))
reward_list += reward
if x == self.goal[0] and y == self.goal[1]:
break
return path, reward_list
def play_optimal_path(self, starting_position):
original_array = open(self.q_value_path + self.size_of_world_path + str(self.graph_data_name) + ".txt", "r+")
q_value = original_array.readlines()
state = []
value = []
count = 0
for line in q_value:
count += 1
if count % 2:
state.append(eval(line.rstrip()))
else:
value.append(float(line.strip()))
q_value_dict = dict(zip(state, value))
path, reward_list = self.action_path(q_value_dict, starting_position)
return path, reward_list
def plot_episode_time_step(self, data, type_graph = "cumulative_reward"):
fig = plt.figure()
axis = fig.add_subplot(111)
if self.grid_world_size == 20:
color_graph = "blue"
else:
color_graph = "red"
if type_graph == "cumulative_reward":
axis.plot(data, color=color_graph)
plt.axhline(y=max(data), color='red', linestyle='-')
axis.set_title(str(self.grid_world_size)+"Number of Test vs Total Reward Value")
axis.set_xlabel("Number of Tests")
axis.set_ylabel("Total Reward Values")
plt.savefig((str(self.chart_path) + str(self.grid_world_size) + "_" + self.graph_data_name + "cumulative_reward" + "_" + ".png"), dpi =500)
def calculate_std_error(self, data):
std = np.std(data)
min_data = min(data)
max_data = max(data)
new_list = set(data)
new_list.remove(max(new_list))
second_max_data = max(new_list)
min_error = second_max_data - max_data
max_error = min_data - max_data
actual, pred = np.array(data), np.array(max_data)
mse = np.square(np.subtract(actual,pred)).mean()
print("Standard Deviation:", std)
print("Minimum error:", min_error)
print("Maximum error:", max_error)
print("MSE of the data:", mse)
if __name__ == "__main__":
grid_size = int(sys.argv[1])
algorithm = (sys.argv[2])
cumulative_reward = []
if algorithm == "sarsa":
data_name = "Sarsa_alpha_0.8"
elif algorithm == "q_learning":
data_name = "Q_Learning_alpha_0.8"
elif algorithm == "double_q_learning":
data_name = "Double_Q_Learning_alpha_0.8"
Grid_world_Eval = grid_evaluate(grid_world_size = grid_size, graph_data_name = data_name)
for i in range(100):
x = random.randint(0, grid_size - 1)
y = random.randint(0, grid_size - 1)
_, reward_list = Grid_world_Eval.play_optimal_path(starting_position=(x,y))
cumulative_reward.append(reward_list)
Grid_world_Eval.plot_episode_time_step(cumulative_reward, type_graph = "cumulative_reward")
Grid_world_Eval.calculate_std_error(cumulative_reward)