-
Notifications
You must be signed in to change notification settings - Fork 0
/
snake_smrtbot.py
127 lines (109 loc) · 5.02 KB
/
snake_smrtbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3
import torch as t
from torch import tensor as T
from numpy import unravel_index as unravel
import matplotlib.pyplot as plt
from time import sleep
import pygame as pg
# This is a simplified version of the classic Snake game, reworked to play
# itself without a neural network, just a old-fashion bot, with depth-first search!
# Highest 10x10 score I've seen it score thus far: 63
# Built using MiniSnakes - https://github.com/eliasffyksen/MiniSnakes
game_size = 10
# Set up some constants
WIDTH = 500 # Window size
CELL_SIZE = WIDTH // game_size # Size of a cell in the grid
def do(snake: t.Tensor, action: int):
prevsegs = snake.max().item()
distb4 = getdists(snake)
positions = snake.flatten().topk(2)[1]
[pos_cur, pos_prev] = [T(unravel(x, snake.shape)) for x in positions]
rotation = T([[0, -1], [1, 0]]).matrix_power(3 + action)
pos_next = (pos_cur + (pos_cur - pos_prev) @ rotation) % T(snake.shape)
if (snake[tuple(pos_next)] > 0).any():
return -10
if snake[tuple(pos_next)] == -1:
pos_food = (snake == 0).flatten().to(t.float).multinomial(1)[0]
snake[unravel(pos_food, snake.shape)] = -1
else:
snake[snake > 0] -= 1
snake[tuple(pos_next)] = snake[tuple(pos_cur)] + 1
segs = snake.max().item()
distaf = getdists(snake)
return 10+segs-4 if segs > prevsegs else (max(int(10-distaf),1) if distaf < distb4 else min(int(-(10-distaf)),-1))
def getdists(snake):
head = divmod(t.argmax(snake).item(), snake.shape[1])
food = divmod(t.argmin(snake).item(), snake.shape[1])
return t.dist(t.tensor(head, dtype=t.float), t.tensor(food, dtype=t.float)).item()
def print_state(snake):
c = {'red': '\x1b[31m', 'green': '\x1b[32m', 'white': '\x1b[37m'}
for row in snake:
row_str = ''.join([f"{c['green'] if value>0 else c['red'] if value==-1 else c['white']}{value:2}" for value in row.tolist()])
print(row_str, end='\x1b[0m\n')
def plot_state(snake):
plt.imshow(snake)
plt.title(f"Score: {snake.max().item() - 3}") # Show snake length as score
plt.xticks([]) # remove ticks from x-axis
plt.yticks([]) # remove ticks from y-axis
plt.draw()
plt.pause(0.001) # Short pause to update plot, adjust as needed
plt.clf()
class PygameWindow:
def __init__(self):
pg.init()
self.window = pg.display.set_mode((WIDTH, WIDTH))
def draw_state(self, snake):
for e in pg.event.get():
if e.type == pg.QUIT or e.type == pg.KEYDOWN and (e.key == pg.K_ESCAPE or e.key == pg.K_q):
pg.quit()
self.window.fill((200, 200, 200))
max_val = snake.max().item() + 1
for y in range(game_size):
for x in range(game_size):
value = snake[y, x].item()
if value > 0: # Snake body
color = (200 * (max_val - value) // max_val, 255, 200 * (max_val - value) // max_val)
elif value == -1: # Food
color = (222, 0, 0)
else: # Empty space
color = (200, 200, 200)
pg.draw.rect(self.window, color, pg.Rect(x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE))
pg.display.update()
def explore_path(snake, depth=0, max_depth=game_size**2//2):
futures = [snake.clone() for _ in range(3)]
scores = [do(future, i) for i, future in enumerate(futures)]
bestaction = scores.index(max(scores))
bestsnake = futures[bestaction]
#if max(scores) >= 10: max_depth = depth + game_size//2 #tried to check after food, not much help.
if depth >= max_depth or max(scores) >= 10: return bestaction
result = explore_path(bestsnake, depth + 1, max_depth) if max(scores) != -10 else None
if depth == 0 and result == None: # if path leads to trap, try next best
nextaction = scores.index(max(scores, key=lambda x: x != max(scores)))
bestaction = nextaction if scores[nextaction] != -10 else bestaction
return bestaction if depth == 0 else result
def single_bot_game():
snake = t.zeros((game_size, game_size), dtype=t.int)
snake[0, :3] = T([1, 2,-1])
reward = do(snake, 1) # snake needs to grab first food so random food spawns
print_state(snake)
#draw_state(snake)
#plot_state(snake)
while reward != -10:
sleep(0.1)
best_action = explore_path(snake, max_depth=game_size**2-snake.max().item())
reward = do(snake, best_action) if best_action != None else -10
print()
print_state(snake)
#draw_state(snake)
#plot_state(snake)
#print(f"{reward:<7}{snake.max().item()-3:^7}{highscore:>7}")
return snake.max().item()-3
if __name__ == '__main__':
highscore = 0
endscores = []
while highscore < game_size**2 - 3:
endscores.append(single_bot_game())
highscore = max(highscore, endscores[-1])
# print last score, average score, highscore on 1 line
print(f"Score:{endscores[-1]:>3} Average:{sum(endscores)/len(endscores):>5.1f} Highest:{highscore:>3}")
sleep(1)