-
Notifications
You must be signed in to change notification settings - Fork 0
/
tetrisgym.py
executable file
·103 lines (76 loc) · 2.9 KB
/
tetrisgym.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python
import numpy as np
import torch
import torch.nn.functional as F
import gymnasium as gym
from matris.game import Game
from matris.game import create_screen
from state import bumpiness_score
from state import find_empty_blocks
from matris.tetrominoes import tetrominoes
tetromino_indices = {
"blue": 0,
"yellow": 1,
"pink": 2,
"green": 3,
"red": 4,
"cyan": 5,
"orange": 6,
}
class TetrisGym(gym.Env):
def __init__(self, device="mps"):
self.screen = create_screen()
# left, right, rotate, drop
self.action_space = torch.arange(4)
self.game = Game(self.screen)
self.device = device
self.score = 0
def step(self, action):
action_count = self.action_space.size()[0]
input_vector = F.one_hot(torch.tensor(action))
terminated = not self.game.step(input_vector)
truncated = False
state = self.game.tensor_state()
_, bump_vector = bumpiness_score(state)
observation = torch.cat([self._piece_vector(), bump_vector]).unsqueeze(0)
observation = torch.tensor(observation, device=self.device)
new_score = self._score(state)
calculated_reward = new_score - self.score
self.score = new_score
reward = 0 if terminated else calculated_reward
return (observation, reward, terminated, truncated)
def reset(self):
self.game = Game(self.screen)
state = self.game.tensor_state()
_, bump_vector = bumpiness_score(state)
observation = torch.cat([self._piece_vector(), bump_vector]).unsqueeze(0)
observation = torch.tensor(observation, device=self.device)
info = dict()
return (observation, info)
def _piece_vector(self):
current = tetromino_indices[self.game.matris.next_tetromino.color]
current = tetromino_indices[self.game.matris.current_tetromino.color]
rotation = self.game.matris.tetromino_rotation
x_position, y_position = self.game.matris.tetromino_position
# pad with 5 zeros...
return torch.tensor([
current,
rotation,
x_position,
y_position,
0, 0, 0, 0, 0, 0,
])
def _score(self, state):
line_score = self.game.lines()
point_score = self.game.lines()
bumpiness, bump_vector = bumpiness_score(state)
height = torch.sum(bump_vector)
empty_blocks = find_empty_blocks(state).sum()
new_aggregated_score = (point_score - 0.51 * height + line_score ** 2
- 10 * empty_blocks - 0.18 * bumpiness)
return new_aggregated_score
if __name__ == '__main__':
tgym = TetrisGym()
_, _, terminated, _ = tgym.step(np.random.randint(tgym.action_space.size()[0]))
while not terminated:
_, _, terminate, _ = tgym.step(np.random.randint(tgym.action_space.size()[0]))