Q-learning implementation with deep off-policy exploration for the procgen environments.
python -m pip install torch torchvision
python -m pip install gym procgen
python -m pip install matplotlib
python -m pip install pynput
from agent import Agent
from environments import Environments
from collector import Collector
agent = Agent()
env = Environments(render=True, envs=["maze"] * 20, agent=agent)
collector = Collector()
while True:
obs, hn, cn = env.start()
act, obs_old, h0, c0, hn, cn, bt, at = agent.chooseMulti(obs, hn, cn)
obs, rew, done, info = env.step(act, hn, cn)
collector.collect(rew, done, act)
agent.rememberMulti(obs_old, act, obs, rew, h0, c0, hn, cn, done, bt, at)
agent.learn()