Skip to content

Commit

Permalink
Speed benchmark on OpenSpiel and PettingZoo (#394)
Browse files Browse the repository at this point in the history
  • Loading branch information
nissymori authored Feb 23, 2023
1 parent 8912d9b commit 10b040d
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 3 deletions.
49 changes: 49 additions & 0 deletions workspace/compariosn/open_spiel_forloop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from vector_env import SyncVectorEnv
import argparse
import time
import numpy as np
import collections


def make_single_env(env_name: str, seed: int):
import pyspiel
from open_spiel.python.rl_environment import Environment, ChanceEventSampler
def gen_env():
game = pyspiel.load_game(env_name)
return Environment(game, chance_event_sampler=ChanceEventSampler(seed=seed))
return gen_env()


def make_env(env_name: str, n_envs: int, seed: int) -> SyncVectorEnv:
return SyncVectorEnv([make_single_env(env_name, seed) for i in range(n_envs)])


def random_play(env: SyncVectorEnv, n_steps_lim: int, batch_size: int):
# random play for open spiel
StepOutput = collections.namedtuple("step_output", ["action"])
time_step = env.reset()
rng = np.random.default_rng()
step_num = 0
while step_num < n_steps_lim:
legal_actions = np.array([ts.observations["legal_actions"][ts.observations["current_player"]] for ts in time_step])
assert len(env.envs) == len(legal_actions) # ensure parallerization
action = [rng.choice(legal_actions[i]) for i in range(len(legal_actions))]
step_outputs = [StepOutput(action=a) for a in action]
time_step, reward, done, unreset_time_steps = env.step(step_outputs, reset_if_done=True)
step_num += batch_size
return step_num


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("env_name")
parser.add_argument("batch_size", type=int)
parser.add_argument("n_steps_lim", type=int)
parser.add_argument("--seed", default=100, type=bool)
args = parser.parse_args()
assert args.n_steps_lim % args.batch_size == 0
env = make_env(args.env_name, args.batch_size, args.seed)
time_sta = time.time()
step_num = random_play(env, args.n_steps_lim, args.batch_size)
time_end = time.time()
print((step_num)/(time_end-time_sta), time_end-time_sta)
45 changes: 45 additions & 0 deletions workspace/compariosn/open_spiel_subproc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from tianshou_env.pettingzoo_env import OpenSpielEnv
from tianshou_env.venvs import SubprocVectorEnv
import numpy as np
import time
import argparse


def make_single_env(env_name: str, seed: int):
import pyspiel
from open_spiel.python.rl_environment import Environment, ChanceEventSampler
def gen_env():
game = pyspiel.load_game(env_name)
return Environment(game, chance_event_sampler=ChanceEventSampler(seed=seed))
return gen_env()

def make_env(env_name: str, n_envs: int, seed: int):
return SubprocVectorEnv([lambda: OpenSpielEnv(make_single_env(env_name, seed)) for _ in range(n_envs)])


def random_play(env: SubprocVectorEnv, n_steps_lim: int, batch_size: int):
step_num = 0
rng = np.random.default_rng()
observation, info = env.reset()
terminated = np.zeros(len(env._env_fns))
while step_num < n_steps_lim:
legal_action_mask = [observation[i]["mask"] for i in range(len(observation))]
action = [rng.choice(legal_action_mask[i]) for i in range(len(legal_action_mask))] # chose action randomly
observation, reward, terminated, _, info = env.step(action)
step_num += batch_size
return step_num


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("env_name")
parser.add_argument("batch_size", type=int)
parser.add_argument("n_steps_lim", type=int)
parser.add_argument("--seed", default=100, type=bool)
args = parser.parse_args()
assert args.n_steps_lim % args.batch_size == 0
env = make_env(args.env_name, args.batch_size, args.seed)
time_sta = time.time()
step_num = random_play(env, args.n_steps_lim, args.batch_size)
time_end = time.time()
print((step_num)/(time_end-time_sta))
64 changes: 64 additions & 0 deletions workspace/compariosn/petting_zoo_forloop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import argparse
import time
import numpy as np
import collections
from tianshou.env import DummyVectorEnv
from tianshou.env.pettingzoo_env import PettingZooEnv
from pettingzoo.classic.tictactoe import tictactoe

class AutoResetPettingZooEnv(PettingZooEnv):
def __init__(self, env):
super().__init__(env)

def step(self, action):
obs, reward, term, trunc, info = super().step(action)
if term:
obs = super().reset()
return obs, reward, term, trunc, info


def make_env(env_name, n_envs):
from pettingzoo.classic.go import go
#from pettingzoo.classic import chess_v5
def get_go_env():
return AutoResetPettingZooEnv(go.env())
def get_tictactoe_env():
return AutoResetPettingZooEnv(tictactoe.env())
if env_name == "go":
return DummyVectorEnv([get_go_env for _ in range(n_envs)])
elif env_name == "tictactoe":
return DummyVectorEnv([get_tictactoe_env for _ in range(n_envs)])
elif env_name == "chess":
#return chess_v5.env()
raise ValueError("Chess will be added later")
else:
raise ValueError("no such environment in petting zoo")


def random_play(env: DummyVectorEnv, n_steps_lim: int, batch_size: int) -> int:
# petting zooのgo環境でrandom gaentを終局まで動かす.
step_num = 0
rng = np.random.default_rng()
observation = env.reset()
terminated = np.zeros(len(env._env_fns))
while step_num < n_steps_lim:
assert len(env._env_fns) == len(observation) # ensure parallerization
legal_action_mask = np.array([observation[i]["mask"] for i in range(len(observation))])
action = [rng.choice(np.where(legal_action_mask[i]==1)[0]) for i in range(len(legal_action_mask))] # chose action randomly
observation, reward, terminated, _, _ = env.step(action)
step_num += batch_size
return step_num


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("env_name")
parser.add_argument("batch_size", type=int)
parser.add_argument("n_steps_lim", type=int)
args = parser.parse_args()
assert args.n_steps_lim % args.batch_size == 0
env = make_env(args.env_name, args.batch_size)
time_sta = time.time()
step_num = random_play(env, args.n_steps_lim, args.batch_size)
time_end = time.time()
print((step_num)/(time_end-time_sta))
64 changes: 64 additions & 0 deletions workspace/compariosn/petting_zoo_subproc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from tianshou.env import SubprocVectorEnv
from pettingzoo.classic.go import go
from pettingzoo.classic.tictactoe import tictactoe
from tianshou.env.pettingzoo_env import PettingZooEnv
import argparse
import numpy as np
import time

class AutoResetPettingZooEnv(PettingZooEnv): # 全体でpetting_zooの関数, classをimportするとopen_spielの速度が落ちる.
def __init__(self, env):
super().__init__(env)

def step(self, action):
obs, reward, term, trunc, info = super().step(action)
if term:
obs = super().reset()
return obs, reward, term, trunc, info


def make_env(env_name, n_envs):

#from pettingzoo.classic import chess_v5
def get_go_env():
return AutoResetPettingZooEnv(go.env())
def get_tictactoe_env():
return AutoResetPettingZooEnv(tictactoe.env())
if env_name == "go":
return SubprocVectorEnv([get_go_env for _ in range(n_envs)])
elif env_name == "tictactoe":
return SubprocVectorEnv([get_tictactoe_env for _ in range(n_envs)])
elif env_name == "chess":
#return chess_v5.env()
raise ValueError("Chess will be added later")
else:
raise ValueError("no such environment in petting zoo")


def random_play(env, n_steps_lim: int, batch_size: int) -> int:
# petting zooのgo環境でrandom gaentを終局まで動かす.
step_num = 0
rng = np.random.default_rng()
observation = env.reset()
terminated = np.zeros(len(env._env_fns))
while step_num < n_steps_lim:
assert len(env._env_fns) == len(observation) # ensure parallerization
legal_action_mask = np.array([observation[i]["mask"] for i in range(len(observation))])
action = [rng.choice(np.where(legal_action_mask[i]==1)[0]) for i in range(len(legal_action_mask))] # chose action randomly
observation, reward, terminated, _, _ = env.step(action)
step_num += batch_size
return step_num


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("env_name")
parser.add_argument("batch_size", type=int)
parser.add_argument("n_steps_lim", type=int)
args = parser.parse_args()
assert args.n_steps_lim % args.batch_size == 0
env = make_env(args.env_name, args.batch_size)
time_sta = time.time()
step_num = random_play(env, args.n_steps_lim, args.batch_size)
time_end = time.time()
print((step_num)/(time_end-time_sta))
6 changes: 3 additions & 3 deletions workspace/compariosn/tianshou_env/venvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import numpy as np
import packaging

from pettingzoo_env import OpenSpielEnv
from utils import ENV_TYPE, gym_new_venv_step_type
from worker import (
from .pettingzoo_env import OpenSpielEnv
from .utils import ENV_TYPE, gym_new_venv_step_type
from .worker import (
DummyEnvWorker,
EnvWorker,
RayEnvWorker,
Expand Down

0 comments on commit 10b040d

Please sign in to comment.