diff --git a/workspace/compariosn/open_spiel_forloop.py b/workspace/compariosn/open_spiel_forloop.py new file mode 100644 index 000000000..143f1b9e2 --- /dev/null +++ b/workspace/compariosn/open_spiel_forloop.py @@ -0,0 +1,49 @@ +from vector_env import SyncVectorEnv +import argparse +import time +import numpy as np +import collections + + +def make_single_env(env_name: str, seed: int): + import pyspiel + from open_spiel.python.rl_environment import Environment, ChanceEventSampler + def gen_env(): + game = pyspiel.load_game(env_name) + return Environment(game, chance_event_sampler=ChanceEventSampler(seed=seed)) + return gen_env() + + +def make_env(env_name: str, n_envs: int, seed: int) -> SyncVectorEnv: + return SyncVectorEnv([make_single_env(env_name, seed) for i in range(n_envs)]) + + +def random_play(env: SyncVectorEnv, n_steps_lim: int, batch_size: int): + # random play for open spiel + StepOutput = collections.namedtuple("step_output", ["action"]) + time_step = env.reset() + rng = np.random.default_rng() + step_num = 0 + while step_num < n_steps_lim: + legal_actions = np.array([ts.observations["legal_actions"][ts.observations["current_player"]] for ts in time_step]) + assert len(env.envs) == len(legal_actions) # ensure parallerization + action = [rng.choice(legal_actions[i]) for i in range(len(legal_actions))] + step_outputs = [StepOutput(action=a) for a in action] + time_step, reward, done, unreset_time_steps = env.step(step_outputs, reset_if_done=True) + step_num += batch_size + return step_num + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("env_name") + parser.add_argument("batch_size", type=int) + parser.add_argument("n_steps_lim", type=int) + parser.add_argument("--seed", default=100, type=bool) + args = parser.parse_args() + assert args.n_steps_lim % args.batch_size == 0 + env = make_env(args.env_name, args.batch_size, args.seed) + time_sta = time.time() + step_num = random_play(env, args.n_steps_lim, args.batch_size) + time_end = time.time() + print((step_num)/(time_end-time_sta), time_end-time_sta) diff --git a/workspace/compariosn/open_spiel_subproc.py b/workspace/compariosn/open_spiel_subproc.py new file mode 100644 index 000000000..668f1cbfc --- /dev/null +++ b/workspace/compariosn/open_spiel_subproc.py @@ -0,0 +1,45 @@ +from tianshou_env.pettingzoo_env import OpenSpielEnv +from tianshou_env.venvs import SubprocVectorEnv +import numpy as np +import time +import argparse + + +def make_single_env(env_name: str, seed: int): + import pyspiel + from open_spiel.python.rl_environment import Environment, ChanceEventSampler + def gen_env(): + game = pyspiel.load_game(env_name) + return Environment(game, chance_event_sampler=ChanceEventSampler(seed=seed)) + return gen_env() + +def make_env(env_name: str, n_envs: int, seed: int): + return SubprocVectorEnv([lambda: OpenSpielEnv(make_single_env(env_name, seed)) for _ in range(n_envs)]) + + +def random_play(env: SubprocVectorEnv, n_steps_lim: int, batch_size: int): + step_num = 0 + rng = np.random.default_rng() + observation, info = env.reset() + terminated = np.zeros(len(env._env_fns)) + while step_num < n_steps_lim: + legal_action_mask = [observation[i]["mask"] for i in range(len(observation))] + action = [rng.choice(legal_action_mask[i]) for i in range(len(legal_action_mask))] # chose action randomly + observation, reward, terminated, _, info = env.step(action) + step_num += batch_size + return step_num + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("env_name") + parser.add_argument("batch_size", type=int) + parser.add_argument("n_steps_lim", type=int) + parser.add_argument("--seed", default=100, type=bool) + args = parser.parse_args() + assert args.n_steps_lim % args.batch_size == 0 + env = make_env(args.env_name, args.batch_size, args.seed) + time_sta = time.time() + step_num = random_play(env, args.n_steps_lim, args.batch_size) + time_end = time.time() + print((step_num)/(time_end-time_sta)) diff --git a/workspace/compariosn/petting_zoo_forloop.py b/workspace/compariosn/petting_zoo_forloop.py new file mode 100644 index 000000000..6b97b4a26 --- /dev/null +++ b/workspace/compariosn/petting_zoo_forloop.py @@ -0,0 +1,64 @@ +import argparse +import time +import numpy as np +import collections +from tianshou.env import DummyVectorEnv +from tianshou.env.pettingzoo_env import PettingZooEnv +from pettingzoo.classic.tictactoe import tictactoe + +class AutoResetPettingZooEnv(PettingZooEnv): + def __init__(self, env): + super().__init__(env) + + def step(self, action): + obs, reward, term, trunc, info = super().step(action) + if term: + obs = super().reset() + return obs, reward, term, trunc, info + + +def make_env(env_name, n_envs): + from pettingzoo.classic.go import go + #from pettingzoo.classic import chess_v5 + def get_go_env(): + return AutoResetPettingZooEnv(go.env()) + def get_tictactoe_env(): + return AutoResetPettingZooEnv(tictactoe.env()) + if env_name == "go": + return DummyVectorEnv([get_go_env for _ in range(n_envs)]) + elif env_name == "tictactoe": + return DummyVectorEnv([get_tictactoe_env for _ in range(n_envs)]) + elif env_name == "chess": + #return chess_v5.env() + raise ValueError("Chess will be added later") + else: + raise ValueError("no such environment in petting zoo") + + +def random_play(env: DummyVectorEnv, n_steps_lim: int, batch_size: int) -> int: + # petting zooのgo環境でrandom gaentを終局まで動かす. + step_num = 0 + rng = np.random.default_rng() + observation = env.reset() + terminated = np.zeros(len(env._env_fns)) + while step_num < n_steps_lim: + assert len(env._env_fns) == len(observation) # ensure parallerization + legal_action_mask = np.array([observation[i]["mask"] for i in range(len(observation))]) + action = [rng.choice(np.where(legal_action_mask[i]==1)[0]) for i in range(len(legal_action_mask))] # chose action randomly + observation, reward, terminated, _, _ = env.step(action) + step_num += batch_size + return step_num + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("env_name") + parser.add_argument("batch_size", type=int) + parser.add_argument("n_steps_lim", type=int) + args = parser.parse_args() + assert args.n_steps_lim % args.batch_size == 0 + env = make_env(args.env_name, args.batch_size) + time_sta = time.time() + step_num = random_play(env, args.n_steps_lim, args.batch_size) + time_end = time.time() + print((step_num)/(time_end-time_sta)) \ No newline at end of file diff --git a/workspace/compariosn/petting_zoo_subproc.py b/workspace/compariosn/petting_zoo_subproc.py new file mode 100644 index 000000000..ed34cfdd5 --- /dev/null +++ b/workspace/compariosn/petting_zoo_subproc.py @@ -0,0 +1,64 @@ +from tianshou.env import SubprocVectorEnv +from pettingzoo.classic.go import go +from pettingzoo.classic.tictactoe import tictactoe +from tianshou.env.pettingzoo_env import PettingZooEnv +import argparse +import numpy as np +import time + +class AutoResetPettingZooEnv(PettingZooEnv): # 全体でpetting_zooの関数, classをimportするとopen_spielの速度が落ちる. + def __init__(self, env): + super().__init__(env) + + def step(self, action): + obs, reward, term, trunc, info = super().step(action) + if term: + obs = super().reset() + return obs, reward, term, trunc, info + + +def make_env(env_name, n_envs): + + #from pettingzoo.classic import chess_v5 + def get_go_env(): + return AutoResetPettingZooEnv(go.env()) + def get_tictactoe_env(): + return AutoResetPettingZooEnv(tictactoe.env()) + if env_name == "go": + return SubprocVectorEnv([get_go_env for _ in range(n_envs)]) + elif env_name == "tictactoe": + return SubprocVectorEnv([get_tictactoe_env for _ in range(n_envs)]) + elif env_name == "chess": + #return chess_v5.env() + raise ValueError("Chess will be added later") + else: + raise ValueError("no such environment in petting zoo") + + +def random_play(env, n_steps_lim: int, batch_size: int) -> int: + # petting zooのgo環境でrandom gaentを終局まで動かす. + step_num = 0 + rng = np.random.default_rng() + observation = env.reset() + terminated = np.zeros(len(env._env_fns)) + while step_num < n_steps_lim: + assert len(env._env_fns) == len(observation) # ensure parallerization + legal_action_mask = np.array([observation[i]["mask"] for i in range(len(observation))]) + action = [rng.choice(np.where(legal_action_mask[i]==1)[0]) for i in range(len(legal_action_mask))] # chose action randomly + observation, reward, terminated, _, _ = env.step(action) + step_num += batch_size + return step_num + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("env_name") + parser.add_argument("batch_size", type=int) + parser.add_argument("n_steps_lim", type=int) + args = parser.parse_args() + assert args.n_steps_lim % args.batch_size == 0 + env = make_env(args.env_name, args.batch_size) + time_sta = time.time() + step_num = random_play(env, args.n_steps_lim, args.batch_size) + time_end = time.time() + print((step_num)/(time_end-time_sta)) \ No newline at end of file diff --git a/workspace/compariosn/tianshou_env/venvs.py b/workspace/compariosn/tianshou_env/venvs.py index 1658e3824..09379e2a8 100644 --- a/workspace/compariosn/tianshou_env/venvs.py +++ b/workspace/compariosn/tianshou_env/venvs.py @@ -5,9 +5,9 @@ import numpy as np import packaging -from pettingzoo_env import OpenSpielEnv -from utils import ENV_TYPE, gym_new_venv_step_type -from worker import ( +from .pettingzoo_env import OpenSpielEnv +from .utils import ENV_TYPE, gym_new_venv_step_type +from .worker import ( DummyEnvWorker, EnvWorker, RayEnvWorker,