diff --git a/chainerrl/agents/ddpg.py b/chainerrl/agents/ddpg.py index 295b195d8..9a8472fb8 100644 --- a/chainerrl/agents/ddpg.py +++ b/chainerrl/agents/ddpg.py @@ -48,8 +48,8 @@ class DDPG(AttributeSavingMixin, Agent): replay_start_size (int): if the replay buffer's size is less than replay_start_size, skip update minibatch_size (int): Minibatch size - update_frequency (int): Model update frequency in step - target_update_frequency (int): Target model update frequency in step + update_interval (int): Model update interval in step + target_update_interval (int): Target model update interval in step phi (callable): Feature extractor applied to observations target_update_method (str): 'hard' or 'soft'. soft_update_tau (float): Tau of soft target update. @@ -75,8 +75,8 @@ class DDPG(AttributeSavingMixin, Agent): def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer, gamma, explorer, gpu=None, replay_start_size=50000, - minibatch_size=32, update_frequency=1, - target_update_frequency=10000, + minibatch_size=32, update_interval=1, + target_update_interval=10000, phi=lambda x: x, target_update_method='hard', soft_update_tau=1e-2, @@ -98,7 +98,7 @@ def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer, self.gamma = gamma self.explorer = explorer self.gpu = gpu - self.target_update_frequency = target_update_frequency + self.target_update_interval = target_update_interval self.phi = phi self.target_update_method = target_update_method self.soft_update_tau = soft_update_tau @@ -119,7 +119,7 @@ def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer, episodic_update_len=episodic_update_len, n_times_update=n_times_update, replay_start_size=replay_start_size, - update_frequency=update_frequency, + update_interval=update_interval, ) self.batch_states = batch_states @@ -294,7 +294,7 @@ def act_and_train(self, state, reward): self.t += 1 # Update the target network - if self.t % self.target_update_frequency == 0: + if self.t % self.target_update_interval == 0: self.sync_target_network() if self.last_state is not None: diff --git a/chainerrl/agents/dqn.py b/chainerrl/agents/dqn.py index d75175fb7..d048c4b15 100644 --- a/chainerrl/agents/dqn.py +++ b/chainerrl/agents/dqn.py @@ -104,8 +104,8 @@ class DQN(agent.AttributeSavingMixin, agent.Agent): replay_start_size (int): if the replay buffer's size is less than replay_start_size, skip update minibatch_size (int): Minibatch size - update_frequency (int): Model update frequency in step - target_update_frequency (int): Target model update frequency in step + update_interval (int): Model update interval in step + target_update_interval (int): Target model update interval in step clip_delta (bool): Clip delta if set True phi (callable): Feature extractor applied to observations target_update_method (str): 'hard' or 'soft'. @@ -128,8 +128,8 @@ class DQN(agent.AttributeSavingMixin, agent.Agent): def __init__(self, q_function, optimizer, replay_buffer, gamma, explorer, gpu=None, replay_start_size=50000, - minibatch_size=32, update_frequency=1, - target_update_frequency=10000, clip_delta=True, + minibatch_size=32, update_interval=1, + target_update_interval=10000, clip_delta=True, phi=lambda x: x, target_update_method='hard', soft_update_tau=1e-2, @@ -152,7 +152,7 @@ def __init__(self, q_function, optimizer, replay_buffer, gamma, self.gamma = gamma self.explorer = explorer self.gpu = gpu - self.target_update_frequency = target_update_frequency + self.target_update_interval = target_update_interval self.clip_delta = clip_delta self.phi = phi self.target_update_method = target_update_method @@ -173,7 +173,7 @@ def __init__(self, q_function, optimizer, replay_buffer, gamma, episodic_update_len=episodic_update_len, n_times_update=n_times_update, replay_start_size=replay_start_size, - update_frequency=update_frequency, + update_interval=update_interval, ) self.t = 0 @@ -405,7 +405,7 @@ def act_and_train(self, state, reward): self.t += 1 # Update the target network - if self.t % self.target_update_frequency == 0: + if self.t % self.target_update_interval == 0: self.sync_target_network() if self.last_state is not None: diff --git a/chainerrl/agents/pgt.py b/chainerrl/agents/pgt.py index 000cbe279..584fa6ba1 100644 --- a/chainerrl/agents/pgt.py +++ b/chainerrl/agents/pgt.py @@ -40,8 +40,8 @@ class PGT(AttributeSavingMixin, Agent): replay_start_size (int): if the replay buffer's size is less than replay_start_size, skip update minibatch_size (int): Minibatch size - update_frequency (int): Model update frequency in step - target_update_frequency (int): Target model update frequency in step + update_interval (int): Model update interval in step + target_update_interval (int): Target model update interval in step phi (callable): Feature extractor applied to observations target_update_method (str): 'hard' or 'soft'. soft_update_tau (float): Tau of soft target update. @@ -67,8 +67,8 @@ class PGT(AttributeSavingMixin, Agent): def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer, gamma, explorer, beta=1e-2, act_deterministically=False, gpu=-1, replay_start_size=50000, - minibatch_size=32, update_frequency=1, - target_update_frequency=10000, + minibatch_size=32, update_interval=1, + target_update_interval=10000, phi=lambda x: x, target_update_method='hard', soft_update_tau=1e-2, @@ -88,7 +88,7 @@ def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer, self.gamma = gamma self.explorer = explorer self.gpu = gpu - self.target_update_frequency = target_update_frequency + self.target_update_interval = target_update_interval self.phi = phi self.target_update_method = target_update_method self.soft_update_tau = soft_update_tau @@ -106,7 +106,7 @@ def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer, episodic_update=False, n_times_update=n_times_update, replay_start_size=replay_start_size, - update_frequency=update_frequency, + update_interval=update_interval, ) self.batch_states = batch_states @@ -211,7 +211,7 @@ def act_and_train(self, state, reward): self.t += 1 # Update the target network - if self.t % self.target_update_frequency == 0: + if self.t % self.target_update_interval == 0: self.sync_target_network() if self.last_state is not None: diff --git a/chainerrl/experiments/evaluator.py b/chainerrl/experiments/evaluator.py index 65a85f84e..8b104ef93 100644 --- a/chainerrl/experiments/evaluator.py +++ b/chainerrl/experiments/evaluator.py @@ -68,7 +68,7 @@ def update_best_model(agent, outdir, t, old_max_score, new_max_score, logger): class Evaluator(object): - def __init__(self, agent, env, n_runs, eval_frequency, + def __init__(self, agent, env, n_runs, eval_interval, outdir, max_episode_len=None, explorer=None, step_offset=0, logger=None): self.agent = agent @@ -76,13 +76,13 @@ def __init__(self, agent, env, n_runs, eval_frequency, self.max_score = np.finfo(np.float32).min self.start_time = time.time() self.n_runs = n_runs - self.eval_frequency = eval_frequency + self.eval_interval = eval_interval self.outdir = outdir self.max_episode_len = max_episode_len self.explorer = explorer self.step_offset = step_offset self.prev_eval_t = (self.step_offset - - self.step_offset % self.eval_frequency) + self.step_offset % self.eval_interval) self.logger = logger or logging.getLogger(__name__) # Write a header line first @@ -108,22 +108,22 @@ def evaluate_and_update_max_score(self, t): return mean def evaluate_if_necessary(self, t): - if t >= self.prev_eval_t + self.eval_frequency: + if t >= self.prev_eval_t + self.eval_interval: score = self.evaluate_and_update_max_score(t) - self.prev_eval_t = t - t % self.eval_frequency + self.prev_eval_t = t - t % self.eval_interval return score return None class AsyncEvaluator(object): - def __init__(self, n_runs, eval_frequency, + def __init__(self, n_runs, eval_interval, outdir, max_episode_len=None, explorer=None, step_offset=0, logger=None): self.start_time = time.time() self.n_runs = n_runs - self.eval_frequency = eval_frequency + self.eval_interval = eval_interval self.outdir = outdir self.max_episode_len = max_episode_len self.explorer = explorer @@ -132,7 +132,7 @@ def __init__(self, n_runs, eval_frequency, # Values below are shared among processes self.prev_eval_t = mp.Value( - 'l', self.step_offset - self.step_offset % self.eval_frequency) + 'l', self.step_offset - self.step_offset % self.eval_interval) self._max_score = mp.Value('f', np.finfo(np.float32).min) self.wrote_header = mp.Value('b', False) @@ -173,9 +173,9 @@ def write_header(self, agent): def evaluate_if_necessary(self, t, env, agent): necessary = False with self.prev_eval_t.get_lock(): - if t >= self.prev_eval_t.value + self.eval_frequency: + if t >= self.prev_eval_t.value + self.eval_interval: necessary = True - self.prev_eval_t.value += self.eval_frequency + self.prev_eval_t.value += self.eval_interval if necessary: with self.wrote_header.get_lock(): if not self.wrote_header.value: diff --git a/chainerrl/experiments/train_agent.py b/chainerrl/experiments/train_agent.py index 15461ae99..433c426d1 100644 --- a/chainerrl/experiments/train_agent.py +++ b/chainerrl/experiments/train_agent.py @@ -86,7 +86,7 @@ def train_agent(agent, env, steps, outdir, max_episode_len=None, def train_agent_with_evaluation( - agent, env, steps, eval_n_runs, eval_frequency, + agent, env, steps, eval_n_runs, eval_interval, outdir, max_episode_len=None, step_offset=0, eval_explorer=None, eval_max_episode_len=None, eval_env=None, successful_score=None, render=False, logger=None): @@ -97,7 +97,7 @@ def train_agent_with_evaluation( env: Environment. steps (int): Number of total time steps for training. eval_n_runs (int): Number of runs for each time of evaluation. - eval_frequency (int): Interval of evaluation. + eval_interval (int): Interval of evaluation. outdir (str): Path to the directory to output things. max_episode_len (int): Maximum episode length. step_offset (int): Time step from which training starts. @@ -119,7 +119,7 @@ def train_agent_with_evaluation( evaluator = Evaluator(agent=agent, n_runs=eval_n_runs, - eval_frequency=eval_frequency, outdir=outdir, + eval_interval=eval_interval, outdir=outdir, max_episode_len=eval_max_episode_len, explorer=eval_explorer, env=eval_env, diff --git a/chainerrl/experiments/train_agent_async.py b/chainerrl/experiments/train_agent_async.py index 3c220667f..830ac32de 100644 --- a/chainerrl/experiments/train_agent_async.py +++ b/chainerrl/experiments/train_agent_async.py @@ -118,7 +118,7 @@ def set_shared_objects(agent, shared_objects): def train_agent_async(outdir, processes, make_env, - profile=False, steps=8 * 10 ** 7, eval_frequency=10 ** 6, + profile=False, steps=8 * 10 ** 7, eval_interval=10 ** 6, eval_n_runs=10, gamma=0.99, max_episode_len=None, step_offset=0, successful_score=None, eval_explorer=None, @@ -155,7 +155,7 @@ def train_agent_async(outdir, processes, make_env, evaluator = AsyncEvaluator( n_runs=eval_n_runs, - eval_frequency=eval_frequency, outdir=outdir, + eval_interval=eval_interval, outdir=outdir, max_episode_len=max_episode_len, step_offset=step_offset, explorer=eval_explorer, diff --git a/chainerrl/replay_buffer.py b/chainerrl/replay_buffer.py index ad952d25a..dd141e1ef 100644 --- a/chainerrl/replay_buffer.py +++ b/chainerrl/replay_buffer.py @@ -290,7 +290,7 @@ class ReplayUpdater(object): replay_start_size (int): if the replay buffer's size is less than replay_start_size, skip update batchsize (int): Minibatch size - update_frequency (int): Model update frequency in step + update_interval (int): Model update interval in step n_times_update (int): Number of repetition of update episodic_update (bool): Use full episodes for update if set True episodic_update_len (int or None): Subsequences of this length are used @@ -298,7 +298,7 @@ class ReplayUpdater(object): """ def __init__(self, replay_buffer, update_func, batchsize, episodic_update, - n_times_update, replay_start_size, update_frequency, + n_times_update, replay_start_size, update_interval, episodic_update_len=None): assert batchsize <= replay_start_size @@ -309,12 +309,12 @@ def __init__(self, replay_buffer, update_func, batchsize, episodic_update, self.episodic_update_len = episodic_update_len self.n_times_update = n_times_update self.replay_start_size = replay_start_size - self.update_frequency = update_frequency + self.update_interval = update_interval def update_if_necessary(self, iteration): if len(self.replay_buffer) < self.replay_start_size: return - if iteration % self.update_frequency != 0: + if iteration % self.update_interval != 0: return for _ in range(self.n_times_update): diff --git a/examples/ale/train_a3c_ale.py b/examples/ale/train_a3c_ale.py index c00eba871..abaabd828 100644 --- a/examples/ale/train_a3c_ale.py +++ b/examples/ale/train_a3c_ale.py @@ -76,7 +76,7 @@ def main(): parser.add_argument('--profile', action='store_true') parser.add_argument('--steps', type=int, default=8 * 10 ** 7) parser.add_argument('--lr', type=float, default=7e-4) - parser.add_argument('--eval-frequency', type=int, default=10 ** 6) + parser.add_argument('--eval-interval', type=int, default=10 ** 6) parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--weight-decay', type=float, default=0.0) parser.add_argument('--use-lstm', action='store_true') @@ -133,7 +133,7 @@ def make_env(process_idx, test): profile=args.profile, steps=args.steps, eval_n_runs=args.eval_n_runs, - eval_frequency=args.eval_frequency, + eval_interval=args.eval_interval, max_episode_len=args.max_episode_len) diff --git a/examples/ale/train_acer_ale.py b/examples/ale/train_acer_ale.py index 4aa9e0f62..c771044dc 100644 --- a/examples/ale/train_acer_ale.py +++ b/examples/ale/train_acer_ale.py @@ -47,7 +47,7 @@ def main(): parser.add_argument('--profile', action='store_true') parser.add_argument('--steps', type=int, default=8 * 10 ** 7) parser.add_argument('--lr', type=float, default=7e-4) - parser.add_argument('--eval-frequency', type=int, default=10 ** 6) + parser.add_argument('--eval-interval', type=int, default=10 ** 6) parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--weight-decay', type=float, default=0.0) parser.add_argument('--use-lstm', action='store_true') @@ -128,7 +128,7 @@ def make_env(process_idx, test): profile=args.profile, steps=args.steps, eval_n_runs=args.eval_n_runs, - eval_frequency=args.eval_frequency, + eval_interval=args.eval_interval, max_episode_len=args.max_episode_len) diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index 254fc041e..b6c889c92 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -79,10 +79,10 @@ def main(): choices=['nature', 'nips', 'dueling']) parser.add_argument('--steps', type=int, default=10 ** 7) parser.add_argument('--replay-start-size', type=int, default=5 * 10 ** 4) - parser.add_argument('--target-update-frequency', + parser.add_argument('--target-update-interval', type=int, default=10 ** 4) - parser.add_argument('--eval-frequency', type=int, default=10 ** 5) - parser.add_argument('--update-frequency', type=int, default=4) + parser.add_argument('--eval-interval', type=int, default=10 ** 5) + parser.add_argument('--update-interval', type=int, default=4) parser.add_argument('--activation', type=str, default='relu') parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--no-clip-delta', @@ -124,9 +124,9 @@ def main(): Agent = parse_agent(args.agent) agent = Agent(q_func, opt, rbuf, gpu=args.gpu, gamma=0.99, explorer=explorer, replay_start_size=args.replay_start_size, - target_update_frequency=args.target_update_frequency, + target_update_interval=args.target_update_interval, clip_delta=args.clip_delta, - update_frequency=args.update_frequency, + update_interval=args.update_interval, batch_accumulator='sum', phi=dqn_phi) if args.load: @@ -145,7 +145,7 @@ def main(): 5e-2, lambda: np.random.randint(n_actions)) experiments.train_agent_with_evaluation( agent=agent, env=env, steps=args.steps, - eval_n_runs=args.eval_n_runs, eval_frequency=args.eval_frequency, + eval_n_runs=args.eval_n_runs, eval_interval=args.eval_interval, outdir=args.outdir, eval_explorer=eval_explorer, eval_env=eval_env) diff --git a/examples/ale/train_nsq_ale.py b/examples/ale/train_nsq_ale.py index 5d9deb07b..313947dca 100644 --- a/examples/ale/train_nsq_ale.py +++ b/examples/ale/train_nsq_ale.py @@ -43,7 +43,7 @@ def main(): type=int, default=4 * 10 ** 6) parser.add_argument('--outdir', type=str, default='nsq_output') parser.add_argument('--profile', action='store_true') - parser.add_argument('--eval-frequency', type=int, default=10 ** 6) + parser.add_argument('--eval-interval', type=int, default=10 ** 6) parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--demo', action='store_true', default=False) parser.add_argument('--load', type=str, default=None) @@ -113,7 +113,7 @@ def make_agent(process_idx): profile=args.profile, steps=args.steps, eval_n_runs=args.eval_n_runs, - eval_frequency=args.eval_frequency, + eval_interval=args.eval_interval, eval_explorer=explorer) if __name__ == '__main__': diff --git a/examples/gym/train_a3c_gym.py b/examples/gym/train_a3c_gym.py index 191d85232..6de70941c 100644 --- a/examples/gym/train_a3c_gym.py +++ b/examples/gym/train_a3c_gym.py @@ -93,7 +93,7 @@ def main(): parser.add_argument('--beta', type=float, default=1e-2) parser.add_argument('--profile', action='store_true') parser.add_argument('--steps', type=int, default=8 * 10 ** 7) - parser.add_argument('--eval-frequency', type=int, default=10 ** 5) + parser.add_argument('--eval-interval', type=int, default=10 ** 5) parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--reward-scale-factor', type=float, default=1e-2) parser.add_argument('--rmsprop-epsilon', type=float, default=1e-1) @@ -169,7 +169,7 @@ def make_env(process_idx, test): profile=args.profile, steps=args.steps, eval_n_runs=args.eval_n_runs, - eval_frequency=args.eval_frequency, + eval_interval=args.eval_interval, max_episode_len=timestep_limit) diff --git a/examples/gym/train_acer_gym.py b/examples/gym/train_acer_gym.py index 2feb6c6ff..16ec28bbb 100644 --- a/examples/gym/train_acer_gym.py +++ b/examples/gym/train_acer_gym.py @@ -50,7 +50,7 @@ def main(): parser.add_argument('--beta', type=float, default=1e-2) parser.add_argument('--profile', action='store_true') parser.add_argument('--steps', type=int, default=8 * 10 ** 7) - parser.add_argument('--eval-frequency', type=int, default=10 ** 5) + parser.add_argument('--eval-interval', type=int, default=10 ** 5) parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--reward-scale-factor', type=float, default=1e-2) parser.add_argument('--rmsprop-epsilon', type=float, default=1e-2) @@ -157,7 +157,7 @@ def make_env(process_idx, test): profile=args.profile, steps=args.steps, eval_n_runs=args.eval_n_runs, - eval_frequency=args.eval_frequency, + eval_interval=args.eval_interval, max_episode_len=timestep_limit) diff --git a/examples/gym/train_ddpg_gym.py b/examples/gym/train_ddpg_gym.py index 00309dd67..8baa3c759 100644 --- a/examples/gym/train_ddpg_gym.py +++ b/examples/gym/train_ddpg_gym.py @@ -44,14 +44,14 @@ def main(): parser.add_argument('--n-hidden-layers', type=int, default=3) parser.add_argument('--replay-start-size', type=int, default=5000) parser.add_argument('--n-update-times', type=int, default=1) - parser.add_argument('--target-update-frequency', + parser.add_argument('--target-update-interval', type=int, default=1) parser.add_argument('--target-update-method', type=str, default='soft', choices=['hard', 'soft']) parser.add_argument('--soft-update-tau', type=float, default=1e-2) - parser.add_argument('--update-frequency', type=int, default=4) + parser.add_argument('--update-interval', type=int, default=4) parser.add_argument('--eval-n-runs', type=int, default=100) - parser.add_argument('--eval-frequency', type=int, default=10 ** 5) + parser.add_argument('--eval-interval', type=int, default=10 ** 5) parser.add_argument('--gamma', type=float, default=0.995) parser.add_argument('--minibatch-size', type=int, default=200) parser.add_argument('--render', action='store_true') @@ -144,8 +144,8 @@ def random_action(): agent = DDPG(model, opt_a, opt_c, rbuf, gamma=args.gamma, explorer=explorer, replay_start_size=args.replay_start_size, target_update_method=args.target_update_method, - target_update_frequency=args.target_update_frequency, - update_frequency=args.update_frequency, + target_update_interval=args.target_update_interval, + update_interval=args.update_interval, soft_update_tau=args.soft_update_tau, n_times_update=args.n_update_times, phi=phi, gpu=args.gpu, minibatch_size=args.minibatch_size) @@ -164,7 +164,7 @@ def random_action(): else: experiments.train_agent_with_evaluation( agent=agent, env=env, steps=args.steps, - eval_n_runs=args.eval_n_runs, eval_frequency=args.eval_frequency, + eval_n_runs=args.eval_n_runs, eval_interval=args.eval_interval, outdir=args.outdir, max_episode_len=timestep_limit) diff --git a/examples/gym/train_dqn_gym.py b/examples/gym/train_dqn_gym.py index f8e3ec40f..f31a7028a 100644 --- a/examples/gym/train_dqn_gym.py +++ b/examples/gym/train_dqn_gym.py @@ -43,12 +43,12 @@ def main(): parser.add_argument('--prioritized-replay', action='store_true') parser.add_argument('--episodic-replay', action='store_true') parser.add_argument('--replay-start-size', type=int, default=None) - parser.add_argument('--target-update-frequency', type=int, default=10 ** 2) + parser.add_argument('--target-update-interval', type=int, default=10 ** 2) parser.add_argument('--target-update-method', type=str, default='hard') parser.add_argument('--soft-update-tau', type=float, default=1e-2) - parser.add_argument('--update-frequency', type=int, default=1) + parser.add_argument('--update-interval', type=int, default=1) parser.add_argument('--eval-n-runs', type=int, default=100) - parser.add_argument('--eval-frequency', type=int, default=10 ** 4) + parser.add_argument('--eval-interval', type=int, default=10 ** 4) parser.add_argument('--n-hidden-channels', type=int, default=100) parser.add_argument('--n-hidden-layers', type=int, default=2) parser.add_argument('--gamma', type=float, default=0.99) @@ -123,7 +123,7 @@ def make_env(for_eval): if args.prioritized_replay: betasteps = \ (args.steps - timestep_limit * args.replay_start_size) \ - // args.update_frequency + // args.update_interval rbuf = replay_buffer.PrioritizedEpisodicReplayBuffer( rbuf_capacity, betasteps=betasteps) else: @@ -135,7 +135,7 @@ def make_env(for_eval): args.replay_start_size = 1000 if args.prioritized_replay: betasteps = (args.steps - args.replay_start_size) \ - // args.update_frequency + // args.update_interval rbuf = replay_buffer.PrioritizedReplayBuffer( rbuf_capacity, betasteps=betasteps) else: @@ -146,8 +146,8 @@ def phi(obs): agent = DQN(q_func, opt, rbuf, gpu=args.gpu, gamma=args.gamma, explorer=explorer, replay_start_size=args.replay_start_size, - target_update_frequency=args.target_update_frequency, - update_frequency=args.update_frequency, + target_update_interval=args.target_update_interval, + update_interval=args.update_interval, phi=phi, minibatch_size=args.minibatch_size, target_update_method=args.target_update_method, soft_update_tau=args.soft_update_tau, @@ -169,7 +169,7 @@ def phi(obs): else: experiments.train_agent_with_evaluation( agent=agent, env=env, steps=args.steps, - eval_n_runs=args.eval_n_runs, eval_frequency=args.eval_frequency, + eval_n_runs=args.eval_n_runs, eval_interval=args.eval_interval, outdir=args.outdir, eval_env=eval_env, max_episode_len=timestep_limit) diff --git a/examples/gym/train_pcl_gym.py b/examples/gym/train_pcl_gym.py index 122ca7edc..0541e019c 100644 --- a/examples/gym/train_pcl_gym.py +++ b/examples/gym/train_pcl_gym.py @@ -53,7 +53,7 @@ def main(): parser.add_argument('--tau', type=float, default=1e-2) parser.add_argument('--profile', action='store_true') parser.add_argument('--steps', type=int, default=8 * 10 ** 7) - parser.add_argument('--eval-frequency', type=int, default=10 ** 5) + parser.add_argument('--eval-interval', type=int, default=10 ** 5) parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--reward-scale-factor', type=float, default=1e-2) parser.add_argument('--render', action='store_true', default=False) @@ -189,7 +189,7 @@ def make_env(process_idx, test): profile=args.profile, steps=args.steps, eval_n_runs=args.eval_n_runs, - eval_frequency=args.eval_frequency, + eval_interval=args.eval_interval, max_episode_len=timestep_limit) else: experiments.train_agent_with_evaluation( @@ -199,7 +199,7 @@ def make_env(process_idx, test): outdir=args.outdir, steps=args.steps, eval_n_runs=args.eval_n_runs, - eval_frequency=args.eval_frequency, + eval_interval=args.eval_interval, max_episode_len=timestep_limit) diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index 021101c22..5e10be112 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -231,8 +231,8 @@ "# Now create an agent that will interact with the environment.\n", "agent = chainerrl.agents.DoubleDQN(\n", " q_func, optimizer, replay_buffer, gamma, explorer,\n", - " replay_start_size=500, update_frequency=1,\n", - " target_update_frequency=100, phi=phi)" + " replay_start_size=500, update_interval=1,\n", + " target_update_interval=100, phi=phi)" ] }, { @@ -473,7 +473,7 @@ " steps=2000, # Train the agent for 2000 steps\n", " eval_n_runs=10, # 10 episodes are sampled for each evaluation\n", " max_episode_len=200, # Maximum length of each episodes\n", - " eval_frequency=1000, # Evaluate the agent after every 1000 steps\n", + " eval_interval=1000, # Evaluate the agent after every 1000 steps\n", " outdir='result') # Save everything to 'result' directory" ] }, diff --git a/tests/agents_tests/test_a3c.py b/tests/agents_tests/test_a3c.py index fe85ebfb6..33b32a3ec 100644 --- a/tests/agents_tests/test_a3c.py +++ b/tests/agents_tests/test_a3c.py @@ -138,7 +138,7 @@ def phi(x): outdir=self.outdir, processes=nproc, make_env=make_env, agent=agent, steps=steps, max_episode_len=max_episode_len, - eval_frequency=500, + eval_interval=500, eval_n_runs=5, successful_score=1) diff --git a/tests/agents_tests/test_acer.py b/tests/agents_tests/test_acer.py index 08b719faa..3d476fa29 100644 --- a/tests/agents_tests/test_acer.py +++ b/tests/agents_tests/test_acer.py @@ -467,7 +467,7 @@ def phi(x): outdir=self.outdir, processes=nproc, make_env=make_env, agent=agent, steps=steps, max_episode_len=max_episode_len, - eval_frequency=500, + eval_interval=500, eval_n_runs=5, successful_score=1) diff --git a/tests/agents_tests/test_al.py b/tests/agents_tests/test_al.py index 50e0ab1c9..e773d387a 100644 --- a/tests/agents_tests/test_al.py +++ b/tests/agents_tests/test_al.py @@ -17,7 +17,7 @@ class TestALOnDiscreteABC(_TestDQNOnDiscreteABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return AL( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestALOnContinuousABC(_TestDQNOnContinuousABC): @@ -25,7 +25,7 @@ class TestALOnContinuousABC(_TestDQNOnContinuousABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return AL( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestALOnDiscretePOABC(_TestDQNOnDiscretePOABC): @@ -33,5 +33,5 @@ class TestALOnDiscretePOABC(_TestDQNOnDiscretePOABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return AL( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100, + replay_start_size=100, target_update_interval=100, episodic_update=True) diff --git a/tests/agents_tests/test_ddpg.py b/tests/agents_tests/test_ddpg.py index 4dc5f9df7..9bb284430 100644 --- a/tests/agents_tests/test_ddpg.py +++ b/tests/agents_tests/test_ddpg.py @@ -118,8 +118,8 @@ def make_ddpg_agent(self, env, model, actor_opt, critic_opt, explorer, rbuf, gpu): return DDPG(model, actor_opt, critic_opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, replay_start_size=100, - target_update_method='soft', target_update_frequency=1, - episodic_update=True, update_frequency=1) + target_update_method='soft', target_update_interval=1, + episodic_update=True, update_interval=1) class TestDDPGOnContinuousABC(_TestDDPGOnContinuousABC): @@ -128,5 +128,5 @@ def make_ddpg_agent(self, env, model, actor_opt, critic_opt, explorer, rbuf, gpu): return DDPG(model, actor_opt, critic_opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, replay_start_size=100, - target_update_method='soft', target_update_frequency=1, + target_update_method='soft', target_update_interval=1, episodic_update=False) diff --git a/tests/agents_tests/test_double_dqn.py b/tests/agents_tests/test_double_dqn.py index b41b4846c..8304a4a86 100644 --- a/tests/agents_tests/test_double_dqn.py +++ b/tests/agents_tests/test_double_dqn.py @@ -17,7 +17,7 @@ class TestDoubleDQNOnDiscreteABC(_TestDQNOnDiscreteABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return DoubleDQN( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestDoubleDQNOnContinuousABC(_TestDQNOnContinuousABC): @@ -25,7 +25,7 @@ class TestDoubleDQNOnContinuousABC(_TestDQNOnContinuousABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return DoubleDQN( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestDoubleDQNOnDiscretePOABC(_TestDQNOnDiscretePOABC): @@ -33,5 +33,5 @@ class TestDoubleDQNOnDiscretePOABC(_TestDQNOnDiscretePOABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return DoubleDQN( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100, + replay_start_size=100, target_update_interval=100, episodic_update=True) diff --git a/tests/agents_tests/test_double_pal.py b/tests/agents_tests/test_double_pal.py index c39d32d6f..a33bd47e9 100644 --- a/tests/agents_tests/test_double_pal.py +++ b/tests/agents_tests/test_double_pal.py @@ -17,7 +17,7 @@ class TestDoublePALOnDiscreteABC(_TestDQNOnDiscreteABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return DoublePAL( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestDoublePALOnContinuousABC(_TestDQNOnContinuousABC): @@ -25,7 +25,7 @@ class TestDoublePALOnContinuousABC(_TestDQNOnContinuousABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return DoublePAL( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestDoublePALOnDiscretePOABC(_TestDQNOnDiscretePOABC): @@ -33,5 +33,5 @@ class TestDoublePALOnDiscretePOABC(_TestDQNOnDiscretePOABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return DoublePAL( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100, + replay_start_size=100, target_update_interval=100, episodic_update=True) diff --git a/tests/agents_tests/test_dpp.py b/tests/agents_tests/test_dpp.py index c05cb6b88..4d6c85242 100644 --- a/tests/agents_tests/test_dpp.py +++ b/tests/agents_tests/test_dpp.py @@ -33,7 +33,7 @@ def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): agent_class = parse_dpp_agent(self.dpp_type) return agent_class( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) # DPP and DPPL don't support continuous action spaces @@ -48,7 +48,7 @@ def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): agent_class = parse_dpp_agent(self.dpp_type) return agent_class( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) # Currently DPP doesn't work with recurrent models @@ -65,5 +65,5 @@ def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): # agent_class = parse_dpp_agent(self.dpp_type) # return agent_class( # q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, -# replay_start_size=100, target_update_frequency=100, +# replay_start_size=100, target_update_interval=100, # episodic_update=True) diff --git a/tests/agents_tests/test_dqn.py b/tests/agents_tests/test_dqn.py index 825e45ba3..74257672a 100644 --- a/tests/agents_tests/test_dqn.py +++ b/tests/agents_tests/test_dqn.py @@ -17,7 +17,7 @@ class TestDQNOnDiscreteABC(_TestDQNOnDiscreteABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return DQN(q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestDQNOnDiscreteABCBoltzmann(_TestDQNOnDiscreteABC): @@ -25,19 +25,19 @@ class TestDQNOnDiscreteABCBoltzmann(_TestDQNOnDiscreteABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): explorer = chainerrl.explorers.Boltzmann() return DQN(q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestDQNOnContinuousABC(_TestDQNOnContinuousABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return DQN(q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestDQNOnDiscretePOABC(_TestDQNOnDiscretePOABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return DQN(q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100, + replay_start_size=100, target_update_interval=100, episodic_update=True) diff --git a/tests/agents_tests/test_nsq.py b/tests/agents_tests/test_nsq.py index d7b27714d..1fe1ce32d 100644 --- a/tests/agents_tests/test_nsq.py +++ b/tests/agents_tests/test_nsq.py @@ -99,7 +99,7 @@ def make_agent(process_idx): outdir=self.outdir, processes=nproc, make_env=make_env, make_agent=make_agent, steps=100000, max_episode_len=5, - eval_frequency=500, + eval_interval=500, eval_n_runs=5, successful_score=1, ) diff --git a/tests/agents_tests/test_pal.py b/tests/agents_tests/test_pal.py index 768a06c51..43c67a35b 100644 --- a/tests/agents_tests/test_pal.py +++ b/tests/agents_tests/test_pal.py @@ -17,7 +17,7 @@ class TestPALOnDiscreteABC(_TestDQNOnDiscreteABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return PAL( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestPALOnContinuousABC(_TestDQNOnContinuousABC): @@ -25,7 +25,7 @@ class TestPALOnContinuousABC(_TestDQNOnContinuousABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return PAL( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestPALOnDiscretePOABC(_TestDQNOnDiscretePOABC): @@ -33,5 +33,5 @@ class TestPALOnDiscretePOABC(_TestDQNOnDiscretePOABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return PAL( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100, + replay_start_size=100, target_update_interval=100, episodic_update=True) diff --git a/tests/agents_tests/test_pcl.py b/tests/agents_tests/test_pcl.py index fee74b589..ef62e3ef8 100644 --- a/tests/agents_tests/test_pcl.py +++ b/tests/agents_tests/test_pcl.py @@ -182,7 +182,7 @@ def phi(x): outdir=self.outdir, processes=nproc, make_env=make_env, agent=agent, steps=steps, max_episode_len=2, - eval_frequency=200, + eval_interval=200, eval_n_runs=5, successful_score=1) # The agent returned by train_agent_async is not guaranteed to be @@ -199,7 +199,7 @@ def phi(x): outdir=self.outdir, steps=steps, max_episode_len=2, - eval_frequency=500, + eval_interval=500, eval_n_runs=5, successful_score=1) diff --git a/tests/agents_tests/test_pgt.py b/tests/agents_tests/test_pgt.py index 691a121c9..e5df76693 100644 --- a/tests/agents_tests/test_pgt.py +++ b/tests/agents_tests/test_pgt.py @@ -128,8 +128,8 @@ def make_env_and_successful_return(self, test): # rbuf, gpu): # return PGT(model, actor_opt, critic_opt, rbuf, gpu=gpu, gamma=0.9, # explorer=explorer, replay_start_size=100, -# target_update_method='soft', target_update_frequency=1, -# episodic_update=True, update_frequency=1, +# target_update_method='soft', target_update_interval=1, +# episodic_update=True, update_interval=1, # act_deterministically=True) @@ -139,5 +139,5 @@ def make_pgt_agent(self, env, model, actor_opt, critic_opt, explorer, rbuf, gpu): return PGT(model, actor_opt, critic_opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, replay_start_size=100, - target_update_method='soft', target_update_frequency=1, + target_update_method='soft', target_update_interval=1, act_deterministically=True) diff --git a/tests/agents_tests/test_residual_dqn.py b/tests/agents_tests/test_residual_dqn.py index 15cdad170..0ea0dd67a 100644 --- a/tests/agents_tests/test_residual_dqn.py +++ b/tests/agents_tests/test_residual_dqn.py @@ -16,7 +16,7 @@ class TestResidualDQNOnDiscreteABC(_TestDQNOnDiscreteABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return ResidualDQN( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100, + replay_start_size=100, target_update_interval=100, grad_scale=1e-1) @@ -25,7 +25,7 @@ class TestResidualDQNOnContinuousABC(_TestDQNOnContinuousABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return ResidualDQN( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100, + replay_start_size=100, target_update_interval=100, grad_scale=1e-1) @@ -34,6 +34,6 @@ class TestResidualDQNOnDiscretePOABC(_TestDQNOnDiscretePOABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return ResidualDQN( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100, + replay_start_size=100, target_update_interval=100, episodic_update=True, grad_scale=1e-1) diff --git a/tests/agents_tests/test_sarsa.py b/tests/agents_tests/test_sarsa.py index fa7d9f890..64bac609a 100644 --- a/tests/agents_tests/test_sarsa.py +++ b/tests/agents_tests/test_sarsa.py @@ -17,7 +17,7 @@ class TestSARSAOnDiscreteABC(_TestDQNOnDiscreteABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return SARSA( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestSARSAOnContinuousABC(_TestDQNOnContinuousABC): @@ -25,7 +25,7 @@ class TestSARSAOnContinuousABC(_TestDQNOnContinuousABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return SARSA( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100) + replay_start_size=100, target_update_interval=100) class TestSARSAOnDiscretePOABC(_TestDQNOnDiscretePOABC): @@ -33,5 +33,5 @@ class TestSARSAOnDiscretePOABC(_TestDQNOnDiscretePOABC): def make_dqn_agent(self, env, q_func, opt, explorer, rbuf, gpu): return SARSA( q_func, opt, rbuf, gpu=gpu, gamma=0.9, explorer=explorer, - replay_start_size=100, target_update_frequency=100, + replay_start_size=100, target_update_interval=100, episodic_update=True) diff --git a/tests/agents_tests/test_training.py b/tests/agents_tests/test_training.py index 0f869b127..4c58fbcd1 100644 --- a/tests/agents_tests/test_training.py +++ b/tests/agents_tests/test_training.py @@ -47,7 +47,7 @@ def _test_training(self, gpu, steps=5000, load_model=False): # Train train_agent_with_evaluation( agent=agent, env=env, steps=steps, outdir=self.tmpdir, - eval_frequency=200, eval_n_runs=5, successful_score=1, + eval_interval=200, eval_n_runs=5, successful_score=1, eval_env=test_env) agent.stop_episode()