Skip to content

Commit

Permalink
Fix usage of "frequency"
Browse files Browse the repository at this point in the history
  • Loading branch information
muupan committed Apr 21, 2017
1 parent 011d2c5 commit 3831e3a
Show file tree
Hide file tree
Showing 32 changed files with 113 additions and 113 deletions.
14 changes: 7 additions & 7 deletions chainerrl/agents/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ class DDPG(AttributeSavingMixin, Agent):
replay_start_size (int): if the replay buffer's size is less than
replay_start_size, skip update
minibatch_size (int): Minibatch size
update_frequency (int): Model update frequency in step
target_update_frequency (int): Target model update frequency in step
update_interval (int): Model update interval in step
target_update_interval (int): Target model update interval in step
phi (callable): Feature extractor applied to observations
target_update_method (str): 'hard' or 'soft'.
soft_update_tau (float): Tau of soft target update.
Expand All @@ -75,8 +75,8 @@ class DDPG(AttributeSavingMixin, Agent):
def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer,
gamma, explorer,
gpu=None, replay_start_size=50000,
minibatch_size=32, update_frequency=1,
target_update_frequency=10000,
minibatch_size=32, update_interval=1,
target_update_interval=10000,
phi=lambda x: x,
target_update_method='hard',
soft_update_tau=1e-2,
Expand All @@ -98,7 +98,7 @@ def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer,
self.gamma = gamma
self.explorer = explorer
self.gpu = gpu
self.target_update_frequency = target_update_frequency
self.target_update_interval = target_update_interval
self.phi = phi
self.target_update_method = target_update_method
self.soft_update_tau = soft_update_tau
Expand All @@ -119,7 +119,7 @@ def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer,
episodic_update_len=episodic_update_len,
n_times_update=n_times_update,
replay_start_size=replay_start_size,
update_frequency=update_frequency,
update_interval=update_interval,
)
self.batch_states = batch_states

Expand Down Expand Up @@ -294,7 +294,7 @@ def act_and_train(self, state, reward):
self.t += 1

# Update the target network
if self.t % self.target_update_frequency == 0:
if self.t % self.target_update_interval == 0:
self.sync_target_network()

if self.last_state is not None:
Expand Down
14 changes: 7 additions & 7 deletions chainerrl/agents/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ class DQN(agent.AttributeSavingMixin, agent.Agent):
replay_start_size (int): if the replay buffer's size is less than
replay_start_size, skip update
minibatch_size (int): Minibatch size
update_frequency (int): Model update frequency in step
target_update_frequency (int): Target model update frequency in step
update_interval (int): Model update interval in step
target_update_interval (int): Target model update interval in step
clip_delta (bool): Clip delta if set True
phi (callable): Feature extractor applied to observations
target_update_method (str): 'hard' or 'soft'.
Expand All @@ -128,8 +128,8 @@ class DQN(agent.AttributeSavingMixin, agent.Agent):

def __init__(self, q_function, optimizer, replay_buffer, gamma,
explorer, gpu=None, replay_start_size=50000,
minibatch_size=32, update_frequency=1,
target_update_frequency=10000, clip_delta=True,
minibatch_size=32, update_interval=1,
target_update_interval=10000, clip_delta=True,
phi=lambda x: x,
target_update_method='hard',
soft_update_tau=1e-2,
Expand All @@ -152,7 +152,7 @@ def __init__(self, q_function, optimizer, replay_buffer, gamma,
self.gamma = gamma
self.explorer = explorer
self.gpu = gpu
self.target_update_frequency = target_update_frequency
self.target_update_interval = target_update_interval
self.clip_delta = clip_delta
self.phi = phi
self.target_update_method = target_update_method
Expand All @@ -173,7 +173,7 @@ def __init__(self, q_function, optimizer, replay_buffer, gamma,
episodic_update_len=episodic_update_len,
n_times_update=n_times_update,
replay_start_size=replay_start_size,
update_frequency=update_frequency,
update_interval=update_interval,
)

self.t = 0
Expand Down Expand Up @@ -405,7 +405,7 @@ def act_and_train(self, state, reward):
self.t += 1

# Update the target network
if self.t % self.target_update_frequency == 0:
if self.t % self.target_update_interval == 0:
self.sync_target_network()

if self.last_state is not None:
Expand Down
14 changes: 7 additions & 7 deletions chainerrl/agents/pgt.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ class PGT(AttributeSavingMixin, Agent):
replay_start_size (int): if the replay buffer's size is less than
replay_start_size, skip update
minibatch_size (int): Minibatch size
update_frequency (int): Model update frequency in step
target_update_frequency (int): Target model update frequency in step
update_interval (int): Model update interval in step
target_update_interval (int): Target model update interval in step
phi (callable): Feature extractor applied to observations
target_update_method (str): 'hard' or 'soft'.
soft_update_tau (float): Tau of soft target update.
Expand All @@ -67,8 +67,8 @@ class PGT(AttributeSavingMixin, Agent):
def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer,
gamma, explorer, beta=1e-2, act_deterministically=False,
gpu=-1, replay_start_size=50000,
minibatch_size=32, update_frequency=1,
target_update_frequency=10000,
minibatch_size=32, update_interval=1,
target_update_interval=10000,
phi=lambda x: x,
target_update_method='hard',
soft_update_tau=1e-2,
Expand All @@ -88,7 +88,7 @@ def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer,
self.gamma = gamma
self.explorer = explorer
self.gpu = gpu
self.target_update_frequency = target_update_frequency
self.target_update_interval = target_update_interval
self.phi = phi
self.target_update_method = target_update_method
self.soft_update_tau = soft_update_tau
Expand All @@ -106,7 +106,7 @@ def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer,
episodic_update=False,
n_times_update=n_times_update,
replay_start_size=replay_start_size,
update_frequency=update_frequency,
update_interval=update_interval,
)
self.batch_states = batch_states

Expand Down Expand Up @@ -211,7 +211,7 @@ def act_and_train(self, state, reward):
self.t += 1

# Update the target network
if self.t % self.target_update_frequency == 0:
if self.t % self.target_update_interval == 0:
self.sync_target_network()

if self.last_state is not None:
Expand Down
20 changes: 10 additions & 10 deletions chainerrl/experiments/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,21 @@ def update_best_model(agent, outdir, t, old_max_score, new_max_score, logger):

class Evaluator(object):

def __init__(self, agent, env, n_runs, eval_frequency,
def __init__(self, agent, env, n_runs, eval_interval,
outdir, max_episode_len=None, explorer=None,
step_offset=0, logger=None):
self.agent = agent
self.env = env
self.max_score = np.finfo(np.float32).min
self.start_time = time.time()
self.n_runs = n_runs
self.eval_frequency = eval_frequency
self.eval_interval = eval_interval
self.outdir = outdir
self.max_episode_len = max_episode_len
self.explorer = explorer
self.step_offset = step_offset
self.prev_eval_t = (self.step_offset -
self.step_offset % self.eval_frequency)
self.step_offset % self.eval_interval)
self.logger = logger or logging.getLogger(__name__)

# Write a header line first
Expand All @@ -108,22 +108,22 @@ def evaluate_and_update_max_score(self, t):
return mean

def evaluate_if_necessary(self, t):
if t >= self.prev_eval_t + self.eval_frequency:
if t >= self.prev_eval_t + self.eval_interval:
score = self.evaluate_and_update_max_score(t)
self.prev_eval_t = t - t % self.eval_frequency
self.prev_eval_t = t - t % self.eval_interval
return score
return None


class AsyncEvaluator(object):

def __init__(self, n_runs, eval_frequency,
def __init__(self, n_runs, eval_interval,
outdir, max_episode_len=None, explorer=None,
step_offset=0, logger=None):

self.start_time = time.time()
self.n_runs = n_runs
self.eval_frequency = eval_frequency
self.eval_interval = eval_interval
self.outdir = outdir
self.max_episode_len = max_episode_len
self.explorer = explorer
Expand All @@ -132,7 +132,7 @@ def __init__(self, n_runs, eval_frequency,

# Values below are shared among processes
self.prev_eval_t = mp.Value(
'l', self.step_offset - self.step_offset % self.eval_frequency)
'l', self.step_offset - self.step_offset % self.eval_interval)
self._max_score = mp.Value('f', np.finfo(np.float32).min)
self.wrote_header = mp.Value('b', False)

Expand Down Expand Up @@ -173,9 +173,9 @@ def write_header(self, agent):
def evaluate_if_necessary(self, t, env, agent):
necessary = False
with self.prev_eval_t.get_lock():
if t >= self.prev_eval_t.value + self.eval_frequency:
if t >= self.prev_eval_t.value + self.eval_interval:
necessary = True
self.prev_eval_t.value += self.eval_frequency
self.prev_eval_t.value += self.eval_interval
if necessary:
with self.wrote_header.get_lock():
if not self.wrote_header.value:
Expand Down
6 changes: 3 additions & 3 deletions chainerrl/experiments/train_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def train_agent(agent, env, steps, outdir, max_episode_len=None,


def train_agent_with_evaluation(
agent, env, steps, eval_n_runs, eval_frequency,
agent, env, steps, eval_n_runs, eval_interval,
outdir, max_episode_len=None, step_offset=0, eval_explorer=None,
eval_max_episode_len=None, eval_env=None, successful_score=None,
render=False, logger=None):
Expand All @@ -97,7 +97,7 @@ def train_agent_with_evaluation(
env: Environment.
steps (int): Number of total time steps for training.
eval_n_runs (int): Number of runs for each time of evaluation.
eval_frequency (int): Interval of evaluation.
eval_interval (int): Interval of evaluation.
outdir (str): Path to the directory to output things.
max_episode_len (int): Maximum episode length.
step_offset (int): Time step from which training starts.
Expand All @@ -119,7 +119,7 @@ def train_agent_with_evaluation(

evaluator = Evaluator(agent=agent,
n_runs=eval_n_runs,
eval_frequency=eval_frequency, outdir=outdir,
eval_interval=eval_interval, outdir=outdir,
max_episode_len=eval_max_episode_len,
explorer=eval_explorer,
env=eval_env,
Expand Down
4 changes: 2 additions & 2 deletions chainerrl/experiments/train_agent_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def set_shared_objects(agent, shared_objects):


def train_agent_async(outdir, processes, make_env,
profile=False, steps=8 * 10 ** 7, eval_frequency=10 ** 6,
profile=False, steps=8 * 10 ** 7, eval_interval=10 ** 6,
eval_n_runs=10, gamma=0.99, max_episode_len=None,
step_offset=0, successful_score=None,
eval_explorer=None,
Expand Down Expand Up @@ -155,7 +155,7 @@ def train_agent_async(outdir, processes, make_env,

evaluator = AsyncEvaluator(
n_runs=eval_n_runs,
eval_frequency=eval_frequency, outdir=outdir,
eval_interval=eval_interval, outdir=outdir,
max_episode_len=max_episode_len,
step_offset=step_offset,
explorer=eval_explorer,
Expand Down
8 changes: 4 additions & 4 deletions chainerrl/replay_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,15 +290,15 @@ class ReplayUpdater(object):
replay_start_size (int): if the replay buffer's size is less than
replay_start_size, skip update
batchsize (int): Minibatch size
update_frequency (int): Model update frequency in step
update_interval (int): Model update interval in step
n_times_update (int): Number of repetition of update
episodic_update (bool): Use full episodes for update if set True
episodic_update_len (int or None): Subsequences of this length are used
for update if set int and episodic_update=True
"""

def __init__(self, replay_buffer, update_func, batchsize, episodic_update,
n_times_update, replay_start_size, update_frequency,
n_times_update, replay_start_size, update_interval,
episodic_update_len=None):

assert batchsize <= replay_start_size
Expand All @@ -309,12 +309,12 @@ def __init__(self, replay_buffer, update_func, batchsize, episodic_update,
self.episodic_update_len = episodic_update_len
self.n_times_update = n_times_update
self.replay_start_size = replay_start_size
self.update_frequency = update_frequency
self.update_interval = update_interval

def update_if_necessary(self, iteration):
if len(self.replay_buffer) < self.replay_start_size:
return
if iteration % self.update_frequency != 0:
if iteration % self.update_interval != 0:
return

for _ in range(self.n_times_update):
Expand Down
4 changes: 2 additions & 2 deletions examples/ale/train_a3c_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def main():
parser.add_argument('--profile', action='store_true')
parser.add_argument('--steps', type=int, default=8 * 10 ** 7)
parser.add_argument('--lr', type=float, default=7e-4)
parser.add_argument('--eval-frequency', type=int, default=10 ** 6)
parser.add_argument('--eval-interval', type=int, default=10 ** 6)
parser.add_argument('--eval-n-runs', type=int, default=10)
parser.add_argument('--weight-decay', type=float, default=0.0)
parser.add_argument('--use-lstm', action='store_true')
Expand Down Expand Up @@ -133,7 +133,7 @@ def make_env(process_idx, test):
profile=args.profile,
steps=args.steps,
eval_n_runs=args.eval_n_runs,
eval_frequency=args.eval_frequency,
eval_interval=args.eval_interval,
max_episode_len=args.max_episode_len)


Expand Down
4 changes: 2 additions & 2 deletions examples/ale/train_acer_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def main():
parser.add_argument('--profile', action='store_true')
parser.add_argument('--steps', type=int, default=8 * 10 ** 7)
parser.add_argument('--lr', type=float, default=7e-4)
parser.add_argument('--eval-frequency', type=int, default=10 ** 6)
parser.add_argument('--eval-interval', type=int, default=10 ** 6)
parser.add_argument('--eval-n-runs', type=int, default=10)
parser.add_argument('--weight-decay', type=float, default=0.0)
parser.add_argument('--use-lstm', action='store_true')
Expand Down Expand Up @@ -128,7 +128,7 @@ def make_env(process_idx, test):
profile=args.profile,
steps=args.steps,
eval_n_runs=args.eval_n_runs,
eval_frequency=args.eval_frequency,
eval_interval=args.eval_interval,
max_episode_len=args.max_episode_len)


Expand Down
12 changes: 6 additions & 6 deletions examples/ale/train_dqn_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ def main():
choices=['nature', 'nips', 'dueling'])
parser.add_argument('--steps', type=int, default=10 ** 7)
parser.add_argument('--replay-start-size', type=int, default=5 * 10 ** 4)
parser.add_argument('--target-update-frequency',
parser.add_argument('--target-update-interval',
type=int, default=10 ** 4)
parser.add_argument('--eval-frequency', type=int, default=10 ** 5)
parser.add_argument('--update-frequency', type=int, default=4)
parser.add_argument('--eval-interval', type=int, default=10 ** 5)
parser.add_argument('--update-interval', type=int, default=4)
parser.add_argument('--activation', type=str, default='relu')
parser.add_argument('--eval-n-runs', type=int, default=10)
parser.add_argument('--no-clip-delta',
Expand Down Expand Up @@ -124,9 +124,9 @@ def main():
Agent = parse_agent(args.agent)
agent = Agent(q_func, opt, rbuf, gpu=args.gpu, gamma=0.99,
explorer=explorer, replay_start_size=args.replay_start_size,
target_update_frequency=args.target_update_frequency,
target_update_interval=args.target_update_interval,
clip_delta=args.clip_delta,
update_frequency=args.update_frequency,
update_interval=args.update_interval,
batch_accumulator='sum', phi=dqn_phi)

if args.load:
Expand All @@ -145,7 +145,7 @@ def main():
5e-2, lambda: np.random.randint(n_actions))
experiments.train_agent_with_evaluation(
agent=agent, env=env, steps=args.steps,
eval_n_runs=args.eval_n_runs, eval_frequency=args.eval_frequency,
eval_n_runs=args.eval_n_runs, eval_interval=args.eval_interval,
outdir=args.outdir, eval_explorer=eval_explorer,
eval_env=eval_env)

Expand Down
4 changes: 2 additions & 2 deletions examples/ale/train_nsq_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def main():
type=int, default=4 * 10 ** 6)
parser.add_argument('--outdir', type=str, default='nsq_output')
parser.add_argument('--profile', action='store_true')
parser.add_argument('--eval-frequency', type=int, default=10 ** 6)
parser.add_argument('--eval-interval', type=int, default=10 ** 6)
parser.add_argument('--eval-n-runs', type=int, default=10)
parser.add_argument('--demo', action='store_true', default=False)
parser.add_argument('--load', type=str, default=None)
Expand Down Expand Up @@ -113,7 +113,7 @@ def make_agent(process_idx):
profile=args.profile,
steps=args.steps,
eval_n_runs=args.eval_n_runs,
eval_frequency=args.eval_frequency,
eval_interval=args.eval_interval,
eval_explorer=explorer)

if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 3831e3a

Please sign in to comment.