From 6c300ff361a77db0a0b76789a5b73e146ea5071b Mon Sep 17 00:00:00 2001 From: muupan Date: Fri, 31 Aug 2018 15:53:10 +0900 Subject: [PATCH 1/8] Remove --use-sdl since it is no longer used --- examples/ale/train_dqn_ale.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index 1bbb2e390..3dae192c0 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -74,7 +74,6 @@ def main(): parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--demo', action='store_true', default=False) parser.add_argument('--load', type=str, default=None) - parser.add_argument('--use-sdl', action='store_true', default=False) parser.add_argument('--final-exploration-frames', type=int, default=10 ** 6) parser.add_argument('--final-epsilon', type=float, default=0.1) From 54159cbb1f4fe6ded5755bdfb72bbca7edddacba Mon Sep 17 00:00:00 2001 From: muupan Date: Fri, 31 Aug 2018 15:58:50 +0900 Subject: [PATCH 2/8] Remove --activation since relu is used almost always --- examples/ale/train_dqn_ale.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index 3dae192c0..c3999a66e 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -28,27 +28,17 @@ import atari_wrappers -def parse_activation(activation_str): - if activation_str == 'relu': - return F.relu - elif activation_str == 'elu': - return F.elu - elif activation_str == 'lrelu': - return F.leaky_relu - else: - raise RuntimeError( - 'Not supported activation: {}'.format(activation_str)) -def parse_arch(arch, n_actions, activation): +def parse_arch(arch, n_actions): if arch == 'nature': return links.Sequence( - links.NatureDQNHead(activation=activation), + links.NatureDQNHead(), L.Linear(512, n_actions), DiscreteActionValue) elif arch == 'nips': return links.Sequence( - links.NIPSDQNHead(activation=activation), + links.NIPSDQNHead(), L.Linear(256, n_actions), DiscreteActionValue) elif arch == 'dueling': @@ -90,7 +80,6 @@ def main(): type=int, default=10 ** 4) parser.add_argument('--eval-interval', type=int, default=10 ** 5) parser.add_argument('--update-interval', type=int, default=4) - parser.add_argument('--activation', type=str, default='relu') parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--no-clip-delta', dest='clip_delta', action='store_false') @@ -139,8 +128,7 @@ def make_env(test): eval_env = make_env(test=True) n_actions = env.action_space.n - activation = parse_activation(args.activation) - q_func = parse_arch(args.arch, n_actions, activation) + q_func = parse_arch(args.arch, n_actions) if args.noisy_net_sigma is not None: links.to_factorized_noisy(q_func) From c2acd6e46a57740eff2546ffb399314765a3b2bd Mon Sep 17 00:00:00 2001 From: muupan Date: Fri, 31 Aug 2018 16:00:53 +0900 Subject: [PATCH 3/8] Add doubledqn arch --- examples/ale/train_dqn_ale.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index c3999a66e..23041d51d 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -10,6 +10,7 @@ import gym gym.undo_logger_setup() # NOQA +import chainer from chainer import functions as F from chainer import links as L from chainer import optimizers @@ -28,6 +29,22 @@ import atari_wrappers +class SingleSharedBias(chainer.Chain): + """Single shared bias used in the Double DQN paper. + + You can add this link after a Linear layer with nobias=True implement a + Linear layer with a single shared bias parameter. + + See http://arxiv.org/abs/1509.06461. + """ + + def __init__(self): + super().__init__() + with self.init_scope(): + self.bias = chainer.Parameter(0, shape=1) + + def __call__(self, x): + return x + F.broadcast_to(self.bias, x.shape) def parse_arch(arch, n_actions): @@ -36,6 +53,12 @@ def parse_arch(arch, n_actions): links.NatureDQNHead(), L.Linear(512, n_actions), DiscreteActionValue) + elif arch == 'doubledqn': + return links.Sequence( + links.NatureDQNHead(), + L.Linear(512, n_actions, nobias=True), + SingleSharedBias(), + DiscreteActionValue) elif arch == 'nips': return links.Sequence( links.NIPSDQNHead(), @@ -70,7 +93,7 @@ def main(): parser.add_argument('--eval-epsilon', type=float, default=0.05) parser.add_argument('--noisy-net-sigma', type=float, default=None) parser.add_argument('--arch', type=str, default='nature', - choices=['nature', 'nips', 'dueling']) + choices=['nature', 'nips', 'dueling', 'doubledqn']) parser.add_argument('--steps', type=int, default=10 ** 7) parser.add_argument('--max-episode-len', type=int, default=5 * 60 * 60 // 4, # 5 minutes with 60/4 fps From e9a0e8bc6f31c4be0dac68a81f8507c388c23ff2 Mon Sep 17 00:00:00 2001 From: muupan Date: Fri, 31 Aug 2018 16:10:38 +0900 Subject: [PATCH 4/8] Add --prioritized to use prioritized experience replay --- examples/ale/train_dqn_ale.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index 23041d51d..c2dae0949 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -116,6 +116,8 @@ def main(): parser.add_argument('--monitor', action='store_true', default=False, help='Monitor env. Videos and additional information' ' are saved as output files.') + parser.add_argument('--prioritized', action='store_true', default=False, + help='Use prioritized experience replay.') args = parser.parse_args() import logging @@ -169,7 +171,14 @@ def make_env(test): opt.setup(q_func) - rbuf = replay_buffer.ReplayBuffer(10 ** 6) + # Select a replay buffer to use + if args.prioritized: + # Anneal beta from beta0 to 1 throughout training + betasteps = args.steps / args.update_interval + rbuf = replay_buffer.PrioritizedReplayBuffer( + 10 ** 6, alpha=0.6, beta0=0.4, betasteps=betasteps) + else: + rbuf = replay_buffer.ReplayBuffer(10 ** 6) explorer = explorers.LinearDecayEpsilonGreedy( 1.0, args.final_epsilon, From 864ef7175199f186355d25958077fbe126741710 Mon Sep 17 00:00:00 2001 From: muupan Date: Fri, 31 Aug 2018 16:13:10 +0900 Subject: [PATCH 5/8] Add --lr --- examples/ale/train_dqn_ale.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index c2dae0949..570469d46 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -116,6 +116,8 @@ def main(): parser.add_argument('--monitor', action='store_true', default=False, help='Monitor env. Videos and additional information' ' are saved as output files.') + parser.add_argument('--lr', type=float, default=2.5e-4, + help='Learning rate') parser.add_argument('--prioritized', action='store_true', default=False, help='Use prioritized experience replay.') args = parser.parse_args() @@ -167,7 +169,7 @@ def make_env(test): # Use the same hyper parameters as the Nature paper's opt = optimizers.RMSpropGraves( - lr=2.5e-4, alpha=0.95, momentum=0.0, eps=1e-2) + lr=args.lr, alpha=0.95, momentum=0.0, eps=1e-2) opt.setup(q_func) From 79a873f6dd3021ad603b3a7fa3bb0cf10cec63c5 Mon Sep 17 00:00:00 2001 From: muupan Date: Fri, 31 Aug 2018 16:16:39 +0900 Subject: [PATCH 6/8] Use settings for tuned DoubleDQN as default --- examples/ale/train_dqn_ale.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index 570469d46..8ff1319c6 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -89,14 +89,14 @@ def main(): parser.add_argument('--load', type=str, default=None) parser.add_argument('--final-exploration-frames', type=int, default=10 ** 6) - parser.add_argument('--final-epsilon', type=float, default=0.1) - parser.add_argument('--eval-epsilon', type=float, default=0.05) + parser.add_argument('--final-epsilon', type=float, default=0.01) + parser.add_argument('--eval-epsilon', type=float, default=0.001) parser.add_argument('--noisy-net-sigma', type=float, default=None) - parser.add_argument('--arch', type=str, default='nature', + parser.add_argument('--arch', type=str, default='doubledqn', choices=['nature', 'nips', 'dueling', 'doubledqn']) - parser.add_argument('--steps', type=int, default=10 ** 7) + parser.add_argument('--steps', type=int, default=5 * 10 ** 7) parser.add_argument('--max-episode-len', type=int, - default=5 * 60 * 60 // 4, # 5 minutes with 60/4 fps + default=30 * 60 * 60 // 4, # 30 minutes with 60/4 fps help='Maximum number of steps for each episode.') parser.add_argument('--replay-start-size', type=int, default=5 * 10 ** 4) parser.add_argument('--target-update-interval', @@ -107,7 +107,7 @@ def main(): parser.add_argument('--no-clip-delta', dest='clip_delta', action='store_false') parser.set_defaults(clip_delta=True) - parser.add_argument('--agent', type=str, default='DQN', + parser.add_argument('--agent', type=str, default='DoubleDQN', choices=['DQN', 'DoubleDQN', 'PAL']) parser.add_argument('--logging-level', type=int, default=20, help='Logging level. 10:DEBUG, 20:INFO etc.') From 9bf522ce17887b242a300e1df78879bd1bf13285 Mon Sep 17 00:00:00 2001 From: muupan Date: Fri, 31 Aug 2018 16:23:21 +0900 Subject: [PATCH 7/8] Use --target-update-interval 30000 following tuned DoubleDQN --- examples/ale/train_dqn_ale.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index 8ff1319c6..9c05ab267 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -100,7 +100,7 @@ def main(): help='Maximum number of steps for each episode.') parser.add_argument('--replay-start-size', type=int, default=5 * 10 ** 4) parser.add_argument('--target-update-interval', - type=int, default=10 ** 4) + type=int, default=3 * 10 ** 4) parser.add_argument('--eval-interval', type=int, default=10 ** 5) parser.add_argument('--update-interval', type=int, default=4) parser.add_argument('--eval-n-runs', type=int, default=10) From 5d30efcb5354d3d8d5b98e0ec3d7531d14accb0f Mon Sep 17 00:00:00 2001 From: muupan Date: Fri, 31 Aug 2018 16:42:30 +0900 Subject: [PATCH 8/8] Fix a grammatical error --- examples/ale/train_dqn_ale.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index 9c05ab267..a264542fb 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -32,7 +32,7 @@ class SingleSharedBias(chainer.Chain): """Single shared bias used in the Double DQN paper. - You can add this link after a Linear layer with nobias=True implement a + You can add this link after a Linear layer with nobias=True to implement a Linear layer with a single shared bias parameter. See http://arxiv.org/abs/1509.06461.