diff --git a/examples/ale/train_dqn_ale.py b/examples/ale/train_dqn_ale.py index a264542fb..56d30efcc 100644 --- a/examples/ale/train_dqn_ale.py +++ b/examples/ale/train_dqn_ale.py @@ -78,31 +78,45 @@ def parse_agent(agent): def main(): parser = argparse.ArgumentParser() - parser.add_argument('--env', type=str, default='BreakoutNoFrameskip-v4') + parser.add_argument('--env', type=str, default='BreakoutNoFrameskip-v4', + help='OpenAI Atari domain to perform algorithm on.') parser.add_argument('--outdir', type=str, default='results', help='Directory path to save output files.' ' If it does not exist, it will be created.') parser.add_argument('--seed', type=int, default=0, help='Random seed [0, 2 ** 31)') - parser.add_argument('--gpu', type=int, default=0) + parser.add_argument('--gpu', type=int, default=0, + help='GPU to use, set to -1 if no GPU.') parser.add_argument('--demo', action='store_true', default=False) parser.add_argument('--load', type=str, default=None) parser.add_argument('--final-exploration-frames', - type=int, default=10 ** 6) - parser.add_argument('--final-epsilon', type=float, default=0.01) - parser.add_argument('--eval-epsilon', type=float, default=0.001) + type=int, default=10 ** 6, + help='Timesteps after which we stop ' + + 'annealing exploration rate') + parser.add_argument('--final-epsilon', type=float, default=0.01, + help='Final value of epsilon during training.') + parser.add_argument('--eval-epsilon', type=float, default=0.001, + help='Exploration epsilon used during eval episodes.') parser.add_argument('--noisy-net-sigma', type=float, default=None) parser.add_argument('--arch', type=str, default='doubledqn', - choices=['nature', 'nips', 'dueling', 'doubledqn']) - parser.add_argument('--steps', type=int, default=5 * 10 ** 7) + choices=['nature', 'nips', 'dueling', 'doubledqn'], + help='Network architecture to use.') + parser.add_argument('--steps', type=int, default=5 * 10 ** 7, + help='Total number of timesteps to train the agent.') parser.add_argument('--max-episode-len', type=int, default=30 * 60 * 60 // 4, # 30 minutes with 60/4 fps - help='Maximum number of steps for each episode.') - parser.add_argument('--replay-start-size', type=int, default=5 * 10 ** 4) + help='Maximum number of timesteps for each episode.') + parser.add_argument('--replay-start-size', type=int, default=5 * 10 ** 4, + help='Minimum replay buffer size before ' + + 'performing gradient updates.') parser.add_argument('--target-update-interval', - type=int, default=3 * 10 ** 4) - parser.add_argument('--eval-interval', type=int, default=10 ** 5) - parser.add_argument('--update-interval', type=int, default=4) + type=int, default=3 * 10 ** 4, + help='Frequency (in timesteps) at which ' + + 'the target network is updated.') + parser.add_argument('--eval-interval', type=int, default=10 ** 5, + help='Frequency (in timesteps) of evaluation phase.') + parser.add_argument('--update-interval', type=int, default=4, + help='Frequency (in timesteps) of network updates.') parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--no-clip-delta', dest='clip_delta', action='store_false') @@ -117,7 +131,7 @@ def main(): help='Monitor env. Videos and additional information' ' are saved as output files.') parser.add_argument('--lr', type=float, default=2.5e-4, - help='Learning rate') + help='Learning rate.') parser.add_argument('--prioritized', action='store_true', default=False, help='Use prioritized experience replay.') args = parser.parse_args()