Skip to content

Commit

Permalink
Improve default and MuJoCo configs.
Browse files Browse the repository at this point in the history
  • Loading branch information
danijar committed Nov 13, 2017
1 parent e897ab1 commit bbd7199
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 18 deletions.
39 changes: 22 additions & 17 deletions agents/scripts/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,24 @@ def default():
"""Default configuration for PPO."""
# General
algorithm = ppo.PPOAlgorithm
num_agents = 10
eval_episodes = 25
num_agents = 30
eval_episodes = 30
use_gpu = False
# Network
network = networks.feed_forward_gaussian
weight_summaries = dict(
all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
policy_layers = 200, 100
value_layers = 200, 100
init_mean_factor = 0.05
init_mean_factor = 0.1
init_logstd = -1
# Optimization
update_every = 30
update_epochs = 25
optimizer = 'AdamOptimizer'
learning_rate = 1e-4
# Losses
discount = 0.985
discount = 0.995
kl_target = 1e-2
kl_cutoff_factor = 2
kl_cutoff_coef = 1000
Expand All @@ -59,35 +59,38 @@ def pendulum():
# Environment
env = 'Pendulum-v0'
max_length = 200
steps = 1e6 # 1M
steps = 2e6 # 2M
return locals()


def cheetah():
"""Configuration for MuJoCo's half cheetah task."""
def reacher():
"""Configuration for MuJoCo's reacher task."""
locals().update(default())
# Environment
env = 'HalfCheetah-v1'
env = 'Reacher-v1'
max_length = 1000
steps = 1e7 # 10M
steps = 5e6 # 5M
discount = 0.985
update_every = 60
return locals()


def walker():
"""Configuration for MuJoCo's walker task."""
def cheetah():
"""Configuration for MuJoCo's half cheetah task."""
locals().update(default())
# Environment
env = 'Walker2d-v1'
env = 'HalfCheetah-v1'
max_length = 1000
steps = 1e7 # 10M
discount = 0.99
return locals()


def reacher():
"""Configuration for MuJoCo's reacher task."""
def walker():
"""Configuration for MuJoCo's walker task."""
locals().update(default())
# Environment
env = 'Reacher-v1'
env = 'Walker2d-v1'
max_length = 1000
steps = 1e7 # 10M
return locals()
Expand All @@ -99,7 +102,8 @@ def hopper():
# Environment
env = 'Hopper-v1'
max_length = 1000
steps = 2e7 # 20M
steps = 1e7 # 10M
update_every = 60
return locals()


Expand All @@ -109,7 +113,7 @@ def ant():
# Environment
env = 'Ant-v1'
max_length = 1000
steps = 5e7 # 50M
steps = 2e7 # 20M
return locals()


Expand All @@ -120,4 +124,5 @@ def humanoid():
env = 'Humanoid-v1'
max_length = 1000
steps = 5e7 # 50M
update_every = 60
return locals()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

setuptools.setup(
name='agents',
version='1.1.0',
version='1.2.0',
description=(
'Efficient TensorFlow implementation of reinforcement learning '
'algorithms.'),
Expand Down

0 comments on commit bbd7199

Please sign in to comment.