Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use logger instead of print in experiments #60

Merged
merged 3 commits into from
Mar 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 22 additions & 14 deletions chainerrl/experiments/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from future import standard_library
standard_library.install_aliases()

import logging
import multiprocessing as mp
import os
import statistics
Expand All @@ -15,7 +16,8 @@


def eval_performance(env, agent, n_runs, max_episode_len=None,
explorer=None):
explorer=None, logger=None):
logger = logger or logging.getLogger(__name__)
scores = []
for i in range(n_runs):
obs = env.reset()
Expand All @@ -36,7 +38,7 @@ def greedy_action_func():
# As mixing float and numpy float causes errors in statistics
# functions, here every score is cast to float.
scores.append(float(test_r))
print('test episode:', i, 'R:', test_r)
logger.info('test episode: %s R: %s', i, test_r)
mean = statistics.mean(scores)
median = statistics.median(scores)
if n_runs >= 2:
Expand All @@ -51,24 +53,24 @@ def record_stats(outdir, values):
print('\t'.join(str(x) for x in values), file=f)


def save_agent(agent, t, outdir, suffix=''):
def save_agent(agent, t, outdir, logger, suffix=''):
dirname = os.path.join(outdir, '{}{}'.format(t, suffix))
agent.save(dirname)
print('Saved the agent to {}'.format(dirname))
logger.info('Saved the agent to %s', dirname)


def update_best_model(agent, outdir, t, old_max_score, new_max_score):
def update_best_model(agent, outdir, t, old_max_score, new_max_score, logger):
# Save the best model so far
print('The best score is updated {} -> {}'.format(
old_max_score, new_max_score))
save_agent(agent, t, outdir)
logger.info('The best score is updated %s -> %s',
old_max_score, new_max_score)
save_agent(agent, t, outdir, logger)


class Evaluator(object):

def __init__(self, agent, env, n_runs, eval_frequency,
outdir, max_episode_len=None, explorer=None,
step_offset=0):
step_offset=0, logger=None):
self.agent = agent
self.env = env
self.max_score = np.finfo(np.float32).min
Expand All @@ -81,6 +83,7 @@ def __init__(self, agent, env, n_runs, eval_frequency,
self.step_offset = step_offset
self.prev_eval_t = (self.step_offset -
self.step_offset % self.eval_frequency)
self.logger = logger or logging.getLogger(__name__)

# Write a header line first
with open(os.path.join(self.outdir, 'scores.txt'), 'w') as f:
Expand All @@ -92,13 +95,15 @@ def __init__(self, agent, env, n_runs, eval_frequency,
def evaluate_and_update_max_score(self, t):
mean, median, stdev = eval_performance(
self.env, self.agent, self.n_runs,
max_episode_len=self.max_episode_len, explorer=self.explorer)
max_episode_len=self.max_episode_len, explorer=self.explorer,
logger=self.logger)
elapsed = time.time() - self.start_time
custom_values = tuple(tup[1] for tup in self.agent.get_statistics())
values = (t, elapsed, mean, median, stdev) + custom_values
record_stats(self.outdir, values)
if mean > self.max_score:
update_best_model(self.agent, self.outdir, t, self.max_score, mean)
update_best_model(self.agent, self.outdir, t, self.max_score, mean,
logger=self.logger)
self.max_score = mean
return mean

Expand All @@ -114,7 +119,7 @@ class AsyncEvaluator(object):

def __init__(self, n_runs, eval_frequency,
outdir, max_episode_len=None, explorer=None,
step_offset=0):
step_offset=0, logger=None):

self.start_time = time.time()
self.n_runs = n_runs
Expand All @@ -123,6 +128,7 @@ def __init__(self, n_runs, eval_frequency,
self.max_episode_len = max_episode_len
self.explorer = explorer
self.step_offset = step_offset
self.logger = logger or logging.getLogger(__name__)

# Values below are shared among processes
self.prev_eval_t = mp.Value(
Expand All @@ -143,15 +149,17 @@ def max_score(self):
def evaluate_and_update_max_score(self, t, env, agent):
mean, median, stdev = eval_performance(
env, agent, self.n_runs,
max_episode_len=self.max_episode_len, explorer=self.explorer)
max_episode_len=self.max_episode_len, explorer=self.explorer,
logger=self.logger)
elapsed = time.time() - self.start_time
custom_values = tuple(tup[1] for tup in agent.get_statistics())
values = (t, elapsed, mean, median, stdev) + custom_values
record_stats(self.outdir, values)
with self._max_score.get_lock():
if mean > self._max_score.value:
update_best_model(
agent, self.outdir, t, self._max_score.value, mean)
agent, self.outdir, t, self._max_score.value, mean,
logger=self.logger)
self._max_score.value = mean
return mean

Expand Down
30 changes: 19 additions & 11 deletions chainerrl/experiments/train_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from future import standard_library
standard_library.install_aliases()

import logging
import os

from chainerrl.experiments.evaluator import Evaluator
Expand All @@ -13,10 +14,11 @@
from chainerrl.misc.makedirs import makedirs


def save_agent_replay_buffer(agent, t, outdir, suffix=''):
def save_agent_replay_buffer(agent, t, outdir, suffix='', logger=None):
logger = logger or logging.getLogger(__name__)
filename = os.path.join(outdir, '{}{}.replay.pkl'.format(t, suffix))
agent.replay_buffer.save(filename)
print('Saved the current replay buffer to {}'.format(filename))
logger.info('Saved the current replay buffer to %s', filename)


def ask_and_save_agent_replay_buffer(agent, t, outdir, suffix=''):
Expand All @@ -26,7 +28,10 @@ def ask_and_save_agent_replay_buffer(agent, t, outdir, suffix=''):


def train_agent(agent, env, steps, outdir, max_episode_len=None,
step_offset=0, evaluator=None, successful_score=None):
step_offset=0, evaluator=None, successful_score=None,
logger=None):

logger = logger or logging.getLogger(__name__)

episode_r = 0
episode_idx = 0
Expand All @@ -53,9 +58,9 @@ def train_agent(agent, env, steps, outdir, max_episode_len=None,

if done or episode_len == max_episode_len or t == steps:
agent.stop_episode_and_train(obs, r, done=done)
print('outdir:{} step:{} episode:{} R:{}'.format(
outdir, t, episode_idx, episode_r))
print('statistics:{}'.format(agent.get_statistics()))
logger.info('outdir:%s step:%s episode:%s R:%s',
outdir, t, episode_idx, episode_r)
logger.info('statistics:%s', agent.get_statistics())
if evaluator is not None:
evaluator.evaluate_if_necessary(t)
if (successful_score is not None and
Expand All @@ -73,18 +78,18 @@ def train_agent(agent, env, steps, outdir, max_episode_len=None,

except Exception:
# Save the current model before being killed
save_agent(agent, t, outdir, suffix='_except')
save_agent(agent, t, outdir, logger, suffix='_except')
raise

# Save the final model
save_agent(agent, t, outdir, suffix='_finish')
save_agent(agent, t, outdir, logger, suffix='_finish')


def train_agent_with_evaluation(
agent, env, steps, eval_n_runs, eval_frequency,
outdir, max_episode_len=None, step_offset=0, eval_explorer=None,
eval_max_episode_len=None, eval_env=None, successful_score=None,
render=False):
render=False, logger=None):
"""Run a DQN-like agent.

Args:
Expand All @@ -102,6 +107,8 @@ def train_agent_with_evaluation(
or equal to this value if not None
"""

logger = logger or logging.getLogger(__name__)

makedirs(outdir, exist_ok=True)

if eval_env is None:
Expand All @@ -116,9 +123,10 @@ def train_agent_with_evaluation(
max_episode_len=eval_max_episode_len,
explorer=eval_explorer,
env=eval_env,
step_offset=step_offset)
step_offset=step_offset,
logger=logger)

train_agent(
agent, env, steps, outdir, max_episode_len=max_episode_len,
step_offset=step_offset, evaluator=evaluator,
successful_score=successful_score)
successful_score=successful_score, logger=logger)
31 changes: 19 additions & 12 deletions chainerrl/experiments/train_agent_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from future import standard_library
standard_library.install_aliases()

import logging
import multiprocessing as mp
import os
import sys

from chainerrl.experiments.evaluator import AsyncEvaluator
from chainerrl.misc import async
Expand All @@ -17,7 +17,9 @@

def train_loop(process_idx, env, agent, steps, outdir, counter, training_done,
max_episode_len=None, evaluator=None, eval_env=None,
successful_score=None):
successful_score=None, logger=None):

logger = logger or logging.getLogger(__name__)

if eval_env is None:
eval_env = env
Expand All @@ -43,10 +45,11 @@ def train_loop(process_idx, env, agent, steps, outdir, counter, training_done,
if done or episode_len == max_episode_len:
agent.stop_episode_and_train(obs, r, done)
if process_idx == 0:
print('outdir:{} global_step:{} local_step:{} lr:{} R:{}'.format( # NOQA
logger.info(
'outdir:%s global_step:%s local_step:%s lr:%s R:%s',
outdir, global_t, local_t, agent.optimizer.lr,
episode_r))
print('statistics:{}'.format(agent.get_statistics()))
episode_r)
logger.info('statistics:%s', agent.get_statistics())
if evaluator is not None:
eval_score = evaluator.evaluate_if_necessary(
global_t, env=eval_env, agent=agent)
Expand Down Expand Up @@ -86,21 +89,20 @@ def train_loop(process_idx, env, agent, steps, outdir, counter, training_done,
# Save the current model before being killed
dirname = os.path.join(outdir, '{}_except'.format(global_t))
agent.save(dirname)
print('Saved the current model to {}'.format(dirname),
file=sys.stderr)
logger.warning('Saved the current model to %s', dirname)
raise

if global_t == steps + 1:
# Save the final model
dirname = os.path.join(outdir, '{}_finish'.format(steps))
agent.save(dirname)
print('Saved the final agent to {}'.format(dirname))
logger.info('Saved the final agent to %s', dirname)

if successful:
# Save the successful model
dirname = os.path.join(outdir, 'successful')
agent.save(dirname)
print('Saved the successful agent to {}'.format(dirname))
logger.info('Saved the successful agent to %s', dirname)


def extract_shared_objects_from_agent(agent):
Expand All @@ -120,7 +122,8 @@ def train_agent_async(outdir, processes, make_env,
eval_n_runs=10, gamma=0.99, max_episode_len=None,
step_offset=0, successful_score=None,
eval_explorer=None,
agent=None, make_agent=None):
agent=None, make_agent=None,
logger=None):
"""Train agent asynchronously.

One of agent and make_agent must be specified.
Expand All @@ -135,6 +138,8 @@ def train_agent_async(outdir, processes, make_env,
steps (int): Number of global time steps for training
"""

logger = logger or logging.getLogger(__name__)

# Prevent numpy from using multiple threads
os.environ['OMP_NUM_THREADS'] = '1'

Expand All @@ -153,7 +158,8 @@ def train_agent_async(outdir, processes, make_env,
eval_frequency=eval_frequency, outdir=outdir,
max_episode_len=max_episode_len,
step_offset=step_offset,
explorer=eval_explorer)
explorer=eval_explorer,
logger=logger)

def run_func(process_idx):
random_seed.set_random_seed(process_idx)
Expand All @@ -179,7 +185,8 @@ def f():
evaluator=evaluator,
successful_score=successful_score,
training_done=training_done,
eval_env=eval_env)
eval_env=eval_env,
logger=logger)

if profile:
import cProfile
Expand Down
2 changes: 1 addition & 1 deletion examples/gym/train_ddpg_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import chainer
from chainer import optimizers
import gym
gym.undo_logger_setup()
from gym import spaces
import gym.wrappers
import numpy as np
Expand Down Expand Up @@ -148,7 +149,6 @@ def random_action():
soft_update_tau=args.soft_update_tau,
n_times_update=args.n_update_times,
phi=phi, gpu=args.gpu, minibatch_size=args.minibatch_size)
agent.logger.setLevel(logging.DEBUG)

if len(args.load) > 0:
agent.load(args.load)
Expand Down
1 change: 0 additions & 1 deletion examples/gym/train_dqn_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ def phi(obs):
target_update_method=args.target_update_method,
soft_update_tau=args.soft_update_tau,
episodic_update=args.episodic_replay, episodic_update_len=16)
agent.logger.setLevel(logging.DEBUG)

if args.load:
agent.load(args.load)
Expand Down
3 changes: 2 additions & 1 deletion examples/gym/train_pcl_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import chainer
import gym
gym.undo_logger_setup()
import gym.wrappers
import numpy as np

Expand Down Expand Up @@ -72,7 +73,7 @@ def main():
dest='backprop_future_values')
args = parser.parse_args()

logging.getLogger().setLevel(args.logger_level)
logging.basicConfig(level=args.logger_level)

if args.seed is not None:
misc.set_random_seed(args.seed)
Expand Down
6 changes: 6 additions & 0 deletions examples/quickstart/quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,12 @@
}
],
"source": [
"# Set up the logger to print info messages for understandability.\n",
"import logging\n",
"import sys\n",
"gym.undo_logger_setup() # Turn off gym's default logger settings\n",
"logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='')\n",
"\n",
"chainerrl.experiments.train_agent_with_evaluation(\n",
" agent, env,\n",
" steps=2000, # Train the agent for 2000 steps\n",
Expand Down