From 3e4bf66ac874c5006a1d695f0512c9a0b828f679 Mon Sep 17 00:00:00 2001 From: muupan Date: Tue, 28 Mar 2017 19:00:48 +0900 Subject: [PATCH 1/3] Use logger in experiments --- chainerrl/experiments/evaluator.py | 36 +++++++++++++--------- chainerrl/experiments/train_agent.py | 30 +++++++++++------- chainerrl/experiments/train_agent_async.py | 31 +++++++++++-------- 3 files changed, 60 insertions(+), 37 deletions(-) diff --git a/chainerrl/experiments/evaluator.py b/chainerrl/experiments/evaluator.py index a063c92ba..65a85f84e 100644 --- a/chainerrl/experiments/evaluator.py +++ b/chainerrl/experiments/evaluator.py @@ -6,6 +6,7 @@ from future import standard_library standard_library.install_aliases() +import logging import multiprocessing as mp import os import statistics @@ -15,7 +16,8 @@ def eval_performance(env, agent, n_runs, max_episode_len=None, - explorer=None): + explorer=None, logger=None): + logger = logger or logging.getLogger(__name__) scores = [] for i in range(n_runs): obs = env.reset() @@ -36,7 +38,7 @@ def greedy_action_func(): # As mixing float and numpy float causes errors in statistics # functions, here every score is cast to float. scores.append(float(test_r)) - print('test episode:', i, 'R:', test_r) + logger.info('test episode: %s R: %s', i, test_r) mean = statistics.mean(scores) median = statistics.median(scores) if n_runs >= 2: @@ -51,24 +53,24 @@ def record_stats(outdir, values): print('\t'.join(str(x) for x in values), file=f) -def save_agent(agent, t, outdir, suffix=''): +def save_agent(agent, t, outdir, logger, suffix=''): dirname = os.path.join(outdir, '{}{}'.format(t, suffix)) agent.save(dirname) - print('Saved the agent to {}'.format(dirname)) + logger.info('Saved the agent to %s', dirname) -def update_best_model(agent, outdir, t, old_max_score, new_max_score): +def update_best_model(agent, outdir, t, old_max_score, new_max_score, logger): # Save the best model so far - print('The best score is updated {} -> {}'.format( - old_max_score, new_max_score)) - save_agent(agent, t, outdir) + logger.info('The best score is updated %s -> %s', + old_max_score, new_max_score) + save_agent(agent, t, outdir, logger) class Evaluator(object): def __init__(self, agent, env, n_runs, eval_frequency, outdir, max_episode_len=None, explorer=None, - step_offset=0): + step_offset=0, logger=None): self.agent = agent self.env = env self.max_score = np.finfo(np.float32).min @@ -81,6 +83,7 @@ def __init__(self, agent, env, n_runs, eval_frequency, self.step_offset = step_offset self.prev_eval_t = (self.step_offset - self.step_offset % self.eval_frequency) + self.logger = logger or logging.getLogger(__name__) # Write a header line first with open(os.path.join(self.outdir, 'scores.txt'), 'w') as f: @@ -92,13 +95,15 @@ def __init__(self, agent, env, n_runs, eval_frequency, def evaluate_and_update_max_score(self, t): mean, median, stdev = eval_performance( self.env, self.agent, self.n_runs, - max_episode_len=self.max_episode_len, explorer=self.explorer) + max_episode_len=self.max_episode_len, explorer=self.explorer, + logger=self.logger) elapsed = time.time() - self.start_time custom_values = tuple(tup[1] for tup in self.agent.get_statistics()) values = (t, elapsed, mean, median, stdev) + custom_values record_stats(self.outdir, values) if mean > self.max_score: - update_best_model(self.agent, self.outdir, t, self.max_score, mean) + update_best_model(self.agent, self.outdir, t, self.max_score, mean, + logger=self.logger) self.max_score = mean return mean @@ -114,7 +119,7 @@ class AsyncEvaluator(object): def __init__(self, n_runs, eval_frequency, outdir, max_episode_len=None, explorer=None, - step_offset=0): + step_offset=0, logger=None): self.start_time = time.time() self.n_runs = n_runs @@ -123,6 +128,7 @@ def __init__(self, n_runs, eval_frequency, self.max_episode_len = max_episode_len self.explorer = explorer self.step_offset = step_offset + self.logger = logger or logging.getLogger(__name__) # Values below are shared among processes self.prev_eval_t = mp.Value( @@ -143,7 +149,8 @@ def max_score(self): def evaluate_and_update_max_score(self, t, env, agent): mean, median, stdev = eval_performance( env, agent, self.n_runs, - max_episode_len=self.max_episode_len, explorer=self.explorer) + max_episode_len=self.max_episode_len, explorer=self.explorer, + logger=self.logger) elapsed = time.time() - self.start_time custom_values = tuple(tup[1] for tup in agent.get_statistics()) values = (t, elapsed, mean, median, stdev) + custom_values @@ -151,7 +158,8 @@ def evaluate_and_update_max_score(self, t, env, agent): with self._max_score.get_lock(): if mean > self._max_score.value: update_best_model( - agent, self.outdir, t, self._max_score.value, mean) + agent, self.outdir, t, self._max_score.value, mean, + logger=self.logger) self._max_score.value = mean return mean diff --git a/chainerrl/experiments/train_agent.py b/chainerrl/experiments/train_agent.py index 0dc2bf16d..15461ae99 100644 --- a/chainerrl/experiments/train_agent.py +++ b/chainerrl/experiments/train_agent.py @@ -5,6 +5,7 @@ from future import standard_library standard_library.install_aliases() +import logging import os from chainerrl.experiments.evaluator import Evaluator @@ -13,10 +14,11 @@ from chainerrl.misc.makedirs import makedirs -def save_agent_replay_buffer(agent, t, outdir, suffix=''): +def save_agent_replay_buffer(agent, t, outdir, suffix='', logger=None): + logger = logger or logging.getLogger(__name__) filename = os.path.join(outdir, '{}{}.replay.pkl'.format(t, suffix)) agent.replay_buffer.save(filename) - print('Saved the current replay buffer to {}'.format(filename)) + logger.info('Saved the current replay buffer to %s', filename) def ask_and_save_agent_replay_buffer(agent, t, outdir, suffix=''): @@ -26,7 +28,10 @@ def ask_and_save_agent_replay_buffer(agent, t, outdir, suffix=''): def train_agent(agent, env, steps, outdir, max_episode_len=None, - step_offset=0, evaluator=None, successful_score=None): + step_offset=0, evaluator=None, successful_score=None, + logger=None): + + logger = logger or logging.getLogger(__name__) episode_r = 0 episode_idx = 0 @@ -53,9 +58,9 @@ def train_agent(agent, env, steps, outdir, max_episode_len=None, if done or episode_len == max_episode_len or t == steps: agent.stop_episode_and_train(obs, r, done=done) - print('outdir:{} step:{} episode:{} R:{}'.format( - outdir, t, episode_idx, episode_r)) - print('statistics:{}'.format(agent.get_statistics())) + logger.info('outdir:%s step:%s episode:%s R:%s', + outdir, t, episode_idx, episode_r) + logger.info('statistics:%s', agent.get_statistics()) if evaluator is not None: evaluator.evaluate_if_necessary(t) if (successful_score is not None and @@ -73,18 +78,18 @@ def train_agent(agent, env, steps, outdir, max_episode_len=None, except Exception: # Save the current model before being killed - save_agent(agent, t, outdir, suffix='_except') + save_agent(agent, t, outdir, logger, suffix='_except') raise # Save the final model - save_agent(agent, t, outdir, suffix='_finish') + save_agent(agent, t, outdir, logger, suffix='_finish') def train_agent_with_evaluation( agent, env, steps, eval_n_runs, eval_frequency, outdir, max_episode_len=None, step_offset=0, eval_explorer=None, eval_max_episode_len=None, eval_env=None, successful_score=None, - render=False): + render=False, logger=None): """Run a DQN-like agent. Args: @@ -102,6 +107,8 @@ def train_agent_with_evaluation( or equal to this value if not None """ + logger = logger or logging.getLogger(__name__) + makedirs(outdir, exist_ok=True) if eval_env is None: @@ -116,9 +123,10 @@ def train_agent_with_evaluation( max_episode_len=eval_max_episode_len, explorer=eval_explorer, env=eval_env, - step_offset=step_offset) + step_offset=step_offset, + logger=logger) train_agent( agent, env, steps, outdir, max_episode_len=max_episode_len, step_offset=step_offset, evaluator=evaluator, - successful_score=successful_score) + successful_score=successful_score, logger=logger) diff --git a/chainerrl/experiments/train_agent_async.py b/chainerrl/experiments/train_agent_async.py index de631aa87..3c220667f 100644 --- a/chainerrl/experiments/train_agent_async.py +++ b/chainerrl/experiments/train_agent_async.py @@ -6,9 +6,9 @@ from future import standard_library standard_library.install_aliases() +import logging import multiprocessing as mp import os -import sys from chainerrl.experiments.evaluator import AsyncEvaluator from chainerrl.misc import async @@ -17,7 +17,9 @@ def train_loop(process_idx, env, agent, steps, outdir, counter, training_done, max_episode_len=None, evaluator=None, eval_env=None, - successful_score=None): + successful_score=None, logger=None): + + logger = logger or logging.getLogger(__name__) if eval_env is None: eval_env = env @@ -43,10 +45,11 @@ def train_loop(process_idx, env, agent, steps, outdir, counter, training_done, if done or episode_len == max_episode_len: agent.stop_episode_and_train(obs, r, done) if process_idx == 0: - print('outdir:{} global_step:{} local_step:{} lr:{} R:{}'.format( # NOQA + logger.info( + 'outdir:%s global_step:%s local_step:%s lr:%s R:%s', outdir, global_t, local_t, agent.optimizer.lr, - episode_r)) - print('statistics:{}'.format(agent.get_statistics())) + episode_r) + logger.info('statistics:%s', agent.get_statistics()) if evaluator is not None: eval_score = evaluator.evaluate_if_necessary( global_t, env=eval_env, agent=agent) @@ -86,21 +89,20 @@ def train_loop(process_idx, env, agent, steps, outdir, counter, training_done, # Save the current model before being killed dirname = os.path.join(outdir, '{}_except'.format(global_t)) agent.save(dirname) - print('Saved the current model to {}'.format(dirname), - file=sys.stderr) + logger.warning('Saved the current model to %s', dirname) raise if global_t == steps + 1: # Save the final model dirname = os.path.join(outdir, '{}_finish'.format(steps)) agent.save(dirname) - print('Saved the final agent to {}'.format(dirname)) + logger.info('Saved the final agent to %s', dirname) if successful: # Save the successful model dirname = os.path.join(outdir, 'successful') agent.save(dirname) - print('Saved the successful agent to {}'.format(dirname)) + logger.info('Saved the successful agent to %s', dirname) def extract_shared_objects_from_agent(agent): @@ -120,7 +122,8 @@ def train_agent_async(outdir, processes, make_env, eval_n_runs=10, gamma=0.99, max_episode_len=None, step_offset=0, successful_score=None, eval_explorer=None, - agent=None, make_agent=None): + agent=None, make_agent=None, + logger=None): """Train agent asynchronously. One of agent and make_agent must be specified. @@ -135,6 +138,8 @@ def train_agent_async(outdir, processes, make_env, steps (int): Number of global time steps for training """ + logger = logger or logging.getLogger(__name__) + # Prevent numpy from using multiple threads os.environ['OMP_NUM_THREADS'] = '1' @@ -153,7 +158,8 @@ def train_agent_async(outdir, processes, make_env, eval_frequency=eval_frequency, outdir=outdir, max_episode_len=max_episode_len, step_offset=step_offset, - explorer=eval_explorer) + explorer=eval_explorer, + logger=logger) def run_func(process_idx): random_seed.set_random_seed(process_idx) @@ -179,7 +185,8 @@ def f(): evaluator=evaluator, successful_score=successful_score, training_done=training_done, - eval_env=eval_env) + eval_env=eval_env, + logger=logger) if profile: import cProfile From 4affb25d907703c469d0dfb8cb81a1d0ae22d59d Mon Sep 17 00:00:00 2001 From: muupan Date: Tue, 28 Mar 2017 19:26:57 +0900 Subject: [PATCH 2/3] Print info messages into stdout in quickstart --- examples/quickstart/quickstart.ipynb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index a87dc8d79..021101c22 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -462,6 +462,12 @@ } ], "source": [ + "# Set up the logger to print info messages for understandability.\n", + "import logging\n", + "import sys\n", + "gym.undo_logger_setup() # Turn off gym's default logger settings\n", + "logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='')\n", + "\n", "chainerrl.experiments.train_agent_with_evaluation(\n", " agent, env,\n", " steps=2000, # Train the agent for 2000 steps\n", From da41b6bc23c8e654ccb664a477b2face56695d58 Mon Sep 17 00:00:00 2001 From: muupan Date: Tue, 28 Mar 2017 19:28:34 +0900 Subject: [PATCH 3/3] Add undo_logger_setup and clean logger settings --- examples/gym/train_ddpg_gym.py | 2 +- examples/gym/train_dqn_gym.py | 1 - examples/gym/train_pcl_gym.py | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/gym/train_ddpg_gym.py b/examples/gym/train_ddpg_gym.py index d01318063..00309dd67 100644 --- a/examples/gym/train_ddpg_gym.py +++ b/examples/gym/train_ddpg_gym.py @@ -10,6 +10,7 @@ import chainer from chainer import optimizers import gym +gym.undo_logger_setup() from gym import spaces import gym.wrappers import numpy as np @@ -148,7 +149,6 @@ def random_action(): soft_update_tau=args.soft_update_tau, n_times_update=args.n_update_times, phi=phi, gpu=args.gpu, minibatch_size=args.minibatch_size) - agent.logger.setLevel(logging.DEBUG) if len(args.load) > 0: agent.load(args.load) diff --git a/examples/gym/train_dqn_gym.py b/examples/gym/train_dqn_gym.py index 419dd7119..f8e3ec40f 100644 --- a/examples/gym/train_dqn_gym.py +++ b/examples/gym/train_dqn_gym.py @@ -152,7 +152,6 @@ def phi(obs): target_update_method=args.target_update_method, soft_update_tau=args.soft_update_tau, episodic_update=args.episodic_replay, episodic_update_len=16) - agent.logger.setLevel(logging.DEBUG) if args.load: agent.load(args.load) diff --git a/examples/gym/train_pcl_gym.py b/examples/gym/train_pcl_gym.py index be8d94a36..122ca7edc 100644 --- a/examples/gym/train_pcl_gym.py +++ b/examples/gym/train_pcl_gym.py @@ -20,6 +20,7 @@ import chainer import gym +gym.undo_logger_setup() import gym.wrappers import numpy as np @@ -72,7 +73,7 @@ def main(): dest='backprop_future_values') args = parser.parse_args() - logging.getLogger().setLevel(args.logger_level) + logging.basicConfig(level=args.logger_level) if args.seed is not None: misc.set_random_seed(args.seed)