From 4ae2b03dbba21d4628f0ead795b69c99c92df95b Mon Sep 17 00:00:00 2001 From: lugimzzz <63761690+lugimzzz@users.noreply.github.com> Date: Tue, 28 Feb 2023 21:02:44 +0800 Subject: [PATCH] [AutoNLP]optimize log (#5021) * optimize log * fix test * fix * fix --- .../experimental/autonlp/auto_trainer_base.py | 35 ++++++++++++++++--- .../autonlp/text_classification.py | 28 +-------------- paddlenlp/trainer/integrations.py | 19 ---------- .../autonlp/test_text_classification.py | 15 ++++---- 4 files changed, 38 insertions(+), 59 deletions(-) diff --git a/paddlenlp/experimental/autonlp/auto_trainer_base.py b/paddlenlp/experimental/autonlp/auto_trainer_base.py index e01f7865ce0c..dbfd13e6420a 100644 --- a/paddlenlp/experimental/autonlp/auto_trainer_base.py +++ b/paddlenlp/experimental/autonlp/auto_trainer_base.py @@ -13,7 +13,10 @@ # limitations under the License. import copy import datetime +import logging import os +import shutil +import sys from abc import ABCMeta, abstractmethod from typing import Any, Callable, Dict, List, Optional, Union @@ -122,12 +125,36 @@ def _data_checks_and_inference(self, train_dataset: Dataset, eval_dataset: Datas Performs different data checks and inferences on the training and eval datasets """ - @abstractmethod - def _construct_trainable(self, train_dataset: Dataset, eval_dataset: Dataset) -> Callable: + def _construct_trainable(self) -> Callable: """ Returns the Trainable functions that contains the main preprocessing and training logic """ + def trainable(model_config): + # import is required for proper pickling + from paddlenlp.utils.log import logger + + stdout_handler = logging.StreamHandler(sys.stdout) + stdout_handler.setFormatter(logger.format) + logger.logger.addHandler(stdout_handler) + + # construct trainer + model_config = model_config["candidates"] + trainer = self._construct_trainer(model_config) + # train + trainer.train() + # evaluate + eval_metrics = trainer.evaluate() + # save dygraph model + trainer.save_model(self.save_path) + + if os.path.exists(self.training_path): + logger.info("Removing training checkpoints to conserve disk space") + shutil.rmtree(self.training_path) + return eval_metrics + + return trainable + @abstractmethod def _compute_metrics(self, eval_preds: EvalPrediction) -> Dict[str, float]: """ @@ -325,9 +352,9 @@ def train( tune_config=tune_config, run_config=RunConfig( name=experiment_name, - log_to_file=True, + log_to_file="train.log", local_dir=self.output_dir if self.output_dir else None, - callbacks=[tune.logger.CSVLoggerCallback(), tune.logger.JsonLoggerCallback()], + callbacks=[tune.logger.CSVLoggerCallback()], ), ) self.training_results = self.tuner.fit() diff --git a/paddlenlp/experimental/autonlp/text_classification.py b/paddlenlp/experimental/autonlp/text_classification.py index 24e869ad57b8..855353fb7d18 100644 --- a/paddlenlp/experimental/autonlp/text_classification.py +++ b/paddlenlp/experimental/autonlp/text_classification.py @@ -16,7 +16,7 @@ import json import os import shutil -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Dict, List, Optional import numpy as np import paddle @@ -372,32 +372,6 @@ def _construct_trainer(self, model_config) -> Trainer: raise NotImplementedError("'trainer_type' can only be one of ['Trainer', 'PromptTrainer']") return trainer - def _construct_trainable(self) -> Callable: - """ - Returns the Trainable functions that contains the main preprocessing and training logic - """ - - def trainable(model_config): - # import is required for proper pickling - from paddlenlp.utils.log import logger - - # construct trainer - model_config = model_config["candidates"] - trainer = self._construct_trainer(model_config) - # train - trainer.train() - # evaluate - eval_metrics = trainer.evaluate() - # save dygraph model - trainer.save_model(self.save_path) - - if os.path.exists(self.training_path): - logger.info("Removing training checkpoints to conserve disk space") - shutil.rmtree(self.training_path) - return eval_metrics - - return trainable - def evaluate(self, eval_dataset: Optional[Dataset] = None, trial_id: Optional[str] = None): """ Run evaluation and returns metrics from a certain `trial_id` on the given dataset. diff --git a/paddlenlp/trainer/integrations.py b/paddlenlp/trainer/integrations.py index 19f229f6c3a0..e092977eb95f 100644 --- a/paddlenlp/trainer/integrations.py +++ b/paddlenlp/trainer/integrations.py @@ -156,25 +156,6 @@ def on_evaluate(self, args, state, control, **kwargs): if self.tune.is_session_enabled() and metrics is not None and isinstance(metrics, dict): self.session.report(metrics) - # report session metrics to Ray to track trial progress - def on_epoch_end(self, args, state, control, **kwargs): - if not state.is_world_process_zero: - return - - metrics = kwargs.get("metrics", None) - if self.tune.is_session_enabled() and metrics is not None and isinstance(metrics, dict): - self.session.report(metrics) - - # forward trainer logs - def on_log(self, args, state, control, logs=None, **kwargs): - if not state.is_world_process_zero: - return - - if logs is not None: - # In AutoNLP's Ray setup, we pipe stdout to a stdout file for logging purposes - # TODO: find a better way for this - print(logs) - INTEGRATION_TO_CALLBACK = { "visualdl": VisualDLCallback, diff --git a/tests/experimental/autonlp/test_text_classification.py b/tests/experimental/autonlp/test_text_classification.py index 947ea18b103a..1be15d2cf982 100644 --- a/tests/experimental/autonlp/test_text_classification.py +++ b/tests/experimental/autonlp/test_text_classification.py @@ -135,13 +135,12 @@ def test_multiclass(self, custom_model_candidate, hp_overrides): self.assertEqual(len(results_df), num_models) # test hp override + model_result = auto_trainer._get_model_result() if hp_overrides is not None: for hp_key, hp_value in hp_overrides.items(): - result_hp_key = f"config/candidates/{hp_key}" - self.assertEqual(results_df[result_hp_key][0], hp_value) + self.assertEqual(model_result.metrics["config"]["candidates"][hp_key], hp_value) # test save - model_result = auto_trainer._get_model_result() trainer_type = model_result.metrics["config"]["candidates"]["trainer_type"] save_path = os.path.join(model_result.log_dir, auto_trainer.save_path) self.assertTrue(os.path.exists(os.path.join(save_path, "model_state.pdparams"))) @@ -247,13 +246,12 @@ def test_multilabel(self, custom_model_candidate, hp_overrides): self.assertEqual(len(results_df), num_models) # test hp override + model_result = auto_trainer._get_model_result() if hp_overrides is not None: for hp_key, hp_value in hp_overrides.items(): - result_hp_key = f"config/candidates/{hp_key}" - self.assertEqual(results_df[result_hp_key][0], hp_value) + self.assertEqual(model_result.metrics["config"]["candidates"][hp_key], hp_value) # test save - model_result = auto_trainer._get_model_result() trainer_type = model_result.metrics["config"]["candidates"]["trainer_type"] save_path = os.path.join(model_result.log_dir, auto_trainer.save_path) self.assertTrue(os.path.exists(os.path.join(save_path, "model_state.pdparams"))) @@ -358,13 +356,12 @@ def test_default_model_candidate(self, language, hp_overrides): self.assertEqual(len(results_df), num_models) # test hp override + model_result = auto_trainer._get_model_result() if hp_overrides is not None: for hp_key, hp_value in hp_overrides.items(): - result_hp_key = f"config/candidates/{hp_key}" - self.assertEqual(results_df[result_hp_key][0], hp_value) + self.assertEqual(model_result.metrics["config"]["candidates"][hp_key], hp_value) # test save - model_result = auto_trainer._get_model_result() trainer_type = model_result.metrics["config"]["candidates"]["trainer_type"] save_path = os.path.join(model_result.log_dir, auto_trainer.save_path) self.assertTrue(os.path.exists(os.path.join(save_path, "model_state.pdparams")))