diff --git a/changelog/5422.feature.rst b/changelog/5422.feature.rst new file mode 100644 index 000000000000..5f612027d351 --- /dev/null +++ b/changelog/5422.feature.rst @@ -0,0 +1,12 @@ +Add options ``tensorboard_log_directory`` and ``tensorboard_log_level`` to ``EmbeddingIntentClassifier``, +``DIETClasifier``, ``ResponseSelector``, ``EmbeddingPolicy`` and ``TEDPolicy``. + +By default ``tensorboard_log_directory`` is ``None``. If a valid directory is provided, +metrics are written during training. After the model is trained you can take a look +at the training metrics in tensorboard. Execute ``tensorboard --logdir ``. + +Metrics can either be written after every epoch (default) or for every training step. +You can specify when to write metrics using the variable ``tensorboard_log_level``. +Valid values are 'epoch' and 'minibatch'. + +We also write down a model summary, i.e. layers with inputs and types, to the given directory. diff --git a/docs/core/policies.rst b/docs/core/policies.rst index 1f176b80fc00..2926359dde8b 100644 --- a/docs/core/policies.rst +++ b/docs/core/policies.rst @@ -425,6 +425,15 @@ It is recommended to use ``state_featurizer=LabelTokenizerSingleStateFeaturizer( # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. "evaluate_on_number_of_examples": 0 + # If you want to use tensorboard to visualize training metrics, + # set this option to a valid output directory. + # You can view the training metrics after training in tensorboard via + # ``tensorboard --logdir `` + "tensorboard_log_directory": None + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + "tensorboard_log_level": "epoch" .. note:: diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst index b0b8ef8f4793..4fd62ea5e6bd 100644 --- a/docs/nlu/components.rst +++ b/docs/nlu/components.rst @@ -919,6 +919,15 @@ EmbeddingIntentClassifier # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. "evaluate_on_number_of_examples": 0 + # If you want to use tensorboard to visualize training metrics, + # set this option to a valid output directory. + # You can view the training metrics after training in tensorboard via + # ``tensorboard --logdir `` + "tensorboard_log_directory": None + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + "tensorboard_log_level": "epoch" .. _keyword_intent_classifier: @@ -1099,7 +1108,16 @@ ResponseSelector # should predict those tokens. "use_masked_language_model": False # Name of the intent for which this response selector is to be trained - "retrieval_intent: None + "retrieval_intent": None + # If you want to use tensorboard to visualize training metrics, + # set this option to a valid output directory. + # You can view the training metrics after training in tensorboard via + # ``tensorboard --logdir `` + "tensorboard_log_directory": None + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + "tensorboard_log_level": "epoch" Entity Extractors @@ -1659,3 +1677,12 @@ DIETClassifier # examples per entity are required. # Rule of thumb: you should have more than 100 examples per entity. "BILOU_flag": True + # If you want to use tensorboard to visualize training metrics, + # set this option to a valid output directory. + # You can view the training metrics after training in tensorboard via + # ``tensorboard --logdir `` + "tensorboard_log_directory": None + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + "tensorboard_log_level": "epoch" diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py index b3ad427db0ca..25eb5f036ad0 100644 --- a/rasa/core/policies/embedding_policy.py +++ b/rasa/core/policies/embedding_policy.py @@ -38,6 +38,8 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_DIR, + TENSORBOARD_LOG_LEVEL, ) from rasa.utils.tensorflow.models import RasaModel import rasa.utils.common as common_utils @@ -140,6 +142,13 @@ class EmbeddingPolicy(TEDPolicy): # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. EVAL_NUM_EXAMPLES: 0, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } def __init__( diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index bd7b8625899b..a1790d52d2e7 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -58,6 +58,8 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_DIR, + TENSORBOARD_LOG_LEVEL, ) @@ -169,6 +171,13 @@ class TEDPolicy(Policy): # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. EVAL_NUM_EXAMPLES: 0, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } @staticmethod @@ -447,7 +456,12 @@ def __init__( max_history_tracker_featurizer_used: bool, label_data: RasaModelData, ) -> None: - super().__init__(name="TED", random_seed=config[RANDOM_SEED]) + super().__init__( + name="TED", + random_seed=config[RANDOM_SEED], + tensorboard_log_dir=config[TENSORBOARD_LOG_DIR], + tensorboard_log_level=config[TENSORBOARD_LOG_LEVEL], + ) self.config = config self.max_history_tracker_featurizer_used = max_history_tracker_featurizer_used diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 778ac9f4b015..73025797d058 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -56,6 +56,7 @@ SPARSE_INPUT_DROPOUT, MASKED_LM, ENTITY_RECOGNITION, + TENSORBOARD_LOG_DIR, INTENT_CLASSIFICATION, EVAL_NUM_EXAMPLES, EVAL_NUM_EPOCHS, @@ -77,6 +78,7 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_LEVEL, ) @@ -207,6 +209,13 @@ def required_components(cls) -> List[Type[Component]]: # examples per entity are required. # Rule of thumb: you should have more than 100 examples per entity. BILOU_FLAG: True, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } # init helpers @@ -937,7 +946,12 @@ def __init__( index_tag_id_mapping: Optional[Dict[int, Text]], config: Dict[Text, Any], ) -> None: - super().__init__(name="DIET", random_seed=config[RANDOM_SEED]) + super().__init__( + name="DIET", + random_seed=config[RANDOM_SEED], + tensorboard_log_dir=config[TENSORBOARD_LOG_DIR], + tensorboard_log_level=config[TENSORBOARD_LOG_LEVEL], + ) self.config = config diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py index 43b485df5e3c..7e91e8e6d594 100644 --- a/rasa/nlu/classifiers/embedding_intent_classifier.py +++ b/rasa/nlu/classifiers/embedding_intent_classifier.py @@ -40,6 +40,8 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_DIR, + TENSORBOARD_LOG_LEVEL, ) import rasa.utils.common as common_utils from rasa.utils.tensorflow.models import RasaModel @@ -132,6 +134,13 @@ def required_components(cls) -> List[Type[Component]]: # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. EVAL_NUM_EXAMPLES: 0, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } def __init__( diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index 52f595ea6827..a7be78a303f3 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -60,6 +60,8 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_DIR, + TENSORBOARD_LOG_LEVEL, ) from rasa.nlu.constants import ( RESPONSE, @@ -186,6 +188,13 @@ def required_components(cls) -> List[Type[Component]]: MASKED_LM: False, # Name of the intent for which this response selector is to be trained RETRIEVAL_INTENT: None, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } def __init__( diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 3e13221041d0..8563ff8d7c19 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -65,3 +65,6 @@ POOLING = "pooling" MAX_POOLING = "max" MEAN_POOLING = "mean" + +TENSORBOARD_LOG_DIR = "tensorboard_log_directory" +TENSORBOARD_LOG_LEVEL = "tensorboard_log_level" diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 48dafd731f21..d4fa4f63e880 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -1,16 +1,23 @@ +import datetime + import tensorflow as tf import numpy as np import logging from collections import defaultdict from typing import List, Text, Dict, Tuple, Union, Optional, Callable + +from tensorflow_core.python.ops.summary_ops_v2 import ResourceSummaryWriter from tqdm import tqdm from rasa.utils.common import is_logging_disabled from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature -from rasa.utils.tensorflow.constants import SEQUENCE +from rasa.utils.tensorflow.constants import SEQUENCE, TENSORBOARD_LOG_LEVEL logger = logging.getLogger(__name__) +TENSORBOARD_LOG_LEVELS = ["epoch", "minibatch"] + + # noinspection PyMethodOverriding class RasaModel(tf.keras.models.Model): """Completely override all public methods of keras Model. @@ -18,7 +25,13 @@ class RasaModel(tf.keras.models.Model): Cannot be used as tf.keras.Model """ - def __init__(self, random_seed: Optional[int] = None, **kwargs) -> None: + def __init__( + self, + random_seed: Optional[int] = None, + tensorboard_log_dir: Optional[Text] = None, + tensorboard_log_level: Optional[Text] = "epoch", + **kwargs, + ) -> None: """Initialize the RasaModel. Args: @@ -35,6 +48,38 @@ def __init__(self, random_seed: Optional[int] = None, **kwargs) -> None: self.random_seed = random_seed + self.train_summary_writer = None + self.test_summary_writer = None + self.model_summary_file = None + self.tensorboard_log_on_epochs = True + + self._set_up_tensorboard_writer(tensorboard_log_level, tensorboard_log_dir) + + def _set_up_tensorboard_writer( + self, tensorboard_log_level: Text, tensorboard_log_dir: Optional[Text] = None + ) -> None: + if tensorboard_log_dir is not None: + if tensorboard_log_level not in TENSORBOARD_LOG_LEVELS: + raise ValueError( + f"Provided '{TENSORBOARD_LOG_LEVEL}' ('{tensorboard_log_level}') " + f"is invalid! Valid values are: {TENSORBOARD_LOG_LEVELS}" + ) + + self.tensorboard_log_on_epochs = tensorboard_log_level == "epoch" + + current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + class_name = self.__class__.__name__ + + train_log_dir = f"{tensorboard_log_dir}/{class_name}/{current_time}/train" + test_log_dir = f"{tensorboard_log_dir}/{class_name}/{current_time}/test" + + self.train_summary_writer = tf.summary.create_file_writer(train_log_dir) + self.test_summary_writer = tf.summary.create_file_writer(test_log_dir) + + self.model_summary_file = ( + f"{tensorboard_log_dir}/{class_name}/{current_time}/model_summary.txt" + ) + def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: @@ -87,18 +132,25 @@ def fit( val_results = {} # validation is not performed every epoch progress_bar = tqdm(range(epochs), desc="Epochs", disable=disable) + training_steps = 0 + for epoch in progress_bar: epoch_batch_size = self.linearly_increasing_batch_size( epoch, batch_size, epochs ) - self._batch_loop( + training_steps = self._batch_loop( train_dataset_function, tf_train_on_batch_function, epoch_batch_size, True, + training_steps, + self.train_summary_writer, ) + if self.tensorboard_log_on_epochs: + self._log_metrics_for_tensorboard(epoch, self.train_summary_writer) + postfix_dict = self._get_metric_results() if evaluate_on_num_examples > 0: @@ -108,13 +160,24 @@ def fit( tf_evaluation_on_batch_function, epoch_batch_size, False, + training_steps, + self.test_summary_writer, ) + + if self.tensorboard_log_on_epochs: + self._log_metrics_for_tensorboard( + epoch, self.test_summary_writer + ) + val_results = self._get_metric_results(prefix="val_") postfix_dict.update(val_results) progress_bar.set_postfix(postfix_dict) + if self.model_summary_file is not None: + self._write_model_summary() + self._training = None # training phase should be defined when building a graph if not disable: logger.info("Finished training.") @@ -194,14 +257,26 @@ def _batch_loop( call_model_function: Callable, batch_size: int, training: bool, - ) -> None: + offset: int, + writer: Optional[ResourceSummaryWriter] = None, + ) -> int: """Run on batches""" self.reset_metrics() + + step = offset + self._training = training # needed for eager mode for batch_in in dataset_function(batch_size): call_model_function(batch_in) + if not self.tensorboard_log_on_epochs: + self._log_metrics_for_tensorboard(step, writer) + + step += 1 + + return step + @staticmethod def _get_tf_call_model_function( dataset_function: Callable, @@ -265,7 +340,6 @@ def evaluation_dataset_function(_batch_size: int) -> tf.data.Dataset: def _get_metric_results(self, prefix: Optional[Text] = None) -> Dict[Text, Text]: """Get the metrics results""" - prefix = prefix or "" return { @@ -274,6 +348,15 @@ def _get_metric_results(self, prefix: Optional[Text] = None) -> Dict[Text, Text] if metric.name in self.metrics_to_log } + def _log_metrics_for_tensorboard( + self, step: int, writer: Optional[ResourceSummaryWriter] = None + ) -> None: + if writer is not None: + with writer.as_default(): + for metric in self.metrics: + if metric.name in self.metrics_to_log: + tf.summary.scalar(metric.name, metric.result(), step=step) + @staticmethod def _should_evaluate( evaluate_every_num_epochs: int, epochs: int, current_epoch: int @@ -292,9 +375,9 @@ def batch_to_model_data_format( """Convert input batch tensors into batch data format. Batch contains any number of batch data. The order is equal to the - key-value pairs in session data. As sparse data were converted into indices, data, - shape before, this methods converts them into sparse tensors. Dense data is - kept. + key-value pairs in session data. As sparse data were converted into indices, + data, shape before, this methods converts them into sparse tensors. Dense data + is kept. """ batch_data = defaultdict(list) @@ -342,6 +425,25 @@ def linearly_increasing_batch_size( else: return int(batch_size[0]) + def _write_model_summary(self): + total_number_of_variables = np.sum( + [np.prod(v.shape) for v in self.trainable_variables] + ) + layers = [ + f"{layer.name} ({layer.dtype.name}) " + f"[{'x'.join(str(s) for s in layer.shape)}]" + for layer in self.trainable_variables + ] + layers.reverse() + + with open(self.model_summary_file, "w") as file: + file.write("Variables: name (type) [shape]\n\n") + for layer in layers: + file.write(layer) + file.write("\n") + file.write("\n") + file.write(f"Total size of variables: {total_number_of_variables}") + def compile(self, *args, **kwargs) -> None: raise Exception( "This method should neither be called nor implemented in our code." diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py index 4c38733607c2..587f8de6a054 100644 --- a/tests/nlu/classifiers/test_diet_classifier.py +++ b/tests/nlu/classifiers/test_diet_classifier.py @@ -13,6 +13,10 @@ RANKING_LENGTH, EPOCHS, MASKED_LM, + TENSORBOARD_LOG_LEVEL, + TENSORBOARD_LOG_DIR, + EVAL_NUM_EPOCHS, + EVAL_NUM_EXAMPLES, ) from rasa.nlu.classifiers.diet_classifier import DIETClassifier from rasa.nlu.model import Interpreter @@ -300,3 +304,41 @@ async def test_set_random_seed(component_builder, tmpdir): result_b = loaded_b.parse("hello")["intent"]["confidence"] assert result_a == result_b + + +async def test_train_tensorboard_logging(component_builder, tmpdir): + from pathlib import Path + + tensorboard_log_dir = Path(tmpdir.strpath) / "tensorboard" + + assert not tensorboard_log_dir.exists() + + _config = RasaNLUModelConfig( + { + "pipeline": [ + {"name": "WhitespaceTokenizer"}, + {"name": "CountVectorsFeaturizer"}, + { + "name": "DIETClassifier", + EPOCHS: 3, + TENSORBOARD_LOG_LEVEL: "epoch", + TENSORBOARD_LOG_DIR: str(tensorboard_log_dir), + EVAL_NUM_EXAMPLES: 15, + EVAL_NUM_EPOCHS: 1, + }, + ], + "language": "en", + } + ) + + await train( + _config, + path=tmpdir.strpath, + data="data/examples/rasa/demo-rasa-multi-intent.md", + component_builder=component_builder, + ) + + assert tensorboard_log_dir.exists() + + all_files = list(tensorboard_log_dir.rglob("*.*")) + assert len(all_files) == 3