diff --git a/changelog/4902.feature.rst b/changelog/4902.feature.rst new file mode 100644 index 000000000000..fc9d6c362535 --- /dev/null +++ b/changelog/4902.feature.rst @@ -0,0 +1,2 @@ +Added a new configuration parameter, ``ranking_length`` to the ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``, +and ``ResponseSelector`` classes. diff --git a/changelog/4902.improvement.rst b/changelog/4902.improvement.rst new file mode 100644 index 000000000000..8ac1670b7f8a --- /dev/null +++ b/changelog/4902.improvement.rst @@ -0,0 +1,2 @@ +The ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``, and ``ResponseSelector`` now by default normalize confidence +levels over the top 10 results. See :ref:`migration-to-rasa-1.7` for more details. diff --git a/data/test/many_intents.md b/data/test/many_intents.md new file mode 100644 index 000000000000..aa4dcf46fab1 --- /dev/null +++ b/data/test/many_intents.md @@ -0,0 +1,59 @@ +## intent:handleinsult +- you are an idiot +- You lack understanding. + +## intent:thank +- Thanks +- Thank you + +## intent:telljoke +- Tell me something that you think will make me laugh. +- Entertain me with a joke. + +## intent:signup_newsletter +- I wanna sign up for the newsletter. +- I want to sign up for the newsletter. + +## intent:react_positive +- you are funny +- thats funny + +## intent:react_negative +- i am sad +- bad + +## intent:how_to_get_started +- how do I get started with rasa +- how do I use rasa + +## intent:technical_question +- what is duckling +- where to train intents in rasa? + +## intent:source_code +- how it works? +- where can i find this code + +## intent:pipeline_recommendation +- what pipeline should I start with? +- what is the right pipeline to choose? + +## intent:rasa_cost +- is rasa free +- are you really free + +## intent:nicetomeeyou +- It’s great connecting with you. +- Hi, nice to meet you! + +## intent:nlu_generation_tool_recommendation +- which tools can I use to create nlu data +- how can I get nlu data + +## intent:install_rasa +- I want to install Rasa Stack +- How to install Rasa? + +## intent:ask_which_events +- Which community events do you have +- Where can I meet Rasas \ No newline at end of file diff --git a/docs/core/policies.rst b/docs/core/policies.rst index 746ce52b7e31..f4d1ea0b5c92 100644 --- a/docs/core/policies.rst +++ b/docs/core/policies.rst @@ -271,6 +271,9 @@ It is recommended to use ``inner`` for ``softmax``, ``cosine`` for ``margin``; - ``loss_type`` sets the type of the loss function, it should be either ``softmax`` or ``margin``; + - ``ranking_length`` defines the number of top confidences over + which to normalize ranking results if ``loss_type: "softmax"``; + to turn off normalization set it to 0 - ``mu_pos`` controls how similar the algorithm should try to make embedding vectors for correct intent labels, used only if ``loss_type`` is set to ``margin``; diff --git a/docs/migration-guide.rst b/docs/migration-guide.rst index a8776048f36c..509a61d77eff 100644 --- a/docs/migration-guide.rst +++ b/docs/migration-guide.rst @@ -11,6 +11,18 @@ Migration Guide This page contains information about changes between major versions and how you can migrate from one version to another. +.. _migration-to-rasa-1.7: + +Rasa 1.6 to Rasa 1.7 +-------------------- + +General +~~~~~~~ +- By default, the ``EmbeddingIntentClassifier``, ``EmbeddingPolicy``, and ``ResponseSelector`` will + now normalize the top 10 confidence results if the ``loss_type`` is ``"softmax"`` (which has been + default since 1.3, see :ref:`migration-to-rasa-1.3`). This is configurable via the ``ranking_length`` + configuration parameter; to turn off normalization to match the previous behavior, set ``ranking_length: 0``. + .. _migration-to-rasa-1.3: Rasa 1.2 to Rasa 1.3 @@ -82,7 +94,7 @@ General - If you were previously importing the ``Button`` or ``Element`` classes from ``rasa_core.dispatcher``, these are now to be imported from ``rasa_sdk.utils``. -- Rasa NLU and Core previously used `separate configuration files +- Rasa NLU and Core previously used `separate configuration files `_. These two files should be merged into a single file either named ``config.yml``, or passed via the ``--config`` parameter. diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst index ce7996861f7c..e48fc7c6dd54 100644 --- a/docs/nlu/components.rst +++ b/docs/nlu/components.rst @@ -464,6 +464,9 @@ EmbeddingIntentClassifier ``inner`` for ``softmax``, ``cosine`` for ``margin``; - ``loss_type`` sets the type of the loss function, it should be either ``softmax`` or ``margin``; + - ``ranking_length`` defines the number of top confidences over + which to normalize ranking results if ``loss_type: "softmax"``; + to turn off normalization set it to 0 - ``mu_pos`` controls how similar the algorithm should try to make embedding vectors for correct intent labels, used only if ``loss_type`` is set to ``margin``; diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py index e817f1010171..7dd727b0f60e 100644 --- a/rasa/core/policies/embedding_policy.py +++ b/rasa/core/policies/embedding_policy.py @@ -74,6 +74,9 @@ class EmbeddingPolicy(Policy): "similarity_type": "auto", # string 'auto' or 'cosine' or 'inner' # the type of the loss function "loss_type": "softmax", # string 'softmax' or 'margin' + # number of top actions to normalize scores for softmax loss_type + # set to 0 to turn off normalization + "ranking_length": 10, # how similar the algorithm should try # to make embedding vectors for correct labels "mu_pos": 0.8, # should be 0.0 < ... < 1.0 for 'cosine' @@ -192,6 +195,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None: self.similarity_type = "inner" elif self.loss_type == "margin": self.similarity_type = "cosine" + self.ranking_length = config["ranking_length"] self.mu_pos = config["mu_pos"] self.mu_neg = config["mu_neg"] @@ -567,8 +571,12 @@ def predict_action_probabilities( tf_feed_dict = self.tf_feed_dict_for_prediction(tracker, domain) confidence = self.session.run(self.pred_confidence, feed_dict=tf_feed_dict) + confidence = confidence[0, -1, :] - return confidence[0, -1, :].tolist() + if self.loss_type == "softmax" and self.ranking_length > 0: + confidence = train_utils.normalize(confidence, self.ranking_length) + + return confidence.tolist() def persist(self, path: Text) -> None: """Persists the policy to a storage.""" @@ -583,7 +591,11 @@ def persist(self, path: Text) -> None: self.featurizer.persist(path) - meta = {"priority": self.priority} + meta = { + "priority": self.priority, + "loss_type": self.loss_type, + "ranking_length": self.ranking_length, + } meta_file = os.path.join(path, "embedding_policy.json") rasa.utils.io.dump_obj_as_json_to_file(meta_file, meta) @@ -665,7 +677,7 @@ def load(cls, path: Text) -> "EmbeddingPolicy": return cls( featurizer=featurizer, - priority=meta["priority"], + priority=meta.pop("priority"), graph=graph, session=session, user_placeholder=a_in, @@ -677,4 +689,5 @@ def load(cls, path: Text) -> "EmbeddingPolicy": bot_embed=bot_embed, all_bot_embed=all_bot_embed, attention_weights=attention_weights, + **meta, ) diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py index 28fffbb0cd64..19821768610e 100644 --- a/rasa/nlu/classifiers/embedding_intent_classifier.py +++ b/rasa/nlu/classifiers/embedding_intent_classifier.py @@ -93,6 +93,9 @@ class EmbeddingIntentClassifier(Component): "similarity_type": "auto", # string 'auto' or 'cosine' or 'inner' # the type of the loss function "loss_type": "softmax", # string 'softmax' or 'margin' + # number of top intents to normalize scores for softmax loss_type + # set to 0 to turn off normalization + "ranking_length": 10, # how similar the algorithm should try # to make embedding vectors for correct labels "mu_pos": 0.8, # should be 0.0 < ... < 1.0 for 'cosine' @@ -174,6 +177,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None: elif self.loss_type == "margin": self.similarity_type = "cosine" + self.ranking_length = config["ranking_length"] self.mu_pos = config["mu_pos"] self.mu_neg = config["mu_neg"] self.use_max_sim_neg = config["use_max_sim_neg"] @@ -801,6 +805,10 @@ def _calculate_message_sim( message_sim = message_sim.flatten() # sim is a matrix label_ids = message_sim.argsort()[::-1] + + if self.loss_type == "softmax" and self.ranking_length > 0: + message_sim = train_utils.normalize(message_sim, self.ranking_length) + message_sim[::-1].sort() # transform sim to python list for JSON serializing @@ -838,8 +846,13 @@ def predict_label( "confidence": message_sim[0], } + if self.ranking_length and 0 < self.ranking_length < LABEL_RANKING_LENGTH: + output_length = self.ranking_length + else: + output_length = LABEL_RANKING_LENGTH + ranking = list(zip(list(label_ids), message_sim)) - ranking = ranking[:LABEL_RANKING_LENGTH] + ranking = ranking[:output_length] label_ranking = [ {"name": self.inverted_label_dict[label_idx], "confidence": score} for label_idx, score in ranking diff --git a/rasa/nlu/selectors/embedding_response_selector.py b/rasa/nlu/selectors/embedding_response_selector.py index 5c34b697e44d..3dcac57b5a41 100644 --- a/rasa/nlu/selectors/embedding_response_selector.py +++ b/rasa/nlu/selectors/embedding_response_selector.py @@ -87,6 +87,9 @@ class ResponseSelector(EmbeddingIntentClassifier): "similarity_type": "auto", # string 'auto' or 'cosine' or 'inner' # the type of the loss function "loss_type": "softmax", # string 'softmax' or 'margin' + # number of top responses to normalize scores for softmax loss_type + # set to 0 to turn off normalization + "ranking_length": 10, # how similar the algorithm should try # to make embedding vectors for correct intent labels "mu_pos": 0.8, # should be 0.0 < ... < 1.0 for 'cosine' diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index b61c6ad6a2e3..650d07203ae7 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -1275,3 +1275,20 @@ def load_tensor(name: Text) -> Optional[Union["tf.Tensor", List["tf.Tensor"]]]: return tensor_list[0] return tensor_list + + +def normalize(values: "np.ndarray", ranking_length: Optional[int] = 0) -> "np.ndarray": + """Normalizes an array of positive numbers over the top `ranking_length` values. + + Other values will be set to 0. + """ + + new_values = values.copy() # prevent mutation of the input + if 0 < ranking_length < len(new_values): + ranked = sorted(new_values, reverse=True) + new_values[new_values < ranked[ranking_length - 1]] = 0 + + if np.sum(new_values) > 0: + new_values = new_values / np.sum(new_values) + + return new_values diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py index 75b5bf9daea2..30c4c2e940bb 100644 --- a/tests/core/test_policies.py +++ b/tests/core/test_policies.py @@ -1,9 +1,8 @@ -from unittest.mock import patch +from unittest.mock import Mock, patch import numpy as np import pytest -from rasa.utils import train_utils from rasa.core import training from rasa.core.actions.action import ( ACTION_DEFAULT_ASK_AFFIRMATION_NAME, @@ -33,6 +32,7 @@ from rasa.core.policies.memoization import AugmentedMemoizationPolicy, MemoizationPolicy from rasa.core.policies.sklearn_policy import SklearnPolicy from rasa.core.trackers import DialogueStateTracker +from rasa.utils import train_utils from tests.core.conftest import ( DEFAULT_DOMAIN_PATH_WITH_MAPPING, DEFAULT_DOMAIN_PATH_WITH_SLOTS, @@ -104,6 +104,14 @@ def featurizer(self): def priority(self): return 1 + @pytest.fixture(scope="module") + def default_domain(self): + return Domain.load(DEFAULT_DOMAIN_PATH_WITH_SLOTS) + + @pytest.fixture(scope="module") + def tracker(self, default_domain): + return DialogueStateTracker(UserMessage.DEFAULT_SENDER_ID, default_domain.slots) + @pytest.fixture(scope="module") async def trained_policy(self, featurizer, priority): default_domain = Domain.load(DEFAULT_DOMAIN_PATH_WITH_SLOTS) @@ -219,14 +227,6 @@ def mock_search(self): gs.return_value = gs # for __init__ yield gs - @pytest.fixture(scope="module") - def default_domain(self): - return Domain.load(DEFAULT_DOMAIN_PATH_WITH_SLOTS) - - @pytest.fixture - def tracker(self, default_domain): - return DialogueStateTracker(UserMessage.DEFAULT_SENDER_ID, default_domain.slots) - @pytest.fixture(scope="module") async def trackers(self, default_domain): return await train_trackers(default_domain, augmentation_factor=20) @@ -337,6 +337,29 @@ def create_policy(self, featurizer, priority): def test_similarity_type(self, trained_policy): assert trained_policy.similarity_type == "inner" + def test_ranking_length(self, trained_policy): + assert trained_policy.ranking_length == 10 + + def test_normalization(self, trained_policy, tracker, default_domain, monkeypatch): + # first check the output is what we expect + predicted_probabilities = trained_policy.predict_action_probabilities( + tracker, default_domain + ) + # count number of non-zero confidences + assert ( + sum([confidence > 0 for confidence in predicted_probabilities]) + == trained_policy.ranking_length + ) + # check that the norm is still 1 + assert sum(predicted_probabilities) == pytest.approx(1) + + # also check our function is called + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + trained_policy.predict_action_probabilities(tracker, default_domain) + + mock.normalize.assert_called_once() + async def test_gen_batch(self, trained_policy, default_domain): training_trackers = await train_trackers(default_domain, augmentation_factor=0) training_data = trained_policy.featurize_for_training( @@ -382,6 +405,15 @@ def create_policy(self, featurizer, priority): def test_similarity_type(self, trained_policy): assert trained_policy.similarity_type == "cosine" + def test_normalization(self, trained_policy, tracker, default_domain, monkeypatch): + # Mock actual normalization method + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + trained_policy.predict_action_probabilities(tracker, default_domain) + + # function should not get called for margin loss_type + mock.normalize.assert_not_called() + class TestEmbeddingPolicyWithEval(TestEmbeddingPolicy): def create_policy(self, featurizer, priority): @@ -393,6 +425,54 @@ def create_policy(self, featurizer, priority): return p +class TestEmbeddingPolicyNoNormalization(TestEmbeddingPolicy): + def create_policy(self, featurizer, priority): + p = EmbeddingPolicy( + featurizer=featurizer, priority=priority, **{"ranking_length": 0} + ) + return p + + def test_ranking_length(self, trained_policy): + assert trained_policy.ranking_length == 0 + + def test_normalization(self, trained_policy, tracker, default_domain, monkeypatch): + # first check the output is what we expect + predicted_probabilities = trained_policy.predict_action_probabilities( + tracker, default_domain + ) + # there should be no normalization + assert all([confidence > 0 for confidence in predicted_probabilities]) + + # also check our function is not called + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + trained_policy.predict_action_probabilities(tracker, default_domain) + + mock.normalize.assert_not_called() + + +class TestEmbeddingPolicyLowRankingLength(TestEmbeddingPolicy): + def create_policy(self, featurizer, priority): + p = EmbeddingPolicy( + featurizer=featurizer, priority=priority, **{"ranking_length": 3} + ) + return p + + def test_ranking_length(self, trained_policy): + assert trained_policy.ranking_length == 3 + + +class TestEmbeddingPolicyHighRankingLength(TestEmbeddingPolicy): + def create_policy(self, featurizer, priority): + p = EmbeddingPolicy( + featurizer=featurizer, priority=priority, **{"ranking_length": 11} + ) + return p + + def test_ranking_length(self, trained_policy): + assert trained_policy.ranking_length == 11 + + class TestEmbeddingPolicyWithFullDialogue(TestEmbeddingPolicy): def create_policy(self, featurizer, priority): # use standard featurizer from EmbeddingPolicy, diff --git a/tests/nlu/classifiers/test_embedding_intent_classifier.py b/tests/nlu/classifiers/test_embedding_intent_classifier.py index 0b3d6ffdd730..e45dbfed1f51 100644 --- a/tests/nlu/classifiers/test_embedding_intent_classifier.py +++ b/tests/nlu/classifiers/test_embedding_intent_classifier.py @@ -2,8 +2,10 @@ import pytest import scipy.sparse +from unittest.mock import Mock + from rasa.nlu import train -from rasa.nlu.model import Interpreter +from rasa.nlu.classifiers import LABEL_RANKING_LENGTH from rasa.nlu.config import RasaNLUModelConfig from rasa.nlu.constants import ( TEXT_ATTRIBUTE, @@ -12,7 +14,9 @@ INTENT_ATTRIBUTE, ) from rasa.nlu.classifiers.embedding_intent_classifier import EmbeddingIntentClassifier +from rasa.nlu.model import Interpreter from rasa.nlu.training_data import Message +from rasa.utils import train_utils from tests.nlu.conftest import DEFAULT_DATA_PATH @@ -160,3 +164,113 @@ async def test_raise_error_on_incorrect_pipeline(component_builder, tmpdir): "Failed to validate component 'EmbeddingIntentClassifier'. Missing one of " "the following properties: " in str(e.value) ) + + +def as_pipeline(*components): + return [{"name": c} for c in components] + + +@pytest.mark.parametrize( + "classifier_params, data_path, output_length, output_should_sum_to_1", + [ + ({"random_seed": 42}, "data/test/many_intents.md", 10, True), # default config + ( + {"random_seed": 42, "ranking_length": 0}, + "data/test/many_intents.md", + LABEL_RANKING_LENGTH, + False, + ), # no normalization + ( + {"random_seed": 42, "ranking_length": 3}, + "data/test/many_intents.md", + 3, + True, + ), # lower than default ranking_length + ( + {"random_seed": 42, "ranking_length": 12}, + "data/test/many_intents.md", + LABEL_RANKING_LENGTH, + False, + ), # higher than default ranking_length + ( + {"random_seed": 42}, + "examples/moodbot/data/nlu.md", + 7, + True, + ), # less intents than default ranking_length + ], +) +async def test_softmax_normalization( + component_builder, + tmpdir, + classifier_params, + data_path, + output_length, + output_should_sum_to_1, +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "EmbeddingIntentClassifier" + ) + assert pipeline[2]["name"] == "EmbeddingIntentClassifier" + pipeline[2].update(classifier_params) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, + path=tmpdir.strpath, + data=data_path, + component_builder=component_builder, + ) + loaded = Interpreter.load(persisted_path, component_builder) + + parse_data = loaded.parse("hello") + intent_ranking = parse_data.get("intent_ranking") + # check that the output was correctly truncated after normalization + assert len(intent_ranking) == output_length + + # check whether normalization had the expected effect + output_sums_to_1 = sum( + [intent.get("confidence") for intent in intent_ranking] + ) == pytest.approx(1) + assert output_sums_to_1 == output_should_sum_to_1 + + # check whether the normalization of rankings is reflected in intent prediction + assert parse_data.get("intent") == intent_ranking[0] + + +@pytest.mark.parametrize( + "classifier_params, output_length", + [({"loss_type": "margin", "random_seed": 42}, LABEL_RANKING_LENGTH)], +) +async def test_margin_loss_is_not_normalized( + monkeypatch, component_builder, tmpdir, classifier_params, output_length, +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "EmbeddingIntentClassifier" + ) + assert pipeline[2]["name"] == "EmbeddingIntentClassifier" + pipeline[2].update(classifier_params) + + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, + path=tmpdir.strpath, + data="data/test/many_intents.md", + component_builder=component_builder, + ) + loaded = Interpreter.load(persisted_path, component_builder) + + parse_data = loaded.parse("hello") + intent_ranking = parse_data.get("intent_ranking") + + # check that the output was not normalized + mock.normalize.assert_not_called() + + # check that the output was correctly truncated + assert len(intent_ranking) == output_length + + # make sure top ranking is reflected in intent prediction + assert parse_data.get("intent") == intent_ranking[0]