RasaHQ · Ghostvv · Jan 23, 2020 · Nov 28, 2019 · Dec 4, 2019 · Dec 12, 2019
diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
@@ -74,6 +74,9 @@ class EmbeddingPolicy(Policy):
         "similarity_type": "auto",  # string 'auto' or 'cosine' or 'inner'
         # the type of the loss function
         "loss_type": "softmax",  # string 'softmax' or 'margin'
+        # number of top actions to normalize scores for softmax loss_type
+        # set to 0 to turn off normalization
+        "normalize_top_num_actions": 0,
         # how similar the algorithm should try
         # to make embedding vectors for correct labels
         "mu_pos": 0.8,  # should be 0.0 < ... < 1.0 for 'cosine'
@@ -192,6 +195,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None:
                 self.similarity_type = "inner"
             elif self.loss_type == "margin":
                 self.similarity_type = "cosine"
+        self.normalize_top_num_actions = config["normalize_top_num_actions"]
 
         self.mu_pos = config["mu_pos"]
         self.mu_neg = config["mu_neg"]
@@ -556,8 +560,17 @@ def predict_action_probabilities(
         tf_feed_dict = self.tf_feed_dict_for_prediction(tracker, domain)
 
         confidence = self.session.run(self.pred_confidence, feed_dict=tf_feed_dict)
+        confidence = confidence[0, -1, :].tolist()
 
-        return confidence[0, -1, :].tolist()
+        # normalise scores if turned on
+        if self.loss_type == "softmax" and self.normalize_top_num_actions > 0:
+            ranked = sorted(confidence, reverse=True)
+            for i, value in enumerate(confidence):
+                if value < ranked[self.normalize_top_num_actions - 1]:
+                    confidence[i] = 0.0
+            confidence = confidence / np.sum(confidence)
+
+        return confidence
 
     def persist(self, path: Text) -> None:
         """Persists the policy to a storage."""
@@ -572,7 +585,10 @@ def persist(self, path: Text) -> None:
 
         self.featurizer.persist(path)
 
-        meta = {"priority": self.priority}
+        meta = {
+            "priority": self.priority,
+            "normalize_top_num_actions": self.normalize_top_num_actions,
+        }
 
         meta_file = os.path.join(path, "embedding_policy.json")
         rasa.utils.io.dump_obj_as_json_to_file(meta_file, meta)
@@ -654,7 +670,7 @@ def load(cls, path: Text) -> "EmbeddingPolicy":
 
         return cls(
             featurizer=featurizer,
-            priority=meta["priority"],
+            priority=meta.pop("priority"),
             graph=graph,
             session=session,
             user_placeholder=a_in,
@@ -666,4 +682,5 @@ def load(cls, path: Text) -> "EmbeddingPolicy":
             bot_embed=bot_embed,
             all_bot_embed=all_bot_embed,
             attention_weights=attention_weights,
+            **meta,
         )
diff --git a/rasa/nlu/classifiers/__init__.py b/rasa/nlu/classifiers/__init__.py
@@ -1,3 +1,3 @@
 # How many labels are at max put into the output
 # ranking, everything else will be cut off
-LABEL_RANKING_LENGTH = 10
+DEFAULT_LABEL_RANKING_LENGTH = 10
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict, List, Optional, Text, Tuple
 import warnings
 
-from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
+from rasa.nlu.classifiers import DEFAULT_LABEL_RANKING_LENGTH
 from rasa.nlu.components import Component
 from rasa.utils import train_utils
 from rasa.nlu.constants import (
@@ -82,6 +82,9 @@ class EmbeddingIntentClassifier(Component):
         "similarity_type": "auto",  # string 'auto' or 'cosine' or 'inner'
         # the type of the loss function
         "loss_type": "softmax",  # string 'softmax' or 'margin'
+        # number of top intents to normalize scores for softmax loss_type
+        # set to 0 to turn off normalization
+        "normalize_top_num_intents": 0,
         # how similar the algorithm should try
         # to make embedding vectors for correct labels
         "mu_pos": 0.8,  # should be 0.0 < ... < 1.0 for 'cosine'
@@ -153,7 +156,8 @@ def __init__(
         self._is_training = None
 
     # config migration warning
-    def _check_old_config_variables(self, config: Dict[Text, Any]) -> None:
+    @staticmethod
+    def _check_old_config_variables(config: Dict[Text, Any]) -> None:
 
         removed_tokenization_params = [
             "intent_tokenization_flag",
@@ -206,6 +210,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None:
             elif self.loss_type == "margin":
                 self.similarity_type = "cosine"
 
+        self.normalize_top_num_intents = config["normalize_top_num_intents"]
         self.mu_pos = config["mu_pos"]
         self.mu_neg = config["mu_neg"]
         self.use_max_sim_neg = config["use_max_sim_neg"]
@@ -617,17 +622,24 @@ def predict_label(self, message):
 
             # if X contains all zeros do not predict some label
             if X.any() and label_ids.size > 0:
-                label = {
-                    "name": self.inverted_label_dict[label_ids[0]],
-                    "confidence": message_sim[0],
-                }
 
+                # normalise scores if turned on
+                if self.loss_type == "softmax" and self.normalize_top_num_intents > 0:
+                    label_ids = label_ids[: self.normalize_top_num_intents]
+                    message_sim = message_sim[: self.normalize_top_num_intents]
+                    message_sim = message_sim / np.sum(message_sim)
+                else:
+                    label_ids = label_ids[:DEFAULT_LABEL_RANKING_LENGTH]
+                    message_sim = message_sim[:DEFAULT_LABEL_RANKING_LENGTH]
                 ranking = list(zip(list(label_ids), message_sim))
-                ranking = ranking[:LABEL_RANKING_LENGTH]
                 label_ranking = [
                     {"name": self.inverted_label_dict[label_idx], "confidence": score}
                     for label_idx, score in ranking
                 ]
+                label = {
+                    "name": self.inverted_label_dict[label_ids[0]],
+                    "confidence": message_sim[0],
+                }
         return label, label_ranking
 
     def process(self, message: "Message", **kwargs: Any) -> None:

diff --git a/rasa/nlu/classifiers/sklearn_intent_classifier.py b/rasa/nlu/classifiers/sklearn_intent_classifier.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict, List, Optional, Text, Tuple
 
 from rasa.nlu import utils
-from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
+from rasa.nlu.classifiers import DEFAULT_LABEL_RANKING_LENGTH
 from rasa.nlu.components import Component
 from rasa.nlu.config import RasaNLUModelConfig
 from rasa.nlu.model import Metadata
@@ -155,7 +155,7 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
             if intents.size > 0 and probabilities.size > 0:
                 ranking = list(zip(list(intents), list(probabilities)))[
-                    :LABEL_RANKING_LENGTH
+                    :DEFAULT_LABEL_RANKING_LENGTH
                 ]
 
                 intent = {"name": intents[0], "confidence": probabilities[0]}