From 9f05641ae6705ae32b6e0dbb0c542b33674d8e1c Mon Sep 17 00:00:00 2001
From: akelad <akela@rasa.com>
Date: Thu, 28 Nov 2019 13:39:44 +0100
Subject: [PATCH 01/35] add normalisation to confidence scores" "

---
 rasa/nlu/classifiers/embedding_intent_classifier.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index 50767d9b5a0a..a023d975d19c 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -621,9 +621,12 @@ def predict_label(self, message):
                     "name": self.inverted_label_dict[label_ids[0]],
                     "confidence": message_sim[0],
                 }
-
+                label_ids = label_ids[:LABEL_RANKING_LENGTH]
+                message_sim = message_sim[:LABEL_RANKING_LENGTH]
+                print(message_sim)
+                message_sim = message_sim/np.sum(message_sim)
+                print(message_sim)
                 ranking = list(zip(list(label_ids), message_sim))
-                ranking = ranking[:LABEL_RANKING_LENGTH]
                 label_ranking = [
                     {"name": self.inverted_label_dict[label_idx], "confidence": score}
                     for label_idx, score in ranking

From d0a50b7edc6d05a0a04cb5c693cb57bf94710a1f Mon Sep 17 00:00:00 2001
From: akelad <akela@rasa.com>
Date: Wed, 4 Dec 2019 15:43:16 +0100
Subject: [PATCH 02/35] add configuration parameter for normalization

---
 rasa/nlu/classifiers/embedding_intent_classifier.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index a023d975d19c..cd465b325b99 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -104,6 +104,8 @@ class EmbeddingIntentClassifier(Component):
         "evaluate_every_num_epochs": 20,  # small values may hurt performance
         # how many examples to use for calculation of training accuracy
         "evaluate_on_num_examples": 0,  # large values may hurt performance
+        # whether to normalize scores or not, softmax loss_type only
+        "normalize": False,
     }
     # end default properties (DOC MARKER - don't remove)
 
@@ -206,6 +208,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None:
             elif self.loss_type == "margin":
                 self.similarity_type = "cosine"
 
+        self.normalize = config["normalize"]
         self.mu_pos = config["mu_pos"]
         self.mu_neg = config["mu_neg"]
         self.use_max_sim_neg = config["use_max_sim_neg"]
@@ -623,9 +626,9 @@ def predict_label(self, message):
                 }
                 label_ids = label_ids[:LABEL_RANKING_LENGTH]
                 message_sim = message_sim[:LABEL_RANKING_LENGTH]
-                print(message_sim)
-                message_sim = message_sim/np.sum(message_sim)
-                print(message_sim)
+                # normalise scores if turned on
+                if self.loss_type == "softmax" and self.normalize == True:
+                    message_sim = message_sim/np.sum(message_sim)
                 ranking = list(zip(list(label_ids), message_sim))
                 label_ranking = [
                     {"name": self.inverted_label_dict[label_idx], "confidence": score}

From 5d7df4df3d44b4a30b948ab4f0f6bd11733e1bfb Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Thu, 12 Dec 2019 22:57:15 +0100
Subject: [PATCH 03/35] fix label confidence not being updated

---
 .../embedding_intent_classifier.py            | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index cd465b325b99..8b7bef24347e 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -104,8 +104,8 @@ class EmbeddingIntentClassifier(Component):
         "evaluate_every_num_epochs": 20,  # small values may hurt performance
         # how many examples to use for calculation of training accuracy
         "evaluate_on_num_examples": 0,  # large values may hurt performance
-        # whether to normalize scores or not, softmax loss_type only
-        "normalize": False,
+        # whether to normalize scores for softmax loss_type
+        "normalize_softmax": False,
     }
     # end default properties (DOC MARKER - don't remove)
 
@@ -155,7 +155,8 @@ def __init__(
         self._is_training = None
 
     # config migration warning
-    def _check_old_config_variables(self, config: Dict[Text, Any]) -> None:
+    @staticmethod
+    def _check_old_config_variables(config: Dict[Text, Any]) -> None:
 
         removed_tokenization_params = [
             "intent_tokenization_flag",
@@ -208,7 +209,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None:
             elif self.loss_type == "margin":
                 self.similarity_type = "cosine"
 
-        self.normalize = config["normalize"]
+        self.normalize_softmax = config["normalize_softmax"]
         self.mu_pos = config["mu_pos"]
         self.mu_neg = config["mu_neg"]
         self.use_max_sim_neg = config["use_max_sim_neg"]
@@ -620,20 +621,20 @@ def predict_label(self, message):
 
             # if X contains all zeros do not predict some label
             if X.any() and label_ids.size > 0:
-                label = {
-                    "name": self.inverted_label_dict[label_ids[0]],
-                    "confidence": message_sim[0],
-                }
                 label_ids = label_ids[:LABEL_RANKING_LENGTH]
                 message_sim = message_sim[:LABEL_RANKING_LENGTH]
                 # normalise scores if turned on
-                if self.loss_type == "softmax" and self.normalize == True:
-                    message_sim = message_sim/np.sum(message_sim)
+                if self.loss_type == "softmax" and self.normalize_softmax:
+                    message_sim = message_sim / np.sum(message_sim)
                 ranking = list(zip(list(label_ids), message_sim))
                 label_ranking = [
                     {"name": self.inverted_label_dict[label_idx], "confidence": score}
                     for label_idx, score in ranking
                 ]
+                label = {
+                    "name": self.inverted_label_dict[label_ids[0]],
+                    "confidence": message_sim[0],
+                }
         return label, label_ranking
 
     def process(self, message: "Message", **kwargs: Any) -> None:

From 44660528e4ba77298324b93a5f75a96b8b94057c Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Thu, 12 Dec 2019 23:08:47 +0100
Subject: [PATCH 04/35] make number to normalize configurable, report that num

---
 rasa/nlu/classifiers/__init__.py              |  2 +-
 .../embedding_intent_classifier.py            | 19 ++++++++++++-------
 .../classifiers/sklearn_intent_classifier.py  |  4 ++--
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/rasa/nlu/classifiers/__init__.py b/rasa/nlu/classifiers/__init__.py
index ae7b52d8840a..d9cf7ed72a65 100644
--- a/rasa/nlu/classifiers/__init__.py
+++ b/rasa/nlu/classifiers/__init__.py
@@ -1,3 +1,3 @@
 # How many labels are at max put into the output
 # ranking, everything else will be cut off
-LABEL_RANKING_LENGTH = 10
+DEFAULT_LABEL_RANKING_LENGTH = 10
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index 8b7bef24347e..0a2994fdb727 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict, List, Optional, Text, Tuple
 import warnings
 
-from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
+from rasa.nlu.classifiers import DEFAULT_LABEL_RANKING_LENGTH
 from rasa.nlu.components import Component
 from rasa.utils import train_utils
 from rasa.nlu.constants import (
@@ -82,6 +82,9 @@ class EmbeddingIntentClassifier(Component):
         "similarity_type": "auto",  # string 'auto' or 'cosine' or 'inner'
         # the type of the loss function
         "loss_type": "softmax",  # string 'softmax' or 'margin'
+        # number of top intents to normalize scores for softmax loss_type
+        # set to 0 to turn off normalization
+        "normalize_top_num_intents": 0,
         # how similar the algorithm should try
         # to make embedding vectors for correct labels
         "mu_pos": 0.8,  # should be 0.0 < ... < 1.0 for 'cosine'
@@ -104,8 +107,6 @@ class EmbeddingIntentClassifier(Component):
         "evaluate_every_num_epochs": 20,  # small values may hurt performance
         # how many examples to use for calculation of training accuracy
         "evaluate_on_num_examples": 0,  # large values may hurt performance
-        # whether to normalize scores for softmax loss_type
-        "normalize_softmax": False,
     }
     # end default properties (DOC MARKER - don't remove)
 
@@ -209,7 +210,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None:
             elif self.loss_type == "margin":
                 self.similarity_type = "cosine"
 
-        self.normalize_softmax = config["normalize_softmax"]
+        self.normalize_top_num_intents = config["normalize_top_num_intents"]
         self.mu_pos = config["mu_pos"]
         self.mu_neg = config["mu_neg"]
         self.use_max_sim_neg = config["use_max_sim_neg"]
@@ -621,11 +622,15 @@ def predict_label(self, message):
 
             # if X contains all zeros do not predict some label
             if X.any() and label_ids.size > 0:
-                label_ids = label_ids[:LABEL_RANKING_LENGTH]
-                message_sim = message_sim[:LABEL_RANKING_LENGTH]
+
                 # normalise scores if turned on
-                if self.loss_type == "softmax" and self.normalize_softmax:
+                if self.loss_type == "softmax" and self.normalize_top_num_intents > 0:
+                    label_ids = label_ids[: self.normalize_top_num_intents]
+                    message_sim = message_sim[: self.normalize_top_num_intents]
                     message_sim = message_sim / np.sum(message_sim)
+                else:
+                    label_ids = label_ids[:DEFAULT_LABEL_RANKING_LENGTH]
+                    message_sim = message_sim[:DEFAULT_LABEL_RANKING_LENGTH]
                 ranking = list(zip(list(label_ids), message_sim))
                 label_ranking = [
                     {"name": self.inverted_label_dict[label_idx], "confidence": score}
diff --git a/rasa/nlu/classifiers/sklearn_intent_classifier.py b/rasa/nlu/classifiers/sklearn_intent_classifier.py
index 377c9c95224d..4b58ca25eb13 100644
--- a/rasa/nlu/classifiers/sklearn_intent_classifier.py
+++ b/rasa/nlu/classifiers/sklearn_intent_classifier.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict, List, Optional, Text, Tuple
 
 from rasa.nlu import utils
-from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
+from rasa.nlu.classifiers import DEFAULT_LABEL_RANKING_LENGTH
 from rasa.nlu.components import Component
 from rasa.nlu.config import RasaNLUModelConfig
 from rasa.nlu.model import Metadata
@@ -155,7 +155,7 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
             if intents.size > 0 and probabilities.size > 0:
                 ranking = list(zip(list(intents), list(probabilities)))[
-                    :LABEL_RANKING_LENGTH
+                    :DEFAULT_LABEL_RANKING_LENGTH
                 ]
 
                 intent = {"name": intents[0], "confidence": probabilities[0]}

From 606e57826f1dd65f9da8fa251f99d414ab1124cd Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Thu, 12 Dec 2019 22:24:29 -0500
Subject: [PATCH 05/35] add normalization for embedding policy

---
 rasa/core/policies/embedding_policy.py | 23 ++++++++++++++++++++---
 rasa/core/policies/ensemble.py         |  2 ++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index d599959e77c4..5cd62ec10230 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -74,6 +74,9 @@ class EmbeddingPolicy(Policy):
         "similarity_type": "auto",  # string 'auto' or 'cosine' or 'inner'
         # the type of the loss function
         "loss_type": "softmax",  # string 'softmax' or 'margin'
+        # number of top actions to normalize scores for softmax loss_type
+        # set to 0 to turn off normalization
+        "normalize_top_num_actions": 0,
         # how similar the algorithm should try
         # to make embedding vectors for correct labels
         "mu_pos": 0.8,  # should be 0.0 < ... < 1.0 for 'cosine'
@@ -192,6 +195,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None:
                 self.similarity_type = "inner"
             elif self.loss_type == "margin":
                 self.similarity_type = "cosine"
+        self.normalize_top_num_actions = config["normalize_top_num_actions"]
 
         self.mu_pos = config["mu_pos"]
         self.mu_neg = config["mu_neg"]
@@ -556,8 +560,17 @@ def predict_action_probabilities(
         tf_feed_dict = self.tf_feed_dict_for_prediction(tracker, domain)
 
         confidence = self.session.run(self.pred_confidence, feed_dict=tf_feed_dict)
+        confidence = confidence[0, -1, :].tolist()
 
-        return confidence[0, -1, :].tolist()
+        # normalise scores if turned on
+        if self.loss_type == "softmax" and self.normalize_top_num_actions > 0:
+            ranked = sorted(confidence, reverse=True)
+            for i, value in enumerate(confidence):
+                if value < ranked[self.normalize_top_num_actions - 1]:
+                    confidence[i] = 0.0
+            confidence = confidence / np.sum(confidence)
+
+        return confidence
 
     def persist(self, path: Text) -> None:
         """Persists the policy to a storage."""
@@ -572,7 +585,10 @@ def persist(self, path: Text) -> None:
 
         self.featurizer.persist(path)
 
-        meta = {"priority": self.priority}
+        meta = {
+            "priority": self.priority,
+            "normalize_top_num_actions": self.normalize_top_num_actions,
+        }
 
         meta_file = os.path.join(path, "embedding_policy.json")
         rasa.utils.io.dump_obj_as_json_to_file(meta_file, meta)
@@ -654,7 +670,7 @@ def load(cls, path: Text) -> "EmbeddingPolicy":
 
         return cls(
             featurizer=featurizer,
-            priority=meta["priority"],
+            priority=meta.pop("priority"),
             graph=graph,
             session=session,
             user_placeholder=a_in,
@@ -666,4 +682,5 @@ def load(cls, path: Text) -> "EmbeddingPolicy":
             bot_embed=bot_embed,
             all_bot_embed=all_bot_embed,
             attention_weights=attention_weights,
+            **meta,
         )
diff --git a/rasa/core/policies/ensemble.py b/rasa/core/policies/ensemble.py
index 46b7210238cb..9e59cedd0366 100644
--- a/rasa/core/policies/ensemble.py
+++ b/rasa/core/policies/ensemble.py
@@ -387,6 +387,8 @@ def probabilities_using_best_policy(
 
         for i, p in enumerate(self.policies):
             probabilities = p.predict_action_probabilities(tracker, domain)
+            if type(p).__name__ == "EmbeddingPolicy":
+                logger.warning(probabilities)
 
             if len(tracker.events) > 0 and isinstance(
                 tracker.events[-1], ActionExecutionRejected

From 792eeb1a5fa983adb0f95bff2a2a1d414943a8dd Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Thu, 12 Dec 2019 22:39:35 -0500
Subject: [PATCH 06/35] remove debug logging

---
 rasa/core/policies/ensemble.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/rasa/core/policies/ensemble.py b/rasa/core/policies/ensemble.py
index 9e59cedd0366..46b7210238cb 100644
--- a/rasa/core/policies/ensemble.py
+++ b/rasa/core/policies/ensemble.py
@@ -387,8 +387,6 @@ def probabilities_using_best_policy(
 
         for i, p in enumerate(self.policies):
             probabilities = p.predict_action_probabilities(tracker, domain)
-            if type(p).__name__ == "EmbeddingPolicy":
-                logger.warning(probabilities)
 
             if len(tracker.events) > 0 and isinstance(
                 tracker.events[-1], ActionExecutionRejected

From 4996ca26cc0ab0905276998582a03a191da92979 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Mon, 16 Dec 2019 21:18:03 -0500
Subject: [PATCH 07/35] persist policy loss type

---
 rasa/core/policies/embedding_policy.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index 5cd62ec10230..2d00d2addd07 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -587,6 +587,7 @@ def persist(self, path: Text) -> None:
 
         meta = {
             "priority": self.priority,
+            "loss_type": self.loss_type,
             "normalize_top_num_actions": self.normalize_top_num_actions,
         }
 

From efa399fd7ef27414b2b079368d7ee6f37f6ae3c4 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Mon, 16 Dec 2019 21:21:32 -0500
Subject: [PATCH 08/35] add response selector, update default, rename parameter

---
 rasa/core/policies/embedding_policy.py              | 10 +++++-----
 rasa/nlu/classifiers/embedding_intent_classifier.py | 10 +++++-----
 rasa/nlu/selectors/embedding_response_selector.py   |  3 +++
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index 2d00d2addd07..77885c5628d7 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -76,7 +76,7 @@ class EmbeddingPolicy(Policy):
         "loss_type": "softmax",  # string 'softmax' or 'margin'
         # number of top actions to normalize scores for softmax loss_type
         # set to 0 to turn off normalization
-        "normalize_top_num_actions": 0,
+        "ranking_length": 10,
         # how similar the algorithm should try
         # to make embedding vectors for correct labels
         "mu_pos": 0.8,  # should be 0.0 < ... < 1.0 for 'cosine'
@@ -195,7 +195,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None:
                 self.similarity_type = "inner"
             elif self.loss_type == "margin":
                 self.similarity_type = "cosine"
-        self.normalize_top_num_actions = config["normalize_top_num_actions"]
+        self.ranking_length = config["ranking_length"]
 
         self.mu_pos = config["mu_pos"]
         self.mu_neg = config["mu_neg"]
@@ -563,10 +563,10 @@ def predict_action_probabilities(
         confidence = confidence[0, -1, :].tolist()
 
         # normalise scores if turned on
-        if self.loss_type == "softmax" and self.normalize_top_num_actions > 0:
+        if self.loss_type == "softmax" and self.ranking_length > 0:
             ranked = sorted(confidence, reverse=True)
             for i, value in enumerate(confidence):
-                if value < ranked[self.normalize_top_num_actions - 1]:
+                if value < ranked[self.ranking_length - 1]:
                     confidence[i] = 0.0
             confidence = confidence / np.sum(confidence)
 
@@ -588,7 +588,7 @@ def persist(self, path: Text) -> None:
         meta = {
             "priority": self.priority,
             "loss_type": self.loss_type,
-            "normalize_top_num_actions": self.normalize_top_num_actions,
+            "ranking_length": self.ranking_length,
         }
 
         meta_file = os.path.join(path, "embedding_policy.json")
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index 0a2994fdb727..f723f799c44a 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -84,7 +84,7 @@ class EmbeddingIntentClassifier(Component):
         "loss_type": "softmax",  # string 'softmax' or 'margin'
         # number of top intents to normalize scores for softmax loss_type
         # set to 0 to turn off normalization
-        "normalize_top_num_intents": 0,
+        "ranking_length": 10,
         # how similar the algorithm should try
         # to make embedding vectors for correct labels
         "mu_pos": 0.8,  # should be 0.0 < ... < 1.0 for 'cosine'
@@ -210,7 +210,7 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None:
             elif self.loss_type == "margin":
                 self.similarity_type = "cosine"
 
-        self.normalize_top_num_intents = config["normalize_top_num_intents"]
+        self.ranking_length = config["ranking_length"]
         self.mu_pos = config["mu_pos"]
         self.mu_neg = config["mu_neg"]
         self.use_max_sim_neg = config["use_max_sim_neg"]
@@ -624,9 +624,9 @@ def predict_label(self, message):
             if X.any() and label_ids.size > 0:
 
                 # normalise scores if turned on
-                if self.loss_type == "softmax" and self.normalize_top_num_intents > 0:
-                    label_ids = label_ids[: self.normalize_top_num_intents]
-                    message_sim = message_sim[: self.normalize_top_num_intents]
+                if self.loss_type == "softmax" and self.ranking_length > 0:
+                    label_ids = label_ids[: self.ranking_length]
+                    message_sim = message_sim[: self.ranking_length]
                     message_sim = message_sim / np.sum(message_sim)
                 else:
                     label_ids = label_ids[:DEFAULT_LABEL_RANKING_LENGTH]
diff --git a/rasa/nlu/selectors/embedding_response_selector.py b/rasa/nlu/selectors/embedding_response_selector.py
index c27c4f00ae2a..b4ea02e2a251 100644
--- a/rasa/nlu/selectors/embedding_response_selector.py
+++ b/rasa/nlu/selectors/embedding_response_selector.py
@@ -82,6 +82,9 @@ class ResponseSelector(EmbeddingIntentClassifier):
         "similarity_type": "auto",  # string 'auto' or 'cosine' or 'inner'
         # the type of the loss function
         "loss_type": "softmax",  # string 'softmax' or 'margin'
+        # number of top responses to normalize scores for softmax loss_type
+        # set to 0 to turn off normalization
+        "ranking_length": 10,
         # how similar the algorithm should try
         # to make embedding vectors for correct intent labels
         "mu_pos": 0.8,  # should be 0.0 < ... < 1.0 for 'cosine'

From 93ac058d5f6402dd0a6e0e3032f8c571edd866f9 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Mon, 16 Dec 2019 21:45:49 -0500
Subject: [PATCH 09/35] clean up code

---
 .../nlu/classifiers/embedding_intent_classifier.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index f723f799c44a..31af79fa769c 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -625,12 +625,16 @@ def predict_label(self, message):
 
                 # normalise scores if turned on
                 if self.loss_type == "softmax" and self.ranking_length > 0:
-                    label_ids = label_ids[: self.ranking_length]
-                    message_sim = message_sim[: self.ranking_length]
-                    message_sim = message_sim / np.sum(message_sim)
+                    ranking_length = self.ranking_length
+                    message_sim = message_sim[:ranking_length] / (
+                        np.sum(message_sim[:ranking_length])
+                    )
                 else:
-                    label_ids = label_ids[:DEFAULT_LABEL_RANKING_LENGTH]
-                    message_sim = message_sim[:DEFAULT_LABEL_RANKING_LENGTH]
+                    ranking_length = DEFAULT_LABEL_RANKING_LENGTH
+
+                label_ids = label_ids[:ranking_length]
+                message_sim = message_sim[:ranking_length]
+
                 ranking = list(zip(list(label_ids), message_sim))
                 label_ranking = [
                     {"name": self.inverted_label_dict[label_idx], "confidence": score}

From 0d13ae42f5c8a14086e222bcd692ec80bac2a62f Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Mon, 16 Dec 2019 22:34:16 -0500
Subject: [PATCH 10/35] changelog and docs

---
 4902.feature.rst        | 7 +++++++
 docs/core/policies.rst  | 3 +++
 docs/nlu/components.rst | 3 +++
 3 files changed, 13 insertions(+)
 create mode 100644 4902.feature.rst

diff --git a/4902.feature.rst b/4902.feature.rst
new file mode 100644
index 000000000000..f5a1ee0b572e
--- /dev/null
+++ b/4902.feature.rst
@@ -0,0 +1,7 @@
+Added a new configuration parameter, ``ranking_length`` to the ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``,
+and ``ResponseSelector`` classes. When used in combination with ``softmax`` loss type, confidence levels will
+be normalized over the ``ranking_length`` number of top results.
+
+The ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``, and ``ResponseSelector`` now by default normalize confidence
+levels over the top 10 results, to improve confidence over larger numbers of intents and actions.
+
diff --git a/docs/core/policies.rst b/docs/core/policies.rst
index 1934deb67a66..a9b2ad79ef4e 100644
--- a/docs/core/policies.rst
+++ b/docs/core/policies.rst
@@ -271,6 +271,9 @@ It is recommended to use
               ``inner`` for ``softmax``, ``cosine`` for ``margin``;
             - ``loss_type`` sets the type of the loss function,
               it should be either ``softmax`` or ``margin``;
+            - ``ranking_length`` defines the number of top confidences over
+              which to normalize ranking results if ``loss_type: "softmax"``;
+              to turn off normalization set to 0
             - ``mu_pos`` controls how similar the algorithm should try
               to make embedding vectors for correct intent labels,
               used only if ``loss_type`` is set to ``margin``;
diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst
index 4b6b004300f3..ca7b63f5b4af 100644
--- a/docs/nlu/components.rst
+++ b/docs/nlu/components.rst
@@ -451,6 +451,9 @@ EmbeddingIntentClassifier
               ``inner`` for ``softmax``, ``cosine`` for ``margin``;
             - ``loss_type`` sets the type of the loss function,
               it should be either ``softmax`` or ``margin``;
+            - ``ranking_length`` defines the number of top confidences over
+              which to normalize ranking results if ``loss_type: "softmax"``;
+              to turn off normalization set to 0
             - ``mu_pos`` controls how similar the algorithm should try
               to make embedding vectors for correct intent labels,
               used only if ``loss_type`` is set to ``margin``;

From 09650092a7bad3eb84993c8c1dd3a8dbbc020bed Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Mon, 16 Dec 2019 22:34:38 -0500
Subject: [PATCH 11/35] fix policy tests

---
 rasa/core/policies/embedding_policy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index 77885c5628d7..5e5f49bc53db 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -560,7 +560,7 @@ def predict_action_probabilities(
         tf_feed_dict = self.tf_feed_dict_for_prediction(tracker, domain)
 
         confidence = self.session.run(self.pred_confidence, feed_dict=tf_feed_dict)
-        confidence = confidence[0, -1, :].tolist()
+        confidence = confidence[0, -1, :]
 
         # normalise scores if turned on
         if self.loss_type == "softmax" and self.ranking_length > 0:
@@ -570,7 +570,7 @@ def predict_action_probabilities(
                     confidence[i] = 0.0
             confidence = confidence / np.sum(confidence)
 
-        return confidence
+        return confidence.tolist()
 
     def persist(self, path: Text) -> None:
         """Persists the policy to a storage."""

From 4885ae5b87c9fc9781c0202c9b93b8692e946683 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Tue, 17 Dec 2019 11:04:46 -0500
Subject: [PATCH 12/35] clean up numpy

---
 docs/core/policies.rst                 | 2 +-
 docs/nlu/components.rst                | 2 +-
 rasa/core/policies/embedding_policy.py | 7 ++++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/docs/core/policies.rst b/docs/core/policies.rst
index a9b2ad79ef4e..b91e431e1454 100644
--- a/docs/core/policies.rst
+++ b/docs/core/policies.rst
@@ -273,7 +273,7 @@ It is recommended to use
               it should be either ``softmax`` or ``margin``;
             - ``ranking_length`` defines the number of top confidences over
               which to normalize ranking results if ``loss_type: "softmax"``;
-              to turn off normalization set to 0
+              to turn off normalization set it to 0
             - ``mu_pos`` controls how similar the algorithm should try
               to make embedding vectors for correct intent labels,
               used only if ``loss_type`` is set to ``margin``;
diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst
index ca7b63f5b4af..e893433c9a46 100644
--- a/docs/nlu/components.rst
+++ b/docs/nlu/components.rst
@@ -453,7 +453,7 @@ EmbeddingIntentClassifier
               it should be either ``softmax`` or ``margin``;
             - ``ranking_length`` defines the number of top confidences over
               which to normalize ranking results if ``loss_type: "softmax"``;
-              to turn off normalization set to 0
+              to turn off normalization set it to 0
             - ``mu_pos`` controls how similar the algorithm should try
               to make embedding vectors for correct intent labels,
               used only if ``loss_type`` is set to ``margin``;
diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index 5e5f49bc53db..42532c0ca20d 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -565,9 +565,10 @@ def predict_action_probabilities(
         # normalise scores if turned on
         if self.loss_type == "softmax" and self.ranking_length > 0:
             ranked = sorted(confidence, reverse=True)
-            for i, value in enumerate(confidence):
-                if value < ranked[self.ranking_length - 1]:
-                    confidence[i] = 0.0
+            confidence[confidence < ranked[self.ranking_length - 1]] = 0
+            # for i, value in enumerate(confidence):
+            #     if value < ranked[self.ranking_length - 1]:
+            #         confidence[i] = 0.0
             confidence = confidence / np.sum(confidence)
 
         return confidence.tolist()

From 71e6a68b4bfbcfe96e7d4ee8cee86e028d7fa773 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Tue, 17 Dec 2019 12:05:39 -0500
Subject: [PATCH 13/35] migration/changelog

---
 4902.feature.rst                       |  7 -------
 changelog/4902.feature.rst             |  6 ++++++
 docs/migration-guide.rst               | 12 ++++++++++++
 rasa/core/policies/embedding_policy.py |  3 ---
 4 files changed, 18 insertions(+), 10 deletions(-)
 delete mode 100644 4902.feature.rst
 create mode 100644 changelog/4902.feature.rst

diff --git a/4902.feature.rst b/4902.feature.rst
deleted file mode 100644
index f5a1ee0b572e..000000000000
--- a/4902.feature.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Added a new configuration parameter, ``ranking_length`` to the ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``,
-and ``ResponseSelector`` classes. When used in combination with ``softmax`` loss type, confidence levels will
-be normalized over the ``ranking_length`` number of top results.
-
-The ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``, and ``ResponseSelector`` now by default normalize confidence
-levels over the top 10 results, to improve confidence over larger numbers of intents and actions.
-
diff --git a/changelog/4902.feature.rst b/changelog/4902.feature.rst
new file mode 100644
index 000000000000..bff781ed9459
--- /dev/null
+++ b/changelog/4902.feature.rst
@@ -0,0 +1,6 @@
+Added a new configuration parameter, ``ranking_length`` to the ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``,
+and ``ResponseSelector`` classes.
+
+The ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``, and ``ResponseSelector`` now by default normalize confidence
+levels over the top 10 results. See :ref:`migration-to-rasa-1.6` for more details.
+
diff --git a/docs/migration-guide.rst b/docs/migration-guide.rst
index fe933e449a30..167f63725839 100644
--- a/docs/migration-guide.rst
+++ b/docs/migration-guide.rst
@@ -11,6 +11,18 @@ Migration Guide
 This page contains information about changes between major versions and
 how you can migrate from one version to another.
 
+.. _migration-to-rasa-1.6:
+
+Rasa 1.5 to Rasa 1.6
+--------------------
+
+General
+~~~~~~~
+- By default, the ``EmbeddingIntentClassifier``, ``EmbeddingPolicy``, and ``ResponseSelector`` will
+  now normalize the top 10 confidence results if the ``loss_type`` is ``"softmax"`` (which has been
+  default since 1.3, see :ref:`migration-to-rasa-1.3`). This is configurable via the ``ranking_length``
+  configuration parameter; to turn off normalization to match the previous behavior, set ``ranking_length: 0``.
+
 .. _migration-to-rasa-1.3:
 
 Rasa 1.2 to Rasa 1.3
diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index 42532c0ca20d..ed5f400e659b 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -566,9 +566,6 @@ def predict_action_probabilities(
         if self.loss_type == "softmax" and self.ranking_length > 0:
             ranked = sorted(confidence, reverse=True)
             confidence[confidence < ranked[self.ranking_length - 1]] = 0
-            # for i, value in enumerate(confidence):
-            #     if value < ranked[self.ranking_length - 1]:
-            #         confidence[i] = 0.0
             confidence = confidence / np.sum(confidence)
 
         return confidence.tolist()

From 73f932e56cb696d0bcd114762264b466506639c8 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 18 Dec 2019 01:51:22 -0500
Subject: [PATCH 14/35] tests for embedding policy normalization

---
 rasa/core/policies/embedding_policy.py |  10 ++-
 tests/core/test_policies.py            | 102 ++++++++++++++++++++++---
 2 files changed, 100 insertions(+), 12 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index d143ec71d73a..c01d10fcd6de 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -552,6 +552,12 @@ def tf_feed_dict_for_prediction(
 
         return {self.a_in: session_data["dialogue_features"][0]}
 
+    def _normalize_scores(self, confidence):
+        ranked = sorted(confidence, reverse=True)
+        confidence[confidence < ranked[self.ranking_length - 1]] = 0
+        confidence = confidence / np.sum(confidence)
+        return confidence
+
     def predict_action_probabilities(
         self, tracker: "DialogueStateTracker", domain: "Domain"
     ) -> List[float]:
@@ -575,9 +581,7 @@ def predict_action_probabilities(
 
         # normalise scores if turned on
         if self.loss_type == "softmax" and self.ranking_length > 0:
-            ranked = sorted(confidence, reverse=True)
-            confidence[confidence < ranked[self.ranking_length - 1]] = 0
-            confidence = confidence / np.sum(confidence)
+            confidence = self._normalize_scores(confidence)
 
         return confidence.tolist()
 
diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py
index 75b5bf9daea2..af818723f80b 100644
--- a/tests/core/test_policies.py
+++ b/tests/core/test_policies.py
@@ -1,4 +1,4 @@
-from unittest.mock import patch
+from unittest.mock import Mock, patch
 
 import numpy as np
 import pytest
@@ -104,6 +104,14 @@ def featurizer(self):
     def priority(self):
         return 1
 
+    @pytest.fixture(scope="module")
+    def default_domain(self):
+        return Domain.load(DEFAULT_DOMAIN_PATH_WITH_SLOTS)
+
+    @pytest.fixture(scope="module")
+    def tracker(self, default_domain):
+        return DialogueStateTracker(UserMessage.DEFAULT_SENDER_ID, default_domain.slots)
+
     @pytest.fixture(scope="module")
     async def trained_policy(self, featurizer, priority):
         default_domain = Domain.load(DEFAULT_DOMAIN_PATH_WITH_SLOTS)
@@ -219,14 +227,6 @@ def mock_search(self):
             gs.return_value = gs  # for __init__
             yield gs
 
-    @pytest.fixture(scope="module")
-    def default_domain(self):
-        return Domain.load(DEFAULT_DOMAIN_PATH_WITH_SLOTS)
-
-    @pytest.fixture
-    def tracker(self, default_domain):
-        return DialogueStateTracker(UserMessage.DEFAULT_SENDER_ID, default_domain.slots)
-
     @pytest.fixture(scope="module")
     async def trackers(self, default_domain):
         return await train_trackers(default_domain, augmentation_factor=20)
@@ -337,6 +337,29 @@ def create_policy(self, featurizer, priority):
     def test_similarity_type(self, trained_policy):
         assert trained_policy.similarity_type == "inner"
 
+    def test_ranking_length(self, trained_policy):
+        assert trained_policy.ranking_length == 10
+
+    def test_normalization(self, trained_policy, tracker, default_domain, monkeypatch):
+        # first check the output is what we expect
+        predicted_probabilities = trained_policy.predict_action_probabilities(
+            tracker, default_domain
+        )
+        # count number of non-zero confidences
+        assert (
+            sum([confidence > 0 for confidence in predicted_probabilities])
+            == trained_policy.ranking_length
+        )
+
+        # also check our function is called
+        mock = Mock()
+        monkeypatch.setattr(
+            EmbeddingPolicy, "_normalize_scores", mock._normalize_scores,
+        )
+        trained_policy.predict_action_probabilities(tracker, default_domain)
+
+        mock._normalize_scores.assert_called_once()
+
     async def test_gen_batch(self, trained_policy, default_domain):
         training_trackers = await train_trackers(default_domain, augmentation_factor=0)
         training_data = trained_policy.featurize_for_training(
@@ -382,6 +405,17 @@ def create_policy(self, featurizer, priority):
     def test_similarity_type(self, trained_policy):
         assert trained_policy.similarity_type == "cosine"
 
+    def test_normalization(self, trained_policy, tracker, default_domain, monkeypatch):
+        # Mock actual normalization method
+        mock = Mock()
+        monkeypatch.setattr(
+            EmbeddingPolicy, "_normalize_scores", mock._normalize_scores,
+        )
+        trained_policy.predict_action_probabilities(tracker, default_domain)
+
+        # function should not get called for margin loss_type
+        mock._normalize_scores.assert_not_called()
+
 
 class TestEmbeddingPolicyWithEval(TestEmbeddingPolicy):
     def create_policy(self, featurizer, priority):
@@ -393,6 +427,56 @@ def create_policy(self, featurizer, priority):
         return p
 
 
+class TestEmbeddingPolicyNoNormalization(TestEmbeddingPolicy):
+    def create_policy(self, featurizer, priority):
+        p = EmbeddingPolicy(
+            featurizer=featurizer, priority=priority, **{"ranking_length": 0}
+        )
+        return p
+
+    def test_ranking_length(self, trained_policy):
+        assert trained_policy.ranking_length == 0
+
+    def test_normalization(self, trained_policy, tracker, default_domain, monkeypatch):
+        # first check the output is what we expect
+        predicted_probabilities = trained_policy.predict_action_probabilities(
+            tracker, default_domain
+        )
+        # there should be no normalization
+        assert all([confidence > 0 for confidence in predicted_probabilities])
+
+        # also check our function is not called
+        mock = Mock()
+        monkeypatch.setattr(
+            EmbeddingPolicy, "_normalize_scores", mock._normalize_scores,
+        )
+        trained_policy.predict_action_probabilities(tracker, default_domain)
+
+        mock._normalize_scores.assert_not_called()
+
+
+class TestEmbeddingPolicyLowRankingLength(TestEmbeddingPolicy):
+    def create_policy(self, featurizer, priority):
+        p = EmbeddingPolicy(
+            featurizer=featurizer, priority=priority, **{"ranking_length": 3}
+        )
+        return p
+
+    def test_ranking_length(self, trained_policy):
+        assert trained_policy.ranking_length == 3
+
+
+class TestEmbeddingPolicyHighRankingLength(TestEmbeddingPolicy):
+    def create_policy(self, featurizer, priority):
+        p = EmbeddingPolicy(
+            featurizer=featurizer, priority=priority, **{"ranking_length": 11}
+        )
+        return p
+
+    def test_ranking_length(self, trained_policy):
+        assert trained_policy.ranking_length == 11
+
+
 class TestEmbeddingPolicyWithFullDialogue(TestEmbeddingPolicy):
     def create_policy(self, featurizer, priority):
         # use standard featurizer from EmbeddingPolicy,

From 292ee54a313ab5f8521eb85442d00b2e6745afa0 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 18 Dec 2019 03:42:19 -0500
Subject: [PATCH 15/35] truncate output as decided

---
 rasa/nlu/classifiers/__init__.py                    | 2 +-
 rasa/nlu/classifiers/embedding_intent_classifier.py | 9 +++------
 rasa/nlu/classifiers/sklearn_intent_classifier.py   | 4 ++--
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/rasa/nlu/classifiers/__init__.py b/rasa/nlu/classifiers/__init__.py
index d9cf7ed72a65..ae7b52d8840a 100644
--- a/rasa/nlu/classifiers/__init__.py
+++ b/rasa/nlu/classifiers/__init__.py
@@ -1,3 +1,3 @@
 # How many labels are at max put into the output
 # ranking, everything else will be cut off
-DEFAULT_LABEL_RANKING_LENGTH = 10
+LABEL_RANKING_LENGTH = 10
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index 8971e8c2bfe7..e27fe9ba8e44 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -8,7 +8,7 @@
 from typing import Any, Dict, List, Optional, Text, Tuple, Union
 import warnings
 
-from rasa.nlu.classifiers import DEFAULT_LABEL_RANKING_LENGTH
+from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
 from rasa.nlu.components import Component
 from rasa.utils import train_utils
 from rasa.utils.train_utils import SessionDataType
@@ -831,15 +831,12 @@ def predict_label(
             # normalise scores if turned on
             if self.loss_type == "softmax" and self.ranking_length > 0:
                 ranking_length = self.ranking_length
+                label_ids = label_ids[:ranking_length]
                 message_sim = message_sim[:ranking_length] / (
                     np.sum(message_sim[:ranking_length])
                 )
-            else:
-                ranking_length = DEFAULT_LABEL_RANKING_LENGTH
-
-            label_ids = label_ids[:ranking_length]
-            message_sim = message_sim[:ranking_length]
 
+            message_sim = message_sim[:LABEL_RANKING_LENGTH]
             ranking = list(zip(list(label_ids), message_sim))
             label_ranking = [
                 {"name": self.inverted_label_dict[label_idx], "confidence": score}
diff --git a/rasa/nlu/classifiers/sklearn_intent_classifier.py b/rasa/nlu/classifiers/sklearn_intent_classifier.py
index 5f438f0766aa..04372bdde903 100644
--- a/rasa/nlu/classifiers/sklearn_intent_classifier.py
+++ b/rasa/nlu/classifiers/sklearn_intent_classifier.py
@@ -7,7 +7,7 @@
 
 from rasa.nlu.featurizers.featurizer import sequence_to_sentence_features
 from rasa.nlu import utils
-from rasa.nlu.classifiers import DEFAULT_LABEL_RANKING_LENGTH
+from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
 from rasa.nlu.components import Component
 from rasa.nlu.config import RasaNLUModelConfig
 from rasa.nlu.model import Metadata
@@ -160,7 +160,7 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
             if intents.size > 0 and probabilities.size > 0:
                 ranking = list(zip(list(intents), list(probabilities)))[
-                    :DEFAULT_LABEL_RANKING_LENGTH
+                    :LABEL_RANKING_LENGTH
                 ]
 
                 intent = {"name": intents[0], "confidence": probabilities[0]}

From 7a9879c458367f45be8cc4145f2072b917a3a71c Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 18 Dec 2019 03:44:52 -0500
Subject: [PATCH 16/35] tests for embeddingintentclassifier normalization

---
 data/test/many_intents.md                     | 59 +++++++++++++++++++
 .../test_embedding_intent_classifier.py       | 48 +++++++++++++++
 2 files changed, 107 insertions(+)
 create mode 100644 data/test/many_intents.md

diff --git a/data/test/many_intents.md b/data/test/many_intents.md
new file mode 100644
index 000000000000..aa4dcf46fab1
--- /dev/null
+++ b/data/test/many_intents.md
@@ -0,0 +1,59 @@
+## intent:handleinsult
+- you are an idiot
+- You lack understanding.
+
+## intent:thank
+- Thanks
+- Thank you
+
+## intent:telljoke
+- Tell me something that you think will make me laugh.
+- Entertain me with a joke.
+
+## intent:signup_newsletter
+- I wanna sign up for the newsletter.
+- I want to sign up for the newsletter.
+
+## intent:react_positive
+- you are funny
+- thats funny
+
+## intent:react_negative
+- i am sad
+- bad
+
+## intent:how_to_get_started
+- how do I get started with rasa
+- how do I use rasa
+
+## intent:technical_question
+- what is duckling
+- where to train intents in rasa?
+
+## intent:source_code
+- how it works?
+- where can i find this code
+
+## intent:pipeline_recommendation
+- what pipeline should I start with?
+- what is the right pipeline to choose?
+
+## intent:rasa_cost
+- is rasa free
+- are you really free
+
+## intent:nicetomeeyou
+- It’s great connecting with you.
+- Hi, nice to meet you!
+
+## intent:nlu_generation_tool_recommendation
+- which tools can I use to create nlu data
+- how can I get nlu data
+
+## intent:install_rasa
+- I want to install Rasa Stack
+- How to install Rasa?
+
+## intent:ask_which_events
+- Which community events do you have
+- Where can I meet Rasas
\ No newline at end of file
diff --git a/tests/nlu/classifiers/test_embedding_intent_classifier.py b/tests/nlu/classifiers/test_embedding_intent_classifier.py
index 1c2c1edc237f..b104048ab26c 100644
--- a/tests/nlu/classifiers/test_embedding_intent_classifier.py
+++ b/tests/nlu/classifiers/test_embedding_intent_classifier.py
@@ -2,6 +2,8 @@
 import pytest
 import scipy.sparse
 
+from rasa.nlu import train
+from rasa.nlu.config import RasaNLUModelConfig
 from rasa.nlu.constants import (
     TEXT_ATTRIBUTE,
     SPARSE_FEATURE_NAMES,
@@ -9,6 +11,8 @@
     INTENT_ATTRIBUTE,
 )
 from rasa.nlu.classifiers.embedding_intent_classifier import EmbeddingIntentClassifier
+from tests.nlu.conftest import DEFAULT_DATA_PATH
+from rasa.nlu.model import Interpreter
 from rasa.nlu.training_data import Message
 
 
@@ -104,3 +108,47 @@ def test_check_labels_features_exist(messages, expected):
         EmbeddingIntentClassifier._check_labels_features_exist(messages, attribute)
         == expected
     )
+
+
+def as_pipeline(*components):
+    return [{"name": c} for c in components]
+
+
+@pytest.mark.parametrize(
+    "classifier_params, output_length, output_should_sum_to_1",
+    [
+        ({}, 10, True),  # default config
+        ({"ranking_length": 0}, 10, False),  # no normalization
+        ({"ranking_length": 3}, 3, True),  # lower than default ranking_length
+        ({"ranking_length": 12}, 10, False),  # higher than default ranking_length
+    ],
+)
+async def test_softmax_normalization(
+    component_builder, tmpdir, classifier_params, output_length, output_should_sum_to_1,
+):
+    pipeline = as_pipeline(
+        "WhitespaceTokenizer", "CountVectorsFeaturizer", "EmbeddingIntentClassifier"
+    )
+    assert pipeline[2]["name"] == "EmbeddingIntentClassifier"
+    pipeline[2].update(classifier_params)
+
+    _config = RasaNLUModelConfig({"pipeline": pipeline})
+    (trained_model, _, persisted_path) = await train(
+        _config,
+        path=tmpdir.strpath,
+        data="data/test/many_intents.md",
+        component_builder=component_builder,
+    )
+    loaded = Interpreter.load(persisted_path, component_builder)
+
+    parse_data = loaded.parse("hello")
+    intent_ranking = parse_data.get("intent_ranking")
+    # check that the output was correctly truncated after normalization
+    assert len(intent_ranking) == output_length
+
+    # check whether normalization had the expected effect
+    output_sums_to_1 = sum([intent.get("confidence") for intent in intent_ranking]) == 1
+    assert output_sums_to_1 == output_should_sum_to_1
+
+    # check whether the normalization of intent_rankings is reflected in intent prediction
+    assert parse_data.get("intent") == intent_ranking[0]

From f55eaa12d7214f2a348619957618051e85dd306e Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 18 Dec 2019 07:10:15 -0500
Subject: [PATCH 17/35] use random seed for stable test results

---
 .../test_embedding_intent_classifier.py          | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tests/nlu/classifiers/test_embedding_intent_classifier.py b/tests/nlu/classifiers/test_embedding_intent_classifier.py
index b104048ab26c..7cbc844589d6 100644
--- a/tests/nlu/classifiers/test_embedding_intent_classifier.py
+++ b/tests/nlu/classifiers/test_embedding_intent_classifier.py
@@ -117,10 +117,18 @@ def as_pipeline(*components):
 @pytest.mark.parametrize(
     "classifier_params, output_length, output_should_sum_to_1",
     [
-        ({}, 10, True),  # default config
-        ({"ranking_length": 0}, 10, False),  # no normalization
-        ({"ranking_length": 3}, 3, True),  # lower than default ranking_length
-        ({"ranking_length": 12}, 10, False),  # higher than default ranking_length
+        ({"random_seed": 42}, 10, True),  # default config
+        ({"random_seed": 42, "ranking_length": 0}, 10, False),  # no normalization
+        (
+            {"random_seed": 42, "ranking_length": 3},
+            3,
+            True,
+        ),  # lower than default ranking_length
+        (
+            {"random_seed": 42, "ranking_length": 12},
+            10,
+            False,
+        ),  # higher than default ranking_length
     ],
 )
 async def test_softmax_normalization(

From 0f8dbc00ea39ef285c65c82f7260a4058302eeb5 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 18 Dec 2019 07:36:37 -0500
Subject: [PATCH 18/35] move normalization method to utils

---
 rasa/core/policies/embedding_policy.py | 10 +++-------
 rasa/utils/train_utils.py              |  7 +++++++
 tests/core/test_policies.py            | 13 +++++++------
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index c01d10fcd6de..92aa6546053d 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -552,12 +552,6 @@ def tf_feed_dict_for_prediction(
 
         return {self.a_in: session_data["dialogue_features"][0]}
 
-    def _normalize_scores(self, confidence):
-        ranked = sorted(confidence, reverse=True)
-        confidence[confidence < ranked[self.ranking_length - 1]] = 0
-        confidence = confidence / np.sum(confidence)
-        return confidence
-
     def predict_action_probabilities(
         self, tracker: "DialogueStateTracker", domain: "Domain"
     ) -> List[float]:
@@ -581,7 +575,9 @@ def predict_action_probabilities(
 
         # normalise scores if turned on
         if self.loss_type == "softmax" and self.ranking_length > 0:
-            confidence = self._normalize_scores(confidence)
+            confidence = train_utils.normalize_confidence(
+                confidence, self.ranking_length
+            )
 
         return confidence.tolist()
 
diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py
index b61c6ad6a2e3..aae1e272830d 100644
--- a/rasa/utils/train_utils.py
+++ b/rasa/utils/train_utils.py
@@ -1275,3 +1275,10 @@ def load_tensor(name: Text) -> Optional[Union["tf.Tensor", List["tf.Tensor"]]]:
         return tensor_list[0]
 
     return tensor_list
+
+
+def normalize_confidence(confidence: "np.ndarray", ranking_length: int):
+    ranked = sorted(confidence, reverse=True)
+    confidence[confidence < ranked[ranking_length - 1]] = 0
+    confidence = confidence / np.sum(confidence)
+    return confidence
diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py
index af818723f80b..82be7ba7cd84 100644
--- a/tests/core/test_policies.py
+++ b/tests/core/test_policies.py
@@ -33,6 +33,7 @@
 from rasa.core.policies.memoization import AugmentedMemoizationPolicy, MemoizationPolicy
 from rasa.core.policies.sklearn_policy import SklearnPolicy
 from rasa.core.trackers import DialogueStateTracker
+from rasa.utils import train_utils
 from tests.core.conftest import (
     DEFAULT_DOMAIN_PATH_WITH_MAPPING,
     DEFAULT_DOMAIN_PATH_WITH_SLOTS,
@@ -354,11 +355,11 @@ def test_normalization(self, trained_policy, tracker, default_domain, monkeypatc
         # also check our function is called
         mock = Mock()
         monkeypatch.setattr(
-            EmbeddingPolicy, "_normalize_scores", mock._normalize_scores,
+            train_utils, "normalize_confidence", mock.normalize_confidence
         )
         trained_policy.predict_action_probabilities(tracker, default_domain)
 
-        mock._normalize_scores.assert_called_once()
+        mock.normalize_confidence.assert_called_once()
 
     async def test_gen_batch(self, trained_policy, default_domain):
         training_trackers = await train_trackers(default_domain, augmentation_factor=0)
@@ -409,12 +410,12 @@ def test_normalization(self, trained_policy, tracker, default_domain, monkeypatc
         # Mock actual normalization method
         mock = Mock()
         monkeypatch.setattr(
-            EmbeddingPolicy, "_normalize_scores", mock._normalize_scores,
+            train_utils, "normalize_confidence", mock.normalize_confidence
         )
         trained_policy.predict_action_probabilities(tracker, default_domain)
 
         # function should not get called for margin loss_type
-        mock._normalize_scores.assert_not_called()
+        mock.normalize_confidence.assert_not_called()
 
 
 class TestEmbeddingPolicyWithEval(TestEmbeddingPolicy):
@@ -448,11 +449,11 @@ def test_normalization(self, trained_policy, tracker, default_domain, monkeypatc
         # also check our function is not called
         mock = Mock()
         monkeypatch.setattr(
-            EmbeddingPolicy, "_normalize_scores", mock._normalize_scores,
+            train_utils, "normalize_confidence", mock.normalize_confidence
         )
         trained_policy.predict_action_probabilities(tracker, default_domain)
 
-        mock._normalize_scores.assert_not_called()
+        mock.normalize_confidence.assert_not_called()
 
 
 class TestEmbeddingPolicyLowRankingLength(TestEmbeddingPolicy):

From 2087fc8f8135750659ad9358b8e9f5718596cc7a Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 18 Dec 2019 07:44:08 -0500
Subject: [PATCH 19/35] truncate labels

---
 rasa/nlu/classifiers/embedding_intent_classifier.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index e27fe9ba8e44..f49644404bde 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -837,6 +837,7 @@ def predict_label(
                 )
 
             message_sim = message_sim[:LABEL_RANKING_LENGTH]
+            label_ids = label_ids[:LABEL_RANKING_LENGTH]
             ranking = list(zip(list(label_ids), message_sim))
             label_ranking = [
                 {"name": self.inverted_label_dict[label_idx], "confidence": score}

From 1c35df95bf27084988bfe7d82b56d3e38dcf99d4 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 03:44:52 +0100
Subject: [PATCH 20/35] move migration content

---
 changelog/4902.feature.rst | 3 +--
 docs/migration-guide.rst   | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/changelog/4902.feature.rst b/changelog/4902.feature.rst
index bff781ed9459..6068c87dbb77 100644
--- a/changelog/4902.feature.rst
+++ b/changelog/4902.feature.rst
@@ -2,5 +2,4 @@ Added a new configuration parameter, ``ranking_length`` to the ``EmbeddingPolicy
 and ``ResponseSelector`` classes.
 
 The ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``, and ``ResponseSelector`` now by default normalize confidence
-levels over the top 10 results. See :ref:`migration-to-rasa-1.6` for more details.
-
+levels over the top 10 results. See :ref:`migration-to-rasa-1.7` for more details.
diff --git a/docs/migration-guide.rst b/docs/migration-guide.rst
index e64f22bff606..509a61d77eff 100644
--- a/docs/migration-guide.rst
+++ b/docs/migration-guide.rst
@@ -11,9 +11,9 @@ Migration Guide
 This page contains information about changes between major versions and
 how you can migrate from one version to another.
 
-.. _migration-to-rasa-1.6:
+.. _migration-to-rasa-1.7:
 
-Rasa 1.5 to Rasa 1.6
+Rasa 1.6 to Rasa 1.7
 --------------------
 
 General
@@ -94,7 +94,7 @@ General
 - If you were previously importing the ``Button`` or ``Element`` classes from
   ``rasa_core.dispatcher``, these are now to be imported from ``rasa_sdk.utils``.
 
-- Rasa NLU and Core previously used `separate configuration files 
+- Rasa NLU and Core previously used `separate configuration files
   <https://legacy-docs.rasa.com/docs/nlu/0.15.1/migrations/?&_ga=2.218966814.608734414.1560704810-314462423.1543594887#id1>`_.
   These two files should be merged into a single file either named ``config.yml``, or passed via the ``--config`` parameter.
 

From 5da58cbfa86ac3ebf055bce156e739b584e82086 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 04:37:24 +0100
Subject: [PATCH 21/35] move normalization to calculate_message_sim

---
 rasa/core/policies/embedding_policy.py        |  4 +---
 .../embedding_intent_classifier.py            | 20 ++++++++++---------
 .../test_embedding_intent_classifier.py       |  6 ++++--
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index 92aa6546053d..a73ad853475a 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -575,9 +575,7 @@ def predict_action_probabilities(
 
         # normalise scores if turned on
         if self.loss_type == "softmax" and self.ranking_length > 0:
-            confidence = train_utils.normalize_confidence(
-                confidence, self.ranking_length
-            )
+            train_utils.normalize_confidence(confidence, self.ranking_length)
 
         return confidence.tolist()
 
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index 839880dd85c0..da75a0bb4624 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -805,6 +805,11 @@ def _calculate_message_sim(
         message_sim = message_sim.flatten()  # sim is a matrix
 
         label_ids = message_sim.argsort()[::-1]
+
+        # normalise scores if turned on
+        if self.loss_type == "softmax" and self.ranking_length > 0:
+            train_utils.normalize_confidence(message_sim, self.ranking_length)
+
         message_sim[::-1].sort()
 
         # transform sim to python list for JSON serializing
@@ -838,16 +843,13 @@ def predict_label(
         # if X contains all zeros do not predict some label
         if label_ids.size > 0:
 
-            # normalise scores if turned on
-            if self.loss_type == "softmax" and self.ranking_length > 0:
-                ranking_length = self.ranking_length
-                label_ids = label_ids[:ranking_length]
-                message_sim = message_sim[:ranking_length] / (
-                    np.sum(message_sim[:ranking_length])
-                )
+            if self.ranking_length and 0 < self.ranking_length < LABEL_RANKING_LENGTH:
+                output_length = self.ranking_length
+            else:
+                output_length = LABEL_RANKING_LENGTH
 
-            message_sim = message_sim[:LABEL_RANKING_LENGTH]
-            label_ids = label_ids[:LABEL_RANKING_LENGTH]
+            message_sim = message_sim[:output_length]
+            label_ids = label_ids[:output_length]
             ranking = list(zip(list(label_ids), message_sim))
             label_ranking = [
                 {"name": self.inverted_label_dict[label_idx], "confidence": score}
diff --git a/tests/nlu/classifiers/test_embedding_intent_classifier.py b/tests/nlu/classifiers/test_embedding_intent_classifier.py
index e67ff03f1c81..0c4d1ff9ee1a 100644
--- a/tests/nlu/classifiers/test_embedding_intent_classifier.py
+++ b/tests/nlu/classifiers/test_embedding_intent_classifier.py
@@ -209,8 +209,10 @@ async def test_softmax_normalization(
     assert len(intent_ranking) == output_length
 
     # check whether normalization had the expected effect
-    output_sums_to_1 = sum([intent.get("confidence") for intent in intent_ranking]) == 1
+    output_sums_to_1 = sum(
+        [intent.get("confidence") for intent in intent_ranking]
+    ) == pytest.approx(1)
     assert output_sums_to_1 == output_should_sum_to_1
 
-    # check whether the normalization of intent_rankings is reflected in intent prediction
+    # check whether the normalization of rankings is reflected in intent prediction
     assert parse_data.get("intent") == intent_ranking[0]

From 7d1911ac53118a6b7c29d11ff394e1b092c37f70 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 04:52:40 +0100
Subject: [PATCH 22/35] make normalization method more general

---
 rasa/core/policies/embedding_policy.py        |  2 +-
 .../embedding_intent_classifier.py            |  2 +-
 rasa/utils/train_utils.py                     | 13 ++++++++-----
 tests/core/test_policies.py                   | 19 ++++++-------------
 .../test_embedding_intent_classifier.py       |  2 --
 5 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index a73ad853475a..e8d18c36a85c 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -575,7 +575,7 @@ def predict_action_probabilities(
 
         # normalise scores if turned on
         if self.loss_type == "softmax" and self.ranking_length > 0:
-            train_utils.normalize_confidence(confidence, self.ranking_length)
+            train_utils.normalize(confidence, self.ranking_length)
 
         return confidence.tolist()
 
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index da75a0bb4624..8bad702a7fd7 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -808,7 +808,7 @@ def _calculate_message_sim(
 
         # normalise scores if turned on
         if self.loss_type == "softmax" and self.ranking_length > 0:
-            train_utils.normalize_confidence(message_sim, self.ranking_length)
+            train_utils.normalize(message_sim, self.ranking_length)
 
         message_sim[::-1].sort()
 
diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py
index aae1e272830d..13039a3ccf9b 100644
--- a/rasa/utils/train_utils.py
+++ b/rasa/utils/train_utils.py
@@ -1277,8 +1277,11 @@ def load_tensor(name: Text) -> Optional[Union["tf.Tensor", List["tf.Tensor"]]]:
     return tensor_list
 
 
-def normalize_confidence(confidence: "np.ndarray", ranking_length: int):
-    ranked = sorted(confidence, reverse=True)
-    confidence[confidence < ranked[ranking_length - 1]] = 0
-    confidence = confidence / np.sum(confidence)
-    return confidence
+def normalize(values: "np.ndarray", ranking_length: Optional[int] = None) -> None:
+    """Normalizes an array over the top `ranking_length` values, if provided.
+
+    Other values will be set to 0. Happens in place."""
+    if ranking_length:
+        ranked = sorted(values, reverse=True)
+        values[values < ranked[ranking_length - 1]] = 0
+    values /= np.sum(values)
diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py
index 82be7ba7cd84..ba7b2f5dea00 100644
--- a/tests/core/test_policies.py
+++ b/tests/core/test_policies.py
@@ -3,7 +3,6 @@
 import numpy as np
 import pytest
 
-from rasa.utils import train_utils
 from rasa.core import training
 from rasa.core.actions.action import (
     ACTION_DEFAULT_ASK_AFFIRMATION_NAME,
@@ -354,12 +353,10 @@ def test_normalization(self, trained_policy, tracker, default_domain, monkeypatc
 
         # also check our function is called
         mock = Mock()
-        monkeypatch.setattr(
-            train_utils, "normalize_confidence", mock.normalize_confidence
-        )
+        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
         trained_policy.predict_action_probabilities(tracker, default_domain)
 
-        mock.normalize_confidence.assert_called_once()
+        mock.normalize.assert_called_once()
 
     async def test_gen_batch(self, trained_policy, default_domain):
         training_trackers = await train_trackers(default_domain, augmentation_factor=0)
@@ -409,13 +406,11 @@ def test_similarity_type(self, trained_policy):
     def test_normalization(self, trained_policy, tracker, default_domain, monkeypatch):
         # Mock actual normalization method
         mock = Mock()
-        monkeypatch.setattr(
-            train_utils, "normalize_confidence", mock.normalize_confidence
-        )
+        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
         trained_policy.predict_action_probabilities(tracker, default_domain)
 
         # function should not get called for margin loss_type
-        mock.normalize_confidence.assert_not_called()
+        mock.normalize.assert_not_called()
 
 
 class TestEmbeddingPolicyWithEval(TestEmbeddingPolicy):
@@ -448,12 +443,10 @@ def test_normalization(self, trained_policy, tracker, default_domain, monkeypatc
 
         # also check our function is not called
         mock = Mock()
-        monkeypatch.setattr(
-            train_utils, "normalize_confidence", mock.normalize_confidence
-        )
+        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
         trained_policy.predict_action_probabilities(tracker, default_domain)
 
-        mock.normalize_confidence.assert_not_called()
+        mock.normalize.assert_not_called()
 
 
 class TestEmbeddingPolicyLowRankingLength(TestEmbeddingPolicy):
diff --git a/tests/nlu/classifiers/test_embedding_intent_classifier.py b/tests/nlu/classifiers/test_embedding_intent_classifier.py
index 0c4d1ff9ee1a..08981577a24d 100644
--- a/tests/nlu/classifiers/test_embedding_intent_classifier.py
+++ b/tests/nlu/classifiers/test_embedding_intent_classifier.py
@@ -3,7 +3,6 @@
 import scipy.sparse
 
 from rasa.nlu import train
-from rasa.nlu.model import Interpreter
 from rasa.nlu.config import RasaNLUModelConfig
 from rasa.nlu.constants import (
     TEXT_ATTRIBUTE,
@@ -12,7 +11,6 @@
     INTENT_ATTRIBUTE,
 )
 from rasa.nlu.classifiers.embedding_intent_classifier import EmbeddingIntentClassifier
-from tests.nlu.conftest import DEFAULT_DATA_PATH
 from rasa.nlu.model import Interpreter
 from rasa.nlu.training_data import Message
 from tests.nlu.conftest import DEFAULT_DATA_PATH

From d6395cd9d25abb46a6340b54b362923a3eb9b337 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 05:03:42 +0100
Subject: [PATCH 23/35] add test for margin loss type not undergoing
 normalization

---
 .../test_embedding_intent_classifier.py       | 52 +++++++++++++++++--
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/tests/nlu/classifiers/test_embedding_intent_classifier.py b/tests/nlu/classifiers/test_embedding_intent_classifier.py
index 08981577a24d..bdcb9c1d0624 100644
--- a/tests/nlu/classifiers/test_embedding_intent_classifier.py
+++ b/tests/nlu/classifiers/test_embedding_intent_classifier.py
@@ -2,7 +2,10 @@
 import pytest
 import scipy.sparse
 
+from unittest.mock import Mock
+
 from rasa.nlu import train
+from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
 from rasa.nlu.config import RasaNLUModelConfig
 from rasa.nlu.constants import (
     TEXT_ATTRIBUTE,
@@ -13,6 +16,7 @@
 from rasa.nlu.classifiers.embedding_intent_classifier import EmbeddingIntentClassifier
 from rasa.nlu.model import Interpreter
 from rasa.nlu.training_data import Message
+from rasa.utils import train_utils
 from tests.nlu.conftest import DEFAULT_DATA_PATH
 
 
@@ -169,8 +173,12 @@ def as_pipeline(*components):
 @pytest.mark.parametrize(
     "classifier_params, output_length, output_should_sum_to_1",
     [
-        ({"random_seed": 42}, 10, True),  # default config
-        ({"random_seed": 42, "ranking_length": 0}, 10, False),  # no normalization
+        ({"random_seed": 42}, LABEL_RANKING_LENGTH, True),  # default config
+        (
+            {"random_seed": 42, "ranking_length": 0},
+            LABEL_RANKING_LENGTH,
+            False,
+        ),  # no normalization
         (
             {"random_seed": 42, "ranking_length": 3},
             3,
@@ -178,7 +186,7 @@ def as_pipeline(*components):
         ),  # lower than default ranking_length
         (
             {"random_seed": 42, "ranking_length": 12},
-            10,
+            LABEL_RANKING_LENGTH,
             False,
         ),  # higher than default ranking_length
     ],
@@ -214,3 +222,41 @@ async def test_softmax_normalization(
 
     # check whether the normalization of rankings is reflected in intent prediction
     assert parse_data.get("intent") == intent_ranking[0]
+
+
+@pytest.mark.parametrize(
+    "classifier_params, output_length",
+    [({"loss_type": "margin", "random_seed": 42}, LABEL_RANKING_LENGTH)],
+)
+async def test_margin_loss_is_not_normalized(
+    monkeypatch, component_builder, tmpdir, classifier_params, output_length,
+):
+    pipeline = as_pipeline(
+        "WhitespaceTokenizer", "CountVectorsFeaturizer", "EmbeddingIntentClassifier"
+    )
+    assert pipeline[2]["name"] == "EmbeddingIntentClassifier"
+    pipeline[2].update(classifier_params)
+
+    mock = Mock()
+    monkeypatch.setattr(train_utils, "normalize", mock.normalize)
+
+    _config = RasaNLUModelConfig({"pipeline": pipeline})
+    (trained_model, _, persisted_path) = await train(
+        _config,
+        path=tmpdir.strpath,
+        data="data/test/many_intents.md",
+        component_builder=component_builder,
+    )
+    loaded = Interpreter.load(persisted_path, component_builder)
+
+    parse_data = loaded.parse("hello")
+    intent_ranking = parse_data.get("intent_ranking")
+
+    # check that the output was not normalized
+    mock.normalize.assert_not_called()
+
+    # check that the output was correctly truncated
+    assert len(intent_ranking) == output_length
+
+    # make sure top ranking is reflected in intent prediction
+    assert parse_data.get("intent") == intent_ranking[0]

From 1ac4bea76cf12552521de191ed75da04941ad618 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 05:06:56 +0100
Subject: [PATCH 24/35] update changelog

---
 changelog/4902.feature.rst     | 3 ---
 changelog/4902.improvement.rst | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)
 create mode 100644 changelog/4902.improvement.rst

diff --git a/changelog/4902.feature.rst b/changelog/4902.feature.rst
index 6068c87dbb77..fc9d6c362535 100644
--- a/changelog/4902.feature.rst
+++ b/changelog/4902.feature.rst
@@ -1,5 +1,2 @@
 Added a new configuration parameter, ``ranking_length`` to the ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``,
 and ``ResponseSelector`` classes.
-
-The ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``, and ``ResponseSelector`` now by default normalize confidence
-levels over the top 10 results. See :ref:`migration-to-rasa-1.7` for more details.
diff --git a/changelog/4902.improvement.rst b/changelog/4902.improvement.rst
new file mode 100644
index 000000000000..8ac1670b7f8a
--- /dev/null
+++ b/changelog/4902.improvement.rst
@@ -0,0 +1,2 @@
+The ``EmbeddingPolicy``, ``EmbeddingIntentClassifier``, and ``ResponseSelector`` now by default normalize confidence
+levels over the top 10 results. See :ref:`migration-to-rasa-1.7` for more details.

From 90d6192b3474a6ee1e4881330370002ce3205eac Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 05:08:52 +0100
Subject: [PATCH 25/35] use hardcoded default ranking length in test

---
 tests/nlu/classifiers/test_embedding_intent_classifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/nlu/classifiers/test_embedding_intent_classifier.py b/tests/nlu/classifiers/test_embedding_intent_classifier.py
index bdcb9c1d0624..66acea81cf86 100644
--- a/tests/nlu/classifiers/test_embedding_intent_classifier.py
+++ b/tests/nlu/classifiers/test_embedding_intent_classifier.py
@@ -173,7 +173,7 @@ def as_pipeline(*components):
 @pytest.mark.parametrize(
     "classifier_params, output_length, output_should_sum_to_1",
     [
-        ({"random_seed": 42}, LABEL_RANKING_LENGTH, True),  # default config
+        ({"random_seed": 42}, 10, True),  # default config
         (
             {"random_seed": 42, "ranking_length": 0},
             LABEL_RANKING_LENGTH,

From 4271286e0507d473f1d7b78d467653485a59fac6 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 05:15:57 +0100
Subject: [PATCH 26/35] use old method for truncating

---
 rasa/nlu/classifiers/embedding_intent_classifier.py | 11 +++++------
 rasa/utils/train_utils.py                           |  1 +
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index 8bad702a7fd7..e4739a2159e9 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -842,23 +842,22 @@ def predict_label(
 
         # if X contains all zeros do not predict some label
         if label_ids.size > 0:
+            label = {
+                "name": self.inverted_label_dict[label_ids[0]],
+                "confidence": message_sim[0],
+            }
 
             if self.ranking_length and 0 < self.ranking_length < LABEL_RANKING_LENGTH:
                 output_length = self.ranking_length
             else:
                 output_length = LABEL_RANKING_LENGTH
 
-            message_sim = message_sim[:output_length]
-            label_ids = label_ids[:output_length]
             ranking = list(zip(list(label_ids), message_sim))
+            ranking = ranking[:output_length]
             label_ranking = [
                 {"name": self.inverted_label_dict[label_idx], "confidence": score}
                 for label_idx, score in ranking
             ]
-            label = {
-                "name": self.inverted_label_dict[label_ids[0]],
-                "confidence": message_sim[0],
-            }
 
         return label, label_ranking
 
diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py
index 13039a3ccf9b..8f26f6567267 100644
--- a/rasa/utils/train_utils.py
+++ b/rasa/utils/train_utils.py
@@ -1281,6 +1281,7 @@ def normalize(values: "np.ndarray", ranking_length: Optional[int] = None) -> Non
     """Normalizes an array over the top `ranking_length` values, if provided.
 
     Other values will be set to 0. Happens in place."""
+
     if ranking_length:
         ranked = sorted(values, reverse=True)
         values[values < ranked[ranking_length - 1]] = 0

From a0fc1868a572cab00b851e68db0f8719503b9a3b Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 09:15:58 +0100
Subject: [PATCH 27/35] handle some edge cases

---
 rasa/core/policies/embedding_policy.py        |  3 +--
 .../embedding_intent_classifier.py            |  3 +--
 rasa/utils/train_utils.py                     | 10 +++++----
 .../test_embedding_intent_classifier.py       | 22 +++++++++++++++----
 4 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index e8d18c36a85c..dcad5bab3014 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -573,8 +573,7 @@ def predict_action_probabilities(
         confidence = self.session.run(self.pred_confidence, feed_dict=tf_feed_dict)
         confidence = confidence[0, -1, :]
 
-        # normalise scores if turned on
-        if self.loss_type == "softmax" and self.ranking_length > 0:
+        if self.loss_type == "softmax":
             train_utils.normalize(confidence, self.ranking_length)
 
         return confidence.tolist()
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index e4739a2159e9..5da38a99d87f 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -806,8 +806,7 @@ def _calculate_message_sim(
 
         label_ids = message_sim.argsort()[::-1]
 
-        # normalise scores if turned on
-        if self.loss_type == "softmax" and self.ranking_length > 0:
+        if self.loss_type == "softmax":
             train_utils.normalize(message_sim, self.ranking_length)
 
         message_sim[::-1].sort()
diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py
index 8f26f6567267..ac366b372c1a 100644
--- a/rasa/utils/train_utils.py
+++ b/rasa/utils/train_utils.py
@@ -1277,12 +1277,14 @@ def load_tensor(name: Text) -> Optional[Union["tf.Tensor", List["tf.Tensor"]]]:
     return tensor_list
 
 
-def normalize(values: "np.ndarray", ranking_length: Optional[int] = None) -> None:
-    """Normalizes an array over the top `ranking_length` values, if provided.
+def normalize(values: "np.ndarray", ranking_length: Optional[int] = 0) -> None:
+    """Normalizes an array of positive numbers over the top `ranking_length` values, if provided.
 
     Other values will be set to 0. Happens in place."""
 
-    if ranking_length:
+    if 0 < ranking_length < len(values):
         ranked = sorted(values, reverse=True)
         values[values < ranked[ranking_length - 1]] = 0
-    values /= np.sum(values)
+
+    if np.sum(values) > 0:
+        values /= np.sum(values)
diff --git a/tests/nlu/classifiers/test_embedding_intent_classifier.py b/tests/nlu/classifiers/test_embedding_intent_classifier.py
index 66acea81cf86..e45dbfed1f51 100644
--- a/tests/nlu/classifiers/test_embedding_intent_classifier.py
+++ b/tests/nlu/classifiers/test_embedding_intent_classifier.py
@@ -171,28 +171,42 @@ def as_pipeline(*components):
 
 
 @pytest.mark.parametrize(
-    "classifier_params, output_length, output_should_sum_to_1",
+    "classifier_params, data_path, output_length, output_should_sum_to_1",
     [
-        ({"random_seed": 42}, 10, True),  # default config
+        ({"random_seed": 42}, "data/test/many_intents.md", 10, True),  # default config
         (
             {"random_seed": 42, "ranking_length": 0},
+            "data/test/many_intents.md",
             LABEL_RANKING_LENGTH,
             False,
         ),  # no normalization
         (
             {"random_seed": 42, "ranking_length": 3},
+            "data/test/many_intents.md",
             3,
             True,
         ),  # lower than default ranking_length
         (
             {"random_seed": 42, "ranking_length": 12},
+            "data/test/many_intents.md",
             LABEL_RANKING_LENGTH,
             False,
         ),  # higher than default ranking_length
+        (
+            {"random_seed": 42},
+            "examples/moodbot/data/nlu.md",
+            7,
+            True,
+        ),  # less intents than default ranking_length
     ],
 )
 async def test_softmax_normalization(
-    component_builder, tmpdir, classifier_params, output_length, output_should_sum_to_1,
+    component_builder,
+    tmpdir,
+    classifier_params,
+    data_path,
+    output_length,
+    output_should_sum_to_1,
 ):
     pipeline = as_pipeline(
         "WhitespaceTokenizer", "CountVectorsFeaturizer", "EmbeddingIntentClassifier"
@@ -204,7 +218,7 @@ async def test_softmax_normalization(
     (trained_model, _, persisted_path) = await train(
         _config,
         path=tmpdir.strpath,
-        data="data/test/many_intents.md",
+        data=data_path,
         component_builder=component_builder,
     )
     loaded = Interpreter.load(persisted_path, component_builder)

From fecb858a6107a9201d235a96b8cd2aa2da4b3b56 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 10:32:00 +0100
Subject: [PATCH 28/35] explicitly do not call normalization if ranking length
 <1

---
 rasa/core/policies/embedding_policy.py              | 2 +-
 rasa/nlu/classifiers/embedding_intent_classifier.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index dcad5bab3014..3c05e8554c87 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -573,7 +573,7 @@ def predict_action_probabilities(
         confidence = self.session.run(self.pred_confidence, feed_dict=tf_feed_dict)
         confidence = confidence[0, -1, :]
 
-        if self.loss_type == "softmax":
+        if self.loss_type == "softmax" and self.loss_type > 0:
             train_utils.normalize(confidence, self.ranking_length)
 
         return confidence.tolist()
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index 5da38a99d87f..3fef9c634100 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -806,7 +806,7 @@ def _calculate_message_sim(
 
         label_ids = message_sim.argsort()[::-1]
 
-        if self.loss_type == "softmax":
+        if self.loss_type == "softmax" and self.loss_type > 0:
             train_utils.normalize(message_sim, self.ranking_length)
 
         message_sim[::-1].sort()

From bb9cd0c4d20b5b78007f9feef179d6fd3cc51ce9 Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Wed, 15 Jan 2020 11:47:59 +0100
Subject: [PATCH 29/35] use correct attribute

---
 rasa/core/policies/embedding_policy.py              | 2 +-
 rasa/nlu/classifiers/embedding_intent_classifier.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index 3c05e8554c87..59d5611ad586 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -573,7 +573,7 @@ def predict_action_probabilities(
         confidence = self.session.run(self.pred_confidence, feed_dict=tf_feed_dict)
         confidence = confidence[0, -1, :]
 
-        if self.loss_type == "softmax" and self.loss_type > 0:
+        if self.loss_type == "softmax" and self.ranking_length > 0:
             train_utils.normalize(confidence, self.ranking_length)
 
         return confidence.tolist()
diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index 3fef9c634100..c00f4714bc1e 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -806,7 +806,7 @@ def _calculate_message_sim(
 
         label_ids = message_sim.argsort()[::-1]
 
-        if self.loss_type == "softmax" and self.loss_type > 0:
+        if self.loss_type == "softmax" and self.ranking_length > 0:
             train_utils.normalize(message_sim, self.ranking_length)
 
         message_sim[::-1].sort()

From 7d933ee599b74d1cf032db863ccff47792283f02 Mon Sep 17 00:00:00 2001
From: Vladimir Vlasov <vladimir@rasa.com>
Date: Thu, 23 Jan 2020 11:01:45 +0100
Subject: [PATCH 30/35] Update
 rasa/nlu/classifiers/embedding_intent_classifier.py

---
 rasa/nlu/classifiers/embedding_intent_classifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py
index c00f4714bc1e..19821768610e 100644
--- a/rasa/nlu/classifiers/embedding_intent_classifier.py
+++ b/rasa/nlu/classifiers/embedding_intent_classifier.py
@@ -807,7 +807,7 @@ def _calculate_message_sim(
         label_ids = message_sim.argsort()[::-1]
 
         if self.loss_type == "softmax" and self.ranking_length > 0:
-            train_utils.normalize(message_sim, self.ranking_length)
+            message_sim = train_utils.normalize(message_sim, self.ranking_length)
 
         message_sim[::-1].sort()
 

From ad2beab7677d90f05938a83400c1fe7531f150b5 Mon Sep 17 00:00:00 2001
From: Vladimir Vlasov <vladimir@rasa.com>
Date: Thu, 23 Jan 2020 11:01:57 +0100
Subject: [PATCH 31/35] Update rasa/core/policies/embedding_policy.py

---
 rasa/core/policies/embedding_policy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py
index 59d5611ad586..7dd727b0f60e 100644
--- a/rasa/core/policies/embedding_policy.py
+++ b/rasa/core/policies/embedding_policy.py
@@ -574,7 +574,7 @@ def predict_action_probabilities(
         confidence = confidence[0, -1, :]
 
         if self.loss_type == "softmax" and self.ranking_length > 0:
-            train_utils.normalize(confidence, self.ranking_length)
+            confidence = train_utils.normalize(confidence, self.ranking_length)
 
         return confidence.tolist()
 

From d76438f14cb1de3135341eb24812c5d67d2b1c53 Mon Sep 17 00:00:00 2001
From: Vladimir Vlasov <vladimir@rasa.com>
Date: Thu, 23 Jan 2020 11:02:09 +0100
Subject: [PATCH 32/35] Update rasa/utils/train_utils.py

---
 rasa/utils/train_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py
index ac366b372c1a..143109484ae4 100644
--- a/rasa/utils/train_utils.py
+++ b/rasa/utils/train_utils.py
@@ -1287,4 +1287,6 @@ def normalize(values: "np.ndarray", ranking_length: Optional[int] = 0) -> None:
         values[values < ranked[ranking_length - 1]] = 0
 
     if np.sum(values) > 0:
-        values /= np.sum(values)
+        values = values / np.sum(values)
+        
+    return values

From 1741307ac518e43d61a69b2db6154cd030b6063e Mon Sep 17 00:00:00 2001
From: Vova Vv <mr.voov@gmail.com>
Date: Thu, 23 Jan 2020 11:17:22 +0100
Subject: [PATCH 33/35] black

---
 rasa/utils/train_utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py
index 143109484ae4..6357ff9d913c 100644
--- a/rasa/utils/train_utils.py
+++ b/rasa/utils/train_utils.py
@@ -1277,10 +1277,11 @@ def load_tensor(name: Text) -> Optional[Union["tf.Tensor", List["tf.Tensor"]]]:
     return tensor_list
 
 
-def normalize(values: "np.ndarray", ranking_length: Optional[int] = 0) -> None:
-    """Normalizes an array of positive numbers over the top `ranking_length` values, if provided.
+def normalize(values: "np.ndarray", ranking_length: Optional[int] = 0) -> "np.ndarray":
+    """Normalizes an array of positive numbers over the top `ranking_length` values.
 
-    Other values will be set to 0. Happens in place."""
+    Other values will be set to 0. Happens in place.
+    """
 
     if 0 < ranking_length < len(values):
         ranked = sorted(values, reverse=True)
@@ -1288,5 +1289,5 @@ def normalize(values: "np.ndarray", ranking_length: Optional[int] = 0) -> None:
 
     if np.sum(values) > 0:
         values = values / np.sum(values)
-        
+
     return values

From f57624a17af7ff9c5fff761ba269cfc213c9d46e Mon Sep 17 00:00:00 2001
From: Vova Vv <mr.voov@gmail.com>
Date: Thu, 23 Jan 2020 11:43:38 +0100
Subject: [PATCH 34/35] don't mutate an argument

---
 rasa/utils/train_utils.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py
index 6357ff9d913c..650d07203ae7 100644
--- a/rasa/utils/train_utils.py
+++ b/rasa/utils/train_utils.py
@@ -1280,14 +1280,15 @@ def load_tensor(name: Text) -> Optional[Union["tf.Tensor", List["tf.Tensor"]]]:
 def normalize(values: "np.ndarray", ranking_length: Optional[int] = 0) -> "np.ndarray":
     """Normalizes an array of positive numbers over the top `ranking_length` values.
 
-    Other values will be set to 0. Happens in place.
+    Other values will be set to 0.
     """
 
-    if 0 < ranking_length < len(values):
-        ranked = sorted(values, reverse=True)
-        values[values < ranked[ranking_length - 1]] = 0
+    new_values = values.copy()  # prevent mutation of the input
+    if 0 < ranking_length < len(new_values):
+        ranked = sorted(new_values, reverse=True)
+        new_values[new_values < ranked[ranking_length - 1]] = 0
 
-    if np.sum(values) > 0:
-        values = values / np.sum(values)
+    if np.sum(new_values) > 0:
+        new_values = new_values / np.sum(new_values)
 
-    return values
+    return new_values

From 97662f1ae9a6f3b943c2853f8d1fa9c16ee48771 Mon Sep 17 00:00:00 2001
From: Vova Vv <mr.voov@gmail.com>
Date: Thu, 23 Jan 2020 11:56:35 +0100
Subject: [PATCH 35/35] add norm test

---
 tests/core/test_policies.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py
index ba7b2f5dea00..30c4c2e940bb 100644
--- a/tests/core/test_policies.py
+++ b/tests/core/test_policies.py
@@ -350,6 +350,8 @@ def test_normalization(self, trained_policy, tracker, default_domain, monkeypatc
             sum([confidence > 0 for confidence in predicted_probabilities])
             == trained_policy.ranking_length
         )
+        # check that the norm is still 1
+        assert sum(predicted_probabilities) == pytest.approx(1)
 
         # also check our function is called
         mock = Mock()