tensorflow · owenvallis · Jan 26, 2023 · Dec 29, 2022 · Dec 29, 2022 · Jan 22, 2023
diff --git a/tensorflow_similarity/losses/multisim_loss.py b/tensorflow_similarity/losses/multisim_loss.py
@@ -47,43 +47,33 @@ def multisimilarity_loss(
 
     Args:
         query_labels: labels associated with the query embed.
-
         query_embeddings: Embedded query examples.
-
         key_labels: labels associated with the key embed.
-
         key_embeddings: Embedded key examples.
-
         distance: Which distance function to use to compute the pairwise.
-
-        remove_diagonal: Bool. If True, will set diagonal to False in positive pair mask
-
+        remove_diagonal: Bool. If True, will set diagonal to False in positive
+            pair mask
         alpha: The exponential weight for the positive pairs. Increasing alpha
-        makes the logsumexp softmax closer to the max positive pair distance,
-        while decreasing it makes it closer to max(P) + log(batch_size).
-
+            makes the logsumexp softmax closer to the max positive pair distance,
+            while decreasing it makes it closer to max(P) + log(batch_size).
         beta: The exponential weight for the negative pairs. Increasing beta
-        makes the logsumexp softmax closer to the max negative pair distance,
-        while decreasing it makes the softmax closer to
-        max(N) + log(batch_size).
-
+            makes the logsumexp softmax closer to the max negative pair distance,
+            while decreasing it makes the softmax closer to
+            max(N) + log(batch_size).
         epsilon: Used to remove easy positive and negative pairs. We only keep
-        positives that we greater than the (smallest negative pair - epsilon)
-        and we only keep negatives that are less than the
-        (largest positive pair + epsilon).
-
+            positives that we greater than the (smallest negative pair - epsilon)
+            and we only keep negatives that are less than the
+            (largest positive pair + epsilon).
         lmda: Used to weight the distance. Below this distance, negatives are
-        up weighted and positives are down weighted. Similarly, above this
-        distance negatives are down weighted and positive are up weighted.
-
+            up weighted and positives are down weighted. Similarly, above this
+            distance negatives are down weighted and positive are up weighted.
         center: This represents the expected distance value and will be used
-        to center the values in the pairwise distance matrix. This is used
-        when weighting the positive and negative examples, with the hardest
-        examples receiving an up weight and the easiest examples receiving a
-        down weight. This should 1 for cosine distances which we expect to
-        be between [0,2]. The value will depend on the data for L2 and L1
-        distances.
-
+            to center the values in the pairwise distance matrix. This is used
+            when weighting the positive and negative examples, with the hardest
+            examples receiving an up weight and the easiest examples receiving a
+            down weight. This should 1 for cosine distances which we expect to
+            be between [0,2]. The value will depend on the data for L2 and L1
+            distances.
 
     Returns:
         Loss: The loss value for the current batch.
@@ -186,37 +176,37 @@ def __init__(
 
         Args:
             distance: Which distance function to use to compute the pairwise
-            distances between embeddings. Defaults to 'cosine'.
+                distances between embeddings. Defaults to 'cosine'.
 
             alpha: The exponential weight for the positive pairs. Increasing
-            alpha makes the logsumexp softmax closer to the max positive pair
-            distance, while decreasing it makes it closer to
-            max(P) + log(batch_size).
+                alpha makes the logsumexp softmax closer to the max positive
+                pair distance, while decreasing it makes it closer to
+                max(P) + log(batch_size).
 
             beta: The exponential weight for the negative pairs. Increasing
-            beta makes the logsumexp softmax closer to the max negative pair
-            distance, while decreasing it makes the softmax closer to
-            max(N) + log(batch_size).
+                beta makes the logsumexp softmax closer to the max negative
+                pair distance, while decreasing it makes the softmax closer
+                to max(N) + log(batch_size).
 
             epsilon: Used to remove easy positive and negative pairs. We only
-            keep positives that we greater than the (smallest negative pair -
-            epsilon) and we only keep negatives that are less than the
-            (largest positive pair + epsilon).
+                keep positives that we greater than the (smallest negative
+                pair - epsilon) and we only keep negatives that are less than
+                the (largest positive pair + epsilon).
 
             lmda: Used to weight the distance. Below this distance, negatives
-            are up weighted and positives are down weighted. Similarly, above
-            this distance negatives are down weighted and positive are up
-            weighted.
+                are up weighted and positives are down weighted. Similarly,
+                above this distance negatives are down weighted and positive
+                are up weighted.
 
             center: This represents the expected distance value and will be used
-            to center the values in the pairwise distance matrix. This is used
-            when weighting the positive and negative examples, with the hardest
-            examples receiving an up weight and the easiest examples receiving a
-            down weight. This should 1 for cosine distances which we expect to
-            be between [0,2]. The value will depend on the data for L2 and L1
-            distances.
-
-            name: Loss name. Defaults to MultiSimilarityLoss.
+                to center the values in the pairwise distance matrix. This is
+                used when weighting the positive and negative examples, with the
+                hardest examples receiving an up weight and the easiest examples
+                receiving a down weight. This should 1 for cosine distances which
+                we expect to be between [0,2]. The value will depend on the data
+                for L2 and L1 distances.
+
+            name: Loss name. Defaults to 'MultiSimilarityLoss'.
         """
 
         # distance canonicalization

diff --git a/tensorflow_similarity/losses/pn_loss.py b/tensorflow_similarity/losses/pn_loss.py
@@ -40,8 +40,7 @@ def pn_loss(
     remove_diagonal: bool = True,
     positive_mining_strategy: str = "hard",
     negative_mining_strategy: str = "semi-hard",
-    soft_margin: bool = True,
-    margin: float = 0.1,
+    margin: float | None = None,
 ) -> Any:
     """Positive Negative loss computations.
 
@@ -60,8 +59,12 @@ def pn_loss(
             embedding from the same class. {'easy', 'hard'}
         negative_mining_strategy: What mining strategy to use for select the
             embedding from the different class. {'hard', 'semi-hard', 'easy'}
-        soft_margin: If True, use a soft margin instead of an explicit one.
-        margin: Use an explicit value for the margin term.
+        margin: Defines the target margin between positive and negative pairs,
+            e.g., a margin of 1.0 means that the positive and negative
+            distances should be 1.0 apart. If None, then a soft margin is used.
+            A soft margin can be beneficial to pull together samples from the
+            same class as much as possible. See the paper for more details
+            https://arxiv.org/pdf/1703.07737.pdf. Defaults to None.
 
     Returns:
         Loss: The loss value for the current batch.
@@ -110,7 +113,7 @@ def pn_loss(
     neg_distances = tf.math.minimum(pn_distances, neg_distances)
 
     # [PN loss computation]
-    pn_loss = compute_loss(pos_distances, neg_distances, soft_margin, margin)
+    pn_loss = compute_loss(pos_distances, neg_distances, margin)
 
     return pn_loss
 
@@ -147,8 +150,7 @@ def __init__(
         distance: Distance | str = "cosine",
         positive_mining_strategy: str = "hard",
         negative_mining_strategy: str = "semi-hard",
-        soft_margin: bool = True,
-        margin: float = 0.1,
+        margin: float | None = None,
         name: str = "PNLoss",
         **kwargs,
     ):
@@ -161,38 +163,36 @@ def __init__(
                 embedding from the same class. {'easy', 'hard'}
             negative_mining_strategy: What mining strategy to use for select the
                 embedding from the different class. {'hard', 'semi-hard', 'easy'}
-            soft_margin: If True, use a soft margin instead of an explicit one.
-            margin: Use an explicit value for the margin term.
-            name: Loss name.
+            margin: Defines the target margin between positive and negative pairs,
+                e.g., a margin of 1.0 means that the positive and negative
+                distances should be 1.0 apart. If None, then a soft margin is used.
+                A soft margin can be beneficial to pull together samples from the
+                same class as much as possible. See the paper for more details
+                https://arxiv.org/pdf/1703.07737.pdf. Defaults to None.
+            name: Loss name. Defaults to "PNLoss".
 
         Raises:
             ValueError: Invalid positive mining strategy.
             ValueError: Invalid negative mining strategy.
-            ValueError: Margin value is not used when soft_margin is set to True.
         """
 
         # distance canonicalization
         distance = distance_canonicalizer(distance)
         self.distance = distance
-        # sanity checks
 
+        # sanity checks
         if positive_mining_strategy not in ["easy", "hard"]:
             raise ValueError("Invalid positive mining strategy.")
 
         if negative_mining_strategy not in ["easy", "hard", "semi-hard"]:
             raise ValueError("Invalid negative mining strategy.")
 
-        # Ensure users knows its one or the other
-        if margin != 0.1 and soft_margin:
-            raise ValueError("Margin value is not used when soft_margin is set to True")
-
         super().__init__(
             pn_loss,
             name=name,
             distance=distance,
             positive_mining_strategy=positive_mining_strategy,
             negative_mining_strategy=negative_mining_strategy,
-            soft_margin=soft_margin,
             margin=margin,
             **kwargs,
         )
diff --git a/tensorflow_similarity/losses/softnn_loss.py b/tensorflow_similarity/losses/softnn_loss.py
@@ -46,8 +46,7 @@ def soft_nn_loss(
         key_labels: labels associated with the key embed.
         key_embeddings: Embedded key examples.
         distance: Which distance function to use to compute the pairwise.
-        temperature: Controls relative importance given
-                        to the pair of points.
+        temperature: Controls relative importance given to the pair of points.
         remove_diagonal: Bool. If True, will set diagonal to False in positive pair mask
 
     Returns:
@@ -131,4 +130,10 @@ def __init__(
         self.distance = distance
         self.temperature = temperature
 
-        super().__init__(fn=soft_nn_loss, name=name, distance=distance, temperature=temperature, **kwargs)
+        super().__init__(
+            fn=soft_nn_loss,
+            name=name,
+            distance=distance,
+            temperature=temperature,
+            **kwargs,
+        )
diff --git a/tensorflow_similarity/losses/triplet_loss.py b/tensorflow_similarity/losses/triplet_loss.py
@@ -39,8 +39,7 @@ def triplet_loss(
     remove_diagonal: bool = True,
     positive_mining_strategy: str = "hard",
     negative_mining_strategy: str = "semi-hard",
-    soft_margin: bool = True,
-    margin: float = 0.1,
+    margin: float | None = None,
 ) -> Any:
     """Triplet loss computations.
 
@@ -57,9 +56,12 @@ def triplet_loss(
             embedding from the same class. {'easy', 'hard'}
         negative_mining_strategy: What mining strategy to use for select the
             embedding from the different class. {'hard', 'semi-hard', 'easy'}
-        soft_margin: Defaults to True. Use a soft margin instead of an explicit
-            one.
-        margin: Use an explicit value for the margin term.
+        margin: Defines the target margin between positive and negative pairs,
+            e.g., a margin of 1.0 means that the positive and negative
+            distances should be 1.0 apart. If None, then a soft margin is used.
+            A soft margin can be beneficial to pull together samples from the
+            same class as much as possible. See the paper for more details
+            https://arxiv.org/pdf/1703.07737.pdf. Defaults to None.
 
     Returns:
         Loss: The loss value for the current batch.
@@ -99,7 +101,7 @@ def triplet_loss(
     )
 
     # [Triplet loss computation]
-    triplet_loss = compute_loss(pos_distances, neg_distances, soft_margin, margin)
+    triplet_loss = compute_loss(pos_distances, neg_distances, margin)
 
     return triplet_loss
 
@@ -129,8 +131,7 @@ def __init__(
         distance: Distance | str = "cosine",
         positive_mining_strategy: str = "hard",
         negative_mining_strategy: str = "semi-hard",
-        soft_margin: bool = True,
-        margin: float = 0.1,
+        margin: float | None = None,
         name: str = "TripletLoss",
         **kwargs,
     ):
@@ -145,39 +146,36 @@ def __init__(
                 embedding from the same class. {'easy', 'hard'}
             negative_mining_strategy: What mining strategy to use for select the
                 embedding from the different class. {'hard', 'semi-hard', 'easy'}
-            soft_margin: Defaults to True. Use a soft margin instead of an
-                explicit one.
-            margin: Use an explicit value for the margin term.
-            name: Loss name. Defaults to TripletLoss.
+            margin: Defines the target margin between positive and negative pairs,
+                e.g., a margin of 1.0 means that the positive and negative
+                distances should be 1.0 apart. If None, then a soft margin is used.
+                A soft margin can be beneficial to pull together samples from the
+                same class as much as possible. See the paper for more details
+                https://arxiv.org/pdf/1703.07737.pdf. Defaults to None.
+            name: Loss name. Defaults to "TripletLoss".
 
         Raises:
             ValueError: Invalid positive mining strategy.
             ValueError: Invalid negative mining strategy.
-            ValueError: Margin value is not used when soft_margin is set to True.
         """
 
         # distance canonicalization
         distance = distance_canonicalizer(distance)
         self.distance = distance
-        # sanity checks
 
+        # sanity checks
         if positive_mining_strategy not in ["easy", "hard"]:
             raise ValueError("Invalid positive mining strategy")
 
         if negative_mining_strategy not in ["easy", "hard", "semi-hard"]:
             raise ValueError("Invalid negative mining strategy")
 
-        # Ensure users knows its one or the other
-        if margin != 0.1 and soft_margin:
-            raise ValueError("Margin value is not used when soft_margin is set to True")
-
         super().__init__(
             triplet_loss,
             name=name,
             distance=distance,
             positive_mining_strategy=positive_mining_strategy,
             negative_mining_strategy=negative_mining_strategy,
-            soft_margin=soft_margin,
             margin=margin,
             **kwargs,
         )