fixed ddp, still need to fix input unit tests

Lightning-AI · SkafteNicki · Aug 3, 2021 · May 21, 2021 · Jun 4, 2021 · Jun 6, 2021
commit e316f645637985141f56ea677b0debf6eb8d7d04
@@ -97,5 +97,18 @@ def test_invalid_input(preds, targets):
     ]
 )
 def test_invalid_norm(preds, target):
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="Norm l3 is not supported. Please select from l1, l2, or max. "):
         calibration_error(preds, target, norm="l3")
+
+
+@pytest.mark.parametrize("n_bins", [-10, 0, -1])
+@pytest.mark.parametrize(
+    "preds, target", [
+        (_input_binary_prob.preds, _input_binary_prob.target),
+        (_input_mcls_prob.preds, _input_mcls_prob.target),
+        (_input_mdmc_prob.preds, _input_mdmc_prob.target),
+    ]
+)
+def test_invalid_bins(preds, target, n_bins):
+    with pytest.raises(ValueError, match=f"Expected argument `n_bins` to be a int larger than 0 but got {n_bins}"):
+        calibration_error(preds, target, n_bins=n_bins)
@@ -55,20 +55,17 @@ def __init__(
 
         Where :math:p_i is the top-1 prediction accuracy in bin i and :math:c_i is the average confidence of predictions in bin i.
 
-        # NOTE: L2-norm debiasing is not yet supported.
-
+        NOTE: L2-norm debiasing is not yet supported.
 
         Args:
-            n_bins (int, optional): Number of bins to use when computing t. Defaults to 15.
+            n_bins (int, optional): Number of bins to use when computing probabilites and accuracies. Defaults to 15.
             norm (str, optional): Norm used to compare empirical and expected probability bins.
                 Defaults to "l1", or Expected Calibration Error.
             debias (bool, optional): Applies debiasing term, only implemented for l2 norm. Defaults to True.
             compute_on_step (bool, optional):  Forward only calls ``update()`` and return None if this is set to False. Defaults to False.
             dist_sync_on_step (bool, optional): Synchronize metric state across processes at each ``forward()``
                 before returning the value at the step.. Defaults to False.
             process_group (Optional[Any], optional): Specify the process group on which synchronization is called. default: None (which selects the entire world). Defaults to None.
-            dist_sync_fn (Callable, optional): Callback that performs the ``allgather`` operation on the metric state. When ``None``, DDP
-                will be used to perform the ``allgather``.. Defaults to None.
         """
         super().__init__(
             compute_on_step=compute_on_step,
@@ -80,20 +77,22 @@ def __init__(
         if norm not in ["l1", "l2", "max"]:
             raise ValueError(f"Norm {norm} is not supported. Please select from l1, l2, or max. ")
 
+        if not isinstance(n_bins, int) and n_bins <= 0:
+            raise ValueError(f"Expected argument `n_bins` to be a int larger than 0 but got {n_bins}")
         self.n_bins = n_bins
         self.register_buffer("bin_boundaries", torch.linspace(0, 1, n_bins + 1))
         self.norm = norm
 
         self.add_state("confidences", [], dist_reduce_fx="cat")
         self.add_state("accuracies", [], dist_reduce_fx="cat")
 
-    def update(self, preds: Tensor, target: Tensor):
+    def update(self, preds: Tensor, target: Tensor) -> None:
         """
         Computes top-level confidences and accuracies for the input probabilites and appends them to internal state.
 
         Args:
-            preds (Tensor): [description]
-            target (Tensor): [description]
+            preds (Tensor): Model output probabilities.
+            target (Tensor): Ground-truth target class labels.
         """
         confidences, accuracies = _ce_update(preds, target)
 
@@ -105,7 +104,7 @@ def compute(self) -> Tensor:
         Computes calibration error across all confidences and accuracies.
 
         Returns:
-            Tensor: [description]
+            Tensor: Calibration error across previously collected examples.
         """
         confidences = dim_zero_cat(self.confidences)
         accuracies = dim_zero_cat(self.accuracies)

@@ -14,15 +14,15 @@
 from typing import Optional, Tuple
 
 import torch
-from torch import Tensor, tensor
+from torch import Tensor, tensor, FloatTensor
 from torch.nn import functional as F
 
 from torchmetrics.utilities.checks import _input_format_classification
 from torchmetrics.utilities.enums import DataType
 
 
 def _ce_compute(
-    confidences: Tensor, accuracies: Tensor, bin_boundaries: Tensor, norm: str = "l1", debias: bool = False
+    confidences: FloatTensor, accuracies: FloatTensor, bin_boundaries: FloatTensor, norm: str = "l1", debias: bool = False
 ) -> Tensor:
 
     conf_bin = torch.zeros_like(bin_boundaries)
@@ -56,7 +56,7 @@ def _ce_compute(
     return ce
 
 
-def _ce_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
+def _ce_update(preds: Tensor, target: Tensor) -> Tuple[FloatTensor, FloatTensor]:
     _, _, mode = _input_format_classification(preds, target)
 
     if mode == DataType.BINARY:
@@ -74,8 +74,8 @@ def _ce_update(preds: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]:
         raise ValueError(
             f"Calibration error is not well-defined for data with size {preds.size()} and targets {target.size()}"
         )
-
-    return confidences, accuracies
+    # must be cast to float for ddp allgather to work
+    return confidences.float(), accuracies.float()
 
 
 def calibration_error(preds: Tensor, target: Tensor, n_bins: int = 15, norm: str = "l1"):
@@ -106,8 +106,8 @@ def calibration_error(preds: Tensor, target: Tensor, n_bins: int = 15, norm: str
 
 
         Args:
-            preds (Tensor): [description]
-            target (Tensor): [description]
+            preds (Tensor): Model output probabilities.
+            target (Tensor): Ground-truth target class labels.
             n_bins (int, optional): Number of bins to use when computing t. Defaults to 15.
             norm (str, optional): Norm used to compare empirical and expected probability bins.
                 Defaults to "l1", or Expected Calibration Error.
@@ -118,6 +118,9 @@ def calibration_error(preds: Tensor, target: Tensor, n_bins: int = 15, norm: str
 
     confidences, accuracies = _ce_update(preds, target)
 
-    bin_boundaries = torch.linspace(0, 1, n_bins + 1).to(preds.device)
+    if not isinstance(n_bins, int) and n_bins <= 0:
+        raise ValueError(f"Expected argument `n_bins` to be a int larger than 0 but got {n_bins}")
+
+    bin_boundaries = torch.linspace(0, 1, n_bins + 1, dtype=torch.float).to(preds.device)
 
     return _ce_compute(confidences, accuracies, bin_boundaries, norm=norm)