Lightning-AI · Borda · Mar 28, 2023 · Mar 16, 2023 · Mar 17, 2023 · Mar 17, 2023
@@ -96,6 +96,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Changed `__iter__` method from raising `NotImplementedError` to `TypeError` by setting to `None` ([#1538](https://github.com/Lightning-AI/metrics/pull/1538))
 
 
+- Allowed FID with `torch.float64` ([#1628](https://github.com/Lightning-AI/metrics/pull/1628))
+
+
 ### Deprecated
 
 -

@@ -19,6 +19,7 @@
 from torch import Tensor
 from torch.autograd import Function
 from torch.nn import Module
+from torch.nn.functional import adaptive_avg_pool2d
 
 from torchmetrics.metric import Metric
 from torchmetrics.utilities import rank_zero_info
@@ -30,11 +31,16 @@
 
 if _TORCH_FIDELITY_AVAILABLE:
     from torch_fidelity.feature_extractor_inceptionv3 import FeatureExtractorInceptionV3 as _FeatureExtractorInceptionV3
+    from torch_fidelity.helpers import vassert
+    from torch_fidelity.interpolate_compat_tensorflow import interpolate_bilinear_2d_like_tensorflow1x
 else:
 
     class _FeatureExtractorInceptionV3(Module):
         pass
 
+    vassert = None
+    interpolate_bilinear_2d_like_tensorflow1x = None
+
     __doctest_skip__ = ["FrechetInceptionDistance", "FrechetInceptionDistance.plot"]
 
 
@@ -59,9 +65,94 @@ def train(self, mode: bool) -> "NoTrainInceptionV3":
         """Force network to always be in evaluation mode."""
         return super().train(False)
 
+    def _torch_fidelity_forward(self, x: Tensor) -> Tensor:
+        """Forward method of inception net.
+
+        Copy of the forward method from this file:
+        https://github.com/toshas/torch-fidelity/blob/master/torch_fidelity/feature_extractor_inceptionv3.py
+        with a single line change regarding the casting of `x` in the beginning.
+
+        Corresponding license file (Apache License, Version 2.0):
+        https://github.com/toshas/torch-fidelity/blob/master/LICENSE.md
+        """
+        vassert(torch.is_tensor(x) and x.dtype == torch.uint8, "Expecting image as torch.Tensor with dtype=torch.uint8")
+        features = {}
+        remaining_features = self.features_list.copy()
+
+        x = x.to(self._dtype) if hasattr(self, "_dtype") else x.to(torch.float)
+        x = interpolate_bilinear_2d_like_tensorflow1x(
+            x,
+            size=(self.INPUT_IMAGE_SIZE, self.INPUT_IMAGE_SIZE),
+            align_corners=False,
+        )
+        x = (x - 128) / 128
+
+        x = self.Conv2d_1a_3x3(x)
+        x = self.Conv2d_2a_3x3(x)
+        x = self.Conv2d_2b_3x3(x)
+        x = self.MaxPool_1(x)
+
+        if "64" in remaining_features:
+            features["64"] = adaptive_avg_pool2d(x, output_size=(1, 1)).squeeze(-1).squeeze(-1)
+            remaining_features.remove("64")
+            if len(remaining_features) == 0:
+                return tuple(features[a] for a in self.features_list)
+
+        x = self.Conv2d_3b_1x1(x)
+        x = self.Conv2d_4a_3x3(x)
+        x = self.MaxPool_2(x)
+
+        if "192" in remaining_features:
+            features["192"] = adaptive_avg_pool2d(x, output_size=(1, 1)).squeeze(-1).squeeze(-1)
+            remaining_features.remove("192")
+            if len(remaining_features) == 0:
+                return tuple(features[a] for a in self.features_list)
+
+        x = self.Mixed_5b(x)
+        x = self.Mixed_5c(x)
+        x = self.Mixed_5d(x)
+        x = self.Mixed_6a(x)
+        x = self.Mixed_6b(x)
+        x = self.Mixed_6c(x)
+        x = self.Mixed_6d(x)
+        x = self.Mixed_6e(x)
+
+        if "768" in remaining_features:
+            features["768"] = adaptive_avg_pool2d(x, output_size=(1, 1)).squeeze(-1).squeeze(-1)
+            remaining_features.remove("768")
+            if len(remaining_features) == 0:
+                return tuple(features[a] for a in self.features_list)
+
+        x = self.Mixed_7a(x)
+        x = self.Mixed_7b(x)
+        x = self.Mixed_7c(x)
+        x = self.AvgPool(x)
+        x = torch.flatten(x, 1)
+
+        if "2048" in remaining_features:
+            features["2048"] = x
+            remaining_features.remove("2048")
+            if len(remaining_features) == 0:
+                return tuple(features[a] for a in self.features_list)
+
+        if "logits_unbiased" in remaining_features:
+            x = x.mm(self.fc.weight.T)
+            # N x 1008 (num_classes)
+            features["logits_unbiased"] = x
+            remaining_features.remove("logits_unbiased")
+            if len(remaining_features) == 0:
+                return tuple(features[a] for a in self.features_list)
+
+            x = x + self.fc.bias.unsqueeze(0)
+        else:
+            x = self.fc(x)
+
+        features["logits"] = x
+        return tuple(features[a] for a in self.features_list)
+
     def forward(self, x: Tensor) -> Tensor:
         """Forward pass of neural network with reshaping of output."""
-        out = super().forward(x)
+        out = self._torch_fidelity_forward(x)
         return out[0].reshape(x.shape[0], -1)
 
 
@@ -151,6 +242,10 @@ class FrechetInceptionDistance(Metric):
     flag ``real`` determines if the images should update the statistics of the real distribution or the
     fake distribution.
 
+    This metric is known to be unstable in its calculatations, and we recommend for the best results using this metric
+    that you calculate using `torch.float64` (default is `torch.float32`) which can be set using the `.set_dtype`
+    method of the metric.
+
     .. note:: using this metrics requires you to have ``scipy`` install. Either install as ``pip install
         torchmetrics[image]`` or ``pip install scipy``
 
@@ -307,6 +402,17 @@ def reset(self) -> None:
         else:
             super().reset()
 
+    def set_dtype(self, dst_type: Union[str, torch.dtype]) -> "Metric":
+        """Transfer all metric state to specific dtype. Special version of standard `type` method.
+
+        Arguments:
+            dst_type (type or string): the desired type.
+        """
+        out = super().set_dtype(dst_type)
+        if isinstance(out.inception, NoTrainInceptionV3):
+            out.inception._dtype = dst_type
+        return out
+
     def plot(
         self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None
     ) -> _PLOT_OUT_TYPE:

@@ -199,3 +199,15 @@ def test_normalize_arg_false():
     metric = FrechetInceptionDistance(normalize=False)
     with pytest.raises(ValueError, match="Expecting image as torch.Tensor with dtype=torch.uint8"):
         metric.update(img, real=True)
+
+
+def test_dtype_transfer_to_submodule():
+    """Test that change in dtype also changes the default inception net."""
+    imgs = torch.randn(1, 3, 256, 256)
+    imgs = ((imgs.clamp(-1, 1) / 2 + 0.5) * 255).to(torch.uint8)
+
+    metric = FrechetInceptionDistance(feature=64)
+    metric.set_dtype(torch.float64)
+
+    out = metric.inception(imgs)
+    assert out.dtype == torch.float64
-Original file line number
+Diff line change
@@ Expand Up @@
     - Changed `__iter__` method from raising `NotImplementedError` to `TypeError` by setting to `None` ([#1538](https://github.com/Lightning-AI/metrics/pull/1538))
+    - Allowed FID with `torch.float64` ([#1628](https://github.com/Lightning-AI/metrics/pull/1628))
     ### Deprecated
     -
@@ Expand Down @@