fixing mypy errors

facebookresearch · Jul 25, 2022 · 7c7f6de · 7c7f6de
1 parent 568c09a
commit 7c7f6de
Show file tree

Hide file tree

Showing 9 changed files with 51 additions and 37 deletions.
diff --git a/tests/test_nvfuser.py b/tests/test_nvfuser.py
@@ -23,6 +23,16 @@
     )
     from xformers.components.nvfuser.utils import build_nvfused
 
+FUSED_PATTERNS = (
+    [
+        NVFusedBiasActivationDropout,
+        NVFusedBiasDropoutRes,
+        NVFusedBiasDropoutResLayerNorm,
+    ]
+    if xformers._is_functorch_available
+    else []
+)
+
 # Testing odd (non-power-of-two for instance) shapes on purpose
 SHAPES = [
     (384, 512),
@@ -41,32 +51,24 @@
 LATENT = 128
 DEVICES = [torch.device("cuda")]
 
+ACTIVATIONS = [
+    Activation.ReLU,
+    Activation.GeLU,
+    Activation.LeakyReLU,
+    Activation.SquaredReLU,
+    Activation.SmeLU,
+]
+
 
-@pytest.mark.skipif(not _gpu_available, reason="GPU is not available")
 @pytest.mark.skipif(
     not xformers._is_functorch_available, reason="Functorch is not available"
 )
-@pytest.mark.parametrize(
-    "fused_pattern",
-    [
-        NVFusedBiasActivationDropout,
-        NVFusedBiasDropoutRes,
-        NVFusedBiasDropoutResLayerNorm,
-    ],
-)
+@pytest.mark.skipif(not _gpu_available, reason="GPU is not available")
+@pytest.mark.parametrize("fused_pattern", FUSED_PATTERNS)
 @pytest.mark.parametrize("shape", SHAPES)
 @pytest.mark.parametrize("amp", [False, True])
 @pytest.mark.parametrize("bias", [False, True])
-@pytest.mark.parametrize(
-    "activation",
-    [
-        Activation.ReLU,
-        Activation.GeLU,
-        Activation.LeakyReLU,
-        Activation.SquaredReLU,
-        Activation.SmeLU,
-    ],
-)
+@pytest.mark.parametrize("activation", ACTIVATIONS)
 @pytest.mark.parametrize("p", [0, 0.1, 0.5])
 @pytest.mark.parametrize("layer_norm_style", [LayerNormStyle.Pre, LayerNormStyle.Post])
 def test_nvfused_pattern_parity(
@@ -118,7 +120,7 @@ def test_nvfused_pattern_parity(
 @pytest.mark.skipif(
     not xformers._is_functorch_available, reason="Functorch is not available"
 )
-@pytest.mark.parametrize("activation", [Activation.ReLU, Activation.GeLU])
+@pytest.mark.parametrize("activation", ACTIVATIONS)
 @pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("p", [0, 0.1, 0.5])
 def test_nvfused_mlp(activation: Activation, device: torch.device, p: float):

diff --git a/xformers/__init__.py b/xformers/__init__.py
@@ -10,11 +10,11 @@
 # Please update the doc version in docs/source/conf.py as well.
 __version__ = "0.0.12.dev"
 
-_is_sparse_available = True
-_is_triton_available = torch.cuda.is_available()
+_is_sparse_available: bool = True
+_is_triton_available: bool = torch.cuda.is_available()
 
 # Set to true to utilize functorch
-_is_functorch_available = False
+_is_functorch_available: bool = False
 
 
 def _register_extensions():

diff --git a/xformers/benchmarks/benchmark_nvfuser.py b/xformers/benchmarks/benchmark_nvfuser.py
@@ -93,7 +93,9 @@ def bench_nvfused(
         NVFusedBiasActivationDropout: "Bias_Act_Dropout",
         NVFusedBiasDropoutRes: "Bias_Dropout_Res",
         NVFusedBiasDropoutResLayerNorm: "Bias_Dropout_Res_LayerNorm",
-    }[fused_pattern]
+    }[
+        fused_pattern  # type: ignore
+    ]
 
     for dtype in [
         torch.float16,
@@ -181,13 +183,13 @@ def step(fn, residual, x):
             for testcase in testcases:
                 torch.cuda.empty_cache()
                 torch.cuda.reset_peak_memory_stats()
-                torch.cuda.synchronize()
+                # torch.cuda.synchronize()
 
                 time = triton.testing.do_bench(
                     lambda: testcase.function(x=a), grad_to_none=[a, b]
                 )[0]
 
-                torch.cuda.synchronize()
+                # torch.cuda.synchronize()
                 max_memory = torch.cuda.max_memory_allocated() / 2**20
 
                 key = f"B={B}, M={M}, K={K}"
@@ -243,12 +245,13 @@ def step(fn, residual, x):
         )
 
 
-# for activation in [Activation.GeLU, None, Activation.SquaredReLU]:
-for pattern in [
+PATTERNS = [
     NVFusedBiasActivationDropout,
     NVFusedBiasDropoutRes,
     NVFusedBiasDropoutResLayerNorm,
-]:
+]
+
+for pattern in PATTERNS:
     activations: List[Optional[Activation]] = (
         [Activation.ReLU, Activation.GeLU, Activation.SquaredReLU]
         if pattern == NVFusedBiasActivationDropout
@@ -263,4 +266,4 @@ def step(fn, residual, x):
                     else [None]
                 )
                 for style in styles:
-                    bench_nvfused(pattern, bias, bw, activation, style)
+                    bench_nvfused(pattern, bias, bw, activation, style)  # type: ignore
diff --git a/xformers/components/attention/blocksparse.py b/xformers/components/attention/blocksparse.py
@@ -14,6 +14,7 @@
 from xformers.components.attention import Attention, AttentionConfig, register_attention
 
 if _is_triton_available:
+
     from triton.ops.blocksparse import matmul as blocksparse_matmul  # type: ignore
     from triton.ops.blocksparse import softmax as blocksparse_softmax  # type: ignore
 

diff --git a/xformers/components/nvfuser/__init__.py b/xformers/components/nvfuser/__init__.py
@@ -6,7 +6,7 @@
 
 from xformers import _is_functorch_available
 
-if _is_functorch_available:
+if _is_functorch_available:  # noqa
     try:
         from .bias_act_dropout import NVFusedBiasActivationDropout  # noqa
         from .bias_dropout_res import NVFusedBiasDropoutRes  # noqa

diff --git a/xformers/components/nvfuser/bias_act_dropout.py b/xformers/components/nvfuser/bias_act_dropout.py
@@ -14,7 +14,10 @@
 
 
 def _fn(
-    x: torch.Tensor, bias: Optional[torch.Tensor], activation: nn.Module, prob: float
+    x: torch.Tensor,
+    bias: Optional[torch.nn.parameter.Parameter],
+    activation: nn.Module,
+    prob: float,
 ) -> torch.Tensor:
     if bias is not None:
         x = torch.add(x, bias)

diff --git a/xformers/components/nvfuser/bias_dropout_res.py b/xformers/components/nvfuser/bias_dropout_res.py
@@ -13,7 +13,7 @@
 
 def _fn(
     x: torch.Tensor,
-    bias: Optional[torch.Tensor],
+    bias: Optional[torch.nn.parameter.Parameter],
     prob: float,
     residual: torch.Tensor,
 ) -> torch.Tensor:

diff --git a/xformers/components/nvfuser/bias_dropout_res_layernorm.py b/xformers/components/nvfuser/bias_dropout_res_layernorm.py
@@ -15,7 +15,7 @@
 
 def _fn(
     x: torch.Tensor,
-    bias: Optional[torch.Tensor],
+    bias: Optional[torch.nn.parameter.Parameter],
     prob: float,
     layer_norm_style: Optional[LayerNormStyle],
     norm: nn.Module,

diff --git a/xformers/components/nvfuser/utils.py b/xformers/components/nvfuser/utils.py
@@ -26,8 +26,13 @@ def build_nvfused(
     bias_shape = shape[-1] if bias else None
     d_model = shape[-1]
     init_args: Dict[nn.Module, List[Any]] = {
-        NVFusedBiasActivationDropout: [p, activation, bias_shape],
-        NVFusedBiasDropoutRes: [p, bias_shape],
-        NVFusedBiasDropoutResLayerNorm: [p, d_model, bias_shape, layer_norm_style],
+        NVFusedBiasActivationDropout: [p, activation, bias_shape],  # type: ignore
+        NVFusedBiasDropoutRes: [p, bias_shape],  # type: ignore
+        NVFusedBiasDropoutResLayerNorm: [  # type: ignore
+            p,
+            d_model,
+            bias_shape,
+            layer_norm_style,
+        ],
     }
     return fused_pattern(*init_args[fused_pattern])