diff --git a/test/test_matmul_cuda.py b/test/test_matmul_cuda.py
index 302fe3638f02c0..99527e62e3b165 100644
--- a/test/test_matmul_cuda.py
+++ b/test/test_matmul_cuda.py
@@ -110,7 +110,7 @@ def cublas_addmm(self, size: int, dtype: torch.dtype, reduced_precision: bool =
     @onlyCUDA
     @skipIfRocmVersionLessThan((5, 2))
     # imported 'tol' as 'xtol' to avoid aliasing in code above
-    @toleranceOverride({torch.float16: xtol(atol=1e-1, rtol=1e-1),
+    @toleranceOverride({torch.float16: xtol(atol=4e-1, rtol=1e-1),
                         torch.bfloat16: xtol(atol=1e-1, rtol=1e-1),
                         torch.float32: xtol(atol=1e-1, rtol=1e-1)})
     @dtypes(torch.float16, torch.bfloat16, torch.float32)