Raise if kernel not supported on device

ghstack-source-id: e6bcca5ca4b995751e3f60c40bc59791c23b44f1 Pull Request resolved: #509
facebookresearch · Nov 10, 2022 · 034464a · 034464a
1 parent 3a16b20
commit 034464a
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 1 deletion.
diff --git a/xformers/components/swiglu/cuda/dual_gemm_silu_identity_mul.cu b/xformers/components/swiglu/cuda/dual_gemm_silu_identity_mul.cu
@@ -67,6 +67,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> dual_gemm_silu_identity_mul_(
   // Optionally, we might not need intermediate GEMM outputs
   constexpr bool kStoreD0 = true;
   constexpr bool kStoreD1 = true;
+  using ArchTag = cutlass::arch::Sm80;
 
   using DualGemm = cutlass::gemm::device::DualGemm<
     scalar_t,
@@ -77,7 +78,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> dual_gemm_silu_identity_mul_(
     cutlass::layout::RowMajor,
     ElementAccumulator,
     cutlass::arch::OpClassTensorOp,
-    cutlass::arch::Sm80,
+    ArchTag,
     ThreadblockShape,
     WarpShape,
     InstructionShape,
@@ -90,6 +91,10 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> dual_gemm_silu_identity_mul_(
     kStoreD1,
     kSplitKSerial
   >;
+  {
+    cudaDeviceProp* p = at::cuda::getDeviceProperties(x.device().index());
+    TORCH_CHECK(p->major * 10 + p->minor >= ArchTag::kMinComputeCapability, "GPU not supported");
+  }
 
   int split_k_slices = DualGemm::kSplitKSerial ? 2 : 1;
   using RefA = typename cutlass::TensorRef<typename DualGemm::ElementA, typename DualGemm::LayoutA>;

diff --git a/xformers/components/swiglu/cuda/gemm_fused_operand_sum.cu b/xformers/components/swiglu/cuda/gemm_fused_operand_sum.cu
@@ -102,6 +102,10 @@ void gemm_fused_operand_sum_(
   cutlass::ComplexTransform::kNone,
   cutlass::ComplexTransform::kNone
   >;
+  {
+    cudaDeviceProp* p = at::cuda::getDeviceProperties(a.device().index());
+    TORCH_CHECK(p->major * 10 + p->minor >= SmArch::kMinComputeCapability, "GPU not supported");
+  }
 
   // Below is the reduction kernel used in the case of parallel split-k
   using ReduceGemmSplitKShape = cutlass::MatrixShape<4, 64>;