Do not run Accelerate Matumul for pre-volta gpus

triton-lang · Apr 21, 2023 · 31ee8fc · 31ee8fc
1 parent 192f889
commit 31ee8fc
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 0 deletions.
diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp
@@ -87,6 +87,8 @@ class BlockedToMMA : public mlir::RewritePattern {
   mlir::LogicalResult
   matchAndRewrite(mlir::Operation *op,
                   mlir::PatternRewriter &rewriter) const override {
+    if (computeCapability < 70)
+      return failure();
     auto dotOp = cast<triton::DotOp>(op);
     // TODO: Check data-types and SM compatibility
     auto oldRetType = dotOp.getResult().getType().cast<RankedTensorType>();

diff --git a/python/triton/language/semantic.py b/python/triton/language/semantic.py
@@ -5,6 +5,8 @@
 
 from . import core as tl
 from triton._C.libtriton.triton import ir
+import torch
+import triton
 
 T = TypeVar('T')
 
@@ -1180,6 +1182,14 @@ def dot(lhs: tl.tensor,
         allow_tf32: bool,
         out_dtype: tl.dtype,
         builder: ir.builder) -> tl.tensor:
+    if torch.version.hip is None:
+        device = triton.runtime.jit.get_current_device()
+        capability = triton.runtime.jit.get_device_capability(device)
+        capability = capability[0] * 10 + capability[1]
+        if capability < 70:
+            assert (
+                not rhs.dtype.is_fp16() and not rhs.dtype.is_fp8()
+            ), "Float8 and Float16 types are not supported for compute capability < 70 (use Float32 or above)"
     assert lhs.type.is_block() and rhs.type.is_block()
     assert lhs.dtype == rhs.dtype, "lhs and rhs must have the same dtype!"
     assert len(lhs.shape) == 2 and len(rhs.shape) == 2