Fix: triton pytest skip

Degnel · Degnel · commit e8edea9ba5fc · 2025-03-14T10:28:28.000+01:00
diff --git a/benchmarks/benchmark_blockwise_scaled_linear_triton.py b/benchmarks/benchmark_blockwise_scaled_linear_triton.py
@@ -1,22 +1,24 @@
-import pandas as pd
 import torch
-from tqdm import tqdm
 
 if torch.cuda.is_available():
+    import pandas as pd
+    from tqdm import tqdm
     from triton.testing import do_bench
 
-from torchao.float8.float8_utils import compute_error
-from torchao.ops import rowwise_scaled_linear_cutlass_s8s4
-from torchao.prototype.blockwise_fp8.blockwise_fp8_gemm_triton import blockwise_fp8_gemm
-from torchao.prototype.blockwise_fp8.blockwise_quantization import (
-    fp8_blockwise_act_quant,
-    fp8_blockwise_weight_quant,
-)
-from torchao.quantization.quant_api import (
-    _int4_symm_per_token_quant_cutlass,
-    _int8_symm_per_token_reduced_range_quant_cutlass,
-)
-from torchao.utils import is_sm_at_least_89
+    from torchao.float8.float8_utils import compute_error
+    from torchao.ops import rowwise_scaled_linear_cutlass_s8s4
+    from torchao.prototype.blockwise_fp8.blockwise_fp8_gemm_triton import (
+        blockwise_fp8_gemm,
+    )
+    from torchao.prototype.blockwise_fp8.blockwise_quantization import (
+        fp8_blockwise_act_quant,
+        fp8_blockwise_weight_quant,
+    )
+    from torchao.quantization.quant_api import (
+        _int4_symm_per_token_quant_cutlass,
+        _int8_symm_per_token_reduced_range_quant_cutlass,
+    )
+    from torchao.utils import is_sm_at_least_89
 
 
 def benchmark_microseconds(f, *args):
diff --git a/test/prototype/test_blockwise_triton.py b/test/prototype/test_blockwise_triton.py
@@ -1,6 +1,8 @@
 import pytest
 import torch
 
+triton = pytest.importorskip("triton", reason="Triton required to run this test")
+
 from torchao.prototype.blockwise_fp8.blockwise_fp8_gemm_triton import blockwise_fp8_gemm
 from torchao.prototype.blockwise_fp8.blockwise_quantization import (
     fp8_blockwise_act_quant,
diff --git a/torchao/prototype/blockwise_fp8/blockwise_fp8_gemm_triton.py b/torchao/prototype/blockwise_fp8/blockwise_fp8_gemm_triton.py
@@ -1,9 +1,7 @@
 import torch
-
-if torch.cuda.is_available():
-    import triton
-    import triton.language as tl
-    from triton import Config
+import triton
+import triton.language as tl
+from triton import Config
 
 # Original implementation at https://github.com/deepseek-ai/DeepSeek-V3/blob/main/inference/kernel.py
 
diff --git a/torchao/prototype/blockwise_fp8/blockwise_quantization.py b/torchao/prototype/blockwise_fp8/blockwise_quantization.py
@@ -1,10 +1,8 @@
 from typing import Tuple
 
 import torch
-
-if torch.cuda.is_available():
-    import triton
-    import triton.language as tl
+import triton
+import triton.language as tl
 
 
 @triton.jit