From 22a9f827b668e1b1b480bc09d620e2d32ba94eb4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 31 May 2024 07:25:37 +0000 Subject: [PATCH] commont on todo --- vllm/model_executor/layers/fused_moe/ampere_fp8_fused_moe.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/model_executor/layers/fused_moe/ampere_fp8_fused_moe.py b/vllm/model_executor/layers/fused_moe/ampere_fp8_fused_moe.py index bd38913fa8e15..82f5337d8d1c9 100644 --- a/vllm/model_executor/layers/fused_moe/ampere_fp8_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/ampere_fp8_fused_moe.py @@ -138,6 +138,8 @@ def fused_moe_kernel( ).to(tl.float16) b = tl.load(b_ptrs, mask=offs_k[:, None] < K - k * BLOCK_SIZE_K, other=0.0) + # todo(wenxh): there is a bug in triton 2.2/2.3 that only "=l" works, "=r" + # will result error in llvm check(low level bug). b = tl.inline_asm_elementwise( asm = "{ \n" ".reg .b32 a<2>, b<2>; \n" # if input = 0xf1f2f3f4