Direct call on ROCm

ROCm · Jan 28, 2025 · c8b8654 · c8b8654
1 parent a892ecc
commit c8b8654
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
@@ -117,11 +117,11 @@ def __init__(
         self.backend = backend_name_to_enum(attn_backend.get_name())
         self.dtype = dtype
 
-        # For cuda and cpu platforms, we control how
+        # For cuda-alike (CUDA and ROCM) and cpu platforms, we control how
         # torch.compile works by registering the attention as one giant
         # opaque custom op. For other platforms, we directly call them
         # and let torch.compile handle them.
-        self.use_direct_call = not current_platform.is_cuda(
+        self.use_direct_call = not current_platform.is_cuda_alike(
         ) and not current_platform.is_cpu()
 
         self.use_output = attn_backend.accept_output_buffer