Skip to content

Commit

Permalink
Direct call on ROCm
Browse files Browse the repository at this point in the history
  • Loading branch information
gshtras committed Jan 28, 2025
1 parent a892ecc commit c8b8654
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions vllm/attention/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ def __init__(
self.backend = backend_name_to_enum(attn_backend.get_name())
self.dtype = dtype

# For cuda and cpu platforms, we control how
# For cuda-alike (CUDA and ROCM) and cpu platforms, we control how
# torch.compile works by registering the attention as one giant
# opaque custom op. For other platforms, we directly call them
# and let torch.compile handle them.
self.use_direct_call = not current_platform.is_cuda(
self.use_direct_call = not current_platform.is_cuda_alike(
) and not current_platform.is_cpu()

self.use_output = attn_backend.accept_output_buffer
Expand Down

0 comments on commit c8b8654

Please sign in to comment.