Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed Aug 28, 2024
1 parent 0dc380c commit a64afdc
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 0 deletions.
1 change: 1 addition & 0 deletions onnxruntime/contrib_ops/cuda/bert/attention_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ Status EfficientAttention(
p.v_head_size = parameters.v_head_size;
p.causal = parameters.is_unidirectional;
p.scale = scale;
p.use_smooth_softmax = false;

if (nullptr == data.mask_index) {
p.seqlen_k_ptr = nullptr;
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/contrib_ops/cuda/bert/packed_attention_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ Status FusedScaledDotProductAttentionCutlass(
p.qk_head_size = parameters.head_size;
p.v_head_size = parameters.v_head_size;
p.causal = false;
p.use_smooth_softmax = false;
p.scale = parameters.scale == 0.0f ? 1.f / sqrt(static_cast<float>(qk_head_size))
: parameters.scale;
p.seqlen_k_ptr = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ Status FusedAttentionCutlass(
p.qk_head_size = parameters.head_size;
p.v_head_size = parameters.v_head_size;
p.causal = false;
p.use_smooth_softmax = false;
p.scale = parameters.scale == 0.0f ? 1.f / sqrt(static_cast<float>(qk_head_size))
: parameters.scale;
p.seqlen_k_ptr = nullptr;
Expand Down

0 comments on commit a64afdc

Please sign in to comment.