[SW-209062] Disable default sdpa in Albert (#22)

Transformers v4.45 introduced sdpa as the default implementation in Albet. This caused performance drop. Adding Albert to the list of models which don't yet have sdpa implementation in Gaudi and use thus eager attention.
huggingface · Nov 26, 2024 · ef54499 · ef54499
1 parent 9a49200
commit ef54499
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/optimum/habana/transformers/models/modeling_all_models.py b/optimum/habana/transformers/models/modeling_all_models.py
@@ -115,7 +115,7 @@ def gaudi_conv1d_forward(self, x):
 @classmethod
 def gaudi_check_and_enable_sdpa(cls, config, hard_check_only: bool = False) -> PretrainedConfig:
     # This model doesn't support SDPA in Gaudi yet, fallback to original code.
-    MODELS_ATTN_IMPLEMENTATION_EAGER = ["bart", "gpt_bigcode", "mistral", "mixtral", "wav2vec2", "roberta"]
+    MODELS_ATTN_IMPLEMENTATION_EAGER = ["albert", "bart", "gpt_bigcode", "mistral", "mixtral", "wav2vec2", "roberta"]
 
     if config.model_type in MODELS_ATTN_IMPLEMENTATION_EAGER:
         config._attn_implementation = "eager"