From ef54499c6cc16a4cc601937ebaa548107ee26b36 Mon Sep 17 00:00:00 2001
From: Urszula Golowicz <urszula.golowicz@intel.com>
Date: Thu, 21 Nov 2024 10:44:05 +0100
Subject: [PATCH] [SW-209062] Disable default sdpa in Albert (#22)

Transformers v4.45 introduced sdpa as the default implementation
in Albet. This caused performance drop.
Adding Albert to the list of models which don't yet have sdpa
implementation in Gaudi and use thus eager attention.
---
 optimum/habana/transformers/models/modeling_all_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/habana/transformers/models/modeling_all_models.py b/optimum/habana/transformers/models/modeling_all_models.py
index 90aa2d5e0f..fafc4bab49 100644
--- a/optimum/habana/transformers/models/modeling_all_models.py
+++ b/optimum/habana/transformers/models/modeling_all_models.py
@@ -115,7 +115,7 @@ def gaudi_conv1d_forward(self, x):
 @classmethod
 def gaudi_check_and_enable_sdpa(cls, config, hard_check_only: bool = False) -> PretrainedConfig:
     # This model doesn't support SDPA in Gaudi yet, fallback to original code.
-    MODELS_ATTN_IMPLEMENTATION_EAGER = ["bart", "gpt_bigcode", "mistral", "mixtral", "wav2vec2", "roberta"]
+    MODELS_ATTN_IMPLEMENTATION_EAGER = ["albert", "bart", "gpt_bigcode", "mistral", "mixtral", "wav2vec2", "roberta"]
 
     if config.model_type in MODELS_ATTN_IMPLEMENTATION_EAGER:
         config._attn_implementation = "eager"