vllm-project · prashantgupta24 · Jul 25, 2024 · Jul 25, 2024 · Jul 25, 2024
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
@@ -496,6 +496,11 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
 
                 break
             else:
+                # if word embeddings are tied,
+                # lm_head will not be used.
+                # To prevent errors, skip loading lm_head.
+                if self.config.tie_word_embeddings and "lm_head" in name:
+                    continue
                 # Skip loading extra bias for GPTQ models.
                 if name.endswith(".bias") and name not in params_dict:
                     continue