From 8c2b7b4ccb319c588f9118a48cb55560335c0287 Mon Sep 17 00:00:00 2001 From: Taemin Lee Date: Thu, 21 Mar 2024 19:40:39 +0900 Subject: [PATCH] refine comments for lm_head gemma error fix --- vllm/model_executor/models/gemma.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py index 4e0c7a3b8ad19..fa8ce60e74056 100644 --- a/vllm/model_executor/models/gemma.py +++ b/vllm/model_executor/models/gemma.py @@ -340,9 +340,8 @@ def load_weights(self, weight_loader(param, loaded_weight, shard_id) break else: - # lm_head is not used in vllm as it is tied weight with embed_token. - # Sometimes duplicate lm_head layers are added when the structure of the model is newly created by quantization, LORA, etc. - # To avoid the error that occurs, skip loading lm_head.weight. + # lm_head is not used in vllm as it is tied with embed_token. + # To prevent errors, skip loading lm_head.weight. if "lm_head.weight" in name: continue # Skip loading extra bias for GPTQ models.