vllm-project · WoosukKwon · Feb 14, 2024 · Feb 14, 2024 · Feb 14, 2024 · Feb 14, 2024
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
@@ -24,7 +24,7 @@
     "GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),
     "GPTJForCausalLM": ("gpt_j", "GPTJForCausalLM"),
     "GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"),
-    "InternLMForCausalLM": ("internlm", "InternLMForCausalLM"),
+    "InternLMForCausalLM": ("llama", "LlamaForCausalLM"),
     "InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"),
     "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
     # For decapoda-research/llama-*

diff --git a/vllm/model_executor/models/internlm.py b/vllm/model_executor/models/internlm.py
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
@@ -91,6 +91,7 @@ def __init__(
         rope_scaling: Optional[Dict[str, Any]] = None,
         max_position_embeddings: int = 8192,
         linear_method: Optional[LinearMethodBase] = None,
+        bias: bool = False,
     ) -> None:
         super().__init__()
         self.hidden_size = hidden_size
@@ -120,13 +121,13 @@ def __init__(
             self.head_dim,
             self.total_num_heads,
             self.total_num_kv_heads,
-            bias=False,
+            bias=bias,
             linear_method=linear_method,
         )
         self.o_proj = RowParallelLinear(
             self.total_num_heads * self.head_dim,
             hidden_size,
-            bias=False,
+            bias=bias,
             linear_method=linear_method,
         )
 
@@ -179,6 +180,7 @@ def __init__(
             rope_scaling=rope_scaling,
             max_position_embeddings=max_position_embeddings,
             linear_method=linear_method,
+            bias=getattr(config, "bias", False),
         )
         self.mlp = LlamaMLP(
             hidden_size=self.hidden_size,