fix quant :-)

sgl-project · Nov 16, 2024 · 8e304aa · 8e304aa
1 parent 9fb5674
commit 8e304aa
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
@@ -271,6 +271,9 @@ def load_model(self):
         self.vllm_config.model_config = self.vllm_model_config
         self.vllm_config.load_config = self.load_config
         self.vllm_config.device_config = DeviceConfig(self.device)
+        self.vllm_config.quant_config = VllmConfig._get_quantization_config(
+            self.vllm_config.model_config, self.vllm_config.load_config
+        )
 
         # Load the model
         self.model = get_model(