From 8e304aa1c77598c4e3bc8cba7ce122ffb7d11481 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Sat, 16 Nov 2024 11:24:57 -0800 Subject: [PATCH] fix quant :-) --- python/sglang/srt/model_executor/model_runner.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 8bef7d187b0..5e431d65b0b 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -271,6 +271,9 @@ def load_model(self): self.vllm_config.model_config = self.vllm_model_config self.vllm_config.load_config = self.load_config self.vllm_config.device_config = DeviceConfig(self.device) + self.vllm_config.quant_config = VllmConfig._get_quantization_config( + self.vllm_config.model_config, self.vllm_config.load_config + ) # Load the model self.model = get_model(