diff --git a/vllm/model_executor/model_loader/veturboio.py b/vllm/model_executor/model_loader/veturboio.py index c285cac838c33..741b8c112cbd5 100644 --- a/vllm/model_executor/model_loader/veturboio.py +++ b/vllm/model_executor/model_loader/veturboio.py @@ -86,8 +86,7 @@ def deserialize(self, model): tensors_dict = veturboio.load(model_file, helper=helper, **self.veturboio_args.deserializer_params) - - model.load_state_dict(tensors_dict, strict=False, assign=True) + model.load_weights(iter(tensors_dict.items())) del tensors_dict # gc.collect() # do gc collect immediately torch.cuda.empty_cache()