From 441506a25d1a2a9635f22fbf4dd75c31a28c0fed Mon Sep 17 00:00:00 2001 From: Jeffrey Fong Date: Sat, 31 Aug 2024 16:25:13 +0000 Subject: [PATCH] fix --- server_vllm.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/server_vllm.py b/server_vllm.py index d97d547..e0cdd41 100644 --- a/server_vllm.py +++ b/server_vllm.py @@ -138,13 +138,11 @@ async def create_chat_completion(raw_request: Request): logger.info(f"args: {args}") - if args.served_model_name is not None: - logger.info( - "args.served_model_name is not used in this service and will be ignored. Served model will consist of args.model only." - ) - served_model = [args.model] + if args.served_model_name is not None: + served_model += args.served_model_name + engine_args = AsyncEngineArgs.from_cli_args(args) # A separate tokenizer to map token IDs to strings. tokenizer = get_tokenizer(