From 78fe5af734e1f42b9da43defbdaec0ae99b1f014 Mon Sep 17 00:00:00 2001 From: jack Date: Thu, 11 Apr 2024 15:10:32 -0500 Subject: [PATCH] Fixing max token error message for openai compatible server --- vllm/entrypoints/openai/serving_engine.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 8f69388c0251e..da6e68545bd24 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -190,6 +190,12 @@ def _validate_prompt_and_tokenize( token_num = len(input_ids) if request.max_tokens is None: + if token_num >= self.max_model_len: + raise ValueError( + f"This model's maximum context length is " + f"{self.max_model_len} tokens. However, you requested " + f"{token_num} tokens in the messages, " + f"Please reduce the length of the messages.", ) request.max_tokens = self.max_model_len - token_num if token_num + request.max_tokens > self.max_model_len: