From 68349c2bd12c734f1d59c0a879e6b88764328d4e Mon Sep 17 00:00:00 2001 From: jack Date: Wed, 14 Aug 2024 06:23:57 +0000 Subject: [PATCH] [Bugfix][Frontend] Disable embedding api for chat models --- vllm/entrypoints/openai/serving_embedding.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index 0a787a34da517..0dc3c3bc7d154 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -71,7 +71,7 @@ def __init__( lora_modules=None, prompt_adapters=None, request_logger=request_logger) - self._check_embedding_mode(model_config.embedding_mode) + self._enabled = self._check_embedding_mode(model_config.embedding_mode) async def create_embedding( self, @@ -83,6 +83,8 @@ async def create_embedding( See https://platform.openai.com/docs/api-reference/embeddings/create for the API specification. This API mimics the OpenAI Embedding API. """ + if not self._enabled: + return self.create_error_response("Embedding API disabled") error_check_ret = await self._check_model(request) if error_check_ret is not None: return error_check_ret @@ -179,3 +181,4 @@ def _check_embedding_mode(self, embedding_mode: bool): "embedding_mode is False. Embedding API will not work.") else: logger.info("Activating the server engine with embedding enabled.") + return embedding_mode