From ff51c056588e421ce6ea094f77af1aee9013d076 Mon Sep 17 00:00:00 2001 From: Xinyao Wang Date: Thu, 16 Jan 2025 15:37:34 +0800 Subject: [PATCH] Align OpenAI API for FaqGen, DocSum Algin all the inputs to OpenAI API format for FaqGen, DocSum, related with GenAIComps PR https://github.com/opea-project/GenAIComps/pull/1161 Signed-off-by: Xinyao Wang --- .../intel/hpu/gaudi/compose.yaml | 2 +- DocSum/docsum.py | 18 ++++++++++++------ DocSum/tests/test_compose_on_gaudi.sh | 6 ++++-- DocSum/tests/test_compose_on_rocm.sh | 6 +++--- DocSum/tests/test_compose_on_xeon.sh | 6 +++--- .../intel/hpu/gaudi/compose.yaml | 2 +- FaqGen/faqgen.py | 2 +- FaqGen/tests/test_compose_on_gaudi.sh | 5 +++-- FaqGen/tests/test_compose_on_rocm.sh | 4 ++-- FaqGen/tests/test_compose_on_xeon.sh | 4 ++-- 10 files changed, 32 insertions(+), 23 deletions(-) diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml index c812b64715..8f36069552 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml @@ -8,7 +8,7 @@ services: ports: - ${LLM_ENDPOINT_PORT:-8008}:80 volumes: - - "./data:/data" + - "${DATA_PATH:-data}:/data" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/DocSum/docsum.py b/DocSum/docsum.py index 1d71f24ad1..34e58c1df0 100644 --- a/DocSum/docsum.py +++ b/DocSum/docsum.py @@ -15,9 +15,9 @@ ChatCompletionResponse, ChatCompletionResponseChoice, ChatMessage, + DocSumChatCompletionRequest, UsageInfo, ) -from comps.cores.proto.docarray import DocSumLLMParams from fastapi import File, Request, UploadFile from fastapi.responses import StreamingResponse @@ -34,14 +34,20 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k if self.services[cur_node].service_type == ServiceType.LLM: for key_to_replace in ["text", "asr_result"]: if key_to_replace in inputs: - inputs["query"] = inputs[key_to_replace] + inputs["messages"] = inputs[key_to_replace] del inputs[key_to_replace] docsum_parameters = kwargs.get("docsum_parameters", None) if docsum_parameters: docsum_parameters = docsum_parameters.model_dump() - del docsum_parameters["query"] + del docsum_parameters["messages"] inputs.update(docsum_parameters) + if "id" in inputs: + del inputs["id"] + if "max_new_tokens" in inputs: + del inputs["max_new_tokens"] + if "input" in inputs: + del inputs["input"] elif self.services[cur_node].service_type == ServiceType.ASR: if "video" in inputs: audio_base64 = video2audio(inputs["video"]) @@ -217,13 +223,13 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File( initial_inputs_data = {} initial_inputs_data[data_type] = prompt else: - initial_inputs_data = {"query": prompt} + initial_inputs_data = {"messages": prompt} else: raise ValueError(f"Unknown request type: {request.headers.get('content-type')}") - docsum_parameters = DocSumLLMParams( - query="", + docsum_parameters = DocSumChatCompletionRequest( + messages="", max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, top_k=chat_request.top_k if chat_request.top_k else 10, top_p=chat_request.top_p if chat_request.top_p else 0.95, diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh index db0977b040..4683c4acb8 100644 --- a/DocSum/tests/test_compose_on_gaudi.sh +++ b/DocSum/tests/test_compose_on_gaudi.sh @@ -28,6 +28,7 @@ export DOCSUM_PORT=9000 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" export DocSum_COMPONENT_NAME="OpeaDocSumTgi" export LOGFLAG=True +export DATA_PATH="/data/cache" WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -51,6 +52,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi + sed -i "s|container_name: docsum-gaudi-backend-server|container_name: docsum-gaudi-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 3m } @@ -158,10 +160,10 @@ function validate_microservices() { # llm microservice validate_services_json \ "${host_ip}:9000/v1/docsum" \ - "data: " \ + "text" \ "llm-docsum-tgi" \ "llm-docsum-gaudi-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' # whisper microservice ulimit -s 65536 diff --git a/DocSum/tests/test_compose_on_rocm.sh b/DocSum/tests/test_compose_on_rocm.sh index 54935f2b78..e4d579f168 100644 --- a/DocSum/tests/test_compose_on_rocm.sh +++ b/DocSum/tests/test_compose_on_rocm.sh @@ -50,7 +50,7 @@ function build_docker_images() { function start_services() { cd "$WORKPATH"/docker_compose/amd/gpu/rocm sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - + sed -i "s|container_name: docsum-backend-server|container_name: docsum-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml # Start Docker Containers docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log sleep 3m @@ -138,10 +138,10 @@ function validate_microservices() { # llm microservice validate_services \ "${host_ip}:9000/v1/docsum" \ - "data: " \ + "text" \ "docsum-llm-server" \ "docsum-llm-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' } diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh index 13036fc0db..aef3b5db13 100644 --- a/DocSum/tests/test_compose_on_xeon.sh +++ b/DocSum/tests/test_compose_on_xeon.sh @@ -49,7 +49,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - + sed -i "s|container_name: docsum-xeon-backend-server|container_name: docsum-xeon-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 3m } @@ -160,10 +160,10 @@ function validate_microservices() { # llm microservice validate_services_json \ "${host_ip}:9000/v1/docsum" \ - "data: " \ + "text" \ "llm-docsum-tgi" \ "llm-docsum-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' # whisper microservice ulimit -s 65536 diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml index 20c2aced1f..4d2c767dfa 100644 --- a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -8,7 +8,7 @@ services: ports: - ${LLM_ENDPOINT_PORT:-8008}:80 volumes: - - "./data:/data" + - "${DATA_PATH:-data}:/data" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/FaqGen/faqgen.py b/FaqGen/faqgen.py index f4b0a58031..01d1e4acbf 100644 --- a/FaqGen/faqgen.py +++ b/FaqGen/faqgen.py @@ -113,7 +113,7 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File( model=chat_request.model if chat_request.model else None, ) result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"query": prompt}, llm_parameters=parameters + initial_inputs={"messages": prompt}, llm_parameters=parameters ) for node, response in result_dict.items(): # Here it suppose the last microservice in the megaservice is LLM. diff --git a/FaqGen/tests/test_compose_on_gaudi.sh b/FaqGen/tests/test_compose_on_gaudi.sh index 95ed2950a0..48241a5d7f 100644 --- a/FaqGen/tests/test_compose_on_gaudi.sh +++ b/FaqGen/tests/test_compose_on_gaudi.sh @@ -13,6 +13,7 @@ export TAG=${IMAGE_TAG} WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +export DATA_PATH="/data/cache" function build_docker_images() { cd $WORKPATH/docker_image_build @@ -91,10 +92,10 @@ function validate_microservices() { # llm microservice validate_services \ "${ip_address}:9000/v1/faqgen" \ - "data: " \ + "text" \ "llm" \ "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' } function validate_megaservice() { diff --git a/FaqGen/tests/test_compose_on_rocm.sh b/FaqGen/tests/test_compose_on_rocm.sh index 726c83461b..5346f41263 100644 --- a/FaqGen/tests/test_compose_on_rocm.sh +++ b/FaqGen/tests/test_compose_on_rocm.sh @@ -95,10 +95,10 @@ function validate_microservices() { # llm microservice validate_services \ "${ip_address}:9000/v1/faqgen" \ - "data: " \ + "text" \ "llm" \ "faqgen-llm-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' } function validate_megaservice() { diff --git a/FaqGen/tests/test_compose_on_xeon.sh b/FaqGen/tests/test_compose_on_xeon.sh index e4409358d2..eb7e49b240 100755 --- a/FaqGen/tests/test_compose_on_xeon.sh +++ b/FaqGen/tests/test_compose_on_xeon.sh @@ -91,10 +91,10 @@ function validate_microservices() { # llm microservice validate_services \ "${ip_address}:9000/v1/faqgen" \ - "data: " \ + "text" \ "llm" \ "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' } function validate_megaservice() {