Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align OpenAI API for FaqGen, DocSum #1401

Merged
merged 1 commit into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ services:
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
- "${DATA_PATH:-data}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
Expand Down
18 changes: 12 additions & 6 deletions DocSum/docsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
ChatCompletionResponse,
ChatCompletionResponseChoice,
ChatMessage,
DocSumChatCompletionRequest,
UsageInfo,
)
from comps.cores.proto.docarray import DocSumLLMParams
from fastapi import File, Request, UploadFile
from fastapi.responses import StreamingResponse

Expand All @@ -34,14 +34,20 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
if self.services[cur_node].service_type == ServiceType.LLM:
for key_to_replace in ["text", "asr_result"]:
if key_to_replace in inputs:
inputs["query"] = inputs[key_to_replace]
inputs["messages"] = inputs[key_to_replace]
del inputs[key_to_replace]

docsum_parameters = kwargs.get("docsum_parameters", None)
if docsum_parameters:
docsum_parameters = docsum_parameters.model_dump()
del docsum_parameters["query"]
del docsum_parameters["messages"]
inputs.update(docsum_parameters)
if "id" in inputs:
del inputs["id"]
if "max_new_tokens" in inputs:
del inputs["max_new_tokens"]
if "input" in inputs:
del inputs["input"]
elif self.services[cur_node].service_type == ServiceType.ASR:
if "video" in inputs:
audio_base64 = video2audio(inputs["video"])
Expand Down Expand Up @@ -217,13 +223,13 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File(
initial_inputs_data = {}
initial_inputs_data[data_type] = prompt
else:
initial_inputs_data = {"query": prompt}
initial_inputs_data = {"messages": prompt}

else:
raise ValueError(f"Unknown request type: {request.headers.get('content-type')}")

docsum_parameters = DocSumLLMParams(
query="",
docsum_parameters = DocSumChatCompletionRequest(
messages="",
max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
top_k=chat_request.top_k if chat_request.top_k else 10,
top_p=chat_request.top_p if chat_request.top_p else 0.95,
Expand Down
6 changes: 4 additions & 2 deletions DocSum/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export DOCSUM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
export LOGFLAG=True
export DATA_PATH="/data/cache"

WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
Expand All @@ -51,6 +52,7 @@ function build_docker_images() {
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi

sed -i "s|container_name: docsum-gaudi-backend-server|container_name: docsum-gaudi-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
sleep 3m
}
Expand Down Expand Up @@ -158,10 +160,10 @@ function validate_microservices() {
# llm microservice
validate_services_json \
"${host_ip}:9000/v1/docsum" \
"data: " \
"text" \
"llm-docsum-tgi" \
"llm-docsum-gaudi-server" \
'{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'

# whisper microservice
ulimit -s 65536
Expand Down
6 changes: 3 additions & 3 deletions DocSum/tests/test_compose_on_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ function build_docker_images() {
function start_services() {
cd "$WORKPATH"/docker_compose/amd/gpu/rocm
sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env

sed -i "s|container_name: docsum-backend-server|container_name: docsum-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
# Start Docker Containers
docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
sleep 3m
Expand Down Expand Up @@ -138,10 +138,10 @@ function validate_microservices() {
# llm microservice
validate_services \
"${host_ip}:9000/v1/docsum" \
"data: " \
"text" \
"docsum-llm-server" \
"docsum-llm-server" \
'{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'

}

Expand Down
6 changes: 3 additions & 3 deletions DocSum/tests/test_compose_on_xeon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ function build_docker_images() {

function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/

sed -i "s|container_name: docsum-xeon-backend-server|container_name: docsum-xeon-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
sleep 3m
}
Expand Down Expand Up @@ -160,10 +160,10 @@ function validate_microservices() {
# llm microservice
validate_services_json \
"${host_ip}:9000/v1/docsum" \
"data: " \
"text" \
"llm-docsum-tgi" \
"llm-docsum-server" \
'{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'

# whisper microservice
ulimit -s 65536
Expand Down
2 changes: 1 addition & 1 deletion FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ services:
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
- "${DATA_PATH:-data}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
Expand Down
2 changes: 1 addition & 1 deletion FaqGen/faqgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File(
model=chat_request.model if chat_request.model else None,
)
result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs={"query": prompt}, llm_parameters=parameters
initial_inputs={"messages": prompt}, llm_parameters=parameters
)
for node, response in result_dict.items():
# Here it suppose the last microservice in the megaservice is LLM.
Expand Down
5 changes: 3 additions & 2 deletions FaqGen/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
export DATA_PATH="/data/cache"

function build_docker_images() {
cd $WORKPATH/docker_image_build
Expand Down Expand Up @@ -91,10 +92,10 @@ function validate_microservices() {
# llm microservice
validate_services \
"${ip_address}:9000/v1/faqgen" \
"data: " \
"text" \
"llm" \
"llm-faqgen-server" \
'{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
}

function validate_megaservice() {
Expand Down
4 changes: 2 additions & 2 deletions FaqGen/tests/test_compose_on_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ function validate_microservices() {
# llm microservice
validate_services \
"${ip_address}:9000/v1/faqgen" \
"data: " \
"text" \
"llm" \
"faqgen-llm-server" \
'{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
}

function validate_megaservice() {
Expand Down
4 changes: 2 additions & 2 deletions FaqGen/tests/test_compose_on_xeon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,10 @@ function validate_microservices() {
# llm microservice
validate_services \
"${ip_address}:9000/v1/faqgen" \
"data: " \
"text" \
"llm" \
"llm-faqgen-server" \
'{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
'{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
}

function validate_megaservice() {
Expand Down
Loading