Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move examples gateway #992

Merged
merged 32 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
8349d24
move chatqna gateway.
Oct 20, 2024
3da7d3c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 20, 2024
eea3db3
fix import issue.
Oct 20, 2024
e51a6d7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 20, 2024
a252db3
move codegen gateway.
Oct 20, 2024
11c1516
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 20, 2024
139986e
Merge branch 'main' into move_gateway
lkk12014402 Oct 21, 2024
91bae4d
move code_translation gateway.
Oct 21, 2024
e11773d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 21, 2024
a07a12b
update all examples gateway.
Oct 21, 2024
ff1b675
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 21, 2024
324df2a
fix import issue.
Oct 21, 2024
cdcfbe9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 21, 2024
b5c970f
Merge branch 'main' into move_gateway
Spycsh Oct 29, 2024
866d72e
update service start entry.
Oct 29, 2024
e398527
update service start entry.
Oct 29, 2024
f72347f
remove `MEGA_SERVICE_HOST_IP` which is not actually used.
Oct 30, 2024
a8e6511
Merge branch 'main' into move_gateway
lkk12014402 Nov 12, 2024
2e9d6ad
Merge branch 'main' into move_gateway
chensuyue Nov 13, 2024
3ac80e2
Merge branch 'main' into move_gateway
lkk12014402 Nov 14, 2024
04c587d
Merge branch 'main' into move_gateway
lkk12014402 Dec 5, 2024
337f0dd
Merge branch 'main' into move_gateway
lkk12014402 Dec 5, 2024
76caa00
update docsum/faqgen.
lkk12014402 Dec 5, 2024
d13bf2a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 5, 2024
b1d5422
update docsum/faqgen.
lkk12014402 Dec 5, 2024
7f1f52d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 5, 2024
0c99b8a
Merge branch 'main' into move_gateway
lkk12014402 Dec 5, 2024
082caee
move more gates.
lkk12014402 Dec 5, 2024
3fbefd4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 5, 2024
6acdb75
fix 2 example.
lkk12014402 Dec 6, 2024
52e0d5a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 6, 2024
3643e29
revert docsum ut.
lkk12014402 Dec 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 37 additions & 4 deletions AudioQnA/audioqna.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
import asyncio
import os

from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
from comps.cores.proto.api_protocol import AudioChatCompletionRequest, ChatCompletionResponse
from comps.cores.proto.docarray import LLMParams
from fastapi import Request

MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
Expand All @@ -16,11 +19,18 @@
TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))


class AudioQnAService:
class AudioQnAService(Gateway):
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
self.megaservice = ServiceOrchestrator()
super().__init__(
megaservice=ServiceOrchestrator(),
host=self.host,
port=self.port,
endpoint=str(MegaServiceEndpoint.AUDIO_QNA),
input_datatype=AudioChatCompletionRequest,
output_datatype=ChatCompletionResponse,
)

def add_remote_service(self):
asr = MicroService(
Expand Down Expand Up @@ -50,7 +60,30 @@ def add_remote_service(self):
self.megaservice.add(asr).add(llm).add(tts)
self.megaservice.flow_to(asr, llm)
self.megaservice.flow_to(llm, tts)
self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)

async def handle_request(self, request: Request):
data = await request.json()

chat_request = AudioChatCompletionRequest.parse_obj(data)
parameters = LLMParams(
# relatively lower max_tokens for audio conversation
max_tokens=chat_request.max_tokens if chat_request.max_tokens else 128,
top_k=chat_request.top_k if chat_request.top_k else 10,
top_p=chat_request.top_p if chat_request.top_p else 0.95,
temperature=chat_request.temperature if chat_request.temperature else 0.01,
frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
streaming=False, # TODO add streaming LLM output as input to TTS
)
result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
)

last_node = runtime_graph.all_leaves()[-1]
response = result_dict[last_node]["byte_str"]

return response


if __name__ == "__main__":
Expand Down
78 changes: 72 additions & 6 deletions ChatQnA/chatqna.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,17 @@
import os
import re

from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionResponseChoice,
ChatMessage,
UsageInfo,
)
from comps.cores.proto.docarray import LLMParams, RerankerParms, RetrieverParms
from fastapi import Request
from fastapi.responses import StreamingResponse
from langchain_core.prompts import PromptTemplate


Expand Down Expand Up @@ -173,14 +183,22 @@ def align_generator(self, gen, **kwargs):
yield "data: [DONE]\n\n"


class ChatQnAService:
class ChatQnAService(Gateway):
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
ServiceOrchestrator.align_inputs = align_inputs
ServiceOrchestrator.align_outputs = align_outputs
ServiceOrchestrator.align_generator = align_generator
self.megaservice = ServiceOrchestrator()

super().__init__(
megaservice=ServiceOrchestrator(),
host=self.host,
port=self.port,
endpoint=str(MegaServiceEndpoint.CHAT_QNA),
input_datatype=ChatCompletionRequest,
output_datatype=ChatCompletionResponse,
)

def add_remote_service(self):

Expand Down Expand Up @@ -223,7 +241,6 @@ def add_remote_service(self):
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)

def add_remote_service_without_rerank(self):

Expand Down Expand Up @@ -256,7 +273,6 @@ def add_remote_service_without_rerank(self):
self.megaservice.add(embedding).add(retriever).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)

def add_remote_service_with_guardrails(self):
guardrail_in = MicroService(
Expand Down Expand Up @@ -314,7 +330,55 @@ def add_remote_service_with_guardrails(self):
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
# self.megaservice.flow_to(llm, guardrail_out)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)

async def handle_request(self, request: Request):
data = await request.json()
stream_opt = data.get("stream", True)
chat_request = ChatCompletionRequest.parse_obj(data)
prompt = self._handle_message(chat_request.messages)
parameters = LLMParams(
max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
top_k=chat_request.top_k if chat_request.top_k else 10,
top_p=chat_request.top_p if chat_request.top_p else 0.95,
temperature=chat_request.temperature if chat_request.temperature else 0.01,
frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
streaming=stream_opt,
chat_template=chat_request.chat_template if chat_request.chat_template else None,
)
retriever_parameters = RetrieverParms(
search_type=chat_request.search_type if chat_request.search_type else "similarity",
k=chat_request.k if chat_request.k else 4,
distance_threshold=chat_request.distance_threshold if chat_request.distance_threshold else None,
fetch_k=chat_request.fetch_k if chat_request.fetch_k else 20,
lambda_mult=chat_request.lambda_mult if chat_request.lambda_mult else 0.5,
score_threshold=chat_request.score_threshold if chat_request.score_threshold else 0.2,
)
reranker_parameters = RerankerParms(
top_n=chat_request.top_n if chat_request.top_n else 1,
)
result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs={"text": prompt},
llm_parameters=parameters,
retriever_parameters=retriever_parameters,
reranker_parameters=reranker_parameters,
)
for node, response in result_dict.items():
if isinstance(response, StreamingResponse):
return response
last_node = runtime_graph.all_leaves()[-1]
response = result_dict[last_node]["text"]
choices = []
usage = UsageInfo()
choices.append(
ChatCompletionResponseChoice(
index=0,
message=ChatMessage(role="assistant", content=response),
finish_reason="stop",
)
)
return ChatCompletionResponse(model="chatqna", choices=choices, usage=usage)


if __name__ == "__main__":
Expand All @@ -331,3 +395,5 @@ def add_remote_service_with_guardrails(self):
chatqna.add_remote_service_with_guardrails()
else:
chatqna.add_remote_service()

# chatqna.start()
63 changes: 59 additions & 4 deletions CodeGen/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,36 @@
import asyncio
import os

from comps import CodeGenGateway, MicroService, ServiceOrchestrator, ServiceType
from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionResponseChoice,
ChatMessage,
UsageInfo,
)
from comps.cores.proto.docarray import LLMParams
from fastapi import Request
from fastapi.responses import StreamingResponse

MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))


class CodeGenService:
class CodeGenService(Gateway):
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
self.megaservice = ServiceOrchestrator()
super().__init__(
megaservice=ServiceOrchestrator(),
host=self.host,
port=self.port,
endpoint=str(MegaServiceEndpoint.CODE_GEN),
input_datatype=ChatCompletionRequest,
output_datatype=ChatCompletionResponse,
)

def add_remote_service(self):
llm = MicroService(
Expand All @@ -28,7 +45,45 @@ def add_remote_service(self):
service_type=ServiceType.LLM,
)
self.megaservice.add(llm)
self.gateway = CodeGenGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)

async def handle_request(self, request: Request):
data = await request.json()
stream_opt = data.get("stream", True)
chat_request = ChatCompletionRequest.parse_obj(data)
prompt = self._handle_message(chat_request.messages)
parameters = LLMParams(
max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
top_k=chat_request.top_k if chat_request.top_k else 10,
top_p=chat_request.top_p if chat_request.top_p else 0.95,
temperature=chat_request.temperature if chat_request.temperature else 0.01,
frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
streaming=stream_opt,
)
result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs={"query": prompt}, llm_parameters=parameters
)
for node, response in result_dict.items():
# Here it suppose the last microservice in the megaservice is LLM.
if (
isinstance(response, StreamingResponse)
and node == list(self.megaservice.services.keys())[-1]
and self.megaservice.services[node].service_type == ServiceType.LLM
):
return response
last_node = runtime_graph.all_leaves()[-1]
response = result_dict[last_node]["text"]
choices = []
usage = UsageInfo()
choices.append(
ChatCompletionResponseChoice(
index=0,
message=ChatMessage(role="assistant", content=response),
finish_reason="stop",
)
)
return ChatCompletionResponse(model="codegen", choices=choices, usage=usage)


if __name__ == "__main__":
Expand Down
63 changes: 59 additions & 4 deletions CodeTrans/code_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,35 @@
import asyncio
import os

from comps import CodeTransGateway, MicroService, ServiceOrchestrator
from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionResponseChoice,
ChatMessage,
UsageInfo,
)
from fastapi import Request
from fastapi.responses import StreamingResponse

MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7777))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))


class CodeTransService:
class CodeTransService(Gateway):
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
self.megaservice = ServiceOrchestrator()
super().__init__(
megaservice=ServiceOrchestrator(),
host=self.host,
port=self.port,
endpoint=str(MegaServiceEndpoint.CODE_TRANS),
input_datatype=ChatCompletionRequest,
output_datatype=ChatCompletionResponse,
)

def add_remote_service(self):
llm = MicroService(
Expand All @@ -27,7 +43,46 @@ def add_remote_service(self):
use_remote_service=True,
)
self.megaservice.add(llm)
self.gateway = CodeTransGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)

async def handle_request(self, request: Request):
data = await request.json()
language_from = data["language_from"]
language_to = data["language_to"]
source_code = data["source_code"]
prompt_template = """
### System: Please translate the following {language_from} codes into {language_to} codes.

### Original codes:
'''{language_from}

{source_code}

'''

### Translated codes:
"""
prompt = prompt_template.format(language_from=language_from, language_to=language_to, source_code=source_code)
result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs={"query": prompt})
for node, response in result_dict.items():
# Here it suppose the last microservice in the megaservice is LLM.
if (
isinstance(response, StreamingResponse)
and node == list(self.megaservice.services.keys())[-1]
and self.megaservice.services[node].service_type == ServiceType.LLM
):
return response
last_node = runtime_graph.all_leaves()[-1]
response = result_dict[last_node]["text"]
choices = []
usage = UsageInfo()
choices.append(
ChatCompletionResponseChoice(
index=0,
message=ChatMessage(role="assistant", content=response),
finish_reason="stop",
)
)
return ChatCompletionResponse(model="codetrans", choices=choices, usage=usage)


if __name__ == "__main__":
Expand Down
Loading
Loading