-
Notifications
You must be signed in to change notification settings - Fork 2.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add FastAPI v1/completions/ endpoint #12101
base: main
Are you sure you want to change the base?
Changes from 1 commit
b96c9ff
2e10482
66feb29
232037d
21191cf
75feb42
a9395b4
179859f
40a0845
fca66a4
48d422b
436e676
04b80eb
c52f979
d00593f
6e715fe
9e93a89
e128274
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Signed-off-by: Abhishree <[email protected]>
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,6 +71,7 @@ async def check_triton_health(): | |
f"http://{triton_settings.triton_service_ip}:{str(triton_settings.triton_service_port)}/v2/health/ready" | ||
) | ||
logging.info(f"Attempting to connect to Triton server at: {triton_url}") | ||
print("---triton_url---", triton_url) | ||
try: | ||
response = requests.get(triton_url, timeout=5) | ||
if response.status_code == 200: | ||
|
@@ -85,7 +86,7 @@ async def check_triton_health(): | |
def completions_v1(request: CompletionRequest): | ||
try: | ||
print("---hello----") | ||
url = triton_settings.triton_service_ip + ":" + str(triton_settings.triton_service_port) | ||
url = f"http://{triton_settings.triton_service_ip}:{triton_settings.triton_service_port}" | ||
nq = NemoQueryLLMPyTorch(url=url, model_name=request.model) | ||
print("---request----", request) | ||
output = nq.query_llm( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @marta-sd I looked at it, it seems to me that the call stack of it will go to |
||
|
@@ -102,5 +103,5 @@ def completions_v1(request: CompletionRequest): | |
"output": output[0][0], | ||
} | ||
except Exception as error: | ||
logging.error("An exception occurred with the post request to /v1/completions/ endpoint:", error) | ||
logging.error(f"An exception occurred with the post request to /v1/completions/ endpoint: {error}") | ||
return {"error": "An exception occurred"} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit
: this might get blocking too, it's recommended to useaihttp
instead ofrequests
inside async functions.