Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SN1-406-improve-api-docs #627

Merged
merged 11 commits into from
Feb 27, 2025
33 changes: 31 additions & 2 deletions validator_api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,42 @@ async def lifespan(app: FastAPI):
pass


app = FastAPI(lifespan=lifespan)
app = FastAPI(
title="Validator API",
description="API for interacting with the validator network and miners",
version="1.0.0",
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json",
openapi_tags=[
{
"name": "GPT Endpoints",
"description": "Endpoints for chat completions, web retrieval, and test time inference",
},
{
"name": "API Management",
"description": "Endpoints for API key management and validation",
},
],
lifespan=lifespan,
)
app.include_router(gpt_router, tags=["GPT Endpoints"])
app.include_router(api_management_router, tags=["API Management"])


@app.get("/health")
@app.get(
"/health",
summary="Health check endpoint",
description="Simple endpoint to check if the API is running",
tags=["Health"],
response_description="Status of the API",
)
async def health():
"""
Health check endpoint to verify the API is operational.

Returns a simple JSON object with status "ok" if the API is running.
"""
return {"status": "ok"}


Expand Down
21 changes: 16 additions & 5 deletions validator_api/chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ async def stream_from_first_response(
) -> AsyncGenerator[str, None]:
first_valid_response = None
response_start_time = time.monotonic()

try:
# Keep looping until we find a valid response or run out of tasks
while responses and first_valid_response is None:
Expand Down Expand Up @@ -245,11 +246,20 @@ async def chat_completion(
collected_chunks_list = [[] for _ in uids]
timings_list = [[] for _ in uids]

if not body.get("sampling_parameters"):
raise HTTPException(status_code=422, detail="Sampling parameters are required")
timeout_seconds = max(
30, max(0, math.floor(math.log2(body["sampling_parameters"].get("max_new_tokens", 256) / 256))) * 10 + 30
30,
max(
0,
math.floor(
math.log2(
body.get("sampling_parameters", shared_settings.SAMPLING_PARAMS).get("max_new_tokens", 256) / 256
)
),
)
* 10
+ 30,
)

if STREAM:
# Create tasks for all miners
response_tasks = [
Expand Down Expand Up @@ -297,7 +307,7 @@ async def chat_completion(
raise HTTPException(status_code=502, detail="No valid response received")

asyncio.create_task(
collect_remainin_nonstream_responses(
collect_remaining_nonstream_responses(
pending=pending,
collected_responses=collected_responses,
body=body,
Expand All @@ -308,14 +318,15 @@ async def chat_completion(
return first_valid_response[0] # Return only the response object, not the chunks


async def collect_remainin_nonstream_responses(
async def collect_remaining_nonstream_responses(
pending: set[asyncio.Task],
collected_responses: list,
body: dict,
uids: list,
timings_list: list,
):
"""Wait for all pending miner tasks to complete and append their responses to the scoring queue."""

try:
# Wait for all remaining tasks; allow exceptions to be returned.
remaining_responses = await asyncio.gather(*pending, return_exceptions=True)
Expand Down
Loading