Skip to content

Commit

Permalink
Add models endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
vmpuri committed Aug 2, 2024
1 parent 6401f55 commit 41ccc69
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 52 deletions.
40 changes: 40 additions & 0 deletions api/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Any, Dict, List, Optional, Union


from dataclasses import dataclass

from download import is_model_downloaded, load_model_configs
from pwd import getpwuid

import os
import time

@dataclass
class ModelInfo:
"""Information about a model that can be used to generate completions."""
id: str
created: int
owner: str
object: str = "model"


@dataclass
class ModelInfoResponse:
"""A list of models that can be used to generate completions."""
data: List[ModelInfo]
object: str = "list"


def get_model_info_list(args) -> ModelInfoResponse:
"""Returns a list of models that can be used to generate completions."""
data = []
for model_id, model_config in load_model_configs().items():
model_dir = args.model_directory
if is_model_downloaded(model_id, model_dir):
path = model_dir / model_id
created = int(os.path.getctime(path))
owner = getpwuid(os.stat(path).st_uid).pw_name

data.append(ModelInfo(id=model_config.name, created=created, owner = owner))
response = ModelInfoResponse(data=data)
return response
103 changes: 51 additions & 52 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,78 +10,78 @@
from typing import Dict, List, Union

from api.api import AssistantMessage, CompletionRequest, OpenAiApiGenerator, UserMessage
from api.models import get_model_info_list, ModelInfoResponse

from build.builder import BuilderArgs, TokenizerArgs
from flask import Flask, request, Response
from generate import GeneratorArgs
from download import load_model_configs, is_model_downloaded


"""
Creates a flask app that can be used to serve the model as a chat API.
"""
app = Flask(__name__)
# Messages and gen are kept global so they can be accessed by the flask app endpoints.
messages: list = []
gen: OpenAiApiGenerator = None
def create_app(args):
"""
Creates a flask app that can be used to serve the model as a chat API.
"""
app = Flask(__name__)

gen: OpenAiApiGenerator = initialize_generator(args)

def _del_none(d: Union[Dict, List]) -> Union[Dict, List]:
"""Recursively delete None values from a dictionary."""
if type(d) is dict:
return {k: _del_none(v) for k, v in d.items() if v}
elif type(d) is list:
return [_del_none(v) for v in d if v]
return d

def _del_none(d: Union[Dict, List]) -> Union[Dict, List]:
"""Recursively delete None values from a dictionary."""
if type(d) is dict:
return {k: _del_none(v) for k, v in d.items() if v}
elif type(d) is list:
return [_del_none(v) for v in d if v]
return d

@app.route("/chat", methods=["POST"])
def chat_endpoint():
"""
Endpoint for the Chat API. This endpoint is used to generate a response to a user prompt.
This endpoint emulates the behavior of the OpenAI Chat API. (https://platform.openai.com/docs/api-reference/chat)

** Warning ** : Not all arguments of the CompletionRequest are consumed.
@app.route("/chat", methods=["POST"])
def chat_endpoint():
"""
Endpoint for the Chat API. This endpoint is used to generate a response to a user prompt.
This endpoint emulates the behavior of the OpenAI Chat API. (https://platform.openai.com/docs/api-reference/chat)
See https://github.com/pytorch/torchchat/issues/973 and the OpenAiApiGenerator class for more details.
** Warning ** : Not all arguments of the CompletionRequest are consumed.
If stream is set to true, the response will be streamed back as a series of CompletionResponseChunk objects. Otherwise,
a single CompletionResponse object will be returned.
"""
See https://github.com/pytorch/torchchat/issues/973 and the OpenAiApiGenerator class for more details.
If stream is set to true, the response will be streamed back as a series of CompletionResponseChunk objects. Otherwise,
a single CompletionResponse object will be returned.
"""

print(" === Completion Request ===")
print(" === Completion Request ===")

# Parse the request in to a CompletionRequest object
data = request.get_json()
req = CompletionRequest(**data)
# Parse the request in to a CompletionRequest object
data = request.get_json()
req = CompletionRequest(**data)

# Add the user message to our internal message history.
messages.append(UserMessage(**req.messages[-1]))
if data.get("stream") == "true":

if data.get("stream") == "true":
def chunk_processor(chunked_completion_generator):
"""Inline function for postprocessing CompletionResponseChunk objects.
def chunk_processor(chunked_completion_generator):
"""Inline function for postprocessing CompletionResponseChunk objects.
Here, we just jsonify the chunk and yield it as a string.
"""
for chunk in chunked_completion_generator:
if (next_tok := chunk.choices[0].delta.content) is None:
next_tok = ""
print(next_tok, end="")
yield json.dumps(_del_none(asdict(chunk)))

Here, we just jsonify the chunk and yield it as a string.
"""
messages.append(AssistantMessage(content=""))
for chunk in chunked_completion_generator:
if (next_tok := chunk.choices[0].delta.content) is None:
next_tok = ""
messages[-1].content += next_tok
print(next_tok, end="")
yield json.dumps(_del_none(asdict(chunk)))
return Response(
chunk_processor(gen.chunked_completion(req)), mimetype="text/event-stream"
)
else:
response = gen.sync_completion(req)

return Response(
chunk_processor(gen.chunked_completion(req)), mimetype="text/event-stream"
)
else:
response = gen.sync_completion(req)
return json.dumps(_del_none(asdict(response)))

messages.append(response.choices[0].message)
print(messages[-1].content)
@app.route("/models", methods=["GET"])
def models_endpoint():
return json.dumps(asdict(get_model_info_list(args)))

return json.dumps(_del_none(asdict(response)))
return app


def initialize_generator(args) -> OpenAiApiGenerator:
Expand All @@ -103,6 +103,5 @@ def initialize_generator(args) -> OpenAiApiGenerator:


def main(args):
global gen
gen = initialize_generator(args)
app = create_app(args)
app.run()

0 comments on commit 41ccc69

Please sign in to comment.