Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement api /v1/chat/completion #3

Merged
merged 10 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ pnpm-lock.yaml
eslint.config.mjs
LICENSE
volumes
docker-compose,yaml
docker-compose.yaml
3 changes: 2 additions & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ INFERENCE_ENG=llamacpp
INFERENCE_ENG_PORT=8080
INFERENCE_ENG_VERSION=server--b1-2321a5e
NUM_CPU_CORES=8.00
NUM_CPU_CORES_EMBEDDING=4.00
EMBEDDING_ENG=embedding_eng
EMBEDDING_ENG_PORT=8081
NUM_CPU_CORES_EMBEDDING=4.00
LANGUAGE_MODEL_NAME=Phi3-mini-4k-instruct-Q4.gguf
LANGUAGE_MODEL_URL=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
EMBEDDING_MODEL_NAME=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
Expand Down
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@ CONTAINER_NAME:=voyager:v0.1.0
APP_PORT:=8000
# compose build related
ENV_FILE:=.env

INFERENCE_ENG:=llamacpp
INFERENCE_ENG_PORT:=8080
INFERENCE_ENG_VERSION:=server--b1-2321a5e
NUM_CPU_CORES:=8.00
NUM_CPU_CORES_EMBEDDING:=4.00


EMBEDDING_ENG:=embedding_eng
EMBEDDING_ENG_PORT:=8081
NUM_CPU_CORES_EMBEDDING:=4.00
LANGUAGE_MODEL_NAME:=Phi3-mini-4k-instruct-Q4.gguf
LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
EMBEDDING_MODEL_NAME:=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
Expand All @@ -33,8 +36,9 @@ env:
@echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(ENV_FILE)
@echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(ENV_FILE)
@echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(ENV_FILE)
@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)">> $(ENV_FILE)
@echo "EMBEDDING_ENG=$(EMBEDDING_ENG)">> $(ENV_FILE)
@echo "EMBEDDING_ENG_PORT=$(EMBEDDING_ENG_PORT)">> $(ENV_FILE)
@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)">> $(ENV_FILE)
@echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(ENV_FILE)
@echo "LANGUAGE_MODEL_URL=$(LANGUAGE_MODEL_URL)">> $(ENV_FILE)
@echo "EMBEDDING_MODEL_NAME=$(EMBEDDING_MODEL_NAME)">> $(ENV_FILE)
Expand Down
27 changes: 24 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# VOYAGER
This project is OpenAI-like API set for SkywardAI project.

# BUILD & RUN
## BUILD & RUN

## Local Machine
### Local Machine
* This project developed on Node Version `v20.15.0`.
* Make sure you installed `Node.js`.

Expand All @@ -19,4 +19,25 @@ npm install

# RUN
npm run
```
```

### Container
**Please make sure you have `docker` and `make` installed in your server**
```shell
# to simply start with all needed containers started, please run
make up
# if you just want to build this project to docker container, please run
make build
# if you want to start only this project in docker, please run
make start
# PLEASE NOTE: make start will automatically run make build first
```

## Lint
To start lint your code, simply run
```shell
npm run lint
```

## Monitor
This project got monitor build with swagger-stats, when you got this project running, just go to `<Your Server>:<Your Port>/swagger-stats`
70 changes: 70 additions & 0 deletions actions/inference.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { formatOpenAIContext } from "../tools/formatContext.js";
import { generateFingerprint } from "../tools/generator.js";
import { post } from "../tools/request.js";

function generateResponseContent(id, object, model, system_fingerprint, stream, content, stopped) {
const resp = {
id,
object,
created: Date.now(),
model,
system_fingerprint,
choices: [{
index: 0,
[stream ? 'delta':'message']: {
role: 'assistant',
content
},
logprobs: null,
finish_reason: stopped ? 'stop' : null
}],
}
if(!stream) {
resp.usage = {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0
}
}
return resp;
}

export async function chatCompletion(req, res) {
const api_key = (req.headers.authorization || '').split('Bearer ').pop();
if(!api_key) {
res.status(401).send('Not Authorized');
return;
}

const system_fingerprint = generateFingerprint();
let {messages, ...request_body} = req.body;
request_body.prompt = formatOpenAIContext(messages);
const model = request_body.model || process.env.LANGUAGE_MODEL_NAME

if(request_body.stream) {
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("X-Accel-Buffering", "no");
res.setHeader("Connection", "Keep-Alive");

const eng_resp = await post('completion', { body: request_body }, { getJSON: false });
const reader = eng_resp.body.pipeThrough(new TextDecoderStream()).getReader();
while(true) {
const { value, done } = await reader.read();
if(done) break;
const data = value.split("data: ").pop()
const json_data = JSON.parse(data)
const { content, stop } = json_data;
res.write(JSON.stringify(generateResponseContent(api_key, 'chat.completion.chunk', model, system_fingerprint, true, content, stop))+'\n\n');
}
res.end();
} else {
const eng_resp = await post('completion', { body: request_body });
const { model, content } = eng_resp;
const response_json = generateResponseContent(
api_key, 'chat.completion', model, system_fingerprint,
false, content, true
)
res.send(response_json);
}
}
4 changes: 2 additions & 2 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ services:
expose:
- 8080
ports:
- 8080:8080
- ${INFERENCE_ENG_PORT}:8080
command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"]

embedding_eng:
Expand All @@ -28,7 +28,7 @@ services:
expose:
- 8080
ports:
- 8082:8080
- ${EMBEDDING_ENG_PORT}:8080
command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"]

voyager:
Expand Down
1 change: 1 addition & 0 deletions eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export default [
}
},
{
ignores: ["volumes/*"],
rules: {
'no-undef': 'off'
}
Expand Down
5 changes: 5 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@ import { configDotenv } from 'dotenv';

import buildRoutes from './routes/index.js'

import swStats from 'swagger-stats';

configDotenv()

const app = express();
app.use(cors());
app.use(bodyParser.json());
app.use(swStats.getMiddleware({
name: "Voyager Swagger Monitor"
}))

buildRoutes(app);

Expand Down
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
"dotenv": "^16.4.5",
"eslint": "^9.8.0",
"express": "^4.19.2",
"globals": "^15.8.0"
"globals": "^15.8.0",
"prom-client": "12",
"swagger-stats": "^0.99.7"
}
}
Loading
Loading