SkywardAI · cbh778899 · Jul 31, 2024 · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -11,4 +11,4 @@ pnpm-lock.yaml
 eslint.config.mjs
 LICENSE
 volumes
-docker-compose,yaml
+docker-compose.yaml
diff --git a/.env b/.env
@@ -3,8 +3,9 @@ INFERENCE_ENG=llamacpp
 INFERENCE_ENG_PORT=8080
 INFERENCE_ENG_VERSION=server--b1-2321a5e
 NUM_CPU_CORES=8.00
-NUM_CPU_CORES_EMBEDDING=4.00
 EMBEDDING_ENG=embedding_eng
+EMBEDDING_ENG_PORT=8081
+NUM_CPU_CORES_EMBEDDING=4.00
 LANGUAGE_MODEL_NAME=Phi3-mini-4k-instruct-Q4.gguf
 LANGUAGE_MODEL_URL=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
 EMBEDDING_MODEL_NAME=all-MiniLM-L6-v2-Q4_K_M-v2.gguf

diff --git a/Makefile b/Makefile
@@ -4,13 +4,16 @@ CONTAINER_NAME:=voyager:v0.1.0
 APP_PORT:=8000
 # compose build related
 ENV_FILE:=.env
+
 INFERENCE_ENG:=llamacpp
 INFERENCE_ENG_PORT:=8080
 INFERENCE_ENG_VERSION:=server--b1-2321a5e
 NUM_CPU_CORES:=8.00
-NUM_CPU_CORES_EMBEDDING:=4.00
+
 
 EMBEDDING_ENG:=embedding_eng
+EMBEDDING_ENG_PORT:=8081
+NUM_CPU_CORES_EMBEDDING:=4.00
 LANGUAGE_MODEL_NAME:=Phi3-mini-4k-instruct-Q4.gguf
 LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
 EMBEDDING_MODEL_NAME:=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
@@ -33,8 +36,9 @@ env:
 	@echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(ENV_FILE)
 	@echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(ENV_FILE)
 	@echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(ENV_FILE)
-	@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)">> $(ENV_FILE)
 	@echo "EMBEDDING_ENG=$(EMBEDDING_ENG)">> $(ENV_FILE)
+	@echo "EMBEDDING_ENG_PORT=$(EMBEDDING_ENG_PORT)">> $(ENV_FILE)
+	@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)">> $(ENV_FILE)
 	@echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(ENV_FILE)
 	@echo "LANGUAGE_MODEL_URL=$(LANGUAGE_MODEL_URL)">> $(ENV_FILE)
 	@echo "EMBEDDING_MODEL_NAME=$(EMBEDDING_MODEL_NAME)">> $(ENV_FILE)

diff --git a/README.md b/README.md
@@ -1,9 +1,9 @@
 # VOYAGER
 This project is OpenAI-like API set for SkywardAI project.
 
-# BUILD & RUN
+## BUILD & RUN
 
-## Local Machine
+### Local Machine
 * This project developed on Node Version `v20.15.0`.  
 * Make sure you installed `Node.js`.  
 
@@ -19,4 +19,25 @@ npm install
 
 # RUN
 npm run
-```
+```
+
+### Container
+**Please make sure you have `docker` and `make` installed in your server** 
+```shell
+# to simply start with all needed containers started, please run
+make up
+# if you just want to build this project to docker container, please run
+make build
+# if you want to start only this project in docker, please run
+make start
+# PLEASE NOTE: make start will automatically run make build first
+```
+
+## Lint
+To start lint your code, simply run
+```shell
+npm run lint
+```
+
+## Monitor
+This project got monitor build with swagger-stats, when you got this project running, just go to `<Your Server>:<Your Port>/swagger-stats`
diff --git a/actions/inference.js b/actions/inference.js
@@ -0,0 +1,70 @@
+import { formatOpenAIContext } from "../tools/formatContext.js";
+import { generateFingerprint } from "../tools/generator.js";
+import { post } from "../tools/request.js";
+
+function generateResponseContent(id, object, model, system_fingerprint, stream, content, stopped) {
+    const resp = {
+        id,
+        object,
+        created: Date.now(),
+        model,
+        system_fingerprint,
+        choices: [{
+            index: 0,
+            [stream ? 'delta':'message']: {
+                role: 'assistant',
+                content
+            },
+            logprobs: null,
+            finish_reason: stopped ? 'stop' : null
+        }],
+    }
+    if(!stream) {
+        resp.usage = {
+            prompt_tokens: 0,
+            completion_tokens: 0,
+            total_tokens: 0
+        }
+    }
+    return resp;
+}
+
+export async function chatCompletion(req, res) {
+    const api_key = (req.headers.authorization || '').split('Bearer ').pop();
+    if(!api_key) {
+        res.status(401).send('Not Authorized');
+        return;
+    }
+
+    const system_fingerprint = generateFingerprint();
+    let {messages, ...request_body} = req.body;
+    request_body.prompt = formatOpenAIContext(messages);
+    const model = request_body.model || process.env.LANGUAGE_MODEL_NAME
+
+    if(request_body.stream) {
+        res.setHeader("Content-Type", "text/event-stream");
+        res.setHeader("Cache-Control", "no-cache");
+        res.setHeader("X-Accel-Buffering", "no");
+        res.setHeader("Connection", "Keep-Alive");
+
+        const eng_resp = await post('completion', { body: request_body }, { getJSON: false });
+        const reader = eng_resp.body.pipeThrough(new TextDecoderStream()).getReader();
+        while(true) {
+            const { value, done } = await reader.read();
+            if(done) break;
+            const data = value.split("data: ").pop()
+            const json_data = JSON.parse(data)
+            const { content, stop } = json_data;
+            res.write(JSON.stringify(generateResponseContent(api_key, 'chat.completion.chunk', model, system_fingerprint, true, content, stop))+'\n\n');
+        }
+        res.end();
+    } else {
+        const eng_resp = await post('completion', { body: request_body });
+        const { model, content } = eng_resp;
+        const response_json = generateResponseContent(
+            api_key, 'chat.completion', model, system_fingerprint,
+            false, content, true
+        )
+        res.send(response_json);
+    }
+}
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -12,7 +12,7 @@ services:
     expose:
       - 8080
     ports:
-      - 8080:8080
+      - ${INFERENCE_ENG_PORT}:8080
     command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"]
 
   embedding_eng:
@@ -28,7 +28,7 @@ services:
     expose:
       - 8080
     ports:
-      - 8082:8080
+      - ${EMBEDDING_ENG_PORT}:8080
     command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"]
 
   voyager:

diff --git a/eslint.config.mjs b/eslint.config.mjs
@@ -19,6 +19,7 @@ export default [
         } 
     },
     {
+        ignores: ["volumes/*"],
         rules: {
             'no-undef': 'off'
         }

diff --git a/index.js b/index.js
@@ -5,11 +5,16 @@ import { configDotenv } from 'dotenv';
 
 import buildRoutes from './routes/index.js'
 
+import swStats from 'swagger-stats';
+
 configDotenv()
 
 const app = express();
 app.use(cors());
 app.use(bodyParser.json());
+app.use(swStats.getMiddleware({
+    name: "Voyager Swagger Monitor"
+}))
 
 buildRoutes(app);
 

diff --git a/package.json b/package.json
@@ -21,6 +21,8 @@
     "dotenv": "^16.4.5",
     "eslint": "^9.8.0",
     "express": "^4.19.2",
-    "globals": "^15.8.0"
+    "globals": "^15.8.0",
+    "prom-client": "12",
+    "swagger-stats": "^0.99.7"
   }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,6 +19,7 @@ export default [ @@
             }
         },
         {
+            ignores: ["volumes/*"],
             rules: {
                 'no-undef': 'off'
             }
@@ Expand Down @@