diff --git a/.dockerignore b/.dockerignore index 8ca15e0..b0c8185 100644 --- a/.dockerignore +++ b/.dockerignore @@ -11,4 +11,4 @@ pnpm-lock.yaml eslint.config.mjs LICENSE volumes -docker-compose,yaml \ No newline at end of file +docker-compose.yaml \ No newline at end of file diff --git a/.env b/.env index 9ea8439..59ec500 100644 --- a/.env +++ b/.env @@ -3,8 +3,9 @@ INFERENCE_ENG=llamacpp INFERENCE_ENG_PORT=8080 INFERENCE_ENG_VERSION=server--b1-2321a5e NUM_CPU_CORES=8.00 -NUM_CPU_CORES_EMBEDDING=4.00 EMBEDDING_ENG=embedding_eng +EMBEDDING_ENG_PORT=8081 +NUM_CPU_CORES_EMBEDDING=4.00 LANGUAGE_MODEL_NAME=Phi3-mini-4k-instruct-Q4.gguf LANGUAGE_MODEL_URL=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true EMBEDDING_MODEL_NAME=all-MiniLM-L6-v2-Q4_K_M-v2.gguf diff --git a/Makefile b/Makefile index e9dece0..d5f57cb 100644 --- a/Makefile +++ b/Makefile @@ -4,13 +4,16 @@ CONTAINER_NAME:=voyager:v0.1.0 APP_PORT:=8000 # compose build related ENV_FILE:=.env + INFERENCE_ENG:=llamacpp INFERENCE_ENG_PORT:=8080 INFERENCE_ENG_VERSION:=server--b1-2321a5e NUM_CPU_CORES:=8.00 -NUM_CPU_CORES_EMBEDDING:=4.00 + EMBEDDING_ENG:=embedding_eng +EMBEDDING_ENG_PORT:=8081 +NUM_CPU_CORES_EMBEDDING:=4.00 LANGUAGE_MODEL_NAME:=Phi3-mini-4k-instruct-Q4.gguf LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true EMBEDDING_MODEL_NAME:=all-MiniLM-L6-v2-Q4_K_M-v2.gguf @@ -33,8 +36,9 @@ env: @echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(ENV_FILE) @echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(ENV_FILE) @echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(ENV_FILE) - @echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)">> $(ENV_FILE) @echo "EMBEDDING_ENG=$(EMBEDDING_ENG)">> $(ENV_FILE) + @echo "EMBEDDING_ENG_PORT=$(EMBEDDING_ENG_PORT)">> $(ENV_FILE) + @echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)">> $(ENV_FILE) @echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(ENV_FILE) @echo "LANGUAGE_MODEL_URL=$(LANGUAGE_MODEL_URL)">> $(ENV_FILE) @echo "EMBEDDING_MODEL_NAME=$(EMBEDDING_MODEL_NAME)">> $(ENV_FILE) diff --git a/README.md b/README.md index a1f1eaa..d0e1ba4 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # VOYAGER This project is OpenAI-like API set for SkywardAI project. -# BUILD & RUN +## BUILD & RUN -## Local Machine +### Local Machine * This project developed on Node Version `v20.15.0`. * Make sure you installed `Node.js`. @@ -19,4 +19,25 @@ npm install # RUN npm run -``` \ No newline at end of file +``` + +### Container +**Please make sure you have `docker` and `make` installed in your server** +```shell +# to simply start with all needed containers started, please run +make up +# if you just want to build this project to docker container, please run +make build +# if you want to start only this project in docker, please run +make start +# PLEASE NOTE: make start will automatically run make build first +``` + +## Lint +To start lint your code, simply run +```shell +npm run lint +``` + +## Monitor +This project got monitor build with swagger-stats, when you got this project running, just go to `:/swagger-stats` \ No newline at end of file diff --git a/actions/inference.js b/actions/inference.js new file mode 100644 index 0000000..a4afaf8 --- /dev/null +++ b/actions/inference.js @@ -0,0 +1,70 @@ +import { formatOpenAIContext } from "../tools/formatContext.js"; +import { generateFingerprint } from "../tools/generator.js"; +import { post } from "../tools/request.js"; + +function generateResponseContent(id, object, model, system_fingerprint, stream, content, stopped) { + const resp = { + id, + object, + created: Date.now(), + model, + system_fingerprint, + choices: [{ + index: 0, + [stream ? 'delta':'message']: { + role: 'assistant', + content + }, + logprobs: null, + finish_reason: stopped ? 'stop' : null + }], + } + if(!stream) { + resp.usage = { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0 + } + } + return resp; +} + +export async function chatCompletion(req, res) { + const api_key = (req.headers.authorization || '').split('Bearer ').pop(); + if(!api_key) { + res.status(401).send('Not Authorized'); + return; + } + + const system_fingerprint = generateFingerprint(); + let {messages, ...request_body} = req.body; + request_body.prompt = formatOpenAIContext(messages); + const model = request_body.model || process.env.LANGUAGE_MODEL_NAME + + if(request_body.stream) { + res.setHeader("Content-Type", "text/event-stream"); + res.setHeader("Cache-Control", "no-cache"); + res.setHeader("X-Accel-Buffering", "no"); + res.setHeader("Connection", "Keep-Alive"); + + const eng_resp = await post('completion', { body: request_body }, { getJSON: false }); + const reader = eng_resp.body.pipeThrough(new TextDecoderStream()).getReader(); + while(true) { + const { value, done } = await reader.read(); + if(done) break; + const data = value.split("data: ").pop() + const json_data = JSON.parse(data) + const { content, stop } = json_data; + res.write(JSON.stringify(generateResponseContent(api_key, 'chat.completion.chunk', model, system_fingerprint, true, content, stop))+'\n\n'); + } + res.end(); + } else { + const eng_resp = await post('completion', { body: request_body }); + const { model, content } = eng_resp; + const response_json = generateResponseContent( + api_key, 'chat.completion', model, system_fingerprint, + false, content, true + ) + res.send(response_json); + } +} \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index f3b52b5..8405989 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -12,7 +12,7 @@ services: expose: - 8080 ports: - - 8080:8080 + - ${INFERENCE_ENG_PORT}:8080 command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"] embedding_eng: @@ -28,7 +28,7 @@ services: expose: - 8080 ports: - - 8082:8080 + - ${EMBEDDING_ENG_PORT}:8080 command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"] voyager: diff --git a/eslint.config.mjs b/eslint.config.mjs index 9704f12..55d0609 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -19,6 +19,7 @@ export default [ } }, { + ignores: ["volumes/*"], rules: { 'no-undef': 'off' } diff --git a/index.js b/index.js index 0b96312..a3c1225 100644 --- a/index.js +++ b/index.js @@ -5,11 +5,16 @@ import { configDotenv } from 'dotenv'; import buildRoutes from './routes/index.js' +import swStats from 'swagger-stats'; + configDotenv() const app = express(); app.use(cors()); app.use(bodyParser.json()); +app.use(swStats.getMiddleware({ + name: "Voyager Swagger Monitor" +})) buildRoutes(app); diff --git a/package.json b/package.json index 62c0b7f..870c4f8 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,8 @@ "dotenv": "^16.4.5", "eslint": "^9.8.0", "express": "^4.19.2", - "globals": "^15.8.0" + "globals": "^15.8.0", + "prom-client": "12", + "swagger-stats": "^0.99.7" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8f3fffc..9978f73 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -32,6 +32,12 @@ importers: globals: specifier: ^15.8.0 version: 15.8.0 + prom-client: + specifier: '12' + version: 12.0.0 + swagger-stats: + specifier: ^0.99.7 + version: 0.99.7(prom-client@12.0.0) packages: @@ -219,9 +225,22 @@ packages: array-flatten@1.1.1: resolution: {integrity: sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==} + asynckit@0.4.0: + resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} + + axios@1.7.2: + resolution: {integrity: sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==} + balanced-match@1.0.2: resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + basic-auth@2.0.1: + resolution: {integrity: sha512-NF+epuEdnUYVlGuhaxbbq+dvJttwLnGY+YixlXlME5KpQ5W3CnXA5cVTneY3SPbPDRkcjMbifrwmFYcClgOZeg==} + engines: {node: '>= 0.8'} + + bintrees@1.0.2: + resolution: {integrity: sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==} + body-parser@1.20.2: resolution: {integrity: sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==} engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16} @@ -270,6 +289,10 @@ packages: color-name@1.1.4: resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} + combined-stream@1.0.8: + resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} + engines: {node: '>= 0.8'} + concat-map@0.0.1: resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} @@ -291,6 +314,10 @@ packages: resolution: {integrity: sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==} engines: {node: '>= 0.6'} + cookies@0.8.0: + resolution: {integrity: sha512-8aPsApQfebXnuI+537McwYsDtjVxGm8gTIzQI3FDW6t5t/DAhERxtnbEPN/8RX+uZthoz4eCOgloXaE5cYyNow==} + engines: {node: '>= 0.8'} + cors@2.8.5: resolution: {integrity: sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==} engines: {node: '>= 0.10'} @@ -323,6 +350,10 @@ packages: resolution: {integrity: sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==} engines: {node: '>= 0.4'} + delayed-stream@1.0.0: + resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} + engines: {node: '>=0.4.0'} + depd@2.0.0: resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==} engines: {node: '>= 0.8'} @@ -456,6 +487,19 @@ packages: flatted@3.3.1: resolution: {integrity: sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==} + follow-redirects@1.15.6: + resolution: {integrity: sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==} + engines: {node: '>=4.0'} + peerDependencies: + debug: '*' + peerDependenciesMeta: + debug: + optional: true + + form-data@4.0.0: + resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==} + engines: {node: '>= 6'} + forwarded@0.2.0: resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==} engines: {node: '>= 0.6'} @@ -585,6 +629,10 @@ packages: engines: {node: '>=6'} hasBin: true + keygrip@1.1.0: + resolution: {integrity: sha512-iYSchDJ+liQ8iwbSI2QqsQOvqv58eJCEanyJPJi+Khyu8smkcKSFUCbPwzFcL7YVtZ6eONjqRX/38caJ7QjRAQ==} + engines: {node: '>= 0.6'} + keyv@4.5.4: resolution: {integrity: sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==} @@ -629,6 +677,9 @@ packages: minimatch@3.1.2: resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==} + moment@2.30.1: + resolution: {integrity: sha512-uEmtNhbDOrWPFS+hdjFCBfy9f2YoyzRpwcl+DqpC6taX21FzsTLQVbMV/W7PzNSX6x/bhC1zA3c2UQ5NzH6how==} + ms@2.0.0: resolution: {integrity: sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==} @@ -691,6 +742,9 @@ packages: path-to-regexp@0.1.7: resolution: {integrity: sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ==} + path-to-regexp@6.2.2: + resolution: {integrity: sha512-GQX3SSMokngb36+whdpRXE+3f9V8UzyAorlYvOGx87ufGHehNTn5lCxrKtLyZ4Yl/wEKnNnr98ZzOwwDZV5ogw==} + picocolors@1.0.1: resolution: {integrity: sha512-anP1Z8qwhkbmu7MFP5iTt+wQKXgwzf7zTyGlcdzabySa9vd0Xt392U0rVmz9poOaBj0uHJKyyo9/upk0HrEQew==} @@ -698,10 +752,17 @@ packages: resolution: {integrity: sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==} engines: {node: '>= 0.8.0'} + prom-client@12.0.0: + resolution: {integrity: sha512-JbzzHnw0VDwCvoqf8y1WDtq4wSBAbthMB1pcVI/0lzdqHGJI3KBJDXle70XK+c7Iv93Gihqo0a5LlOn+g8+DrQ==} + engines: {node: '>=10'} + proxy-addr@2.0.7: resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==} engines: {node: '>= 0.10'} + proxy-from-env@1.1.0: + resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==} + punycode@2.3.1: resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==} engines: {node: '>=6'} @@ -710,6 +771,10 @@ packages: resolution: {integrity: sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==} engines: {node: '>=0.6'} + qs@6.12.3: + resolution: {integrity: sha512-AWJm14H1vVaO/iNZ4/hO+HyaTehuy9nRqVdkTqlJt0HWvBiBIEXFmb4C0DGeYo3Xes9rrEW+TxHsaigCbN5ICQ==} + engines: {node: '>=0.6'} + queue-microtask@1.2.3: resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==} @@ -732,6 +797,9 @@ packages: run-parallel@1.2.0: resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==} + safe-buffer@5.1.2: + resolution: {integrity: sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==} + safe-buffer@5.2.1: resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} @@ -789,6 +857,14 @@ packages: resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==} engines: {node: '>=8'} + swagger-stats@0.99.7: + resolution: {integrity: sha512-niP70m99Cwpz/Fyfk8ydul1jM0pOKD6UofSaDzW2Op6o6WYFsuAl/BhVbmLkZWOAZ7IloDVvFj6vaU5zA0xydg==} + peerDependencies: + prom-client: '>= 10 <= 14' + + tdigest@0.1.2: + resolution: {integrity: sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==} + text-table@0.2.0: resolution: {integrity: sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==} @@ -800,6 +876,10 @@ packages: resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==} engines: {node: '>=0.6'} + tsscmp@1.0.6: + resolution: {integrity: sha512-LxhtAkPDTkVCMQjt2h6eBVY28KCjikZqZfMcC15YBeNjkgUpdCfBu5HoiOTDu86v6smE8yOjyEktJ8hlbANHQA==} + engines: {node: '>=0.6.x'} + type-check@0.4.0: resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==} engines: {node: '>= 0.8.0'} @@ -825,6 +905,10 @@ packages: resolution: {integrity: sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==} engines: {node: '>= 0.4.0'} + uuid@9.0.1: + resolution: {integrity: sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==} + hasBin: true + vary@1.1.2: resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==} engines: {node: '>= 0.8'} @@ -1074,8 +1158,24 @@ snapshots: array-flatten@1.1.1: {} + asynckit@0.4.0: {} + + axios@1.7.2(debug@4.3.6): + dependencies: + follow-redirects: 1.15.6(debug@4.3.6) + form-data: 4.0.0 + proxy-from-env: 1.1.0 + transitivePeerDependencies: + - debug + balanced-match@1.0.2: {} + basic-auth@2.0.1: + dependencies: + safe-buffer: 5.1.2 + + bintrees@1.0.2: {} + body-parser@1.20.2: dependencies: bytes: 3.1.2 @@ -1142,6 +1242,10 @@ snapshots: color-name@1.1.4: {} + combined-stream@1.0.8: + dependencies: + delayed-stream: 1.0.0 + concat-map@0.0.1: {} content-disposition@0.5.4: @@ -1156,6 +1260,11 @@ snapshots: cookie@0.6.0: {} + cookies@0.8.0: + dependencies: + depd: 2.0.0 + keygrip: 1.1.0 + cors@2.8.5: dependencies: object-assign: 4.1.1 @@ -1183,6 +1292,8 @@ snapshots: es-errors: 1.3.0 gopd: 1.0.1 + delayed-stream@1.0.0: {} + depd@2.0.0: {} destroy@1.2.0: {} @@ -1360,6 +1471,16 @@ snapshots: flatted@3.3.1: {} + follow-redirects@1.15.6(debug@4.3.6): + optionalDependencies: + debug: 4.3.6 + + form-data@4.0.0: + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + mime-types: 2.1.35 + forwarded@0.2.0: {} fresh@0.5.2: {} @@ -1457,6 +1578,10 @@ snapshots: json5@2.2.3: {} + keygrip@1.1.0: + dependencies: + tsscmp: 1.0.6 + keyv@4.5.4: dependencies: json-buffer: 3.0.1 @@ -1494,6 +1619,8 @@ snapshots: dependencies: brace-expansion: 1.1.11 + moment@2.30.1: {} + ms@2.0.0: {} ms@2.1.2: {} @@ -1543,21 +1670,33 @@ snapshots: path-to-regexp@0.1.7: {} + path-to-regexp@6.2.2: {} + picocolors@1.0.1: {} prelude-ls@1.2.1: {} + prom-client@12.0.0: + dependencies: + tdigest: 0.1.2 + proxy-addr@2.0.7: dependencies: forwarded: 0.2.0 ipaddr.js: 1.9.1 + proxy-from-env@1.1.0: {} + punycode@2.3.1: {} qs@6.11.0: dependencies: side-channel: 1.0.6 + qs@6.12.3: + dependencies: + side-channel: 1.0.6 + queue-microtask@1.2.3: {} range-parser@1.2.1: {} @@ -1577,6 +1716,8 @@ snapshots: dependencies: queue-microtask: 1.2.3 + safe-buffer@5.1.2: {} + safe-buffer@5.2.1: {} safer-buffer@2.1.2: {} @@ -1650,12 +1791,33 @@ snapshots: dependencies: has-flag: 4.0.0 + swagger-stats@0.99.7(prom-client@12.0.0): + dependencies: + axios: 1.7.2(debug@4.3.6) + basic-auth: 2.0.1 + cookies: 0.8.0 + debug: 4.3.6 + moment: 2.30.1 + path-to-regexp: 6.2.2 + prom-client: 12.0.0 + qs: 6.12.3 + send: 0.18.0 + uuid: 9.0.1 + transitivePeerDependencies: + - supports-color + + tdigest@0.1.2: + dependencies: + bintrees: 1.0.2 + text-table@0.2.0: {} to-fast-properties@2.0.0: {} toidentifier@1.0.1: {} + tsscmp@1.0.6: {} + type-check@0.4.0: dependencies: prelude-ls: 1.2.1 @@ -1679,6 +1841,8 @@ snapshots: utils-merge@1.0.1: {} + uuid@9.0.1: {} + vary@1.1.2: {} which@2.0.2: diff --git a/routes/index.js b/routes/index.js index e6dfa8c..6c5beb6 100644 --- a/routes/index.js +++ b/routes/index.js @@ -21,12 +21,20 @@ function indexRoute() { return router; } +function generateAPIRouters() { + const api_router = Router(); + + api_router.use('/chat', inferenceRoute()); + api_router.use('/token', tokenRoute()); + api_router.use('/tracing', tracingRoute()); + api_router.use('/embedding', embeddingRoute()); + api_router.use('/encoder', encoderRoute()); + api_router.use('/decoder', decoderRoute()); + + return api_router; +} + export default function buildRoutes(app) { app.use('/', indexRoute()); - app.use('/inference', inferenceRoute()); - app.use('/token', tokenRoute()); - app.use('/tracing', tracingRoute()); - app.use('/embedding', embeddingRoute()); - app.use('/encoder', encoderRoute()); - app.use('/decoder', decoderRoute()); + app.use('/v1', generateAPIRouters()); } \ No newline at end of file diff --git a/routes/inference.js b/routes/inference.js index a1a20b3..57f9270 100644 --- a/routes/inference.js +++ b/routes/inference.js @@ -1,6 +1,10 @@ import { Router } from "express"; +import { chatCompletion } from "../actions/inference.js"; export default function inferenceRoute() { const router = Router(); + + router.post('/completions', chatCompletion); + return router; } \ No newline at end of file diff --git a/tools/formatContext.js b/tools/formatContext.js new file mode 100644 index 0000000..97ed984 --- /dev/null +++ b/tools/formatContext.js @@ -0,0 +1,18 @@ +const system_context = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." + +export function formatInferenceContext(history, question) { + let context = system_context; + context += history.map(({role, message}) => { + return `### ${role === 'user' ? 'Human' : 'Assistant'}: ${message || ''}` + }).join('\n'); + context += `\n### Human: ${question}\n### Assistant:`; + return context; +} + +export function formatOpenAIContext(messages) { + let context = messages.map(({role, content}) => { + return `### ${role}: ${content}`; + }).join("\n"); + context += '\n### assistant:' + return context; +} \ No newline at end of file diff --git a/tools/generator.js b/tools/generator.js new file mode 100644 index 0000000..2a71f75 --- /dev/null +++ b/tools/generator.js @@ -0,0 +1,11 @@ +export function generateRandomString() { + return Math.random().toString(32).slice(2) +} + +export function generateFingerprint() { + return 'fp_'+generateRandomString(); +} + +export function generateAPIKey() { + return 'voy-'+[...Array(4)].map(generateRandomString).join('') +} \ No newline at end of file diff --git a/tools/request.js b/tools/request.js new file mode 100644 index 0000000..ea357e1 --- /dev/null +++ b/tools/request.js @@ -0,0 +1,76 @@ +const BASE_URL = { + "chat": `http://${process.env.INFERENCE_ENG || 'llamacpp'}:${process.env.INFERENCE_ENG_PORT || 8080}`, + "rag": `http://${process.env.EMBEDDING_ENG || 'embedding_eng'}:${process.env.EMBEDDING_ENG_PORT || 8081}` +} + +const default_options = { + headers: { + 'Content-Type': 'application/json' + } +} + +/** + * @typedef RequestOptions + * @property {"rag"|"chat"} eng select between rag engine or chat engine, default value is `chat` + * @property {Boolean} getJSON + * * If set to `true`, this function will return the result of `await(await fetch(...)).json();` + * and include an attribute `http_error: true` if there's any http error occurs during fetch(). + * * If set to `false`, this function will return the result of `await fetch(...);`, without error handling + * * default value is `true`; + */ + +/** + * A wrap of native fetch api helps fill default headers and urls + * @param {String} url The url to send request + * @param {RequestInit} options the options to init request + * @param {RequestOptions} request_options extra options to be included + * @returns {Promise|Object|{http_error: true}} +*/ +export default async function request(url, options={}, request_options={}) { + const eng = request_options.eng || "chat"; + const getJSON = Object.hasOwn(request_options, 'getJSON') ? request_options.getJSON : true + + url = `${BASE_URL[eng]}${url[0]!=='/' && '/'}${url}`; + + options = { + ...default_options, + ...options + } + + if(options.body) { + options.body = JSON.stringify(options.body) + } + + const res = await fetch(url, options); + if(getJSON) { + if(res.ok) { + return await res.json(); + } else { + return { http_error: true } + } + } else { + return res; + } +} + +/** + * A quick get {@link request} wrap + * @param {String} url The url to send request + * @param {RequestInit} options the options to init request + * @param {RequestOptions} request_options extra options to be included + * @returns {Promise|Object|{http_error: true}} + */ +export function get(url, options, request_options) { + return request(url, {method: 'GET', ...options}, request_options); +} + +/** + * A quick post {@link request} wrap + * @param {String} url The url to send request + * @param {RequestInit} options the options to init request + * @param {RequestOptions} request_options extra options to be included + * @returns {Promise|Object|{http_error: true}} + */ +export function post(url, options, request_options) { + return request(url, {method: 'POST', ...options}, request_options); +} \ No newline at end of file