diff --git a/.env b/.env index 59ec500..eab4c4b 100644 --- a/.env +++ b/.env @@ -1,4 +1,6 @@ APP_PORT=8000 +ENG_ACCESS_PORT=8080 +MODEL_SAVE_PATH=volumes/models INFERENCE_ENG=llamacpp INFERENCE_ENG_PORT=8080 INFERENCE_ENG_VERSION=server--b1-2321a5e diff --git a/Makefile b/Makefile index d5f57cb..50a90b2 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,9 @@ APP_PORT:=8000 # compose build related ENV_FILE:=.env +ENG_ACCESS_PORT:=8080 +MODEL_SAVE_PATH:=volumes/models + INFERENCE_ENG:=llamacpp INFERENCE_ENG_PORT:=8080 INFERENCE_ENG_VERSION:=server--b1-2321a5e @@ -32,6 +35,8 @@ run: build .PHONY: env env: @echo "APP_PORT=$(APP_PORT)"> $(ENV_FILE) + @echo "ENG_ACCESS_PORT=$(ENG_ACCESS_PORT)">> $(ENV_FILE) + @echo "MODEL_SAVE_PATH=$(MODEL_SAVE_PATH)">> $(ENV_FILE) @echo "INFERENCE_ENG=$(INFERENCE_ENG)">> $(ENV_FILE) @echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(ENV_FILE) @echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(ENV_FILE) @@ -44,8 +49,13 @@ env: @echo "EMBEDDING_MODEL_NAME=$(EMBEDDING_MODEL_NAME)">> $(ENV_FILE) @echo "EMBEDDING_MODEL_URL=$(EMBEDDING_MODEL_URL)">> $(ENV_FILE) +.PHONY: model-prepare +model-prepare: + @mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) $(LANGUAGE_MODEL_URL) + @mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) $(EMBEDDING_MODEL_URL) + .PHONY: compose-build -compose-build: env +compose-build: env model-prepare @docker compose -f docker-compose.yaml build .PHONY: up diff --git a/README.md b/README.md index d0e1ba4..1bb243f 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ This project is OpenAI-like API set for SkywardAI project. ## BUILD & RUN ### Local Machine +* Please make sure you installed `Node.js` on your local machine. * This project developed on Node Version `v20.15.0`. -* Make sure you installed `Node.js`. ```shell # Manage package by pnpm @@ -22,7 +22,8 @@ npm run ``` ### Container -**Please make sure you have `docker` and `make` installed in your server** +* Please make sure you have `docker` and `make` installed in your server. +* Docker version for testing is `27.0.3, build 7d4bcd8`. ```shell # to simply start with all needed containers started, please run make up @@ -40,4 +41,4 @@ npm run lint ``` ## Monitor -This project got monitor build with swagger-stats, when you got this project running, just go to `:/swagger-stats` \ No newline at end of file +This project got monitor build with swagger-stats, when you got this project running, just go to `:/stats` \ No newline at end of file diff --git a/actions/inference.js b/actions/inference.js index a4afaf8..3463936 100644 --- a/actions/inference.js +++ b/actions/inference.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import { formatOpenAIContext } from "../tools/formatContext.js"; import { generateFingerprint } from "../tools/generator.js"; import { post } from "../tools/request.js"; diff --git a/docker-compose.yaml b/docker-compose.yaml index 8405989..72c67d6 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,47 +2,47 @@ services: llamacpp: container_name: ${INFERENCE_ENG} image: gclub/llama.cpp:${INFERENCE_ENG_VERSION} - restart: no + restart: always deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md resources: reservations: cpus: "${NUM_CPU_CORES}" volumes: - - "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models" + - "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models" expose: - - 8080 + - ${ENG_ACCESS_PORT} ports: - - ${INFERENCE_ENG_PORT}:8080 + - ${INFERENCE_ENG_PORT}:${ENG_ACCESS_PORT} command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"] embedding_eng: container_name: ${EMBEDDING_ENG} image: gclub/llama.cpp:${INFERENCE_ENG_VERSION} - restart: no + restart: always deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md resources: reservations: cpus: "${NUM_CPU_CORES_EMBEDDING}" volumes: - - "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models" + - "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models" expose: - - 8080 + - ${ENG_ACCESS_PORT} ports: - - ${EMBEDDING_ENG_PORT}:8080 + - ${EMBEDDING_ENG_PORT}:${ENG_ACCESS_PORT} command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"] voyager: container_name: voyager - restart: no + restart: always build: dockerfile: Dockerfile context: . volumes: - .:/app expose: - - 8000 + - ${APP_PORT} ports: - - 8000:8000 + - ${APP_PORT}:${APP_PORT} depends_on: - llamacpp - embedding_eng \ No newline at end of file diff --git a/healthy-check.js b/healthy-check.js index 229dc98..d64d027 100644 --- a/healthy-check.js +++ b/healthy-check.js @@ -1,2 +1,17 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + await fetch('http://localhost:8000/healthy'); console.log('Healthy check passed.') \ No newline at end of file diff --git a/index.js b/index.js index a3c1225..7ed2cd9 100644 --- a/index.js +++ b/index.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import express from 'express'; import cors from 'cors'; import bodyParser from 'body-parser'; @@ -13,7 +28,8 @@ const app = express(); app.use(cors()); app.use(bodyParser.json()); app.use(swStats.getMiddleware({ - name: "Voyager Swagger Monitor" + name: "Voyager Swagger Monitor", + uriPath: '/stats' })) buildRoutes(app); diff --git a/routes/decoder.js b/routes/decoder.js index 2b27d96..45df81a 100644 --- a/routes/decoder.js +++ b/routes/decoder.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import { Router } from "express"; export default function decoderRoute() { diff --git a/routes/embedding.js b/routes/embedding.js index 1f8035e..910e2eb 100644 --- a/routes/embedding.js +++ b/routes/embedding.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import { Router } from "express"; export default function embeddingRoute() { diff --git a/routes/encoder.js b/routes/encoder.js index 7e64a43..d9549f3 100644 --- a/routes/encoder.js +++ b/routes/encoder.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import { Router } from "express"; export default function encoderRoute() { diff --git a/routes/index.js b/routes/index.js index 6c5beb6..8239ef1 100644 --- a/routes/index.js +++ b/routes/index.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import { Router } from "express"; import inferenceRoute from "./inference.js"; diff --git a/routes/inference.js b/routes/inference.js index 57f9270..18fa803 100644 --- a/routes/inference.js +++ b/routes/inference.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import { Router } from "express"; import { chatCompletion } from "../actions/inference.js"; diff --git a/routes/token.js b/routes/token.js index 04f128a..5007cf3 100644 --- a/routes/token.js +++ b/routes/token.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import { Router } from "express"; import { generateAPIKey } from "../tools/generator.js"; diff --git a/routes/tracing.js b/routes/tracing.js index fb85ca6..9e92e1e 100644 --- a/routes/tracing.js +++ b/routes/tracing.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import { Router } from "express"; export default function tracingRoute() { diff --git a/tools/formatContext.js b/tools/formatContext.js index 97ed984..512bd73 100644 --- a/tools/formatContext.js +++ b/tools/formatContext.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + const system_context = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." export function formatInferenceContext(history, question) { diff --git a/tools/generator.js b/tools/generator.js index 2a71f75..9614747 100644 --- a/tools/generator.js +++ b/tools/generator.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + export function generateRandomString() { return Math.random().toString(32).slice(2) } diff --git a/tools/request.js b/tools/request.js index ea357e1..9b57029 100644 --- a/tools/request.js +++ b/tools/request.js @@ -1,3 +1,18 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + const BASE_URL = { "chat": `http://${process.env.INFERENCE_ENG || 'llamacpp'}:${process.env.INFERENCE_ENG_PORT || 8080}`, "rag": `http://${process.env.EMBEDDING_ENG || 'embedding_eng'}:${process.env.EMBEDDING_ENG_PORT || 8081}`