diff --git a/core/package.json b/core/package.json index ce0714c277..1244f0ce96 100644 --- a/core/package.json +++ b/core/package.json @@ -15,13 +15,6 @@ "dist" ], "author": "Jan ", - "repository": { - "type": "git", - "url": "" - }, - "engines": { - "node": ">=6.0.0" - }, "exports": { ".": "./dist/core.umd.js", "./sdk": "./dist/core.umd.js", @@ -49,53 +42,6 @@ "build": "tsc --module commonjs && rollup -c rollup.config.ts", "start": "rollup -c rollup.config.ts -w" }, - "lint-staged": { - "{src,test}/**/*.ts": [ - "prettier --write", - "git add" - ] - }, - "config": { - "commitizen": { - "path": "node_modules/cz-conventional-changelog" - } - }, - "jest": { - "transform": { - ".(ts|tsx)": "ts-jest" - }, - "testEnvironment": "node", - "testRegex": "(/__tests__/.*|\\.(test|spec))\\.(ts|tsx|js)$", - "moduleFileExtensions": [ - "ts", - "tsx", - "js" - ], - "coveragePathIgnorePatterns": [ - "/node_modules/", - "/test/" - ], - "coverageThreshold": { - "global": { - "branches": 90, - "functions": 95, - "lines": 95, - "statements": 95 - } - }, - "collectCoverageFrom": [ - "src/*.{js,ts}" - ] - }, - "prettier": { - "semi": false, - "singleQuote": true - }, - "commitlint": { - "extends": [ - "@commitlint/config-conventional" - ] - }, "devDependencies": { "@types/node": "^12.0.2", "rollup": "^2.38.5", @@ -104,7 +50,6 @@ "rollup-plugin-node-resolve": "^5.2.0", "rollup-plugin-sourcemaps": "^0.6.3", "rollup-plugin-typescript2": "^0.36.0", - "ts-node": "^7.0.1", "tslib": "^2.6.2", "typescript": "^5.2.2" } diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts index 23d27935ec..80adc9e96e 100644 --- a/core/src/types/model/modelEntity.ts +++ b/core/src/types/model/modelEntity.ts @@ -104,6 +104,9 @@ export type ModelSettingParams = { n_parallel?: number cpu_threads?: number prompt_template?: string + system_prompt?: string + ai_prompt?: string + user_prompt?: string } /** diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 6366ad4b8b..90984acef3 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -3,11 +3,11 @@ "version": "1.0.0", "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See nitro.jan.ai", "main": "dist/index.js", - "module": "dist/module.js", + "node": "dist/node/index.cjs.js", "author": "Jan ", "license": "AGPL-3.0", "scripts": { - "build": "tsc -b . && webpack --config webpack.config.js", + "build": "tsc --module commonjs && rollup -c rollup.config.ts", "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro", "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro", "downloadnitro:win32": "download.bat", @@ -19,24 +19,33 @@ }, "exports": { ".": "./dist/index.js", - "./main": "./dist/module.js" + "./main": "./dist/node/index.cjs.js" }, "devDependencies": { + "@rollup/plugin-commonjs": "^25.0.7", + "@rollup/plugin-json": "^6.1.0", + "@rollup/plugin-node-resolve": "^15.2.3", + "@types/node": "^20.11.4", + "@types/tcp-port-used": "^1.0.4", "cpx": "^1.5.0", + "download-cli": "^1.1.1", "rimraf": "^3.0.2", + "rollup": "^2.38.5", + "rollup-plugin-define": "^1.0.1", + "rollup-plugin-sourcemaps": "^0.6.3", + "rollup-plugin-typescript2": "^0.36.0", "run-script-os": "^1.1.6", - "webpack": "^5.88.2", - "webpack-cli": "^5.1.4" + "typescript": "^5.3.3" }, "dependencies": { "@janhq/core": "file:../../core", - "download-cli": "^1.1.1", + "@rollup/plugin-replace": "^5.0.5", + "@types/os-utils": "^0.0.4", "fetch-retry": "^5.0.6", "os-utils": "^0.0.14", "path-browserify": "^1.0.1", "rxjs": "^7.8.1", "tcp-port-used": "^1.0.2", - "ts-loader": "^9.5.0", "ulid": "^2.3.0" }, "engines": { diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts new file mode 100644 index 0000000000..374a054cd5 --- /dev/null +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -0,0 +1,77 @@ +import resolve from "@rollup/plugin-node-resolve"; +import commonjs from "@rollup/plugin-commonjs"; +import sourceMaps from "rollup-plugin-sourcemaps"; +import typescript from "rollup-plugin-typescript2"; +import json from "@rollup/plugin-json"; +import replace from "@rollup/plugin-replace"; +const packageJson = require("./package.json"); + +const pkg = require("./package.json"); + +export default [ + { + input: `src/index.ts`, + output: [{ file: pkg.main, format: "es", sourcemap: true }], + // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') + external: [], + watch: { + include: "src/**", + }, + plugins: [ + replace({ + NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), + INFERENCE_URL: JSON.stringify( + process.env.INFERENCE_URL || + "http://127.0.0.1:3928/inferences/llamacpp/chat_completion" + ), + TROUBLESHOOTING_URL: JSON.stringify( + "https://jan.ai/guides/troubleshooting" + ), + }), + // Allow json resolution + json(), + // Compile TypeScript files + typescript({ useTsconfigDeclarationDir: true }), + // Compile TypeScript files + // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) + commonjs(), + // Allow node_modules resolution, so you can use 'external' to control + // which external modules to include in the bundle + // https://github.com/rollup/rollup-plugin-node-resolve#usage + resolve({ + extensions: [".js", ".ts", ".svelte"], + }), + + // Resolve source maps to the original source + sourceMaps(), + ], + }, + { + input: `src/node/index.ts`, + output: [ + { file: "dist/node/index.cjs.js", format: "cjs", sourcemap: true }, + ], + // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') + external: ["@janhq/core/node"], + watch: { + include: "src/node/**", + }, + plugins: [ + // Allow json resolution + json(), + // Compile TypeScript files + typescript({ useTsconfigDeclarationDir: true }), + // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) + commonjs(), + // Allow node_modules resolution, so you can use 'external' to control + // which external modules to include in the bundle + // https://github.com/rollup/rollup-plugin-node-resolve#usage + resolve({ + extensions: [".ts", ".js", ".json"], + }), + + // Resolve source maps to the original source + sourceMaps(), + ], + }, +]; diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts index 6bcdc4adc5..5fb41f0f8a 100644 --- a/extensions/inference-nitro-extension/src/@types/global.d.ts +++ b/extensions/inference-nitro-extension/src/@types/global.d.ts @@ -1,4 +1,4 @@ -declare const MODULE: string; +declare const NODE: string; declare const INFERENCE_URL: string; declare const TROUBLESHOOTING_URL: string; diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index b6c63f59a7..fdd352aad2 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -26,7 +26,6 @@ import { } from "@janhq/core"; import { requestInference } from "./helpers/sse"; import { ulid } from "ulid"; -import { join } from "path"; /** * A class that implements the InferenceExtension interface from the @janhq/core package. @@ -43,7 +42,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension { */ private static readonly _intervalHealthCheck = 5 * 1000; - private _currentModel: Model; + private _currentModel: Model | undefined; private _engineSettings: EngineSettings = { ctx_len: 2048, @@ -82,7 +81,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension { if (!(await fs.existsSync(JanInferenceNitroExtension._homeDir))) { await fs .mkdirSync(JanInferenceNitroExtension._homeDir) - .catch((err) => console.debug(err)); + .catch((err: Error) => console.debug(err)); } if (!(await fs.existsSync(JanInferenceNitroExtension._settingsDir))) @@ -90,7 +89,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension { this.writeDefaultEngineSettings(); // Events subscription - events.on(EventName.OnMessageSent, (data) => this.onMessageRequest(data)); + events.on(EventName.OnMessageSent, (data: MessageRequest) => + this.onMessageRequest(data) + ); events.on(EventName.OnModelInit, (model: Model) => this.onModelInit(model)); @@ -99,7 +100,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension { events.on(EventName.OnInferenceStopped, () => this.onInferenceStopped()); // Attempt to fetch nvidia info - await executeOnMain(MODULE, "updateNvidiaInfo", {}); + await executeOnMain(NODE, "updateNvidiaInfo", {}); } /** @@ -109,10 +110,10 @@ export default class JanInferenceNitroExtension implements InferenceExtension { private async writeDefaultEngineSettings() { try { - const engineFile = join( + const engineFile = await joinPath([ JanInferenceNitroExtension._homeDir, - JanInferenceNitroExtension._engineMetadataFileName - ); + JanInferenceNitroExtension._engineMetadataFileName, + ]); if (await fs.existsSync(engineFile)) { const engine = await fs.readFileSync(engineFile, "utf-8"); this._engineSettings = @@ -133,12 +134,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension { const modelFullPath = await joinPath(["models", model.id]); - const nitroInitResult = await executeOnMain(MODULE, "initModel", { - modelFullPath: modelFullPath, - model: model, + const nitroInitResult = await executeOnMain(NODE, "runModel", { + modelFullPath, + model, }); - if (nitroInitResult.error === null) { + if (nitroInitResult?.error) { events.emit(EventName.OnModelFail, model); return; } @@ -155,12 +156,11 @@ export default class JanInferenceNitroExtension implements InferenceExtension { private async onModelStop(model: Model) { if (model.engine !== "nitro") return; - await executeOnMain(MODULE, "stopModel"); + await executeOnMain(NODE, "stopModel"); events.emit(EventName.OnModelStopped, {}); // stop the periocally health check if (this.getNitroProcesHealthIntervalId) { - console.debug("Stop calling Nitro process health check"); clearInterval(this.getNitroProcesHealthIntervalId); this.getNitroProcesHealthIntervalId = undefined; } @@ -170,7 +170,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension { * Periodically check for nitro process's health. */ private async periodicallyGetNitroHealth(): Promise { - const health = await executeOnMain(MODULE, "getCurrentNitroProcessInfo"); + const health = await executeOnMain(NODE, "getCurrentNitroProcessInfo"); const isRunning = this.nitroProcessInfo?.isRunning ?? false; if (isRunning && health.isRunning === false) { @@ -204,6 +204,8 @@ export default class JanInferenceNitroExtension implements InferenceExtension { }; return new Promise(async (resolve, reject) => { + if (!this._currentModel) return Promise.reject("No model loaded"); + requestInference(data.messages ?? [], this._currentModel).subscribe({ next: (_content) => {}, complete: async () => { @@ -223,7 +225,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension { * @param {MessageRequest} data - The data for the new message request. */ private async onMessageRequest(data: MessageRequest) { - if (data.model.engine !== "nitro") return; + if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) { + return; + } const timestamp = Date.now(); const message: ThreadMessage = { @@ -242,11 +246,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension { this.isCancelled = false; this.controller = new AbortController(); - requestInference( - data.messages ?? [], - { ...this._currentModel, ...data.model }, - this.controller - ).subscribe({ + // @ts-ignore + const model: Model = { + ...(this._currentModel || {}), + ...(data.model || {}), + }; + requestInference(data.messages ?? [], model, this.controller).subscribe({ next: (content) => { const messageContent: ThreadContent = { type: ContentType.Text, diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts deleted file mode 100644 index 6907f244a2..0000000000 --- a/extensions/inference-nitro-extension/src/module.ts +++ /dev/null @@ -1,514 +0,0 @@ -const fs = require("fs"); -const path = require("path"); -const { exec, spawn } = require("child_process"); -const tcpPortUsed = require("tcp-port-used"); -const fetchRetry = require("fetch-retry")(global.fetch); -const osUtils = require("os-utils"); -const { readFileSync, writeFileSync, existsSync } = require("fs"); -const { log } = require("@janhq/core/node"); - -// The PORT to use for the Nitro subprocess -const PORT = 3928; -const LOCAL_HOST = "127.0.0.1"; -const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`; -const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`; -const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`; -const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`; -const SUPPORTED_MODEL_FORMAT = ".gguf"; -const NVIDIA_INFO_FILE = path.join( - require("os").homedir(), - "jan", - "settings", - "settings.json" -); - -// The subprocess instance for Nitro -let subprocess = undefined; -let currentModelFile: string = undefined; -let currentSettings = undefined; - -let nitroProcessInfo = undefined; - -/** - * Default GPU settings - **/ -const DEFALT_SETTINGS = { - notify: true, - run_mode: "cpu", - nvidia_driver: { - exist: false, - version: "", - }, - cuda: { - exist: false, - version: "", - }, - gpus: [], - gpu_highest_vram: "", -}; - -/** - * Stops a Nitro subprocess. - * @param wrapper - The model wrapper. - * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. - */ -function stopModel(): Promise { - return killSubprocess(); -} - -/** - * Initializes a Nitro subprocess to load a machine learning model. - * @param wrapper - The model wrapper. - * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. - * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package - * TODO: Should it be startModel instead? - */ -async function initModel(wrapper: any): Promise { - currentModelFile = wrapper.modelFullPath; - const janRoot = path.join(require("os").homedir(), "jan"); - if (!currentModelFile.includes(janRoot)) { - currentModelFile = path.join(janRoot, currentModelFile); - } - const files: string[] = fs.readdirSync(currentModelFile); - - // Look for GGUF model file - const ggufBinFile = files.find( - (file) => - file === path.basename(currentModelFile) || - file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) - ); - - currentModelFile = path.join(currentModelFile, ggufBinFile); - - if (wrapper.model.engine !== "nitro") { - return Promise.resolve({ error: "Not a nitro model" }); - } else { - const nitroResourceProbe = await getResourcesInfo(); - // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt - if (wrapper.model.settings.prompt_template) { - const promptTemplate = wrapper.model.settings.prompt_template; - const prompt = promptTemplateConverter(promptTemplate); - if (prompt.error) { - return Promise.resolve({ error: prompt.error }); - } - wrapper.model.settings.system_prompt = prompt.system_prompt; - wrapper.model.settings.user_prompt = prompt.user_prompt; - wrapper.model.settings.ai_prompt = prompt.ai_prompt; - } - - currentSettings = { - llama_model_path: currentModelFile, - ...wrapper.model.settings, - // This is critical and requires real system information - cpu_threads: nitroResourceProbe.numCpuPhysicalCore, - }; - return loadModel(nitroResourceProbe); - } -} - -async function loadModel(nitroResourceProbe: any | undefined) { - // Gather system information for CPU physical cores and memory - if (!nitroResourceProbe) nitroResourceProbe = await getResourcesInfo(); - return killSubprocess() - .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) - .then(() => { - /** - * There is a problem with Windows process manager - * Should wait for awhile to make sure the port is free and subprocess is killed - * The tested threshold is 500ms - **/ - if (process.platform === "win32") { - return new Promise((resolve) => setTimeout(resolve, 500)); - } else { - return Promise.resolve(); - } - }) - .then(() => spawnNitroProcess(nitroResourceProbe)) - .then(() => loadLLMModel(currentSettings)) - .then(validateModelStatus) - .catch((err) => { - log(`[NITRO]::Error: ${err}`); - // TODO: Broadcast error so app could display proper error message - return { error: err, currentModelFile }; - }); -} - -function promptTemplateConverter(promptTemplate) { - // Split the string using the markers - const systemMarker = "{system_message}"; - const promptMarker = "{prompt}"; - - if ( - promptTemplate.includes(systemMarker) && - promptTemplate.includes(promptMarker) - ) { - // Find the indices of the markers - const systemIndex = promptTemplate.indexOf(systemMarker); - const promptIndex = promptTemplate.indexOf(promptMarker); - - // Extract the parts of the string - const system_prompt = promptTemplate.substring(0, systemIndex); - const user_prompt = promptTemplate.substring( - systemIndex + systemMarker.length, - promptIndex - ); - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length - ); - - // Return the split parts - return { system_prompt, user_prompt, ai_prompt }; - } else if (promptTemplate.includes(promptMarker)) { - // Extract the parts of the string for the case where only promptMarker is present - const promptIndex = promptTemplate.indexOf(promptMarker); - const user_prompt = promptTemplate.substring(0, promptIndex); - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length - ); - const system_prompt = ""; - - // Return the split parts - return { system_prompt, user_prompt, ai_prompt }; - } - - // Return an error if none of the conditions are met - return { error: "Cannot split prompt template" }; -} - -/** - * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request. - * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. - */ -function loadLLMModel(settings): Promise { - log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`); - return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify(settings), - retries: 3, - retryDelay: 500, - }).catch((err) => { - log(`[NITRO]::Error: Load model failed with error ${err}`); - }); -} - -/** - * Validates the status of a model. - * @returns {Promise} A promise that resolves to an object. - * If the model is loaded successfully, the object is empty. - * If the model is not loaded successfully, the object contains an error message. - */ -async function validateModelStatus(): Promise { - // Send a GET request to the validation URL. - // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. - return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { - method: "GET", - headers: { - "Content-Type": "application/json", - }, - retries: 5, - retryDelay: 500, - }).then(async (res: Response) => { - // If the response is OK, check model_loaded status. - if (res.ok) { - const body = await res.json(); - // If the model is loaded, return an empty object. - // Otherwise, return an object with an error message. - if (body.model_loaded) { - return { error: undefined }; - } - } - return { error: "Model loading failed" }; - }); -} - -/** - * Terminates the Nitro subprocess. - * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. - */ -async function killSubprocess(): Promise { - const controller = new AbortController(); - setTimeout(() => controller.abort(), 5000); - log(`[NITRO]::Debug: Request to kill Nitro`); - - return fetch(NITRO_HTTP_KILL_URL, { - method: "DELETE", - signal: controller.signal, - }) - .then(() => { - subprocess?.kill(); - subprocess = undefined; - }) - .catch(() => {}) - .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) - .then(() => log(`[NITRO]::Debug: Nitro process is terminated`)); -} - -/** - * Spawns a Nitro subprocess. - * @param nitroResourceProbe - The Nitro resource probe. - * @returns A promise that resolves when the Nitro subprocess is started. - */ -function spawnNitroProcess(nitroResourceProbe: any): Promise { - log(`[NITRO]::Debug: Spawning Nitro subprocess...`); - - return new Promise(async (resolve, reject) => { - let binaryFolder = path.join(__dirname, "bin"); // Current directory by default - let cudaVisibleDevices = ""; - let binaryName; - if (process.platform === "win32") { - let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); - if (nvidiaInfo["run_mode"] === "cpu") { - binaryFolder = path.join(binaryFolder, "win-cpu"); - } else { - if (nvidiaInfo["cuda"].version === "12") { - binaryFolder = path.join(binaryFolder, "win-cuda-12-0"); - } else { - binaryFolder = path.join(binaryFolder, "win-cuda-11-7"); - } - cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"]; - } - binaryName = "nitro.exe"; - } else if (process.platform === "darwin") { - if (process.arch === "arm64") { - binaryFolder = path.join(binaryFolder, "mac-arm64"); - } else { - binaryFolder = path.join(binaryFolder, "mac-x64"); - } - binaryName = "nitro"; - } else { - let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); - if (nvidiaInfo["run_mode"] === "cpu") { - binaryFolder = path.join(binaryFolder, "linux-cpu"); - } else { - if (nvidiaInfo["cuda"].version === "12") { - binaryFolder = path.join(binaryFolder, "linux-cuda-12-0"); - } else { - binaryFolder = path.join(binaryFolder, "linux-cuda-11-7"); - } - cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"]; - } - binaryName = "nitro"; - } - - const binaryPath = path.join(binaryFolder, binaryName); - // Execute the binary - subprocess = spawn(binaryPath, ["1", LOCAL_HOST, PORT.toString()], { - cwd: binaryFolder, - env: { - ...process.env, - CUDA_VISIBLE_DEVICES: cudaVisibleDevices, - }, - }); - - // Handle subprocess output - subprocess.stdout.on("data", (data) => { - log(`[NITRO]::Debug: ${data}`); - }); - - subprocess.stderr.on("data", (data) => { - log(`[NITRO]::Error: ${data}`); - }); - - subprocess.on("close", (code) => { - log(`[NITRO]::Debug: Nitro exited with code: ${code}`); - subprocess = null; - reject(`child process exited with code ${code}`); - }); - - tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { - resolve(nitroResourceProbe); - }); - }); -} - -/** - * Get the system resources information - * TODO: Move to Core so that it can be reused - */ -function getResourcesInfo(): Promise { - return new Promise(async (resolve) => { - const cpu = await osUtils.cpuCount(); - log(`[NITRO]::CPU informations - ${cpu}`); - const response: ResourcesInfo = { - numCpuPhysicalCore: cpu, - memAvailable: 0, - }; - resolve(response); - }); -} - -/** - * This will retrive GPU informations and persist settings.json - * Will be called when the extension is loaded to turn on GPU acceleration if supported - */ -async function updateNvidiaInfo() { - if (process.platform !== "darwin") { - await Promise.all([ - updateNvidiaDriverInfo(), - updateCudaExistence(), - updateGpuInfo(), - ]); - } -} - -/** - * Retrieve current nitro process - */ -const getCurrentNitroProcessInfo = (): Promise => { - nitroProcessInfo = { - isRunning: subprocess != null, - }; - return nitroProcessInfo; -}; - -/** - * Every module should have a dispose function - * This will be called when the extension is unloaded and should clean up any resources - * Also called when app is closed - */ -function dispose() { - // clean other registered resources here - killSubprocess(); -} - -/** - * Validate nvidia and cuda for linux and windows - */ -async function updateNvidiaDriverInfo(): Promise { - exec( - "nvidia-smi --query-gpu=driver_version --format=csv,noheader", - (error, stdout) => { - let data; - try { - data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); - } catch (error) { - data = DEFALT_SETTINGS; - } - - if (!error) { - const firstLine = stdout.split("\n")[0].trim(); - data["nvidia_driver"].exist = true; - data["nvidia_driver"].version = firstLine; - } else { - data["nvidia_driver"].exist = false; - } - - writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)); - Promise.resolve(); - } - ); -} - -/** - * Check if file exists in paths - */ -function checkFileExistenceInPaths(file: string, paths: string[]): boolean { - return paths.some((p) => existsSync(path.join(p, file))); -} - -/** - * Validate cuda for linux and windows - */ -function updateCudaExistence() { - let filesCuda12: string[]; - let filesCuda11: string[]; - let paths: string[]; - let cudaVersion: string = ""; - - if (process.platform === "win32") { - filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"]; - filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"]; - paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []; - } else { - filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"]; - filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"]; - paths = process.env.LD_LIBRARY_PATH - ? process.env.LD_LIBRARY_PATH.split(path.delimiter) - : []; - paths.push("/usr/lib/x86_64-linux-gnu/"); - } - - let cudaExists = filesCuda12.every( - (file) => existsSync(file) || checkFileExistenceInPaths(file, paths) - ); - - if (!cudaExists) { - cudaExists = filesCuda11.every( - (file) => existsSync(file) || checkFileExistenceInPaths(file, paths) - ); - if (cudaExists) { - cudaVersion = "11"; - } - } else { - cudaVersion = "12"; - } - - let data; - try { - data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); - } catch (error) { - data = DEFALT_SETTINGS; - } - - data["cuda"].exist = cudaExists; - data["cuda"].version = cudaVersion; - if (cudaExists) { - data.run_mode = "gpu"; - } - writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)); -} - -/** - * Get GPU information - */ -async function updateGpuInfo(): Promise { - exec( - "nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits", - (error, stdout) => { - let data; - try { - data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); - } catch (error) { - data = DEFALT_SETTINGS; - } - - if (!error) { - // Get GPU info and gpu has higher memory first - let highestVram = 0; - let highestVramId = "0"; - let gpus = stdout - .trim() - .split("\n") - .map((line) => { - let [id, vram] = line.split(", "); - vram = vram.replace(/\r/g, ""); - if (parseFloat(vram) > highestVram) { - highestVram = parseFloat(vram); - highestVramId = id; - } - return { id, vram }; - }); - - data["gpus"] = gpus; - data["gpu_highest_vram"] = highestVramId; - } else { - data["gpus"] = []; - } - - writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)); - Promise.resolve(); - } - ); -} - -module.exports = { - initModel, - stopModel, - killSubprocess, - dispose, - updateNvidiaInfo, - getCurrentNitroProcessInfo, -}; diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts new file mode 100644 index 0000000000..ca266639c6 --- /dev/null +++ b/extensions/inference-nitro-extension/src/node/execute.ts @@ -0,0 +1,65 @@ +import { readFileSync } from "fs"; +import * as path from "path"; +import { NVIDIA_INFO_FILE } from "./nvidia"; + +export interface NitroExecutableOptions { + executablePath: string; + cudaVisibleDevices: string; +} +/** + * Find which executable file to run based on the current platform. + * @returns The name of the executable file to run. + */ +export const executableNitroFile = (): NitroExecutableOptions => { + let binaryFolder = path.join(__dirname, "..", "bin"); // Current directory by default + let cudaVisibleDevices = ""; + let binaryName = "nitro"; + /** + * The binary folder is different for each platform. + */ + if (process.platform === "win32") { + /** + * For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0 + */ + let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); + if (nvidiaInfo["run_mode"] === "cpu") { + binaryFolder = path.join(binaryFolder, "win-cpu"); + } else { + if (nvidiaInfo["cuda"].version === "12") { + binaryFolder = path.join(binaryFolder, "win-cuda-12-0"); + } else { + binaryFolder = path.join(binaryFolder, "win-cuda-11-7"); + } + cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"]; + } + binaryName = "nitro.exe"; + } else if (process.platform === "darwin") { + /** + * For MacOS: mac-arm64 (Silicon), mac-x64 (InteL) + */ + if (process.arch === "arm64") { + binaryFolder = path.join(binaryFolder, "mac-arm64"); + } else { + binaryFolder = path.join(binaryFolder, "mac-x64"); + } + } else { + /** + * For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0 + */ + let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); + if (nvidiaInfo["run_mode"] === "cpu") { + binaryFolder = path.join(binaryFolder, "linux-cpu"); + } else { + if (nvidiaInfo["cuda"].version === "12") { + binaryFolder = path.join(binaryFolder, "linux-cuda-12-0"); + } else { + binaryFolder = path.join(binaryFolder, "linux-cuda-11-7"); + } + cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"]; + } + } + return { + executablePath: path.join(binaryFolder, binaryName), + cudaVisibleDevices, + }; +}; diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts new file mode 100644 index 0000000000..fc22f62e78 --- /dev/null +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -0,0 +1,379 @@ +import fs from "fs"; +import path from "path"; +import { ChildProcessWithoutNullStreams, spawn } from "child_process"; +import tcpPortUsed from "tcp-port-used"; +import fetchRT from "fetch-retry"; +import osUtils from "os-utils"; +import { log } from "@janhq/core/node"; +import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia"; +import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core"; +import { executableNitroFile } from "./execute"; +import { homedir } from "os"; +// Polyfill fetch with retry +const fetchRetry = fetchRT(fetch); + +/** + * The response object for model init operation. + */ +interface ModelInitOptions { + modelFullPath: string; + model: Model; +} + +/** + * The response object of Prompt Template parsing. + */ +interface PromptTemplate { + system_prompt?: string; + ai_prompt?: string; + user_prompt?: string; + error?: string; +} + +/** + * Model setting args for Nitro model load. + */ +interface ModelSettingArgs extends ModelSettingParams { + llama_model_path: string; + cpu_threads: number; +} + +// The PORT to use for the Nitro subprocess +const PORT = 3928; +// The HOST address to use for the Nitro subprocess +const LOCAL_HOST = "127.0.0.1"; +// The URL for the Nitro subprocess +const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`; +// The URL for the Nitro subprocess to load a model +const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`; +// The URL for the Nitro subprocess to validate a model +const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`; +// The URL for the Nitro subprocess to kill itself +const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`; + +// The supported model format +// TODO: Should be an array to support more models +const SUPPORTED_MODEL_FORMAT = ".gguf"; + +// The subprocess instance for Nitro +let subprocess: ChildProcessWithoutNullStreams | undefined = undefined; +// The current model file url +let currentModelFile: string = ""; +// The current model settings +let currentSettings: ModelSettingArgs | undefined = undefined; + +/** + * Stops a Nitro subprocess. + * @param wrapper - The model wrapper. + * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. + */ +function stopModel(): Promise { + return killSubprocess(); +} + +/** + * Initializes a Nitro subprocess to load a machine learning model. + * @param wrapper - The model wrapper. + * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. + * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package + */ +async function runModel( + wrapper: ModelInitOptions +): Promise { + if (wrapper.model.engine !== InferenceEngine.nitro) { + // Not a nitro model + return Promise.resolve(); + } + + currentModelFile = wrapper.modelFullPath; + const janRoot = path.join(homedir(), "jan"); + if (!currentModelFile.includes(janRoot)) { + currentModelFile = path.join(janRoot, currentModelFile); + } + const files: string[] = fs.readdirSync(currentModelFile); + + // Look for GGUF model file + const ggufBinFile = files.find( + (file) => + file === path.basename(currentModelFile) || + file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) + ); + + if (!ggufBinFile) return Promise.reject("No GGUF model file found"); + + currentModelFile = path.join(currentModelFile, ggufBinFile); + + if (wrapper.model.engine !== InferenceEngine.nitro) { + return Promise.reject("Not a nitro model"); + } else { + const nitroResourceProbe = await getResourcesInfo(); + // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt + if (wrapper.model.settings.prompt_template) { + const promptTemplate = wrapper.model.settings.prompt_template; + const prompt = promptTemplateConverter(promptTemplate); + if (prompt?.error) { + return Promise.reject(prompt.error); + } + wrapper.model.settings.system_prompt = prompt.system_prompt; + wrapper.model.settings.user_prompt = prompt.user_prompt; + wrapper.model.settings.ai_prompt = prompt.ai_prompt; + } + + currentSettings = { + llama_model_path: currentModelFile, + ...wrapper.model.settings, + // This is critical and requires real system information + cpu_threads: nitroResourceProbe.numCpuPhysicalCore, + }; + return runNitroAndLoadModel(); + } +} + +/** + * 1. Spawn Nitro process + * 2. Load model into Nitro subprocess + * 3. Validate model status + * @returns + */ +async function runNitroAndLoadModel() { + // Gather system information for CPU physical cores and memory + return killSubprocess() + .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) + .then(() => { + /** + * There is a problem with Windows process manager + * Should wait for awhile to make sure the port is free and subprocess is killed + * The tested threshold is 500ms + **/ + if (process.platform === "win32") { + return new Promise((resolve) => setTimeout(resolve, 500)); + } else { + return Promise.resolve(); + } + }) + .then(spawnNitroProcess) + .then(() => loadLLMModel(currentSettings)) + .then(validateModelStatus) + .catch((err) => { + // TODO: Broadcast error so app could display proper error message + log(`[NITRO]::Error: ${err}`); + return { error: err }; + }); +} + +/** + * Parse prompt template into agrs settings + * @param promptTemplate Template as string + * @returns + */ +function promptTemplateConverter(promptTemplate: string): PromptTemplate { + // Split the string using the markers + const systemMarker = "{system_message}"; + const promptMarker = "{prompt}"; + + if ( + promptTemplate.includes(systemMarker) && + promptTemplate.includes(promptMarker) + ) { + // Find the indices of the markers + const systemIndex = promptTemplate.indexOf(systemMarker); + const promptIndex = promptTemplate.indexOf(promptMarker); + + // Extract the parts of the string + const system_prompt = promptTemplate.substring(0, systemIndex); + const user_prompt = promptTemplate.substring( + systemIndex + systemMarker.length, + promptIndex + ); + const ai_prompt = promptTemplate.substring( + promptIndex + promptMarker.length + ); + + // Return the split parts + return { system_prompt, user_prompt, ai_prompt }; + } else if (promptTemplate.includes(promptMarker)) { + // Extract the parts of the string for the case where only promptMarker is present + const promptIndex = promptTemplate.indexOf(promptMarker); + const user_prompt = promptTemplate.substring(0, promptIndex); + const ai_prompt = promptTemplate.substring( + promptIndex + promptMarker.length + ); + + // Return the split parts + return { user_prompt, ai_prompt }; + } + + // Return an error if none of the conditions are met + return { error: "Cannot split prompt template" }; +} + +/** + * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request. + * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. + */ +function loadLLMModel(settings: any): Promise { + log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`); + return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(settings), + retries: 3, + retryDelay: 500, + }) + .then((res) => { + log( + `[NITRO]::Debug: Load model success with response ${JSON.stringify( + res + )}` + ); + return Promise.resolve(res); + }) + .catch((err) => { + log(`[NITRO]::Error: Load model failed with error ${err}`); + return Promise.reject(); + }); +} + +/** + * Validates the status of a model. + * @returns {Promise} A promise that resolves to an object. + * If the model is loaded successfully, the object is empty. + * If the model is not loaded successfully, the object contains an error message. + */ +async function validateModelStatus(): Promise { + // Send a GET request to the validation URL. + // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. + return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { + method: "GET", + headers: { + "Content-Type": "application/json", + }, + retries: 5, + retryDelay: 500, + }).then(async (res: Response) => { + log( + `[NITRO]::Debug: Validate model state success with response ${JSON.stringify( + res + )}` + ); + // If the response is OK, check model_loaded status. + if (res.ok) { + const body = await res.json(); + // If the model is loaded, return an empty object. + // Otherwise, return an object with an error message. + if (body.model_loaded) { + return Promise.resolve(); + } + } + return Promise.reject("Validate model status failed"); + }); +} + +/** + * Terminates the Nitro subprocess. + * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. + */ +async function killSubprocess(): Promise { + const controller = new AbortController(); + setTimeout(() => controller.abort(), 5000); + log(`[NITRO]::Debug: Request to kill Nitro`); + + return fetch(NITRO_HTTP_KILL_URL, { + method: "DELETE", + signal: controller.signal, + }) + .then(() => { + subprocess?.kill(); + subprocess = undefined; + }) + .catch(() => {}) + .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) + .then(() => log(`[NITRO]::Debug: Nitro process is terminated`)); +} + +/** + * Spawns a Nitro subprocess. + * @returns A promise that resolves when the Nitro subprocess is started. + */ +function spawnNitroProcess(): Promise { + log(`[NITRO]::Debug: Spawning Nitro subprocess...`); + + return new Promise(async (resolve, reject) => { + let binaryFolder = path.join(__dirname, "..", "bin"); // Current directory by default + let executableOptions = executableNitroFile(); + + const args: string[] = ["1", LOCAL_HOST, PORT.toString()]; + // Execute the binary + log( + `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` + ); + subprocess = spawn( + executableOptions.executablePath, + ["1", LOCAL_HOST, PORT.toString()], + { + cwd: binaryFolder, + env: { + ...process.env, + CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, + }, + } + ); + + // Handle subprocess output + subprocess.stdout.on("data", (data: any) => { + log(`[NITRO]::Debug: ${data}`); + }); + + subprocess.stderr.on("data", (data: any) => { + log(`[NITRO]::Error: ${data}`); + }); + + subprocess.on("close", (code: any) => { + log(`[NITRO]::Debug: Nitro exited with code: ${code}`); + subprocess = undefined; + reject(`child process exited with code ${code}`); + }); + + tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { + log(`[NITRO]::Debug: Nitro is ready`); + resolve(); + }); + }); +} + +/** + * Get the system resources information + * TODO: Move to Core so that it can be reused + */ +function getResourcesInfo(): Promise { + return new Promise(async (resolve) => { + const cpu = await osUtils.cpuCount(); + log(`[NITRO]::CPU informations - ${cpu}`); + const response: ResourcesInfo = { + numCpuPhysicalCore: cpu, + memAvailable: 0, + }; + resolve(response); + }); +} + +/** + * Every module should have a dispose function + * This will be called when the extension is unloaded and should clean up any resources + * Also called when app is closed + */ +function dispose() { + // clean other registered resources here + killSubprocess(); +} + +export default { + runModel, + stopModel, + killSubprocess, + dispose, + updateNvidiaInfo, + getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess), +}; diff --git a/extensions/inference-nitro-extension/src/node/nvidia.ts b/extensions/inference-nitro-extension/src/node/nvidia.ts new file mode 100644 index 0000000000..ddd5719e17 --- /dev/null +++ b/extensions/inference-nitro-extension/src/node/nvidia.ts @@ -0,0 +1,201 @@ +import { writeFileSync, existsSync, readFileSync } from "fs"; +import { exec } from "child_process"; +import path from "path"; +import { homedir } from "os"; + +/** + * Default GPU settings + **/ +const DEFALT_SETTINGS = { + notify: true, + run_mode: "cpu", + nvidia_driver: { + exist: false, + version: "", + }, + cuda: { + exist: false, + version: "", + }, + gpus: [], + gpu_highest_vram: "", +}; + +/** + * Path to the settings file + **/ +export const NVIDIA_INFO_FILE = path.join( + homedir(), + "jan", + "settings", + "settings.json" +); + +/** + * Current nitro process + */ +let nitroProcessInfo: NitroProcessInfo | undefined = undefined; + +/** + * Nitro process info + */ +export interface NitroProcessInfo { + isRunning: boolean +} + +/** + * This will retrive GPU informations and persist settings.json + * Will be called when the extension is loaded to turn on GPU acceleration if supported + */ +export async function updateNvidiaInfo() { + if (process.platform !== "darwin") { + await Promise.all([ + updateNvidiaDriverInfo(), + updateCudaExistence(), + updateGpuInfo(), + ]); + } +} + +/** + * Retrieve current nitro process + */ +export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => { + nitroProcessInfo = { + isRunning: subprocess != null, + }; + return nitroProcessInfo; +}; + +/** + * Validate nvidia and cuda for linux and windows + */ +export async function updateNvidiaDriverInfo(): Promise { + exec( + "nvidia-smi --query-gpu=driver_version --format=csv,noheader", + (error, stdout) => { + let data; + try { + data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); + } catch (error) { + data = DEFALT_SETTINGS; + } + + if (!error) { + const firstLine = stdout.split("\n")[0].trim(); + data["nvidia_driver"].exist = true; + data["nvidia_driver"].version = firstLine; + } else { + data["nvidia_driver"].exist = false; + } + + writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)); + Promise.resolve(); + } + ); +} + +/** + * Check if file exists in paths + */ +export function checkFileExistenceInPaths( + file: string, + paths: string[] +): boolean { + return paths.some((p) => existsSync(path.join(p, file))); +} + +/** + * Validate cuda for linux and windows + */ +export function updateCudaExistence() { + let filesCuda12: string[]; + let filesCuda11: string[]; + let paths: string[]; + let cudaVersion: string = ""; + + if (process.platform === "win32") { + filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"]; + filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"]; + paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []; + } else { + filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"]; + filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"]; + paths = process.env.LD_LIBRARY_PATH + ? process.env.LD_LIBRARY_PATH.split(path.delimiter) + : []; + paths.push("/usr/lib/x86_64-linux-gnu/"); + } + + let cudaExists = filesCuda12.every( + (file) => existsSync(file) || checkFileExistenceInPaths(file, paths) + ); + + if (!cudaExists) { + cudaExists = filesCuda11.every( + (file) => existsSync(file) || checkFileExistenceInPaths(file, paths) + ); + if (cudaExists) { + cudaVersion = "11"; + } + } else { + cudaVersion = "12"; + } + + let data; + try { + data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); + } catch (error) { + data = DEFALT_SETTINGS; + } + + data["cuda"].exist = cudaExists; + data["cuda"].version = cudaVersion; + if (cudaExists) { + data.run_mode = "gpu"; + } + writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)); +} + +/** + * Get GPU information + */ +export async function updateGpuInfo(): Promise { + exec( + "nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits", + (error, stdout) => { + let data; + try { + data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); + } catch (error) { + data = DEFALT_SETTINGS; + } + + if (!error) { + // Get GPU info and gpu has higher memory first + let highestVram = 0; + let highestVramId = "0"; + let gpus = stdout + .trim() + .split("\n") + .map((line) => { + let [id, vram] = line.split(", "); + vram = vram.replace(/\r/g, ""); + if (parseFloat(vram) > highestVram) { + highestVram = parseFloat(vram); + highestVramId = id; + } + return { id, vram }; + }); + + data["gpus"] = gpus; + data["gpu_highest_vram"] = highestVramId; + } else { + data["gpus"] = []; + } + + writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2)); + Promise.resolve(); + } + ); +} diff --git a/extensions/inference-nitro-extension/tsconfig.json b/extensions/inference-nitro-extension/tsconfig.json index b48175a169..bada43fc7b 100644 --- a/extensions/inference-nitro-extension/tsconfig.json +++ b/extensions/inference-nitro-extension/tsconfig.json @@ -1,15 +1,19 @@ { "compilerOptions": { - "target": "es2016", - "module": "ES6", "moduleResolution": "node", - - "outDir": "./dist", - "esModuleInterop": true, - "forceConsistentCasingInFileNames": true, - "strict": false, - "skipLibCheck": true, - "rootDir": "./src" + "target": "es5", + "module": "ES2020", + "lib": ["es2015", "es2016", "es2017", "dom"], + "strict": true, + "sourceMap": true, + "declaration": true, + "allowSyntheticDefaultImports": true, + "experimentalDecorators": true, + "emitDecoratorMetadata": true, + "declarationDir": "dist/types", + "outDir": "dist", + "importHelpers": true, + "typeRoots": ["node_modules/@types"] }, - "include": ["./src"] + "include": ["src"] } diff --git a/extensions/inference-nitro-extension/webpack.config.js b/extensions/inference-nitro-extension/webpack.config.js deleted file mode 100644 index 2927affbc7..0000000000 --- a/extensions/inference-nitro-extension/webpack.config.js +++ /dev/null @@ -1,43 +0,0 @@ -const path = require("path"); -const webpack = require("webpack"); -const packageJson = require("./package.json"); - -module.exports = { - experiments: { outputModule: true }, - entry: "./src/index.ts", // Adjust the entry point to match your project's main file - mode: "production", - module: { - rules: [ - { - test: /\.tsx?$/, - use: "ts-loader", - exclude: /node_modules/, - }, - ], - }, - plugins: [ - new webpack.DefinePlugin({ - MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`), - INFERENCE_URL: JSON.stringify( - process.env.INFERENCE_URL || - "http://127.0.0.1:3928/inferences/llamacpp/chat_completion" - ), - TROUBLESHOOTING_URL: JSON.stringify("https://jan.ai/guides/troubleshooting") - }), - ], - output: { - filename: "index.js", // Adjust the output file name as needed - path: path.resolve(__dirname, "dist"), - library: { type: "module" }, // Specify ESM output format - }, - resolve: { - extensions: [".ts", ".js"], - fallback: { - path: require.resolve("path-browserify"), - }, - }, - optimization: { - minimize: false, - }, - // Add loaders and other configuration as needed for your project -};