diff --git a/Dockerfile b/Dockerfile index 913a93a114..48b2d254fb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,6 @@ COPY . ./ RUN export NITRO_VERSION=$(cat extensions/inference-nitro-extension/bin/version.txt) && \ jq --arg nitroVersion $NITRO_VERSION '(.scripts."downloadnitro:linux" | gsub("\\${NITRO_VERSION}"; $nitroVersion)) | gsub("\r"; "")' extensions/inference-nitro-extension/package.json > /tmp/newcommand.txt && export NEW_COMMAND=$(sed 's/^"//;s/"$//' /tmp/newcommand.txt) && jq --arg newCommand "$NEW_COMMAND" '.scripts."downloadnitro:linux" = $newCommand' extensions/inference-nitro-extension/package.json > /tmp/package.json && mv /tmp/package.json extensions/inference-nitro-extension/package.json RUN make install-and-build -RUN yarn workspace jan-web install - -RUN export NODE_ENV=production && yarn workspace jan-web build # # 2. Rebuild the source code only when needed FROM base AS runner @@ -42,12 +39,13 @@ COPY --from=builder /app/docs/openapi ./docs/openapi/ COPY --from=builder /app/pre-install ./pre-install/ # Copy the package.json, yarn.lock, and output of web yarn space to leverage Docker cache -COPY --from=builder /app/web/out ./web/out/ -COPY --from=builder /app/web/.next ./web/.next/ -COPY --from=builder /app/web/package.json ./web/package.json -COPY --from=builder /app/web/yarn.lock ./web/yarn.lock +COPY --from=builder /app/uikit ./uikit/ +COPY --from=builder /app/web ./web/ COPY --from=builder /app/models ./models/ +RUN yarn workspace @janhq/uikit install && yarn workspace @janhq/uikit build +RUN yarn workspace jan-web install + RUN npm install -g serve@latest EXPOSE 1337 3000 3928 @@ -55,7 +53,9 @@ EXPOSE 1337 3000 3928 ENV JAN_API_HOST 0.0.0.0 ENV JAN_API_PORT 1337 -CMD ["sh", "-c", "cd server && node build/main.js & cd web && npx serve out"] +ENV API_BASE_URL http://localhost:1337 + +CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out & cd server && node build/main.js"] # docker build -t jan . # docker run -p 1337:1337 -p 3000:3000 -p 3928:3928 jan diff --git a/Dockerfile.gpu b/Dockerfile.gpu index d5ea704992..832e2c18c5 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -28,9 +28,6 @@ COPY . ./ RUN export NITRO_VERSION=$(cat extensions/inference-nitro-extension/bin/version.txt) && \ jq --arg nitroVersion $NITRO_VERSION '(.scripts."downloadnitro:linux" | gsub("\\${NITRO_VERSION}"; $nitroVersion)) | gsub("\r"; "")' extensions/inference-nitro-extension/package.json > /tmp/newcommand.txt && export NEW_COMMAND=$(sed 's/^"//;s/"$//' /tmp/newcommand.txt) && jq --arg newCommand "$NEW_COMMAND" '.scripts."downloadnitro:linux" = $newCommand' extensions/inference-nitro-extension/package.json > /tmp/package.json && mv /tmp/package.json extensions/inference-nitro-extension/package.json RUN make install-and-build -RUN yarn workspace jan-web install - -RUN export NODE_ENV=production && yarn workspace jan-web build # # 2. Rebuild the source code only when needed FROM base AS runner @@ -66,12 +63,13 @@ COPY --from=builder /app/docs/openapi ./docs/openapi/ COPY --from=builder /app/pre-install ./pre-install/ # Copy the package.json, yarn.lock, and output of web yarn space to leverage Docker cache -COPY --from=builder /app/web/out ./web/out/ -COPY --from=builder /app/web/.next ./web/.next/ -COPY --from=builder /app/web/package.json ./web/package.json -COPY --from=builder /app/web/yarn.lock ./web/yarn.lock +COPY --from=builder /app/uikit ./uikit/ +COPY --from=builder /app/web ./web/ COPY --from=builder /app/models ./models/ +RUN yarn workspace @janhq/uikit install && yarn workspace @janhq/uikit build +RUN yarn workspace jan-web install + RUN npm install -g serve@latest EXPOSE 1337 3000 3928 @@ -81,7 +79,9 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/lib:/usr/local/cuda-12. ENV JAN_API_HOST 0.0.0.0 ENV JAN_API_PORT 1337 -CMD ["sh", "-c", "cd server && node build/main.js & cd web && npx serve out"] +ENV API_BASE_URL http://localhost:1337 + +CMD ["sh", "-c", "export NODE_ENV=production && yarn workspace jan-web build && cd web && npx serve out & cd server && node build/main.js"] # pre-requisites: nvidia-docker # docker build -t jan-gpu . -f Dockerfile.gpu diff --git a/Makefile b/Makefile index ffb1abee25..a45477b294 100644 --- a/Makefile +++ b/Makefile @@ -52,18 +52,28 @@ build: check-file-counts clean: ifeq ($(OS),Windows_NT) - powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist -Recurse -Directory | Remove-Item -Recurse -Force" + powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist, build, out -Recurse -Directory | Remove-Item -Recurse -Force" + powershell -Command "Remove-Item -Recurse -Force ./pre-install/*.tgz" + powershell -Command "Remove-Item -Recurse -Force ./electron/pre-install/*.tgz" rmdir /s /q "%USERPROFILE%\jan\extensions" else ifeq ($(shell uname -s),Linux) find . -name "node_modules" -type d -prune -exec rm -rf '{}' + find . -name ".next" -type d -exec rm -rf '{}' + find . -name "dist" -type d -exec rm -rf '{}' + + find . -name "build" -type d -exec rm -rf '{}' + + find . -name "out" -type d -exec rm -rf '{}' + + rm -rf ./pre-install/*.tgz + rm -rf ./electron/pre-install/*.tgz rm -rf "~/jan/extensions" rm -rf "~/.cache/jan*" else find . -name "node_modules" -type d -prune -exec rm -rf '{}' + find . -name ".next" -type d -exec rm -rf '{}' + find . -name "dist" -type d -exec rm -rf '{}' + + find . -name "build" -type d -exec rm -rf '{}' + + find . -name "out" -type d -exec rm -rf '{}' + + rm -rf ./pre-install/*.tgz + rm -rf ./electron/pre-install/*.tgz rm -rf ~/jan/extensions rm -rf ~/Library/Caches/jan* endif diff --git a/README.md b/README.md index 3d0eeb3ef8..ed3d189853 100644 --- a/README.md +++ b/README.md @@ -235,61 +235,84 @@ This will build the app MacOS m1/m2 for production (with code signing already do - Run Jan in Docker mode - - **Option 1**: Run Jan in CPU mode +| Docker compose Profile | Description | +| ---------------------- | -------------------------------------------- | +| `cpu-fs` | Run Jan in CPU mode with default file system | +| `cpu-s3fs` | Run Jan in CPU mode with S3 file system | +| `gpu-fs` | Run Jan in GPU mode with default file system | +| `gpu-s3fs` | Run Jan in GPU mode with S3 file system | + +| Environment Variable | Description | +| ----------------------- | ------------------------------------------------------------------------------------------------------- | +| `S3_BUCKET_NAME` | S3 bucket name - leave blank for default file system | +| `AWS_ACCESS_KEY_ID` | AWS access key ID - leave blank for default file system | +| `AWS_SECRET_ACCESS_KEY` | AWS secret access key - leave blank for default file system | +| `AWS_ENDPOINT` | AWS endpoint URL - leave blank for default file system | +| `AWS_REGION` | AWS region - leave blank for default file system | +| `API_BASE_URL` | Jan Server URL, please modify it as your public ip address or domain name default http://localhost:1377 | + +- **Option 1**: Run Jan in CPU mode + + ```bash + # cpu mode with default file system + docker compose --profile cpu-fs up -d + + # cpu mode with S3 file system + docker compose --profile cpu-s3fs up -d + ``` + +- **Option 2**: Run Jan in GPU mode + + - **Step 1**: Check CUDA compatibility with your NVIDIA driver by running `nvidia-smi` and check the CUDA version in the output + + ```bash + nvidia-smi + + # Output + +---------------------------------------------------------------------------------------+ + | NVIDIA-SMI 531.18 Driver Version: 531.18 CUDA Version: 12.1 | + |-----------------------------------------+----------------------+----------------------+ + | GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC | + | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | + | | | MIG M. | + |=========================================+======================+======================| + | 0 NVIDIA GeForce RTX 4070 Ti WDDM | 00000000:01:00.0 On | N/A | + | 0% 44C P8 16W / 285W| 1481MiB / 12282MiB | 2% Default | + | | | N/A | + +-----------------------------------------+----------------------+----------------------+ + | 1 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:02:00.0 Off | N/A | + | 0% 49C P8 14W / 120W| 0MiB / 6144MiB | 0% Default | + | | | N/A | + +-----------------------------------------+----------------------+----------------------+ + | 2 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:05:00.0 Off | N/A | + | 29% 38C P8 11W / 120W| 0MiB / 6144MiB | 0% Default | + | | | N/A | + +-----------------------------------------+----------------------+----------------------+ + + +---------------------------------------------------------------------------------------+ + | Processes: | + | GPU GI CI PID Type Process name GPU Memory | + | ID ID Usage | + |=======================================================================================| + ``` + + - **Step 2**: Visit [NVIDIA NGC Catalog ](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags) and find the smallest minor version of image tag that matches your CUDA version (e.g., 12.1 -> 12.1.0) + + - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`) + + - **Step 4**: Run command to start Jan in GPU mode ```bash - docker compose --profile cpu up -d + # GPU mode with default file system + docker compose --profile gpu up -d + + # GPU mode with S3 file system + docker compose --profile gpu-s3fs up -d ``` - - **Option 2**: Run Jan in GPU mode - - - **Step 1**: Check CUDA compatibility with your NVIDIA driver by running `nvidia-smi` and check the CUDA version in the output - - ```bash - nvidia-smi - - # Output - +---------------------------------------------------------------------------------------+ - | NVIDIA-SMI 531.18 Driver Version: 531.18 CUDA Version: 12.1 | - |-----------------------------------------+----------------------+----------------------+ - | GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC | - | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | - | | | MIG M. | - |=========================================+======================+======================| - | 0 NVIDIA GeForce RTX 4070 Ti WDDM | 00000000:01:00.0 On | N/A | - | 0% 44C P8 16W / 285W| 1481MiB / 12282MiB | 2% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ - | 1 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:02:00.0 Off | N/A | - | 0% 49C P8 14W / 120W| 0MiB / 6144MiB | 0% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ - | 2 NVIDIA GeForce GTX 1660 Ti WDDM | 00000000:05:00.0 Off | N/A | - | 29% 38C P8 11W / 120W| 0MiB / 6144MiB | 0% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ - - +---------------------------------------------------------------------------------------+ - | Processes: | - | GPU GI CI PID Type Process name GPU Memory | - | ID ID Usage | - |=======================================================================================| - ``` - - - **Step 2**: Visit [NVIDIA NGC Catalog ](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags) and find the smallest minor version of image tag that matches your CUDA version (e.g., 12.1 -> 12.1.0) - - - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`) - - - **Step 4**: Run command to start Jan in GPU mode - - ```bash - # GPU mode - docker compose --profile gpu up -d - ``` - - This will start the web server and you can access Jan at `http://localhost:3000`. - - > Note: Currently, Docker mode is only work for development and localhost, production is not supported yet. RAG feature is not supported in Docker mode yet. +This will start the web server and you can access Jan at `http://localhost:3000`. + +> Note: RAG feature is not supported in Docker mode with s3fs yet. ## Acknowledgements diff --git a/docker-compose.yml b/docker-compose.yml index 4195a32940..1691a841a0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,10 +42,10 @@ services: vpcbr: # app_cpu service for running the CPU version of the application - app_cpu: + app_cpu_s3fs: image: jan:latest volumes: - - app_data:/app/server/build/jan + - app_data_cpu_s3fs:/app/server/build/jan build: context: . dockerfile: Dockerfile @@ -56,9 +56,10 @@ services: S3_BUCKET_NAME: mybucket AWS_ENDPOINT: http://10.5.0.2:9000 AWS_REGION: us-east-1 + API_BASE_URL: http://localhost:1337 restart: always profiles: - - cpu + - cpu-s3fs ports: - "3000:3000" - "1337:1337" @@ -68,7 +69,7 @@ services: ipv4_address: 10.5.0.3 # app_gpu service for running the GPU version of the application - app_gpu: + app_gpu_s3fs: deploy: resources: reservations: @@ -78,7 +79,7 @@ services: capabilities: [gpu] image: jan-gpu:latest volumes: - - app_data:/app/server/build/jan + - app_data_gpu_s3fs:/app/server/build/jan build: context: . dockerfile: Dockerfile.gpu @@ -90,8 +91,9 @@ services: S3_BUCKET_NAME: mybucket AWS_ENDPOINT: http://10.5.0.2:9000 AWS_REGION: us-east-1 + API_BASE_URL: http://localhost:1337 profiles: - - gpu + - gpu-s3fs ports: - "3000:3000" - "1337:1337" @@ -100,9 +102,60 @@ services: vpcbr: ipv4_address: 10.5.0.4 + app_cpu_fs: + image: jan:latest + volumes: + - app_data_cpu_fs:/app/server/build/jan + build: + context: . + dockerfile: Dockerfile + environment: + API_BASE_URL: http://localhost:1337 + restart: always + profiles: + - cpu-fs + ports: + - "3000:3000" + - "1337:1337" + - "3928:3928" + networks: + vpcbr: + ipv4_address: 10.5.0.5 + + # app_gpu service for running the GPU version of the application + app_gpu_fs: + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + image: jan-gpu:latest + volumes: + - app_data_gpu_fs:/app/server/build/jan + build: + context: . + dockerfile: Dockerfile.gpu + restart: always + environment: + API_BASE_URL: http://localhost:1337 + profiles: + - gpu-fs + ports: + - "3000:3000" + - "1337:1337" + - "3928:3928" + networks: + vpcbr: + ipv4_address: 10.5.0.6 + volumes: minio_data: - app_data: + app_data_cpu_s3fs: + app_data_gpu_s3fs: + app_data_cpu_fs: + app_data_gpu_fs: networks: vpcbr: @@ -113,5 +166,7 @@ networks: gateway: 10.5.0.1 # Usage: -# - Run 'docker-compose --profile cpu up -d' to start the app_cpu service -# - Run 'docker-compose --profile gpu up -d' to start the app_gpu service +# - Run 'docker compose --profile cpu-s3fs up -d' to start the app_cpu service +# - Run 'docker compose --profile gpu-s3fs up -d' to start the app_gpu service +# - Run 'docker compose --profile cpu-fs up -d' to start the app_cpu service +# - Run 'docker compose --profile gpu-fs up -d' to start the app_gpu service diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index b88501936c..cb9b88bed2 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -88,7 +88,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension { const electronApi = window?.electronAPI this.inferenceUrl = INFERENCE_URL if (!electronApi) { - this.inferenceUrl = JAN_SERVER_INFERENCE_URL + this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions` } console.debug('Inference url: ', this.inferenceUrl) diff --git a/server/helpers/setup.ts b/server/helpers/setup.ts index e6fab7a25e..7d8f8914a1 100644 --- a/server/helpers/setup.ts +++ b/server/helpers/setup.ts @@ -31,6 +31,32 @@ export async function setup() { 'utf-8' ) + if (!existsSync(join(appDir, 'settings'))) { + console.debug('Writing nvidia config file...') + mkdirSync(join(appDir, 'settings')) + writeFileSync( + join(appDir, 'settings', 'settings.json'), + JSON.stringify( + { + notify: true, + run_mode: 'cpu', + nvidia_driver: { + exist: false, + version: '', + }, + cuda: { + exist: false, + version: '', + }, + gpus: [], + gpu_highest_vram: '', + gpus_in_use: [], + is_initial: true, + }), + 'utf-8' + ) + } + /** * Install extensions */ diff --git a/web/extension/ExtensionManager.ts b/web/extension/ExtensionManager.ts index 3074177bb8..1259021f73 100644 --- a/web/extension/ExtensionManager.ts +++ b/web/extension/ExtensionManager.ts @@ -83,7 +83,10 @@ export class ExtensionManager { // Import class const extensionUrl = window.electronAPI ? extension.url - : extension.url.replace('extension://', `${API_BASE_URL}/extensions/`) + : extension.url.replace( + 'extension://', + `${window.core?.api?.baseApiUrl ?? ''}/extensions/` + ) await import(/* webpackIgnore: true */ extensionUrl).then( (extensionClass) => { // Register class if it has a default export diff --git a/web/next.config.js b/web/next.config.js index 217f696983..a4b3e6d436 100644 --- a/web/next.config.js +++ b/web/next.config.js @@ -6,6 +6,11 @@ const webpack = require('webpack') const packageJson = require('./package.json') const nextConfig = { + eslint: { + // Warning: This allows production builds to successfully complete even if + // your project has ESLint errors. + ignoreDuringBuilds: true, + }, output: 'export', assetPrefix: '.', images: { diff --git a/web/services/restService.ts b/web/services/restService.ts index e84a5b7c30..73348caebe 100644 --- a/web/services/restService.ts +++ b/web/services/restService.ts @@ -18,11 +18,14 @@ export const restAPI = { ...acc, [proxy.route]: (...args: any) => { // For each route, define a function that sends a request to the API - return fetch(`${API_BASE_URL}/v1/${proxy.path}/${proxy.route}`, { - method: 'POST', - body: JSON.stringify(args), - headers: { contentType: 'application/json' }, - }).then(async (res) => { + return fetch( + `${window.core?.api.baseApiUrl}/v1/${proxy.path}/${proxy.route}`, + { + method: 'POST', + body: JSON.stringify(args), + headers: { contentType: 'application/json' }, + } + ).then(async (res) => { try { if (proxy.path === 'fs') { const text = await res.text() @@ -38,6 +41,6 @@ export const restAPI = { }, {}), openExternalUrl, // Jan Server URL - baseApiUrl: API_BASE_URL, + baseApiUrl: process.env.API_BASE_URL ?? API_BASE_URL, pollingInterval: 5000, }