janhq · louis-menlo · Feb 1, 2024 · Feb 1, 2024
diff --git a/core/src/node/api/common/startStopModel.ts b/core/src/node/api/common/startStopModel.ts
@@ -305,7 +305,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
     })
     .catch((err: any) => {
       logServer(`[NITRO]::Error: Load model failed with error ${err}`)
-      return Promise.reject()
+      return Promise.reject(err)
     })
 }
 

diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
@@ -134,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
 
     const modelFullPath = await joinPath(["models", model.id]);
 
+    this._currentModel = model;
     const nitroInitResult = await executeOnMain(NODE, "runModel", {
       modelFullPath,
       model,
@@ -144,7 +145,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
       return;
     }
 
-    this._currentModel = model;
     events.emit(ModelEvent.OnModelReady, model);
 
     this.getNitroProcesHealthIntervalId = setInterval(
@@ -226,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
    */
   private async onMessageRequest(data: MessageRequest) {
     if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
+      console.log(
+        `Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}`
+      );
       return;
     }
 

diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
@@ -67,7 +67,7 @@ function stopModel(): Promise<void> {
  * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
  */
 async function runModel(
-  wrapper: ModelInitOptions
+  wrapper: ModelInitOptions,
 ): Promise<ModelOperationResponse | void> {
   if (wrapper.model.engine !== InferenceEngine.nitro) {
     // Not a nitro model
@@ -85,7 +85,7 @@ async function runModel(
   const ggufBinFile = files.find(
     (file) =>
       file === path.basename(currentModelFile) ||
-      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
+      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
   );
 
   if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@@ -180,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
     const system_prompt = promptTemplate.substring(0, systemIndex);
     const user_prompt = promptTemplate.substring(
       systemIndex + systemMarker.length,
-      promptIndex
+      promptIndex,
     );
     const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
+      promptIndex + promptMarker.length,
     );
 
     // Return the split parts
@@ -193,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
     const promptIndex = promptTemplate.indexOf(promptMarker);
     const user_prompt = promptTemplate.substring(0, promptIndex);
     const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
+      promptIndex + promptMarker.length,
     );
 
     // Return the split parts
@@ -225,14 +225,14 @@ function loadLLMModel(settings: any): Promise<Response> {
     .then((res) => {
       log(
         `[NITRO]::Debug: Load model success with response ${JSON.stringify(
-          res
-        )}`
+          res,
+        )}`,
       );
       return Promise.resolve(res);
     })
     .catch((err) => {
       log(`[NITRO]::Error: Load model failed with error ${err}`);
-      return Promise.reject();
+      return Promise.reject(err);
     });
 }
 
@@ -254,8 +254,8 @@ async function validateModelStatus(): Promise<void> {
     retryDelay: 500,
   }).then(async (res: Response) => {
     log(
-      `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
-        res
+      `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
+        res.status
       )}`
     );
     // If the response is OK, check model_loaded status.
@@ -264,9 +264,19 @@ async function validateModelStatus(): Promise<void> {
       // If the model is loaded, return an empty object.
       // Otherwise, return an object with an error message.
       if (body.model_loaded) {
+        log(
+          `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
+            body
+          )}`
+        );
         return Promise.resolve();
       }
     }
+    log(
+      `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
+        res.statusText
+      )}`
+    );
     return Promise.reject("Validate model status failed");
   });
 }
@@ -307,7 +317,7 @@ function spawnNitroProcess(): Promise<any> {
     const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
     // Execute the binary
     log(
-      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
     );
     subprocess = spawn(
       executableOptions.executablePath,
@@ -318,7 +328,7 @@ function spawnNitroProcess(): Promise<any> {
           ...process.env,
           CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
         },
-      }
+      },
     );
 
     // Handle subprocess output

diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
@@ -13,9 +13,15 @@
 } from '@janhq/core'
 import { useAtomValue, useSetAtom } from 'jotai'
 
-import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
+import {
+  activeModelAtom,
+  loadModelErrorAtom,
+  stateModelAtom,
+} from '@/hooks/useActiveModel'
 import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 
+import { queuedMessageAtom } from '@/hooks/useSendChatMessage'
+
 import { toaster } from '../Toast'
 
 import { extensionManager } from '@/extension'
@@ -26,6 +32,7 @@
 import {
   updateThreadWaitingForResponseAtom,
   threadsAtom,
+  isGeneratingResponseAtom,
 } from '@/helpers/atoms/Thread.atom'
 
 export default function EventHandler({ children }: { children: ReactNode }) {
@@ -34,11 +41,14 @@
   const { downloadedModels } = useGetDownloadedModels()
   const setActiveModel = useSetAtom(activeModelAtom)
   const setStateModel = useSetAtom(stateModelAtom)
+  const setQueuedMessage = useSetAtom(queuedMessageAtom)
+  const setLoadModelError = useSetAtom(loadModelErrorAtom)
 
   const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom)
   const threads = useAtomValue(threadsAtom)
   const modelsRef = useRef(downloadedModels)
   const threadsRef = useRef(threads)
+  const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
 
   useEffect(() => {
     threadsRef.current = threads
@@ -51,8 +61,9 @@
   const onNewMessageResponse = useCallback(
     (message: ThreadMessage) => {
       addNewMessage(message)
+      setIsGeneratingResponse(false)
     },
-    [addNewMessage]
+    [addNewMessage, setIsGeneratingResponse]
   )
 
   const onModelReady = useCallback(
@@ -83,13 +94,15 @@
     (res: any) => {
       const errorMessage = `${res.error}`
       console.error('Failed to load model: ' + errorMessage)
+      setLoadModelError(errorMessage)
       setStateModel(() => ({
         state: 'start',
         loading: false,
         model: res.modelId,
       }))
+      setQueuedMessage(false)
     },
-    [setStateModel]
+    [setStateModel, setQueuedMessage, setLoadModelError]
   )
 
   const onMessageResponseUpdate = useCallback(
@@ -108,6 +121,8 @@
       // Mark the thread as not waiting for response
       updateThreadWaiting(message.thread_id, false)
 
+      setIsGeneratingResponse(false)
+
       const thread = threadsRef.current?.find((e) => e.id == message.thread_id)
       if (thread) {
         const messageContent = message.content[0]?.text.value ?? ''
@@ -127,7 +142,7 @@
          ?.addNewMessage(message)
      }
    },
    [updateMessage, updateThreadWaiting]
  )

  useEffect(() => {

diff --git a/web/helpers/atoms/SystemBar.atom.ts b/web/helpers/atoms/SystemBar.atom.ts
@@ -2,5 +2,6 @@ import { atom } from 'jotai'
 
 export const totalRamAtom = atom<number>(0)
 export const usedRamAtom = atom<number>(0)
+export const availableRamAtom = atom<number>(0)
 
 export const cpuUsageAtom = atom<number>(0)
diff --git a/web/helpers/atoms/Thread.atom.ts b/web/helpers/atoms/Thread.atom.ts
@@ -23,6 +23,7 @@ export const setActiveThreadIdAtom = atom(
 
 export const waitingToSendMessage = atom<boolean | undefined>(undefined)
 
+export const isGeneratingResponseAtom = atom<boolean | undefined>(undefined)
 /**
  * Stores all thread states for the current user
  */

diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
@@ -1,5 +1,5 @@
 import { events, Model, ModelEvent } from '@janhq/core'
-import { atom, useAtom, useAtomValue } from 'jotai'
+import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import { toaster } from '@/containers/Toast'
 
@@ -9,6 +9,7 @@ import { LAST_USED_MODEL_ID } from './useRecommendedModel'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 export const activeModelAtom = atom<Model | undefined>(undefined)
+export const loadModelErrorAtom = atom<string | undefined>(undefined)
 
 export const stateModelAtom = atom({
   state: 'start',
@@ -21,6 +22,7 @@ export function useActiveModel() {
   const activeThread = useAtomValue(activeThreadAtom)
   const [stateModel, setStateModel] = useAtom(stateModelAtom)
   const { downloadedModels } = useGetDownloadedModels()
+  const setLoadModelError = useSetAtom(loadModelErrorAtom)
 
   const startModel = async (modelId: string) => {
     if (
@@ -31,6 +33,7 @@ export function useActiveModel() {
       return
     }
     // TODO: incase we have multiple assistants, the configuration will be from assistant
+    setLoadModelError(undefined)
 
     setActiveModel(undefined)
 

diff --git a/web/hooks/useGetSystemResources.ts b/web/hooks/useGetSystemResources.ts
@@ -6,6 +6,7 @@ import { useSetAtom } from 'jotai'
 
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
+  availableRamAtom,
   cpuUsageAtom,
   totalRamAtom,
   usedRamAtom,
@@ -16,6 +17,7 @@ export default function useGetSystemResources() {
   const [cpu, setCPU] = useState<number>(0)
   const setTotalRam = useSetAtom(totalRamAtom)
   const setUsedRam = useSetAtom(usedRamAtom)
+  const setAvailableRam = useSetAtom(availableRamAtom)
   const setCpuUsage = useSetAtom(cpuUsageAtom)
 
   const getSystemResources = async () => {
@@ -40,6 +42,10 @@ export default function useGetSystemResources() {
       setTotalRam(resourceInfor.mem.totalMemory)
 
     setRam(Math.round(ram * 100))
+    if (resourceInfor.mem.totalMemory && resourceInfor.mem.usedMemory)
+      setAvailableRam(
+        resourceInfor.mem.totalMemory - resourceInfor.mem.usedMemory
+      )
     setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
     setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
   }

diff --git a/web/hooks/useInference.ts b/web/hooks/useInference.ts
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
@@ -25,12 +25,10 @@
 import { selectedModelAtom } from '@/containers/DropdownListSidebar'
 import { currentPromptAtom, fileUploadAtom } from '@/containers/Providers/Jotai'
 
-import { toaster } from '@/containers/Toast'
-
 import { getBase64 } from '@/utils/base64'
 import { toRuntimeParams, toSettingParams } from '@/utils/modelParam'
 
-import { useActiveModel } from './useActiveModel'
+import { loadModelErrorAtom, useActiveModel } from './useActiveModel'
 
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
@@ -59,9 +57,11 @@
   const { activeModel } = useActiveModel()
   const selectedModel = useAtomValue(selectedModelAtom)
   const { startModel } = useActiveModel()
-  const setQueuedMessage = useSetAtom(queuedMessageAtom)
+  const [queuedMessage, setQueuedMessage] = useAtom(queuedMessageAtom)
+  const loadModelFailed = useAtomValue(loadModelErrorAtom)
 
   const modelRef = useRef<Model | undefined>()
+  const loadModelFailedRef = useRef<string | undefined>()
   const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
   const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom)
 
@@ -73,6 +73,10 @@
     modelRef.current = activeModel
   }, [activeModel])
 
+  useEffect(() => {
+    loadModelFailedRef.current = loadModelFailed
+  }, [loadModelFailed])
+
   const resendChatMessage = async (currentMessage: ThreadMessage) => {
     if (!activeThread) {
       console.error('No active thread')
@@ -121,21 +125,6 @@
     events.emit(MessageEvent.OnMessageSent, messageRequest)
   }
 
-  // TODO: Refactor @louis
-  const waitForModelStarting = async (modelId: string) => {
-    return new Promise<void>((resolve) => {
-      setTimeout(async () => {
-        if (modelRef.current?.id !== modelId) {
-          console.debug('waiting for model to start')
-          await waitForModelStarting(modelId)
-          resolve()
-        } else {
-          resolve()
-        }
-      }, 200)
-    })
-  }
-
   const sendChatMessage = async (message: string) => {
     if (!message || message.trim().length === 0) return
 
@@ -304,6 +293,19 @@
     setEngineParamsUpdate(false)
   }
 
+  const waitForModelStarting = async (modelId: string) => {
+    return new Promise<void>((resolve) => {
+      setTimeout(async () => {
+        if (modelRef.current?.id !== modelId && !loadModelFailedRef.current) {
+          await waitForModelStarting(modelId)
+          resolve()
+        } else {
+          resolve()
+        }
+      }, 200)
+    })
+  }
+
   return {
     sendChatMessage,
     resendChatMessage,
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,6 +23,7 @@ export const setActiveThreadIdAtom = atom( @@
     export const waitingToSendMessage = atom<boolean | undefined>(undefined)
+    export const isGeneratingResponseAtom = atom<boolean | undefined>(undefined)
     /**
      * Stores all thread states for the current user
      */
@@ Expand Down @@