Skip to content

Commit

Permalink
fix: display error message on model load fail (#1894)
Browse files Browse the repository at this point in the history
  • Loading branch information
louis-menlo authored Feb 1, 2024
1 parent 36cd598 commit 5ce2e42
Show file tree
Hide file tree
Showing 15 changed files with 166 additions and 92 deletions.
2 changes: 1 addition & 1 deletion core/src/node/api/common/startStopModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
})
.catch((err: any) => {
logServer(`[NITRO]::Error: Load model failed with error ${err}`)
return Promise.reject()
return Promise.reject(err)
})
}

Expand Down
5 changes: 4 additions & 1 deletion extensions/inference-nitro-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {

const modelFullPath = await joinPath(["models", model.id]);

this._currentModel = model;
const nitroInitResult = await executeOnMain(NODE, "runModel", {
modelFullPath,
model,
Expand All @@ -144,7 +145,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
return;
}

this._currentModel = model;
events.emit(ModelEvent.OnModelReady, model);

this.getNitroProcesHealthIntervalId = setInterval(
Expand Down Expand Up @@ -226,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
*/
private async onMessageRequest(data: MessageRequest) {
if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
console.log(
`Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}`
);
return;
}

Expand Down
34 changes: 22 additions & 12 deletions extensions/inference-nitro-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ function stopModel(): Promise<void> {
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
*/
async function runModel(
wrapper: ModelInitOptions
wrapper: ModelInitOptions,
): Promise<ModelOperationResponse | void> {
if (wrapper.model.engine !== InferenceEngine.nitro) {
// Not a nitro model
Expand All @@ -85,7 +85,7 @@ async function runModel(
const ggufBinFile = files.find(
(file) =>
file === path.basename(currentModelFile) ||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
);

if (!ggufBinFile) return Promise.reject("No GGUF model file found");
Expand Down Expand Up @@ -180,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const system_prompt = promptTemplate.substring(0, systemIndex);
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex
promptIndex,
);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
promptIndex + promptMarker.length,
);

// Return the split parts
Expand All @@ -193,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
promptIndex + promptMarker.length,
);

// Return the split parts
Expand Down Expand Up @@ -225,14 +225,14 @@ function loadLLMModel(settings: any): Promise<Response> {
.then((res) => {
log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
res
)}`
res,
)}`,
);
return Promise.resolve(res);
})
.catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`);
return Promise.reject();
return Promise.reject(err);
});
}

Expand All @@ -254,8 +254,8 @@ async function validateModelStatus(): Promise<void> {
retryDelay: 500,
}).then(async (res: Response) => {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
res
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
res.status
)}`
);
// If the response is OK, check model_loaded status.
Expand All @@ -264,9 +264,19 @@ async function validateModelStatus(): Promise<void> {
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
body
)}`
);
return Promise.resolve();
}
}
log(
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
res.statusText
)}`
);
return Promise.reject("Validate model status failed");
});
}
Expand Down Expand Up @@ -307,7 +317,7 @@ function spawnNitroProcess(): Promise<any> {
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
// Execute the binary
log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
);
subprocess = spawn(
executableOptions.executablePath,
Expand All @@ -318,7 +328,7 @@ function spawnNitroProcess(): Promise<any> {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
}
},
);

// Handle subprocess output
Expand Down
21 changes: 18 additions & 3 deletions web/containers/Providers/EventHandler.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,15 @@ import {
} from '@janhq/core'
import { useAtomValue, useSetAtom } from 'jotai'

import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
import {
activeModelAtom,
loadModelErrorAtom,
stateModelAtom,
} from '@/hooks/useActiveModel'
import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'

import { queuedMessageAtom } from '@/hooks/useSendChatMessage'

import { toaster } from '../Toast'

import { extensionManager } from '@/extension'
Expand All @@ -26,6 +32,7 @@ import {
import {
updateThreadWaitingForResponseAtom,
threadsAtom,
isGeneratingResponseAtom,
} from '@/helpers/atoms/Thread.atom'

export default function EventHandler({ children }: { children: ReactNode }) {
Expand All @@ -34,11 +41,14 @@ export default function EventHandler({ children }: { children: ReactNode }) {
const { downloadedModels } = useGetDownloadedModels()
const setActiveModel = useSetAtom(activeModelAtom)
const setStateModel = useSetAtom(stateModelAtom)
const setQueuedMessage = useSetAtom(queuedMessageAtom)
const setLoadModelError = useSetAtom(loadModelErrorAtom)

const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom)
const threads = useAtomValue(threadsAtom)
const modelsRef = useRef(downloadedModels)
const threadsRef = useRef(threads)
const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)

useEffect(() => {
threadsRef.current = threads
Expand All @@ -51,8 +61,9 @@ export default function EventHandler({ children }: { children: ReactNode }) {
const onNewMessageResponse = useCallback(
(message: ThreadMessage) => {
addNewMessage(message)
setIsGeneratingResponse(false)
},
[addNewMessage]
[addNewMessage, setIsGeneratingResponse]
)

const onModelReady = useCallback(
Expand Down Expand Up @@ -83,13 +94,15 @@ export default function EventHandler({ children }: { children: ReactNode }) {
(res: any) => {
const errorMessage = `${res.error}`
console.error('Failed to load model: ' + errorMessage)
setLoadModelError(errorMessage)
setStateModel(() => ({
state: 'start',
loading: false,
model: res.modelId,
}))
setQueuedMessage(false)
},
[setStateModel]
[setStateModel, setQueuedMessage, setLoadModelError]
)

const onMessageResponseUpdate = useCallback(
Expand All @@ -108,6 +121,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
// Mark the thread as not waiting for response
updateThreadWaiting(message.thread_id, false)

setIsGeneratingResponse(false)

const thread = threadsRef.current?.find((e) => e.id == message.thread_id)
if (thread) {
const messageContent = message.content[0]?.text.value ?? ''
Expand Down
1 change: 1 addition & 0 deletions web/helpers/atoms/SystemBar.atom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ import { atom } from 'jotai'

export const totalRamAtom = atom<number>(0)
export const usedRamAtom = atom<number>(0)
export const availableRamAtom = atom<number>(0)

export const cpuUsageAtom = atom<number>(0)
1 change: 1 addition & 0 deletions web/helpers/atoms/Thread.atom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const setActiveThreadIdAtom = atom(

export const waitingToSendMessage = atom<boolean | undefined>(undefined)

export const isGeneratingResponseAtom = atom<boolean | undefined>(undefined)
/**
* Stores all thread states for the current user
*/
Expand Down
5 changes: 4 additions & 1 deletion web/hooks/useActiveModel.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { events, Model, ModelEvent } from '@janhq/core'
import { atom, useAtom, useAtomValue } from 'jotai'
import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'

import { toaster } from '@/containers/Toast'

Expand All @@ -9,6 +9,7 @@ import { LAST_USED_MODEL_ID } from './useRecommendedModel'
import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'

export const activeModelAtom = atom<Model | undefined>(undefined)
export const loadModelErrorAtom = atom<string | undefined>(undefined)

export const stateModelAtom = atom({
state: 'start',
Expand All @@ -21,6 +22,7 @@ export function useActiveModel() {
const activeThread = useAtomValue(activeThreadAtom)
const [stateModel, setStateModel] = useAtom(stateModelAtom)
const { downloadedModels } = useGetDownloadedModels()
const setLoadModelError = useSetAtom(loadModelErrorAtom)

const startModel = async (modelId: string) => {
if (
Expand All @@ -31,6 +33,7 @@ export function useActiveModel() {
return
}
// TODO: incase we have multiple assistants, the configuration will be from assistant
setLoadModelError(undefined)

setActiveModel(undefined)

Expand Down
6 changes: 6 additions & 0 deletions web/hooks/useGetSystemResources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { useSetAtom } from 'jotai'

import { extensionManager } from '@/extension/ExtensionManager'
import {
availableRamAtom,
cpuUsageAtom,
totalRamAtom,
usedRamAtom,
Expand All @@ -16,6 +17,7 @@ export default function useGetSystemResources() {
const [cpu, setCPU] = useState<number>(0)
const setTotalRam = useSetAtom(totalRamAtom)
const setUsedRam = useSetAtom(usedRamAtom)
const setAvailableRam = useSetAtom(availableRamAtom)
const setCpuUsage = useSetAtom(cpuUsageAtom)

const getSystemResources = async () => {
Expand All @@ -40,6 +42,10 @@ export default function useGetSystemResources() {
setTotalRam(resourceInfor.mem.totalMemory)

setRam(Math.round(ram * 100))
if (resourceInfor.mem.totalMemory && resourceInfor.mem.usedMemory)
setAvailableRam(
resourceInfor.mem.totalMemory - resourceInfor.mem.usedMemory
)
setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
}
Expand Down
15 changes: 0 additions & 15 deletions web/hooks/useInference.ts

This file was deleted.

40 changes: 21 additions & 19 deletions web/hooks/useSendChatMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,10 @@ import { ulid } from 'ulid'
import { selectedModelAtom } from '@/containers/DropdownListSidebar'
import { currentPromptAtom, fileUploadAtom } from '@/containers/Providers/Jotai'

import { toaster } from '@/containers/Toast'

import { getBase64 } from '@/utils/base64'
import { toRuntimeParams, toSettingParams } from '@/utils/modelParam'

import { useActiveModel } from './useActiveModel'
import { loadModelErrorAtom, useActiveModel } from './useActiveModel'

import { extensionManager } from '@/extension/ExtensionManager'
import {
Expand Down Expand Up @@ -59,9 +57,11 @@ export default function useSendChatMessage() {
const { activeModel } = useActiveModel()
const selectedModel = useAtomValue(selectedModelAtom)
const { startModel } = useActiveModel()
const setQueuedMessage = useSetAtom(queuedMessageAtom)
const [queuedMessage, setQueuedMessage] = useAtom(queuedMessageAtom)

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-ubuntu

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-macos

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-macos

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-ubuntu

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-windows (mcafee)

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-windows (mcafee)

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-windows (default-windows-security)

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-windows (default-windows-security)

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-windows (bit-defender)

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-windows (bit-defender)

'queuedMessage' is assigned a value but never used
const loadModelFailed = useAtomValue(loadModelErrorAtom)

const modelRef = useRef<Model | undefined>()
const loadModelFailedRef = useRef<string | undefined>()
const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom)

Expand All @@ -73,6 +73,10 @@ export default function useSendChatMessage() {
modelRef.current = activeModel
}, [activeModel])

useEffect(() => {
loadModelFailedRef.current = loadModelFailed
}, [loadModelFailed])

const resendChatMessage = async (currentMessage: ThreadMessage) => {
if (!activeThread) {
console.error('No active thread')
Expand Down Expand Up @@ -121,21 +125,6 @@ export default function useSendChatMessage() {
events.emit(MessageEvent.OnMessageSent, messageRequest)
}

// TODO: Refactor @louis
const waitForModelStarting = async (modelId: string) => {
return new Promise<void>((resolve) => {
setTimeout(async () => {
if (modelRef.current?.id !== modelId) {
console.debug('waiting for model to start')
await waitForModelStarting(modelId)
resolve()
} else {
resolve()
}
}, 200)
})
}

const sendChatMessage = async (message: string) => {
if (!message || message.trim().length === 0) return

Expand Down Expand Up @@ -304,6 +293,19 @@ export default function useSendChatMessage() {
setEngineParamsUpdate(false)
}

const waitForModelStarting = async (modelId: string) => {
return new Promise<void>((resolve) => {
setTimeout(async () => {
if (modelRef.current?.id !== modelId && !loadModelFailedRef.current) {
await waitForModelStarting(modelId)
resolve()
} else {
resolve()
}
}, 200)
})
}

return {
sendChatMessage,
resendChatMessage,
Expand Down
Loading

0 comments on commit 5ce2e42

Please sign in to comment.