Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: display error message on model load fail #1894

Merged
merged 1 commit into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/src/node/api/common/startStopModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
})
.catch((err: any) => {
logServer(`[NITRO]::Error: Load model failed with error ${err}`)
return Promise.reject()
return Promise.reject(err)
})
}

Expand Down
5 changes: 4 additions & 1 deletion extensions/inference-nitro-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {

const modelFullPath = await joinPath(["models", model.id]);

this._currentModel = model;
const nitroInitResult = await executeOnMain(NODE, "runModel", {
modelFullPath,
model,
Expand All @@ -144,7 +145,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
return;
}

this._currentModel = model;
events.emit(ModelEvent.OnModelReady, model);

this.getNitroProcesHealthIntervalId = setInterval(
Expand Down Expand Up @@ -226,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
*/
private async onMessageRequest(data: MessageRequest) {
if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
console.log(
`Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}`
);
return;
}

Expand Down
34 changes: 22 additions & 12 deletions extensions/inference-nitro-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ function stopModel(): Promise<void> {
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
*/
async function runModel(
wrapper: ModelInitOptions
wrapper: ModelInitOptions,
): Promise<ModelOperationResponse | void> {
if (wrapper.model.engine !== InferenceEngine.nitro) {
// Not a nitro model
Expand All @@ -85,7 +85,7 @@ async function runModel(
const ggufBinFile = files.find(
(file) =>
file === path.basename(currentModelFile) ||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
);

if (!ggufBinFile) return Promise.reject("No GGUF model file found");
Expand Down Expand Up @@ -180,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const system_prompt = promptTemplate.substring(0, systemIndex);
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex
promptIndex,
);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
promptIndex + promptMarker.length,
);

// Return the split parts
Expand All @@ -193,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
promptIndex + promptMarker.length,
);

// Return the split parts
Expand Down Expand Up @@ -225,14 +225,14 @@ function loadLLMModel(settings: any): Promise<Response> {
.then((res) => {
log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
res
)}`
res,
)}`,
);
return Promise.resolve(res);
})
.catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`);
return Promise.reject();
return Promise.reject(err);
});
}

Expand All @@ -254,8 +254,8 @@ async function validateModelStatus(): Promise<void> {
retryDelay: 500,
}).then(async (res: Response) => {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
res
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
res.status
)}`
);
// If the response is OK, check model_loaded status.
Expand All @@ -264,9 +264,19 @@ async function validateModelStatus(): Promise<void> {
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
body
)}`
);
return Promise.resolve();
}
}
log(
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
res.statusText
)}`
);
return Promise.reject("Validate model status failed");
});
}
Expand Down Expand Up @@ -307,7 +317,7 @@ function spawnNitroProcess(): Promise<any> {
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
// Execute the binary
log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
);
subprocess = spawn(
executableOptions.executablePath,
Expand All @@ -318,7 +328,7 @@ function spawnNitroProcess(): Promise<any> {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
}
},
);

// Handle subprocess output
Expand Down
21 changes: 18 additions & 3 deletions web/containers/Providers/EventHandler.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,15 @@
} from '@janhq/core'
import { useAtomValue, useSetAtom } from 'jotai'

import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
import {
activeModelAtom,
loadModelErrorAtom,
stateModelAtom,
} from '@/hooks/useActiveModel'
import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'

import { queuedMessageAtom } from '@/hooks/useSendChatMessage'

import { toaster } from '../Toast'

import { extensionManager } from '@/extension'
Expand All @@ -26,6 +32,7 @@
import {
updateThreadWaitingForResponseAtom,
threadsAtom,
isGeneratingResponseAtom,
} from '@/helpers/atoms/Thread.atom'

export default function EventHandler({ children }: { children: ReactNode }) {
Expand All @@ -34,11 +41,14 @@
const { downloadedModels } = useGetDownloadedModels()
const setActiveModel = useSetAtom(activeModelAtom)
const setStateModel = useSetAtom(stateModelAtom)
const setQueuedMessage = useSetAtom(queuedMessageAtom)
const setLoadModelError = useSetAtom(loadModelErrorAtom)

const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom)
const threads = useAtomValue(threadsAtom)
const modelsRef = useRef(downloadedModels)
const threadsRef = useRef(threads)
const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)

useEffect(() => {
threadsRef.current = threads
Expand All @@ -51,8 +61,9 @@
const onNewMessageResponse = useCallback(
(message: ThreadMessage) => {
addNewMessage(message)
setIsGeneratingResponse(false)
},
[addNewMessage]
[addNewMessage, setIsGeneratingResponse]
)

const onModelReady = useCallback(
Expand Down Expand Up @@ -83,13 +94,15 @@
(res: any) => {
const errorMessage = `${res.error}`
console.error('Failed to load model: ' + errorMessage)
setLoadModelError(errorMessage)
setStateModel(() => ({
state: 'start',
loading: false,
model: res.modelId,
}))
setQueuedMessage(false)
},
[setStateModel]
[setStateModel, setQueuedMessage, setLoadModelError]
)

const onMessageResponseUpdate = useCallback(
Expand All @@ -108,6 +121,8 @@
// Mark the thread as not waiting for response
updateThreadWaiting(message.thread_id, false)

setIsGeneratingResponse(false)

const thread = threadsRef.current?.find((e) => e.id == message.thread_id)
if (thread) {
const messageContent = message.content[0]?.text.value ?? ''
Expand All @@ -127,7 +142,7 @@
?.addNewMessage(message)
}
},
[updateMessage, updateThreadWaiting]

Check warning on line 145 in web/containers/Providers/EventHandler.tsx

View workflow job for this annotation

GitHub Actions / test-on-macos

React Hook useCallback has a missing dependency: 'setIsGeneratingResponse'. Either include it or remove the dependency array

Check warning on line 145 in web/containers/Providers/EventHandler.tsx

View workflow job for this annotation

GitHub Actions / test-on-ubuntu

React Hook useCallback has a missing dependency: 'setIsGeneratingResponse'. Either include it or remove the dependency array

Check warning on line 145 in web/containers/Providers/EventHandler.tsx

View workflow job for this annotation

GitHub Actions / test-on-windows (mcafee)

React Hook useCallback has a missing dependency: 'setIsGeneratingResponse'. Either include it or remove the dependency array

Check warning on line 145 in web/containers/Providers/EventHandler.tsx

View workflow job for this annotation

GitHub Actions / test-on-windows (default-windows-security)

React Hook useCallback has a missing dependency: 'setIsGeneratingResponse'. Either include it or remove the dependency array

Check warning on line 145 in web/containers/Providers/EventHandler.tsx

View workflow job for this annotation

GitHub Actions / test-on-windows (bit-defender)

React Hook useCallback has a missing dependency: 'setIsGeneratingResponse'. Either include it or remove the dependency array
)

useEffect(() => {
Expand Down
1 change: 1 addition & 0 deletions web/helpers/atoms/SystemBar.atom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ import { atom } from 'jotai'

export const totalRamAtom = atom<number>(0)
export const usedRamAtom = atom<number>(0)
export const availableRamAtom = atom<number>(0)

export const cpuUsageAtom = atom<number>(0)
1 change: 1 addition & 0 deletions web/helpers/atoms/Thread.atom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const setActiveThreadIdAtom = atom(

export const waitingToSendMessage = atom<boolean | undefined>(undefined)

export const isGeneratingResponseAtom = atom<boolean | undefined>(undefined)
/**
* Stores all thread states for the current user
*/
Expand Down
5 changes: 4 additions & 1 deletion web/hooks/useActiveModel.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { events, Model, ModelEvent } from '@janhq/core'
import { atom, useAtom, useAtomValue } from 'jotai'
import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'

import { toaster } from '@/containers/Toast'

Expand All @@ -9,6 +9,7 @@ import { LAST_USED_MODEL_ID } from './useRecommendedModel'
import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'

export const activeModelAtom = atom<Model | undefined>(undefined)
export const loadModelErrorAtom = atom<string | undefined>(undefined)

export const stateModelAtom = atom({
state: 'start',
Expand All @@ -21,6 +22,7 @@ export function useActiveModel() {
const activeThread = useAtomValue(activeThreadAtom)
const [stateModel, setStateModel] = useAtom(stateModelAtom)
const { downloadedModels } = useGetDownloadedModels()
const setLoadModelError = useSetAtom(loadModelErrorAtom)

const startModel = async (modelId: string) => {
if (
Expand All @@ -31,6 +33,7 @@ export function useActiveModel() {
return
}
// TODO: incase we have multiple assistants, the configuration will be from assistant
setLoadModelError(undefined)

setActiveModel(undefined)

Expand Down
6 changes: 6 additions & 0 deletions web/hooks/useGetSystemResources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { useSetAtom } from 'jotai'

import { extensionManager } from '@/extension/ExtensionManager'
import {
availableRamAtom,
cpuUsageAtom,
totalRamAtom,
usedRamAtom,
Expand All @@ -16,6 +17,7 @@ export default function useGetSystemResources() {
const [cpu, setCPU] = useState<number>(0)
const setTotalRam = useSetAtom(totalRamAtom)
const setUsedRam = useSetAtom(usedRamAtom)
const setAvailableRam = useSetAtom(availableRamAtom)
const setCpuUsage = useSetAtom(cpuUsageAtom)

const getSystemResources = async () => {
Expand All @@ -40,6 +42,10 @@ export default function useGetSystemResources() {
setTotalRam(resourceInfor.mem.totalMemory)

setRam(Math.round(ram * 100))
if (resourceInfor.mem.totalMemory && resourceInfor.mem.usedMemory)
setAvailableRam(
resourceInfor.mem.totalMemory - resourceInfor.mem.usedMemory
)
setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
}
Expand Down
15 changes: 0 additions & 15 deletions web/hooks/useInference.ts

This file was deleted.

40 changes: 21 additions & 19 deletions web/hooks/useSendChatMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,10 @@
import { selectedModelAtom } from '@/containers/DropdownListSidebar'
import { currentPromptAtom, fileUploadAtom } from '@/containers/Providers/Jotai'

import { toaster } from '@/containers/Toast'

import { getBase64 } from '@/utils/base64'
import { toRuntimeParams, toSettingParams } from '@/utils/modelParam'

import { useActiveModel } from './useActiveModel'
import { loadModelErrorAtom, useActiveModel } from './useActiveModel'

import { extensionManager } from '@/extension/ExtensionManager'
import {
Expand Down Expand Up @@ -59,9 +57,11 @@
const { activeModel } = useActiveModel()
const selectedModel = useAtomValue(selectedModelAtom)
const { startModel } = useActiveModel()
const setQueuedMessage = useSetAtom(queuedMessageAtom)
const [queuedMessage, setQueuedMessage] = useAtom(queuedMessageAtom)

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-macos

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-ubuntu

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-windows (mcafee)

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-windows (default-windows-security)

'queuedMessage' is assigned a value but never used

Check warning on line 60 in web/hooks/useSendChatMessage.ts

View workflow job for this annotation

GitHub Actions / test-on-windows (bit-defender)

'queuedMessage' is assigned a value but never used
const loadModelFailed = useAtomValue(loadModelErrorAtom)

const modelRef = useRef<Model | undefined>()
const loadModelFailedRef = useRef<string | undefined>()
const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom)

Expand All @@ -73,6 +73,10 @@
modelRef.current = activeModel
}, [activeModel])

useEffect(() => {
loadModelFailedRef.current = loadModelFailed
}, [loadModelFailed])

const resendChatMessage = async (currentMessage: ThreadMessage) => {
if (!activeThread) {
console.error('No active thread')
Expand Down Expand Up @@ -121,21 +125,6 @@
events.emit(MessageEvent.OnMessageSent, messageRequest)
}

// TODO: Refactor @louis
const waitForModelStarting = async (modelId: string) => {
return new Promise<void>((resolve) => {
setTimeout(async () => {
if (modelRef.current?.id !== modelId) {
console.debug('waiting for model to start')
await waitForModelStarting(modelId)
resolve()
} else {
resolve()
}
}, 200)
})
}

const sendChatMessage = async (message: string) => {
if (!message || message.trim().length === 0) return

Expand Down Expand Up @@ -304,6 +293,19 @@
setEngineParamsUpdate(false)
}

const waitForModelStarting = async (modelId: string) => {
return new Promise<void>((resolve) => {
setTimeout(async () => {
if (modelRef.current?.id !== modelId && !loadModelFailedRef.current) {
await waitForModelStarting(modelId)
resolve()
} else {
resolve()
}
}, 200)
})
}

return {
sendChatMessage,
resendChatMessage,
Expand Down
Loading
Loading