diff --git a/core/src/node/api/common/startStopModel.ts b/core/src/node/api/common/startStopModel.ts index 7665883804..0d4934e1c0 100644 --- a/core/src/node/api/common/startStopModel.ts +++ b/core/src/node/api/common/startStopModel.ts @@ -305,7 +305,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise => }) .catch((err: any) => { logServer(`[NITRO]::Error: Load model failed with error ${err}`) - return Promise.reject() + return Promise.reject(err) }) } diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index aaa230ca34..9f1f002632 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -134,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension { const modelFullPath = await joinPath(["models", model.id]); + this._currentModel = model; const nitroInitResult = await executeOnMain(NODE, "runModel", { modelFullPath, model, @@ -144,7 +145,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension { return; } - this._currentModel = model; events.emit(ModelEvent.OnModelReady, model); this.getNitroProcesHealthIntervalId = setInterval( @@ -226,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension { */ private async onMessageRequest(data: MessageRequest) { if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) { + console.log( + `Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}` + ); return; } diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 296433d424..7ba90b556b 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -67,7 +67,7 @@ function stopModel(): Promise { * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package */ async function runModel( - wrapper: ModelInitOptions + wrapper: ModelInitOptions, ): Promise { if (wrapper.model.engine !== InferenceEngine.nitro) { // Not a nitro model @@ -85,7 +85,7 @@ async function runModel( const ggufBinFile = files.find( (file) => file === path.basename(currentModelFile) || - file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) + file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT), ); if (!ggufBinFile) return Promise.reject("No GGUF model file found"); @@ -180,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate { const system_prompt = promptTemplate.substring(0, systemIndex); const user_prompt = promptTemplate.substring( systemIndex + systemMarker.length, - promptIndex + promptIndex, ); const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length + promptIndex + promptMarker.length, ); // Return the split parts @@ -193,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate { const promptIndex = promptTemplate.indexOf(promptMarker); const user_prompt = promptTemplate.substring(0, promptIndex); const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length + promptIndex + promptMarker.length, ); // Return the split parts @@ -225,14 +225,14 @@ function loadLLMModel(settings: any): Promise { .then((res) => { log( `[NITRO]::Debug: Load model success with response ${JSON.stringify( - res - )}` + res, + )}`, ); return Promise.resolve(res); }) .catch((err) => { log(`[NITRO]::Error: Load model failed with error ${err}`); - return Promise.reject(); + return Promise.reject(err); }); } @@ -254,8 +254,8 @@ async function validateModelStatus(): Promise { retryDelay: 500, }).then(async (res: Response) => { log( - `[NITRO]::Debug: Validate model state success with response ${JSON.stringify( - res + `[NITRO]::Debug: Validate model state with response ${JSON.stringify( + res.status )}` ); // If the response is OK, check model_loaded status. @@ -264,9 +264,19 @@ async function validateModelStatus(): Promise { // If the model is loaded, return an empty object. // Otherwise, return an object with an error message. if (body.model_loaded) { + log( + `[NITRO]::Debug: Validate model state success with response ${JSON.stringify( + body + )}` + ); return Promise.resolve(); } } + log( + `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify( + res.statusText + )}` + ); return Promise.reject("Validate model status failed"); }); } @@ -307,7 +317,7 @@ function spawnNitroProcess(): Promise { const args: string[] = ["1", LOCAL_HOST, PORT.toString()]; // Execute the binary log( - `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` + `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`, ); subprocess = spawn( executableOptions.executablePath, @@ -318,7 +328,7 @@ function spawnNitroProcess(): Promise { ...process.env, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, }, - } + }, ); // Handle subprocess output diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx index ac793b4ae1..9f29226d08 100644 --- a/web/containers/Providers/EventHandler.tsx +++ b/web/containers/Providers/EventHandler.tsx @@ -13,9 +13,15 @@ import { } from '@janhq/core' import { useAtomValue, useSetAtom } from 'jotai' -import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel' +import { + activeModelAtom, + loadModelErrorAtom, + stateModelAtom, +} from '@/hooks/useActiveModel' import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels' +import { queuedMessageAtom } from '@/hooks/useSendChatMessage' + import { toaster } from '../Toast' import { extensionManager } from '@/extension' @@ -26,6 +32,7 @@ import { import { updateThreadWaitingForResponseAtom, threadsAtom, + isGeneratingResponseAtom, } from '@/helpers/atoms/Thread.atom' export default function EventHandler({ children }: { children: ReactNode }) { @@ -34,11 +41,14 @@ export default function EventHandler({ children }: { children: ReactNode }) { const { downloadedModels } = useGetDownloadedModels() const setActiveModel = useSetAtom(activeModelAtom) const setStateModel = useSetAtom(stateModelAtom) + const setQueuedMessage = useSetAtom(queuedMessageAtom) + const setLoadModelError = useSetAtom(loadModelErrorAtom) const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom) const threads = useAtomValue(threadsAtom) const modelsRef = useRef(downloadedModels) const threadsRef = useRef(threads) + const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom) useEffect(() => { threadsRef.current = threads @@ -51,8 +61,9 @@ export default function EventHandler({ children }: { children: ReactNode }) { const onNewMessageResponse = useCallback( (message: ThreadMessage) => { addNewMessage(message) + setIsGeneratingResponse(false) }, - [addNewMessage] + [addNewMessage, setIsGeneratingResponse] ) const onModelReady = useCallback( @@ -83,13 +94,15 @@ export default function EventHandler({ children }: { children: ReactNode }) { (res: any) => { const errorMessage = `${res.error}` console.error('Failed to load model: ' + errorMessage) + setLoadModelError(errorMessage) setStateModel(() => ({ state: 'start', loading: false, model: res.modelId, })) + setQueuedMessage(false) }, - [setStateModel] + [setStateModel, setQueuedMessage, setLoadModelError] ) const onMessageResponseUpdate = useCallback( @@ -108,6 +121,8 @@ export default function EventHandler({ children }: { children: ReactNode }) { // Mark the thread as not waiting for response updateThreadWaiting(message.thread_id, false) + setIsGeneratingResponse(false) + const thread = threadsRef.current?.find((e) => e.id == message.thread_id) if (thread) { const messageContent = message.content[0]?.text.value ?? '' diff --git a/web/helpers/atoms/SystemBar.atom.ts b/web/helpers/atoms/SystemBar.atom.ts index aa5e77d587..42ef7b29f0 100644 --- a/web/helpers/atoms/SystemBar.atom.ts +++ b/web/helpers/atoms/SystemBar.atom.ts @@ -2,5 +2,6 @@ import { atom } from 'jotai' export const totalRamAtom = atom(0) export const usedRamAtom = atom(0) +export const availableRamAtom = atom(0) export const cpuUsageAtom = atom(0) diff --git a/web/helpers/atoms/Thread.atom.ts b/web/helpers/atoms/Thread.atom.ts index fcaa2a4afc..cab286bd16 100644 --- a/web/helpers/atoms/Thread.atom.ts +++ b/web/helpers/atoms/Thread.atom.ts @@ -23,6 +23,7 @@ export const setActiveThreadIdAtom = atom( export const waitingToSendMessage = atom(undefined) +export const isGeneratingResponseAtom = atom(undefined) /** * Stores all thread states for the current user */ diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index a456d8787d..54a1fdbe06 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -1,5 +1,5 @@ import { events, Model, ModelEvent } from '@janhq/core' -import { atom, useAtom, useAtomValue } from 'jotai' +import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai' import { toaster } from '@/containers/Toast' @@ -9,6 +9,7 @@ import { LAST_USED_MODEL_ID } from './useRecommendedModel' import { activeThreadAtom } from '@/helpers/atoms/Thread.atom' export const activeModelAtom = atom(undefined) +export const loadModelErrorAtom = atom(undefined) export const stateModelAtom = atom({ state: 'start', @@ -21,6 +22,7 @@ export function useActiveModel() { const activeThread = useAtomValue(activeThreadAtom) const [stateModel, setStateModel] = useAtom(stateModelAtom) const { downloadedModels } = useGetDownloadedModels() + const setLoadModelError = useSetAtom(loadModelErrorAtom) const startModel = async (modelId: string) => { if ( @@ -31,6 +33,7 @@ export function useActiveModel() { return } // TODO: incase we have multiple assistants, the configuration will be from assistant + setLoadModelError(undefined) setActiveModel(undefined) diff --git a/web/hooks/useGetSystemResources.ts b/web/hooks/useGetSystemResources.ts index 8dffa8eb4d..de595ad7b4 100644 --- a/web/hooks/useGetSystemResources.ts +++ b/web/hooks/useGetSystemResources.ts @@ -6,6 +6,7 @@ import { useSetAtom } from 'jotai' import { extensionManager } from '@/extension/ExtensionManager' import { + availableRamAtom, cpuUsageAtom, totalRamAtom, usedRamAtom, @@ -16,6 +17,7 @@ export default function useGetSystemResources() { const [cpu, setCPU] = useState(0) const setTotalRam = useSetAtom(totalRamAtom) const setUsedRam = useSetAtom(usedRamAtom) + const setAvailableRam = useSetAtom(availableRamAtom) const setCpuUsage = useSetAtom(cpuUsageAtom) const getSystemResources = async () => { @@ -40,6 +42,10 @@ export default function useGetSystemResources() { setTotalRam(resourceInfor.mem.totalMemory) setRam(Math.round(ram * 100)) + if (resourceInfor.mem.totalMemory && resourceInfor.mem.usedMemory) + setAvailableRam( + resourceInfor.mem.totalMemory - resourceInfor.mem.usedMemory + ) setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0)) setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0)) } diff --git a/web/hooks/useInference.ts b/web/hooks/useInference.ts deleted file mode 100644 index 8ada18cb77..0000000000 --- a/web/hooks/useInference.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { useAtomValue } from 'jotai' - -import { threadStatesAtom } from '@/helpers/atoms/Thread.atom' - -export default function useInference() { - const threadStates = useAtomValue(threadStatesAtom) - - const isGeneratingResponse = Object.values(threadStates).some( - (threadState) => threadState.waitingForResponse - ) - - return { - isGeneratingResponse, - } -} diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts index 379defa15b..5d1894db83 100644 --- a/web/hooks/useSendChatMessage.ts +++ b/web/hooks/useSendChatMessage.ts @@ -25,12 +25,10 @@ import { ulid } from 'ulid' import { selectedModelAtom } from '@/containers/DropdownListSidebar' import { currentPromptAtom, fileUploadAtom } from '@/containers/Providers/Jotai' -import { toaster } from '@/containers/Toast' - import { getBase64 } from '@/utils/base64' import { toRuntimeParams, toSettingParams } from '@/utils/modelParam' -import { useActiveModel } from './useActiveModel' +import { loadModelErrorAtom, useActiveModel } from './useActiveModel' import { extensionManager } from '@/extension/ExtensionManager' import { @@ -59,9 +57,11 @@ export default function useSendChatMessage() { const { activeModel } = useActiveModel() const selectedModel = useAtomValue(selectedModelAtom) const { startModel } = useActiveModel() - const setQueuedMessage = useSetAtom(queuedMessageAtom) + const [queuedMessage, setQueuedMessage] = useAtom(queuedMessageAtom) + const loadModelFailed = useAtomValue(loadModelErrorAtom) const modelRef = useRef() + const loadModelFailedRef = useRef() const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom) const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom) @@ -73,6 +73,10 @@ export default function useSendChatMessage() { modelRef.current = activeModel }, [activeModel]) + useEffect(() => { + loadModelFailedRef.current = loadModelFailed + }, [loadModelFailed]) + const resendChatMessage = async (currentMessage: ThreadMessage) => { if (!activeThread) { console.error('No active thread') @@ -121,21 +125,6 @@ export default function useSendChatMessage() { events.emit(MessageEvent.OnMessageSent, messageRequest) } - // TODO: Refactor @louis - const waitForModelStarting = async (modelId: string) => { - return new Promise((resolve) => { - setTimeout(async () => { - if (modelRef.current?.id !== modelId) { - console.debug('waiting for model to start') - await waitForModelStarting(modelId) - resolve() - } else { - resolve() - } - }, 200) - }) - } - const sendChatMessage = async (message: string) => { if (!message || message.trim().length === 0) return @@ -304,6 +293,19 @@ export default function useSendChatMessage() { setEngineParamsUpdate(false) } + const waitForModelStarting = async (modelId: string) => { + return new Promise((resolve) => { + setTimeout(async () => { + if (modelRef.current?.id !== modelId && !loadModelFailedRef.current) { + await waitForModelStarting(modelId) + resolve() + } else { + resolve() + } + }, 200) + }) + } + return { sendChatMessage, resendChatMessage, diff --git a/web/hooks/useSetActiveThread.ts b/web/hooks/useSetActiveThread.ts index 76a744bcd3..3545d0d233 100644 --- a/web/hooks/useSetActiveThread.ts +++ b/web/hooks/useSetActiveThread.ts @@ -13,6 +13,7 @@ import { setConvoMessagesAtom } from '@/helpers/atoms/ChatMessage.atom' import { ModelParams, getActiveThreadIdAtom, + isGeneratingResponseAtom, setActiveThreadIdAtom, setThreadModelParamsAtom, } from '@/helpers/atoms/Thread.atom' @@ -22,6 +23,7 @@ export default function useSetActiveThread() { const setActiveThreadId = useSetAtom(setActiveThreadIdAtom) const setThreadMessage = useSetAtom(setConvoMessagesAtom) const setThreadModelParams = useSetAtom(setThreadModelParamsAtom) + const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom) const setActiveThread = async (thread: Thread) => { if (activeThreadId === thread.id) { @@ -29,6 +31,7 @@ export default function useSetActiveThread() { return } + setIsGeneratingResponse(false) events.emit(InferenceEvent.OnInferenceStopped, thread.id) // load the corresponding messages diff --git a/web/screens/Chat/ChatBody/index.tsx b/web/screens/Chat/ChatBody/index.tsx index e0a34a1a11..1ce6b591f6 100644 --- a/web/screens/Chat/ChatBody/index.tsx +++ b/web/screens/Chat/ChatBody/index.tsx @@ -8,14 +8,11 @@ import { useAtomValue } from 'jotai' import LogoMark from '@/containers/Brand/Logo/Mark' -import GenerateResponse from '@/containers/Loader/GenerateResponse' - import { MainViewState } from '@/constants/screens' -import { activeModelAtom } from '@/hooks/useActiveModel' +import { loadModelErrorAtom } from '@/hooks/useActiveModel' import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels' -import useInference from '@/hooks/useInference' import { useMainViewState } from '@/hooks/useMainViewState' import ChatItem from '../ChatItem' @@ -26,10 +23,9 @@ import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom' const ChatBody: React.FC = () => { const messages = useAtomValue(getCurrentChatMessagesAtom) - const activeModel = useAtomValue(activeModelAtom) const { downloadedModels } = useGetDownloadedModels() const { setMainViewState } = useMainViewState() - const { isGeneratingResponse } = useInference() + const loadModelError = useAtomValue(loadModelErrorAtom) if (downloadedModels.length === 0) return ( @@ -90,15 +86,14 @@ const ChatBody: React.FC = () => { message.content.length > 0) && ( )} - {(message.status === MessageStatus.Error || - message.status === MessageStatus.Stopped) && + {!loadModelError && + (message.status === MessageStatus.Error || + message.status === MessageStatus.Stopped) && index === messages.length - 1 && ( )} ))} - - {activeModel && isGeneratingResponse && } )} diff --git a/web/screens/Chat/ErrorMessage/index.tsx b/web/screens/Chat/ErrorMessage/index.tsx index 8879b15be8..84a89cee86 100644 --- a/web/screens/Chat/ErrorMessage/index.tsx +++ b/web/screens/Chat/ErrorMessage/index.tsx @@ -17,7 +17,6 @@ import { deleteMessageAtom, getCurrentChatMessagesAtom, } from '@/helpers/atoms/ChatMessage.atom' -import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom' import { activeThreadAtom } from '@/helpers/atoms/Thread.atom' const ErrorMessage = ({ message }: { message: ThreadMessage }) => { @@ -25,8 +24,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { const thread = useAtomValue(activeThreadAtom) const deleteMessage = useSetAtom(deleteMessageAtom) const { resendChatMessage } = useSendChatMessage() - const { activeModel } = useActiveModel() - const totalRam = useAtomValue(totalRamAtom) const regenerateMessage = async () => { const lastMessageIndex = messages.length - 1 @@ -70,33 +67,26 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => { {message.status === MessageStatus.Error && (
- {Number(activeModel?.metadata.size) > totalRam ? ( - <> - Oops! Model size exceeds available RAM. Consider selecting a - smaller model or upgrading your RAM for smoother performance. - - ) : ( - <> -

Apologies, something's amiss!

- Jan's in beta. Find troubleshooting guides{' '} - - here - {' '} - or reach out to us on{' '} - - Discord - {' '} - for assistance. - - )} + <> +

Apologies, something's amiss!

+ Jan's in beta. Find troubleshooting guides{' '} + + here + {' '} + or reach out to us on{' '} + + Discord + {' '} + for assistance. +
)} diff --git a/web/screens/Chat/LoadModelErrorMessage/index.tsx b/web/screens/Chat/LoadModelErrorMessage/index.tsx new file mode 100644 index 0000000000..d3c4a704d2 --- /dev/null +++ b/web/screens/Chat/LoadModelErrorMessage/index.tsx @@ -0,0 +1,48 @@ +import { MessageStatus, ThreadMessage } from '@janhq/core' +import { useAtomValue } from 'jotai' + +import { useActiveModel } from '@/hooks/useActiveModel' + +import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom' + +const LoadModelErrorMessage = () => { + const { activeModel } = useActiveModel() + const availableRam = useAtomValue(totalRamAtom) + + return ( + <> +
+ + {Number(activeModel?.metadata.size) > availableRam ? ( + <> + Oops! Model size exceeds available RAM. Consider selecting a + smaller model or upgrading your RAM for smoother performance. + + ) : ( + <> +

Apologies, something's amiss!

+ Jan's in beta. Find troubleshooting guides{' '} + + here + {' '} + or reach out to us on{' '} + + Discord + {' '} + for assistance. + + )} +
+
+ + ) +} +export default LoadModelErrorMessage diff --git a/web/screens/Chat/index.tsx b/web/screens/Chat/index.tsx index e7cb827407..1f78966040 100644 --- a/web/screens/Chat/index.tsx +++ b/web/screens/Chat/index.tsx @@ -9,6 +9,7 @@ import { UploadCloudIcon } from 'lucide-react' import { twMerge } from 'tailwind-merge' +import GenerateResponse from '@/containers/Loader/GenerateResponse' import ModelReload from '@/containers/Loader/ModelReload' import ModelStart from '@/containers/Loader/ModelStart' @@ -19,6 +20,7 @@ import { snackbar } from '@/containers/Toast' import { FeatureToggleContext } from '@/context/FeatureToggle' +import { activeModelAtom, loadModelErrorAtom } from '@/hooks/useActiveModel' import { queuedMessageAtom, reloadModelAtom } from '@/hooks/useSendChatMessage' import ChatBody from '@/screens/Chat/ChatBody' @@ -26,12 +28,14 @@ import ChatBody from '@/screens/Chat/ChatBody' import ThreadList from '@/screens/Chat/ThreadList' import ChatInput from './ChatInput' +import LoadModelErrorMessage from './LoadModelErrorMessage' import RequestDownloadModel from './RequestDownloadModel' import Sidebar from './Sidebar' import { activeThreadAtom, engineParamsUpdateAtom, + isGeneratingResponseAtom, } from '@/helpers/atoms/Thread.atom' const renderError = (code: string) => { @@ -63,6 +67,11 @@ const ChatScreen: React.FC = () => { const setFileUpload = useSetAtom(fileUploadAtom) const { experimentalFeature } = useContext(FeatureToggleContext) + const activeModel = useAtomValue(activeModelAtom) + + const isGeneratingResponse = useAtomValue(isGeneratingResponseAtom) + const loadModelError = useAtomValue(loadModelErrorAtom) + const { getRootProps, isDragReject } = useDropzone({ noClick: true, multiple: false, @@ -202,6 +211,9 @@ const ChatScreen: React.FC = () => { )} + + {activeModel && isGeneratingResponse && } + {loadModelError && }