diff --git a/core/src/node/api/common/startStopModel.ts b/core/src/node/api/common/startStopModel.ts
index 7665883804..0d4934e1c0 100644
--- a/core/src/node/api/common/startStopModel.ts
+++ b/core/src/node/api/common/startStopModel.ts
@@ -305,7 +305,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
     })
     .catch((err: any) => {
       logServer(`[NITRO]::Error: Load model failed with error ${err}`)
-      return Promise.reject()
+      return Promise.reject(err)
     })
 }
 
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index aaa230ca34..9f1f002632 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -134,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
 
     const modelFullPath = await joinPath(["models", model.id]);
 
+    this._currentModel = model;
     const nitroInitResult = await executeOnMain(NODE, "runModel", {
       modelFullPath,
       model,
@@ -144,7 +145,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
       return;
     }
 
-    this._currentModel = model;
     events.emit(ModelEvent.OnModelReady, model);
 
     this.getNitroProcesHealthIntervalId = setInterval(
@@ -226,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
    */
   private async onMessageRequest(data: MessageRequest) {
     if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
+      console.log(
+        `Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}`
+      );
       return;
     }
 
diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
index 296433d424..7ba90b556b 100644
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@@ -67,7 +67,7 @@ function stopModel(): Promise<void> {
  * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
  */
 async function runModel(
-  wrapper: ModelInitOptions
+  wrapper: ModelInitOptions,
 ): Promise<ModelOperationResponse | void> {
   if (wrapper.model.engine !== InferenceEngine.nitro) {
     // Not a nitro model
@@ -85,7 +85,7 @@ async function runModel(
   const ggufBinFile = files.find(
     (file) =>
       file === path.basename(currentModelFile) ||
-      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
+      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
   );
 
   if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@@ -180,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
     const system_prompt = promptTemplate.substring(0, systemIndex);
     const user_prompt = promptTemplate.substring(
       systemIndex + systemMarker.length,
-      promptIndex
+      promptIndex,
     );
     const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
+      promptIndex + promptMarker.length,
     );
 
     // Return the split parts
@@ -193,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
     const promptIndex = promptTemplate.indexOf(promptMarker);
     const user_prompt = promptTemplate.substring(0, promptIndex);
     const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
+      promptIndex + promptMarker.length,
     );
 
     // Return the split parts
@@ -225,14 +225,14 @@ function loadLLMModel(settings: any): Promise<Response> {
     .then((res) => {
       log(
         `[NITRO]::Debug: Load model success with response ${JSON.stringify(
-          res
-        )}`
+          res,
+        )}`,
       );
       return Promise.resolve(res);
     })
     .catch((err) => {
       log(`[NITRO]::Error: Load model failed with error ${err}`);
-      return Promise.reject();
+      return Promise.reject(err);
     });
 }
 
@@ -254,8 +254,8 @@ async function validateModelStatus(): Promise<void> {
     retryDelay: 500,
   }).then(async (res: Response) => {
     log(
-      `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
-        res
+      `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
+        res.status
       )}`
     );
     // If the response is OK, check model_loaded status.
@@ -264,9 +264,19 @@ async function validateModelStatus(): Promise<void> {
       // If the model is loaded, return an empty object.
       // Otherwise, return an object with an error message.
       if (body.model_loaded) {
+        log(
+          `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
+            body
+          )}`
+        );
         return Promise.resolve();
       }
     }
+    log(
+      `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
+        res.statusText
+      )}`
+    );
     return Promise.reject("Validate model status failed");
   });
 }
@@ -307,7 +317,7 @@ function spawnNitroProcess(): Promise<any> {
     const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
     // Execute the binary
     log(
-      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
     );
     subprocess = spawn(
       executableOptions.executablePath,
@@ -318,7 +328,7 @@ function spawnNitroProcess(): Promise<any> {
           ...process.env,
           CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
         },
-      }
+      },
     );
 
     // Handle subprocess output
diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index ac793b4ae1..9f29226d08 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -13,9 +13,15 @@ import {
 } from '@janhq/core'
 import { useAtomValue, useSetAtom } from 'jotai'
 
-import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
+import {
+  activeModelAtom,
+  loadModelErrorAtom,
+  stateModelAtom,
+} from '@/hooks/useActiveModel'
 import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 
+import { queuedMessageAtom } from '@/hooks/useSendChatMessage'
+
 import { toaster } from '../Toast'
 
 import { extensionManager } from '@/extension'
@@ -26,6 +32,7 @@ import {
 import {
   updateThreadWaitingForResponseAtom,
   threadsAtom,
+  isGeneratingResponseAtom,
 } from '@/helpers/atoms/Thread.atom'
 
 export default function EventHandler({ children }: { children: ReactNode }) {
@@ -34,11 +41,14 @@ export default function EventHandler({ children }: { children: ReactNode }) {
   const { downloadedModels } = useGetDownloadedModels()
   const setActiveModel = useSetAtom(activeModelAtom)
   const setStateModel = useSetAtom(stateModelAtom)
+  const setQueuedMessage = useSetAtom(queuedMessageAtom)
+  const setLoadModelError = useSetAtom(loadModelErrorAtom)
 
   const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom)
   const threads = useAtomValue(threadsAtom)
   const modelsRef = useRef(downloadedModels)
   const threadsRef = useRef(threads)
+  const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
 
   useEffect(() => {
     threadsRef.current = threads
@@ -51,8 +61,9 @@ export default function EventHandler({ children }: { children: ReactNode }) {
   const onNewMessageResponse = useCallback(
     (message: ThreadMessage) => {
       addNewMessage(message)
+      setIsGeneratingResponse(false)
     },
-    [addNewMessage]
+    [addNewMessage, setIsGeneratingResponse]
   )
 
   const onModelReady = useCallback(
@@ -83,13 +94,15 @@ export default function EventHandler({ children }: { children: ReactNode }) {
     (res: any) => {
       const errorMessage = `${res.error}`
       console.error('Failed to load model: ' + errorMessage)
+      setLoadModelError(errorMessage)
       setStateModel(() => ({
         state: 'start',
         loading: false,
         model: res.modelId,
       }))
+      setQueuedMessage(false)
     },
-    [setStateModel]
+    [setStateModel, setQueuedMessage, setLoadModelError]
   )
 
   const onMessageResponseUpdate = useCallback(
@@ -108,6 +121,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
       // Mark the thread as not waiting for response
       updateThreadWaiting(message.thread_id, false)
 
+      setIsGeneratingResponse(false)
+
       const thread = threadsRef.current?.find((e) => e.id == message.thread_id)
       if (thread) {
         const messageContent = message.content[0]?.text.value ?? ''
diff --git a/web/helpers/atoms/SystemBar.atom.ts b/web/helpers/atoms/SystemBar.atom.ts
index aa5e77d587..42ef7b29f0 100644
--- a/web/helpers/atoms/SystemBar.atom.ts
+++ b/web/helpers/atoms/SystemBar.atom.ts
@@ -2,5 +2,6 @@ import { atom } from 'jotai'
 
 export const totalRamAtom = atom<number>(0)
 export const usedRamAtom = atom<number>(0)
+export const availableRamAtom = atom<number>(0)
 
 export const cpuUsageAtom = atom<number>(0)
diff --git a/web/helpers/atoms/Thread.atom.ts b/web/helpers/atoms/Thread.atom.ts
index fcaa2a4afc..cab286bd16 100644
--- a/web/helpers/atoms/Thread.atom.ts
+++ b/web/helpers/atoms/Thread.atom.ts
@@ -23,6 +23,7 @@ export const setActiveThreadIdAtom = atom(
 
 export const waitingToSendMessage = atom<boolean | undefined>(undefined)
 
+export const isGeneratingResponseAtom = atom<boolean | undefined>(undefined)
 /**
  * Stores all thread states for the current user
  */
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index a456d8787d..54a1fdbe06 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -1,5 +1,5 @@
 import { events, Model, ModelEvent } from '@janhq/core'
-import { atom, useAtom, useAtomValue } from 'jotai'
+import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import { toaster } from '@/containers/Toast'
 
@@ -9,6 +9,7 @@ import { LAST_USED_MODEL_ID } from './useRecommendedModel'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 export const activeModelAtom = atom<Model | undefined>(undefined)
+export const loadModelErrorAtom = atom<string | undefined>(undefined)
 
 export const stateModelAtom = atom({
   state: 'start',
@@ -21,6 +22,7 @@ export function useActiveModel() {
   const activeThread = useAtomValue(activeThreadAtom)
   const [stateModel, setStateModel] = useAtom(stateModelAtom)
   const { downloadedModels } = useGetDownloadedModels()
+  const setLoadModelError = useSetAtom(loadModelErrorAtom)
 
   const startModel = async (modelId: string) => {
     if (
@@ -31,6 +33,7 @@ export function useActiveModel() {
       return
     }
     // TODO: incase we have multiple assistants, the configuration will be from assistant
+    setLoadModelError(undefined)
 
     setActiveModel(undefined)
 
diff --git a/web/hooks/useGetSystemResources.ts b/web/hooks/useGetSystemResources.ts
index 8dffa8eb4d..de595ad7b4 100644
--- a/web/hooks/useGetSystemResources.ts
+++ b/web/hooks/useGetSystemResources.ts
@@ -6,6 +6,7 @@ import { useSetAtom } from 'jotai'
 
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
+  availableRamAtom,
   cpuUsageAtom,
   totalRamAtom,
   usedRamAtom,
@@ -16,6 +17,7 @@ export default function useGetSystemResources() {
   const [cpu, setCPU] = useState<number>(0)
   const setTotalRam = useSetAtom(totalRamAtom)
   const setUsedRam = useSetAtom(usedRamAtom)
+  const setAvailableRam = useSetAtom(availableRamAtom)
   const setCpuUsage = useSetAtom(cpuUsageAtom)
 
   const getSystemResources = async () => {
@@ -40,6 +42,10 @@ export default function useGetSystemResources() {
       setTotalRam(resourceInfor.mem.totalMemory)
 
     setRam(Math.round(ram * 100))
+    if (resourceInfor.mem.totalMemory && resourceInfor.mem.usedMemory)
+      setAvailableRam(
+        resourceInfor.mem.totalMemory - resourceInfor.mem.usedMemory
+      )
     setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
     setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
   }
diff --git a/web/hooks/useInference.ts b/web/hooks/useInference.ts
deleted file mode 100644
index 8ada18cb77..0000000000
--- a/web/hooks/useInference.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import { useAtomValue } from 'jotai'
-
-import { threadStatesAtom } from '@/helpers/atoms/Thread.atom'
-
-export default function useInference() {
-  const threadStates = useAtomValue(threadStatesAtom)
-
-  const isGeneratingResponse = Object.values(threadStates).some(
-    (threadState) => threadState.waitingForResponse
-  )
-
-  return {
-    isGeneratingResponse,
-  }
-}
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 379defa15b..5d1894db83 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -25,12 +25,10 @@ import { ulid } from 'ulid'
 import { selectedModelAtom } from '@/containers/DropdownListSidebar'
 import { currentPromptAtom, fileUploadAtom } from '@/containers/Providers/Jotai'
 
-import { toaster } from '@/containers/Toast'
-
 import { getBase64 } from '@/utils/base64'
 import { toRuntimeParams, toSettingParams } from '@/utils/modelParam'
 
-import { useActiveModel } from './useActiveModel'
+import { loadModelErrorAtom, useActiveModel } from './useActiveModel'
 
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
@@ -59,9 +57,11 @@ export default function useSendChatMessage() {
   const { activeModel } = useActiveModel()
   const selectedModel = useAtomValue(selectedModelAtom)
   const { startModel } = useActiveModel()
-  const setQueuedMessage = useSetAtom(queuedMessageAtom)
+  const [queuedMessage, setQueuedMessage] = useAtom(queuedMessageAtom)
+  const loadModelFailed = useAtomValue(loadModelErrorAtom)
 
   const modelRef = useRef<Model | undefined>()
+  const loadModelFailedRef = useRef<string | undefined>()
   const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
   const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom)
 
@@ -73,6 +73,10 @@ export default function useSendChatMessage() {
     modelRef.current = activeModel
   }, [activeModel])
 
+  useEffect(() => {
+    loadModelFailedRef.current = loadModelFailed
+  }, [loadModelFailed])
+
   const resendChatMessage = async (currentMessage: ThreadMessage) => {
     if (!activeThread) {
       console.error('No active thread')
@@ -121,21 +125,6 @@ export default function useSendChatMessage() {
     events.emit(MessageEvent.OnMessageSent, messageRequest)
   }
 
-  // TODO: Refactor @louis
-  const waitForModelStarting = async (modelId: string) => {
-    return new Promise<void>((resolve) => {
-      setTimeout(async () => {
-        if (modelRef.current?.id !== modelId) {
-          console.debug('waiting for model to start')
-          await waitForModelStarting(modelId)
-          resolve()
-        } else {
-          resolve()
-        }
-      }, 200)
-    })
-  }
-
   const sendChatMessage = async (message: string) => {
     if (!message || message.trim().length === 0) return
 
@@ -304,6 +293,19 @@ export default function useSendChatMessage() {
     setEngineParamsUpdate(false)
   }
 
+  const waitForModelStarting = async (modelId: string) => {
+    return new Promise<void>((resolve) => {
+      setTimeout(async () => {
+        if (modelRef.current?.id !== modelId && !loadModelFailedRef.current) {
+          await waitForModelStarting(modelId)
+          resolve()
+        } else {
+          resolve()
+        }
+      }, 200)
+    })
+  }
+
   return {
     sendChatMessage,
     resendChatMessage,
diff --git a/web/hooks/useSetActiveThread.ts b/web/hooks/useSetActiveThread.ts
index 76a744bcd3..3545d0d233 100644
--- a/web/hooks/useSetActiveThread.ts
+++ b/web/hooks/useSetActiveThread.ts
@@ -13,6 +13,7 @@ import { setConvoMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 import {
   ModelParams,
   getActiveThreadIdAtom,
+  isGeneratingResponseAtom,
   setActiveThreadIdAtom,
   setThreadModelParamsAtom,
 } from '@/helpers/atoms/Thread.atom'
@@ -22,6 +23,7 @@ export default function useSetActiveThread() {
   const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
   const setThreadMessage = useSetAtom(setConvoMessagesAtom)
   const setThreadModelParams = useSetAtom(setThreadModelParamsAtom)
+  const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
 
   const setActiveThread = async (thread: Thread) => {
     if (activeThreadId === thread.id) {
@@ -29,6 +31,7 @@ export default function useSetActiveThread() {
       return
     }
 
+    setIsGeneratingResponse(false)
     events.emit(InferenceEvent.OnInferenceStopped, thread.id)
 
     // load the corresponding messages
diff --git a/web/screens/Chat/ChatBody/index.tsx b/web/screens/Chat/ChatBody/index.tsx
index e0a34a1a11..1ce6b591f6 100644
--- a/web/screens/Chat/ChatBody/index.tsx
+++ b/web/screens/Chat/ChatBody/index.tsx
@@ -8,14 +8,11 @@ import { useAtomValue } from 'jotai'
 
 import LogoMark from '@/containers/Brand/Logo/Mark'
 
-import GenerateResponse from '@/containers/Loader/GenerateResponse'
-
 import { MainViewState } from '@/constants/screens'
 
-import { activeModelAtom } from '@/hooks/useActiveModel'
+import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 
-import useInference from '@/hooks/useInference'
 import { useMainViewState } from '@/hooks/useMainViewState'
 
 import ChatItem from '../ChatItem'
@@ -26,10 +23,9 @@ import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 
 const ChatBody: React.FC = () => {
   const messages = useAtomValue(getCurrentChatMessagesAtom)
-  const activeModel = useAtomValue(activeModelAtom)
   const { downloadedModels } = useGetDownloadedModels()
   const { setMainViewState } = useMainViewState()
-  const { isGeneratingResponse } = useInference()
+  const loadModelError = useAtomValue(loadModelErrorAtom)
 
   if (downloadedModels.length === 0)
     return (
@@ -90,15 +86,14 @@ const ChatBody: React.FC = () => {
                 message.content.length > 0) && (
                 <ChatItem {...message} key={message.id} />
               )}
-              {(message.status === MessageStatus.Error ||
-                message.status === MessageStatus.Stopped) &&
+              {!loadModelError &&
+                (message.status === MessageStatus.Error ||
+                  message.status === MessageStatus.Stopped) &&
                 index === messages.length - 1 && (
                   <ErrorMessage message={message} />
                 )}
             </div>
           ))}
-
-          {activeModel && isGeneratingResponse && <GenerateResponse />}
         </ScrollToBottom>
       )}
     </Fragment>
diff --git a/web/screens/Chat/ErrorMessage/index.tsx b/web/screens/Chat/ErrorMessage/index.tsx
index 8879b15be8..84a89cee86 100644
--- a/web/screens/Chat/ErrorMessage/index.tsx
+++ b/web/screens/Chat/ErrorMessage/index.tsx
@@ -17,7 +17,6 @@ import {
   deleteMessageAtom,
   getCurrentChatMessagesAtom,
 } from '@/helpers/atoms/ChatMessage.atom'
-import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
@@ -25,8 +24,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
   const thread = useAtomValue(activeThreadAtom)
   const deleteMessage = useSetAtom(deleteMessageAtom)
   const { resendChatMessage } = useSendChatMessage()
-  const { activeModel } = useActiveModel()
-  const totalRam = useAtomValue(totalRamAtom)
 
   const regenerateMessage = async () => {
     const lastMessageIndex = messages.length - 1
@@ -70,33 +67,26 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
       {message.status === MessageStatus.Error && (
         <div key={message.id} className="mt-10 flex flex-col items-center">
           <span className="mb-3 text-center text-sm font-medium text-gray-500">
-            {Number(activeModel?.metadata.size) > totalRam ? (
-              <>
-                Oops! Model size exceeds available RAM. Consider selecting a
-                smaller model or upgrading your RAM for smoother performance.
-              </>
-            ) : (
-              <>
-                <p>Apologies, something&apos;s amiss!</p>
-                Jan&apos;s in beta. Find troubleshooting guides{' '}
-                <a
-                  href="https://jan.ai/guides/troubleshooting"
-                  target="_blank"
-                  className="text-blue-600 hover:underline dark:text-blue-300"
-                >
-                  here
-                </a>{' '}
-                or reach out to us on{' '}
-                <a
-                  href="https://discord.gg/AsJ8krTT3N"
-                  target="_blank"
-                  className="text-blue-600 hover:underline dark:text-blue-300"
-                >
-                  Discord
-                </a>{' '}
-                for assistance.
-              </>
-            )}
+            <>
+              <p>Apologies, something&apos;s amiss!</p>
+              Jan&apos;s in beta. Find troubleshooting guides{' '}
+              <a
+                href="https://jan.ai/guides/troubleshooting"
+                target="_blank"
+                className="text-blue-600 hover:underline dark:text-blue-300"
+              >
+                here
+              </a>{' '}
+              or reach out to us on{' '}
+              <a
+                href="https://discord.gg/AsJ8krTT3N"
+                target="_blank"
+                className="text-blue-600 hover:underline dark:text-blue-300"
+              >
+                Discord
+              </a>{' '}
+              for assistance.
+            </>
           </span>
         </div>
       )}
diff --git a/web/screens/Chat/LoadModelErrorMessage/index.tsx b/web/screens/Chat/LoadModelErrorMessage/index.tsx
new file mode 100644
index 0000000000..d3c4a704d2
--- /dev/null
+++ b/web/screens/Chat/LoadModelErrorMessage/index.tsx
@@ -0,0 +1,48 @@
+import { MessageStatus, ThreadMessage } from '@janhq/core'
+import { useAtomValue } from 'jotai'
+
+import { useActiveModel } from '@/hooks/useActiveModel'
+
+import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
+
+const LoadModelErrorMessage = () => {
+  const { activeModel } = useActiveModel()
+  const availableRam = useAtomValue(totalRamAtom)
+
+  return (
+    <>
+      <div className="mt-10 flex flex-col items-center">
+        <span className="mb-3 text-center text-sm font-medium text-gray-500">
+          {Number(activeModel?.metadata.size) > availableRam ? (
+            <>
+              Oops! Model size exceeds available RAM. Consider selecting a
+              smaller model or upgrading your RAM for smoother performance.
+            </>
+          ) : (
+            <>
+              <p>Apologies, something&apos;s amiss!</p>
+              Jan&apos;s in beta. Find troubleshooting guides{' '}
+              <a
+                href="https://jan.ai/guides/troubleshooting"
+                target="_blank"
+                className="text-blue-600 hover:underline dark:text-blue-300"
+              >
+                here
+              </a>{' '}
+              or reach out to us on{' '}
+              <a
+                href="https://discord.gg/AsJ8krTT3N"
+                target="_blank"
+                className="text-blue-600 hover:underline dark:text-blue-300"
+              >
+                Discord
+              </a>{' '}
+              for assistance.
+            </>
+          )}
+        </span>
+      </div>
+    </>
+  )
+}
+export default LoadModelErrorMessage
diff --git a/web/screens/Chat/index.tsx b/web/screens/Chat/index.tsx
index e7cb827407..1f78966040 100644
--- a/web/screens/Chat/index.tsx
+++ b/web/screens/Chat/index.tsx
@@ -9,6 +9,7 @@ import { UploadCloudIcon } from 'lucide-react'
 
 import { twMerge } from 'tailwind-merge'
 
+import GenerateResponse from '@/containers/Loader/GenerateResponse'
 import ModelReload from '@/containers/Loader/ModelReload'
 import ModelStart from '@/containers/Loader/ModelStart'
 
@@ -19,6 +20,7 @@ import { snackbar } from '@/containers/Toast'
 
 import { FeatureToggleContext } from '@/context/FeatureToggle'
 
+import { activeModelAtom, loadModelErrorAtom } from '@/hooks/useActiveModel'
 import { queuedMessageAtom, reloadModelAtom } from '@/hooks/useSendChatMessage'
 
 import ChatBody from '@/screens/Chat/ChatBody'
@@ -26,12 +28,14 @@ import ChatBody from '@/screens/Chat/ChatBody'
 import ThreadList from '@/screens/Chat/ThreadList'
 
 import ChatInput from './ChatInput'
+import LoadModelErrorMessage from './LoadModelErrorMessage'
 import RequestDownloadModel from './RequestDownloadModel'
 import Sidebar from './Sidebar'
 
 import {
   activeThreadAtom,
   engineParamsUpdateAtom,
+  isGeneratingResponseAtom,
 } from '@/helpers/atoms/Thread.atom'
 
 const renderError = (code: string) => {
@@ -63,6 +67,11 @@ const ChatScreen: React.FC = () => {
   const setFileUpload = useSetAtom(fileUploadAtom)
   const { experimentalFeature } = useContext(FeatureToggleContext)
 
+  const activeModel = useAtomValue(activeModelAtom)
+
+  const isGeneratingResponse = useAtomValue(isGeneratingResponseAtom)
+  const loadModelError = useAtomValue(loadModelErrorAtom)
+
   const { getRootProps, isDragReject } = useDropzone({
     noClick: true,
     multiple: false,
@@ -202,6 +211,9 @@ const ChatScreen: React.FC = () => {
               </span>
             </div>
           )}
+
+          {activeModel && isGeneratingResponse && <GenerateResponse />}
+          {loadModelError && <LoadModelErrorMessage />}
           <ChatInput />
         </div>
       </div>