fix: display error message on model load fail (#1894)

2024-02-01 23:28:15 +07:00 · 2024-02-01 23:28:15 +07:00 · 5ce2e422f8
commit 5ce2e422f8
parent 36cd5988d4
15 changed files with 166 additions and 92 deletions
--- a/core/src/node/api/common/startStopModel.ts
+++ b/core/src/node/api/common/startStopModel.ts
@ -305,7 +305,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
    })
    .catch((err: any) => {
      logServer(`[NITRO]::Error: Load model failed with error ${err}`)
-      return Promise.reject()
+      return Promise.reject(err)
    })
 }
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -134,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
    const modelFullPath = await joinPath(["models", model.id]);
    this._currentModel = model;
    const nitroInitResult = await executeOnMain(NODE, "runModel", {
      modelFullPath,
      model,
@ -144,7 +145,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
      return;
    }
    this._currentModel = model;
    events.emit(ModelEvent.OnModelReady, model);
    this.getNitroProcesHealthIntervalId = setInterval(
@ -226,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
   */
  private async onMessageRequest(data: MessageRequest) {
    if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
      console.log(
        `Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}`
      );
      return;
    }
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -67,7 +67,7 @@ function stopModel(): Promise<void> {
 * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
 */
 async function runModel(
-  wrapper: ModelInitOptions
+  wrapper: ModelInitOptions,
 ): Promise<ModelOperationResponse | void> {
  if (wrapper.model.engine !== InferenceEngine.nitro) {
    // Not a nitro model
@ -85,7 +85,7 @@ async function runModel(
  const ggufBinFile = files.find(
    (file) =>
      file === path.basename(currentModelFile) ||
-      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
+      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
  );
  if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@ -180,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
    const system_prompt = promptTemplate.substring(0, systemIndex);
    const user_prompt = promptTemplate.substring(
      systemIndex + systemMarker.length,
-      promptIndex
+      promptIndex,
    );
    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
+      promptIndex + promptMarker.length,
    );
    // Return the split parts
@ -193,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
    const promptIndex = promptTemplate.indexOf(promptMarker);
    const user_prompt = promptTemplate.substring(0, promptIndex);
    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
+      promptIndex + promptMarker.length,
    );
    // Return the split parts
@ -225,14 +225,14 @@ function loadLLMModel(settings: any): Promise<Response> {
    .then((res) => {
      log(
        `[NITRO]::Debug: Load model success with response ${JSON.stringify(
-          res
+          res,
-        )}`
+        )}`,
      );
      return Promise.resolve(res);
    })
    .catch((err) => {
      log(`[NITRO]::Error: Load model failed with error ${err}`);
-      return Promise.reject();
+      return Promise.reject(err);
    });
 }
@ -254,8 +254,8 @@ async function validateModelStatus(): Promise<void> {
    retryDelay: 500,
  }).then(async (res: Response) => {
    log(
-      `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
+      `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
-        res
+        res.status
      )}`
    );
    // If the response is OK, check model_loaded status.
@ -264,9 +264,19 @@ async function validateModelStatus(): Promise<void> {
      // If the model is loaded, return an empty object.
      // Otherwise, return an object with an error message.
      if (body.model_loaded) {
        log(
          `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
            body
          )}`
        );
        return Promise.resolve();
      }
    }
    log(
      `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
        res.statusText
      )}`
    );
    return Promise.reject("Validate model status failed");
  });
 }
@ -307,7 +317,7 @@ function spawnNitroProcess(): Promise<any> {
    const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
    // Execute the binary
    log(
-      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
    );
    subprocess = spawn(
      executableOptions.executablePath,
@ -318,7 +328,7 @@ function spawnNitroProcess(): Promise<any> {
          ...process.env,
          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
        },
-      }
+      },
    );
    // Handle subprocess output
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@ -13,9 +13,15 @@ import {
 } from '@janhq/core'
 import { useAtomValue, useSetAtom } from 'jotai'
-import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
+import {
  activeModelAtom,
  loadModelErrorAtom,
  stateModelAtom,
 } from '@/hooks/useActiveModel'
 import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 import { queuedMessageAtom } from '@/hooks/useSendChatMessage'
 import { toaster } from '../Toast'
 import { extensionManager } from '@/extension'
@ -26,6 +32,7 @@ import {
 import {
  updateThreadWaitingForResponseAtom,
  threadsAtom,
  isGeneratingResponseAtom,
 } from '@/helpers/atoms/Thread.atom'
 export default function EventHandler({ children }: { children: ReactNode }) {
@ -34,11 +41,14 @@ export default function EventHandler({ children }: { children: ReactNode }) {
  const { downloadedModels } = useGetDownloadedModels()
  const setActiveModel = useSetAtom(activeModelAtom)
  const setStateModel = useSetAtom(stateModelAtom)
  const setQueuedMessage = useSetAtom(queuedMessageAtom)
  const setLoadModelError = useSetAtom(loadModelErrorAtom)
  const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom)
  const threads = useAtomValue(threadsAtom)
  const modelsRef = useRef(downloadedModels)
  const threadsRef = useRef(threads)
  const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
  useEffect(() => {
    threadsRef.current = threads
@ -51,8 +61,9 @@ export default function EventHandler({ children }: { children: ReactNode }) {
  const onNewMessageResponse = useCallback(
    (message: ThreadMessage) => {
      addNewMessage(message)
      setIsGeneratingResponse(false)
    },
-    [addNewMessage]
+    [addNewMessage, setIsGeneratingResponse]
  )
  const onModelReady = useCallback(
@ -83,13 +94,15 @@ export default function EventHandler({ children }: { children: ReactNode }) {
    (res: any) => {
      const errorMessage = `${res.error}`
      console.error('Failed to load model: ' + errorMessage)
      setLoadModelError(errorMessage)
      setStateModel(() => ({
        state: 'start',
        loading: false,
        model: res.modelId,
      }))
      setQueuedMessage(false)
    },
-    [setStateModel]
+    [setStateModel, setQueuedMessage, setLoadModelError]
  )
  const onMessageResponseUpdate = useCallback(
@ -108,6 +121,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
      // Mark the thread as not waiting for response
      updateThreadWaiting(message.thread_id, false)
      setIsGeneratingResponse(false)
      const thread = threadsRef.current?.find((e) => e.id == message.thread_id)
      if (thread) {
        const messageContent = message.content[0]?.text.value ?? ''
--- a/web/helpers/atoms/SystemBar.atom.ts
+++ b/web/helpers/atoms/SystemBar.atom.ts
@ -2,5 +2,6 @@ import { atom } from 'jotai'
 export const totalRamAtom = atom<number>(0)
 export const usedRamAtom = atom<number>(0)
 export const availableRamAtom = atom<number>(0)
 export const cpuUsageAtom = atom<number>(0)
--- a/web/helpers/atoms/Thread.atom.ts
+++ b/web/helpers/atoms/Thread.atom.ts
@ -23,6 +23,7 @@ export const setActiveThreadIdAtom = atom(
 export const waitingToSendMessage = atom<boolean | undefined>(undefined)
 export const isGeneratingResponseAtom = atom<boolean | undefined>(undefined)
 /**
 * Stores all thread states for the current user
 */
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@ -1,5 +1,5 @@
 import { events, Model, ModelEvent } from '@janhq/core'
-import { atom, useAtom, useAtomValue } from 'jotai'
+import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 import { toaster } from '@/containers/Toast'
@ -9,6 +9,7 @@ import { LAST_USED_MODEL_ID } from './useRecommendedModel'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 export const activeModelAtom = atom<Model | undefined>(undefined)
 export const loadModelErrorAtom = atom<string | undefined>(undefined)
 export const stateModelAtom = atom({
  state: 'start',
@ -21,6 +22,7 @@ export function useActiveModel() {
  const activeThread = useAtomValue(activeThreadAtom)
  const [stateModel, setStateModel] = useAtom(stateModelAtom)
  const { downloadedModels } = useGetDownloadedModels()
  const setLoadModelError = useSetAtom(loadModelErrorAtom)
  const startModel = async (modelId: string) => {
    if (
@ -31,6 +33,7 @@ export function useActiveModel() {
      return
    }
    // TODO: incase we have multiple assistants, the configuration will be from assistant
    setLoadModelError(undefined)
    setActiveModel(undefined)
--- a/web/hooks/useGetSystemResources.ts
+++ b/web/hooks/useGetSystemResources.ts
@ -6,6 +6,7 @@ import { useSetAtom } from 'jotai'
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
  availableRamAtom,
  cpuUsageAtom,
  totalRamAtom,
  usedRamAtom,
@ -16,6 +17,7 @@ export default function useGetSystemResources() {
  const [cpu, setCPU] = useState<number>(0)
  const setTotalRam = useSetAtom(totalRamAtom)
  const setUsedRam = useSetAtom(usedRamAtom)
  const setAvailableRam = useSetAtom(availableRamAtom)
  const setCpuUsage = useSetAtom(cpuUsageAtom)
  const getSystemResources = async () => {
@ -40,6 +42,10 @@ export default function useGetSystemResources() {
      setTotalRam(resourceInfor.mem.totalMemory)
    setRam(Math.round(ram * 100))
    if (resourceInfor.mem.totalMemory && resourceInfor.mem.usedMemory)
      setAvailableRam(
        resourceInfor.mem.totalMemory - resourceInfor.mem.usedMemory
      )
    setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
    setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
  }
--- a/web/hooks/useInference.ts
+++ b/web/hooks/useInference.ts
@ -1,15 +0,0 @@
 import { useAtomValue } from 'jotai'
 import { threadStatesAtom } from '@/helpers/atoms/Thread.atom'
 export default function useInference() {
  const threadStates = useAtomValue(threadStatesAtom)
  const isGeneratingResponse = Object.values(threadStates).some(
    (threadState) => threadState.waitingForResponse
  )
  return {
    isGeneratingResponse,
  }
 }
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@ -25,12 +25,10 @@ import { ulid } from 'ulid'
 import { selectedModelAtom } from '@/containers/DropdownListSidebar'
 import { currentPromptAtom, fileUploadAtom } from '@/containers/Providers/Jotai'
 import { toaster } from '@/containers/Toast'
 import { getBase64 } from '@/utils/base64'
 import { toRuntimeParams, toSettingParams } from '@/utils/modelParam'
-import { useActiveModel } from './useActiveModel'
+import { loadModelErrorAtom, useActiveModel } from './useActiveModel'
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
@ -59,9 +57,11 @@ export default function useSendChatMessage() {
  const { activeModel } = useActiveModel()
  const selectedModel = useAtomValue(selectedModelAtom)
  const { startModel } = useActiveModel()
-  const setQueuedMessage = useSetAtom(queuedMessageAtom)
+  const [queuedMessage, setQueuedMessage] = useAtom(queuedMessageAtom)
  const loadModelFailed = useAtomValue(loadModelErrorAtom)
  const modelRef = useRef<Model | undefined>()
  const loadModelFailedRef = useRef<string | undefined>()
  const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
  const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom)
@ -73,6 +73,10 @@ export default function useSendChatMessage() {
    modelRef.current = activeModel
  }, [activeModel])
  useEffect(() => {
    loadModelFailedRef.current = loadModelFailed
  }, [loadModelFailed])
  const resendChatMessage = async (currentMessage: ThreadMessage) => {
    if (!activeThread) {
      console.error('No active thread')
@ -121,21 +125,6 @@ export default function useSendChatMessage() {
    events.emit(MessageEvent.OnMessageSent, messageRequest)
  }
  // TODO: Refactor @louis
  const waitForModelStarting = async (modelId: string) => {
    return new Promise<void>((resolve) => {
      setTimeout(async () => {
        if (modelRef.current?.id !== modelId) {
          console.debug('waiting for model to start')
          await waitForModelStarting(modelId)
          resolve()
        } else {
          resolve()
        }
      }, 200)
    })
  }
  const sendChatMessage = async (message: string) => {
    if (!message || message.trim().length === 0) return
@ -304,6 +293,19 @@ export default function useSendChatMessage() {
    setEngineParamsUpdate(false)
  }
  const waitForModelStarting = async (modelId: string) => {
    return new Promise<void>((resolve) => {
      setTimeout(async () => {
        if (modelRef.current?.id !== modelId && !loadModelFailedRef.current) {
          await waitForModelStarting(modelId)
          resolve()
        } else {
          resolve()
        }
      }, 200)
    })
  }
  return {
    sendChatMessage,
    resendChatMessage,
--- a/web/hooks/useSetActiveThread.ts
+++ b/web/hooks/useSetActiveThread.ts
@ -13,6 +13,7 @@ import { setConvoMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 import {
  ModelParams,
  getActiveThreadIdAtom,
  isGeneratingResponseAtom,
  setActiveThreadIdAtom,
  setThreadModelParamsAtom,
 } from '@/helpers/atoms/Thread.atom'
@ -22,6 +23,7 @@ export default function useSetActiveThread() {
  const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
  const setThreadMessage = useSetAtom(setConvoMessagesAtom)
  const setThreadModelParams = useSetAtom(setThreadModelParamsAtom)
  const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
  const setActiveThread = async (thread: Thread) => {
    if (activeThreadId === thread.id) {
@ -29,6 +31,7 @@ export default function useSetActiveThread() {
      return
    }
    setIsGeneratingResponse(false)
    events.emit(InferenceEvent.OnInferenceStopped, thread.id)
    // load the corresponding messages
--- a/web/screens/Chat/ChatBody/index.tsx
+++ b/web/screens/Chat/ChatBody/index.tsx
@ -8,14 +8,11 @@ import { useAtomValue } from 'jotai'
 import LogoMark from '@/containers/Brand/Logo/Mark'
 import GenerateResponse from '@/containers/Loader/GenerateResponse'
 import { MainViewState } from '@/constants/screens'
-import { activeModelAtom } from '@/hooks/useActiveModel'
+import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 import useInference from '@/hooks/useInference'
 import { useMainViewState } from '@/hooks/useMainViewState'
 import ChatItem from '../ChatItem'
@ -26,10 +23,9 @@ import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 const ChatBody: React.FC = () => {
  const messages = useAtomValue(getCurrentChatMessagesAtom)
  const activeModel = useAtomValue(activeModelAtom)
  const { downloadedModels } = useGetDownloadedModels()
  const { setMainViewState } = useMainViewState()
-  const { isGeneratingResponse } = useInference()
+  const loadModelError = useAtomValue(loadModelErrorAtom)
  if (downloadedModels.length === 0)
    return (
@ -90,15 +86,14 @@ const ChatBody: React.FC = () => {
                message.content.length > 0) && (
                <ChatItem {...message} key={message.id} />
              )}
-              {(message.status === MessageStatus.Error ||
+              {!loadModelError &&
                (message.status === MessageStatus.Error ||
                  message.status === MessageStatus.Stopped) &&
                index === messages.length - 1 && (
                  <ErrorMessage message={message} />
                )}
            </div>
          ))}
          {activeModel && isGeneratingResponse && <GenerateResponse />}
        </ScrollToBottom>
      )}
    </Fragment>
--- a/web/screens/Chat/ErrorMessage/index.tsx
+++ b/web/screens/Chat/ErrorMessage/index.tsx
@ -17,7 +17,6 @@ import {
  deleteMessageAtom,
  getCurrentChatMessagesAtom,
 } from '@/helpers/atoms/ChatMessage.atom'
 import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
@ -25,8 +24,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
  const thread = useAtomValue(activeThreadAtom)
  const deleteMessage = useSetAtom(deleteMessageAtom)
  const { resendChatMessage } = useSendChatMessage()
  const { activeModel } = useActiveModel()
  const totalRam = useAtomValue(totalRamAtom)
  const regenerateMessage = async () => {
    const lastMessageIndex = messages.length - 1
@ -70,12 +67,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
      {message.status === MessageStatus.Error && (
        <div key={message.id} className="mt-10 flex flex-col items-center">
          <span className="mb-3 text-center text-sm font-medium text-gray-500">
            {Number(activeModel?.metadata.size) > totalRam ? (
              <>
                Oops! Model size exceeds available RAM. Consider selecting a
                smaller model or upgrading your RAM for smoother performance.
              </>
            ) : (
            <>
              <p>Apologies, something&apos;s amiss!</p>
              Jan&apos;s in beta. Find troubleshooting guides{' '}
@ -96,7 +87,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
              </a>{' '}
              for assistance.
            </>
            )}
          </span>
        </div>
      )}
--- a/web/screens/Chat/LoadModelErrorMessage/index.tsx
+++ b/web/screens/Chat/LoadModelErrorMessage/index.tsx
@ -0,0 +1,48 @@
 import { MessageStatus, ThreadMessage } from '@janhq/core'
 import { useAtomValue } from 'jotai'
 import { useActiveModel } from '@/hooks/useActiveModel'
 import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
 const LoadModelErrorMessage = () => {
  const { activeModel } = useActiveModel()
  const availableRam = useAtomValue(totalRamAtom)
  return (
    <>
      <div className="mt-10 flex flex-col items-center">
        <span className="mb-3 text-center text-sm font-medium text-gray-500">
          {Number(activeModel?.metadata.size) > availableRam ? (
            <>
              Oops! Model size exceeds available RAM. Consider selecting a
              smaller model or upgrading your RAM for smoother performance.
            </>
          ) : (
            <>
              <p>Apologies, something&apos;s amiss!</p>
              Jan&apos;s in beta. Find troubleshooting guides{' '}
              <a
                href="https://jan.ai/guides/troubleshooting"
                target="_blank"
                className="text-blue-600 hover:underline dark:text-blue-300"
              >
                here
              </a>{' '}
              or reach out to us on{' '}
              <a
                href="https://discord.gg/AsJ8krTT3N"
                target="_blank"
                className="text-blue-600 hover:underline dark:text-blue-300"
              >
                Discord
              </a>{' '}
              for assistance.
            </>
          )}
        </span>
      </div>
    </>
  )
 }
 export default LoadModelErrorMessage
--- a/web/screens/Chat/index.tsx
+++ b/web/screens/Chat/index.tsx
@ -9,6 +9,7 @@ import { UploadCloudIcon } from 'lucide-react'
 import { twMerge } from 'tailwind-merge'
 import GenerateResponse from '@/containers/Loader/GenerateResponse'
 import ModelReload from '@/containers/Loader/ModelReload'
 import ModelStart from '@/containers/Loader/ModelStart'
@ -19,6 +20,7 @@ import { snackbar } from '@/containers/Toast'
 import { FeatureToggleContext } from '@/context/FeatureToggle'
 import { activeModelAtom, loadModelErrorAtom } from '@/hooks/useActiveModel'
 import { queuedMessageAtom, reloadModelAtom } from '@/hooks/useSendChatMessage'
 import ChatBody from '@/screens/Chat/ChatBody'
@ -26,12 +28,14 @@ import ChatBody from '@/screens/Chat/ChatBody'
 import ThreadList from '@/screens/Chat/ThreadList'
 import ChatInput from './ChatInput'
 import LoadModelErrorMessage from './LoadModelErrorMessage'
 import RequestDownloadModel from './RequestDownloadModel'
 import Sidebar from './Sidebar'
 import {
  activeThreadAtom,
  engineParamsUpdateAtom,
  isGeneratingResponseAtom,
 } from '@/helpers/atoms/Thread.atom'
 const renderError = (code: string) => {
@ -63,6 +67,11 @@ const ChatScreen: React.FC = () => {
  const setFileUpload = useSetAtom(fileUploadAtom)
  const { experimentalFeature } = useContext(FeatureToggleContext)
  const activeModel = useAtomValue(activeModelAtom)
  const isGeneratingResponse = useAtomValue(isGeneratingResponseAtom)
  const loadModelError = useAtomValue(loadModelErrorAtom)
  const { getRootProps, isDragReject } = useDropzone({
    noClick: true,
    multiple: false,
@ -202,6 +211,9 @@ const ChatScreen: React.FC = () => {
              </span>
            </div>
          )}
          {activeModel && isGeneratingResponse && <GenerateResponse />}
          {loadModelError && <LoadModelErrorMessage />}
          <ChatInput />
        </div>
      </div>