fix: display error message on model load fail (#1894)

This commit is contained in:
Louis 2024-02-01 23:28:15 +07:00 committed by GitHub
parent 36cd5988d4
commit 5ce2e422f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 166 additions and 92 deletions

View File

@ -305,7 +305,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
})
.catch((err: any) => {
logServer(`[NITRO]::Error: Load model failed with error ${err}`)
return Promise.reject()
return Promise.reject(err)
})
}

View File

@ -134,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
const modelFullPath = await joinPath(["models", model.id]);
this._currentModel = model;
const nitroInitResult = await executeOnMain(NODE, "runModel", {
modelFullPath,
model,
@ -144,7 +145,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
return;
}
this._currentModel = model;
events.emit(ModelEvent.OnModelReady, model);
this.getNitroProcesHealthIntervalId = setInterval(
@ -226,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
*/
private async onMessageRequest(data: MessageRequest) {
if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
console.log(
`Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}`
);
return;
}

View File

@ -67,7 +67,7 @@ function stopModel(): Promise<void> {
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
*/
async function runModel(
wrapper: ModelInitOptions
wrapper: ModelInitOptions,
): Promise<ModelOperationResponse | void> {
if (wrapper.model.engine !== InferenceEngine.nitro) {
// Not a nitro model
@ -85,7 +85,7 @@ async function runModel(
const ggufBinFile = files.find(
(file) =>
file === path.basename(currentModelFile) ||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
);
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@ -180,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const system_prompt = promptTemplate.substring(0, systemIndex);
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex
promptIndex,
);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
promptIndex + promptMarker.length,
);
// Return the split parts
@ -193,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
promptIndex + promptMarker.length,
);
// Return the split parts
@ -225,14 +225,14 @@ function loadLLMModel(settings: any): Promise<Response> {
.then((res) => {
log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
res
)}`
res,
)}`,
);
return Promise.resolve(res);
})
.catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`);
return Promise.reject();
return Promise.reject(err);
});
}
@ -254,8 +254,8 @@ async function validateModelStatus(): Promise<void> {
retryDelay: 500,
}).then(async (res: Response) => {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
res
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
res.status
)}`
);
// If the response is OK, check model_loaded status.
@ -264,9 +264,19 @@ async function validateModelStatus(): Promise<void> {
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
body
)}`
);
return Promise.resolve();
}
}
log(
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
res.statusText
)}`
);
return Promise.reject("Validate model status failed");
});
}
@ -307,7 +317,7 @@ function spawnNitroProcess(): Promise<any> {
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
// Execute the binary
log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
);
subprocess = spawn(
executableOptions.executablePath,
@ -318,7 +328,7 @@ function spawnNitroProcess(): Promise<any> {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
}
},
);
// Handle subprocess output

View File

@ -13,9 +13,15 @@ import {
} from '@janhq/core'
import { useAtomValue, useSetAtom } from 'jotai'
import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
import {
activeModelAtom,
loadModelErrorAtom,
stateModelAtom,
} from '@/hooks/useActiveModel'
import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
import { queuedMessageAtom } from '@/hooks/useSendChatMessage'
import { toaster } from '../Toast'
import { extensionManager } from '@/extension'
@ -26,6 +32,7 @@ import {
import {
updateThreadWaitingForResponseAtom,
threadsAtom,
isGeneratingResponseAtom,
} from '@/helpers/atoms/Thread.atom'
export default function EventHandler({ children }: { children: ReactNode }) {
@ -34,11 +41,14 @@ export default function EventHandler({ children }: { children: ReactNode }) {
const { downloadedModels } = useGetDownloadedModels()
const setActiveModel = useSetAtom(activeModelAtom)
const setStateModel = useSetAtom(stateModelAtom)
const setQueuedMessage = useSetAtom(queuedMessageAtom)
const setLoadModelError = useSetAtom(loadModelErrorAtom)
const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom)
const threads = useAtomValue(threadsAtom)
const modelsRef = useRef(downloadedModels)
const threadsRef = useRef(threads)
const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
useEffect(() => {
threadsRef.current = threads
@ -51,8 +61,9 @@ export default function EventHandler({ children }: { children: ReactNode }) {
const onNewMessageResponse = useCallback(
(message: ThreadMessage) => {
addNewMessage(message)
setIsGeneratingResponse(false)
},
[addNewMessage]
[addNewMessage, setIsGeneratingResponse]
)
const onModelReady = useCallback(
@ -83,13 +94,15 @@ export default function EventHandler({ children }: { children: ReactNode }) {
(res: any) => {
const errorMessage = `${res.error}`
console.error('Failed to load model: ' + errorMessage)
setLoadModelError(errorMessage)
setStateModel(() => ({
state: 'start',
loading: false,
model: res.modelId,
}))
setQueuedMessage(false)
},
[setStateModel]
[setStateModel, setQueuedMessage, setLoadModelError]
)
const onMessageResponseUpdate = useCallback(
@ -108,6 +121,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
// Mark the thread as not waiting for response
updateThreadWaiting(message.thread_id, false)
setIsGeneratingResponse(false)
const thread = threadsRef.current?.find((e) => e.id == message.thread_id)
if (thread) {
const messageContent = message.content[0]?.text.value ?? ''

View File

@ -2,5 +2,6 @@ import { atom } from 'jotai'
export const totalRamAtom = atom<number>(0)
export const usedRamAtom = atom<number>(0)
export const availableRamAtom = atom<number>(0)
export const cpuUsageAtom = atom<number>(0)

View File

@ -23,6 +23,7 @@ export const setActiveThreadIdAtom = atom(
export const waitingToSendMessage = atom<boolean | undefined>(undefined)
export const isGeneratingResponseAtom = atom<boolean | undefined>(undefined)
/**
* Stores all thread states for the current user
*/

View File

@ -1,5 +1,5 @@
import { events, Model, ModelEvent } from '@janhq/core'
import { atom, useAtom, useAtomValue } from 'jotai'
import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
import { toaster } from '@/containers/Toast'
@ -9,6 +9,7 @@ import { LAST_USED_MODEL_ID } from './useRecommendedModel'
import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
export const activeModelAtom = atom<Model | undefined>(undefined)
export const loadModelErrorAtom = atom<string | undefined>(undefined)
export const stateModelAtom = atom({
state: 'start',
@ -21,6 +22,7 @@ export function useActiveModel() {
const activeThread = useAtomValue(activeThreadAtom)
const [stateModel, setStateModel] = useAtom(stateModelAtom)
const { downloadedModels } = useGetDownloadedModels()
const setLoadModelError = useSetAtom(loadModelErrorAtom)
const startModel = async (modelId: string) => {
if (
@ -31,6 +33,7 @@ export function useActiveModel() {
return
}
// TODO: incase we have multiple assistants, the configuration will be from assistant
setLoadModelError(undefined)
setActiveModel(undefined)

View File

@ -6,6 +6,7 @@ import { useSetAtom } from 'jotai'
import { extensionManager } from '@/extension/ExtensionManager'
import {
availableRamAtom,
cpuUsageAtom,
totalRamAtom,
usedRamAtom,
@ -16,6 +17,7 @@ export default function useGetSystemResources() {
const [cpu, setCPU] = useState<number>(0)
const setTotalRam = useSetAtom(totalRamAtom)
const setUsedRam = useSetAtom(usedRamAtom)
const setAvailableRam = useSetAtom(availableRamAtom)
const setCpuUsage = useSetAtom(cpuUsageAtom)
const getSystemResources = async () => {
@ -40,6 +42,10 @@ export default function useGetSystemResources() {
setTotalRam(resourceInfor.mem.totalMemory)
setRam(Math.round(ram * 100))
if (resourceInfor.mem.totalMemory && resourceInfor.mem.usedMemory)
setAvailableRam(
resourceInfor.mem.totalMemory - resourceInfor.mem.usedMemory
)
setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
}

View File

@ -1,15 +0,0 @@
import { useAtomValue } from 'jotai'
import { threadStatesAtom } from '@/helpers/atoms/Thread.atom'
export default function useInference() {
const threadStates = useAtomValue(threadStatesAtom)
const isGeneratingResponse = Object.values(threadStates).some(
(threadState) => threadState.waitingForResponse
)
return {
isGeneratingResponse,
}
}

View File

@ -25,12 +25,10 @@ import { ulid } from 'ulid'
import { selectedModelAtom } from '@/containers/DropdownListSidebar'
import { currentPromptAtom, fileUploadAtom } from '@/containers/Providers/Jotai'
import { toaster } from '@/containers/Toast'
import { getBase64 } from '@/utils/base64'
import { toRuntimeParams, toSettingParams } from '@/utils/modelParam'
import { useActiveModel } from './useActiveModel'
import { loadModelErrorAtom, useActiveModel } from './useActiveModel'
import { extensionManager } from '@/extension/ExtensionManager'
import {
@ -59,9 +57,11 @@ export default function useSendChatMessage() {
const { activeModel } = useActiveModel()
const selectedModel = useAtomValue(selectedModelAtom)
const { startModel } = useActiveModel()
const setQueuedMessage = useSetAtom(queuedMessageAtom)
const [queuedMessage, setQueuedMessage] = useAtom(queuedMessageAtom)
const loadModelFailed = useAtomValue(loadModelErrorAtom)
const modelRef = useRef<Model | undefined>()
const loadModelFailedRef = useRef<string | undefined>()
const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom)
@ -73,6 +73,10 @@ export default function useSendChatMessage() {
modelRef.current = activeModel
}, [activeModel])
useEffect(() => {
loadModelFailedRef.current = loadModelFailed
}, [loadModelFailed])
const resendChatMessage = async (currentMessage: ThreadMessage) => {
if (!activeThread) {
console.error('No active thread')
@ -121,21 +125,6 @@ export default function useSendChatMessage() {
events.emit(MessageEvent.OnMessageSent, messageRequest)
}
// TODO: Refactor @louis
const waitForModelStarting = async (modelId: string) => {
return new Promise<void>((resolve) => {
setTimeout(async () => {
if (modelRef.current?.id !== modelId) {
console.debug('waiting for model to start')
await waitForModelStarting(modelId)
resolve()
} else {
resolve()
}
}, 200)
})
}
const sendChatMessage = async (message: string) => {
if (!message || message.trim().length === 0) return
@ -304,6 +293,19 @@ export default function useSendChatMessage() {
setEngineParamsUpdate(false)
}
const waitForModelStarting = async (modelId: string) => {
return new Promise<void>((resolve) => {
setTimeout(async () => {
if (modelRef.current?.id !== modelId && !loadModelFailedRef.current) {
await waitForModelStarting(modelId)
resolve()
} else {
resolve()
}
}, 200)
})
}
return {
sendChatMessage,
resendChatMessage,

View File

@ -13,6 +13,7 @@ import { setConvoMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
import {
ModelParams,
getActiveThreadIdAtom,
isGeneratingResponseAtom,
setActiveThreadIdAtom,
setThreadModelParamsAtom,
} from '@/helpers/atoms/Thread.atom'
@ -22,6 +23,7 @@ export default function useSetActiveThread() {
const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
const setThreadMessage = useSetAtom(setConvoMessagesAtom)
const setThreadModelParams = useSetAtom(setThreadModelParamsAtom)
const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
const setActiveThread = async (thread: Thread) => {
if (activeThreadId === thread.id) {
@ -29,6 +31,7 @@ export default function useSetActiveThread() {
return
}
setIsGeneratingResponse(false)
events.emit(InferenceEvent.OnInferenceStopped, thread.id)
// load the corresponding messages

View File

@ -8,14 +8,11 @@ import { useAtomValue } from 'jotai'
import LogoMark from '@/containers/Brand/Logo/Mark'
import GenerateResponse from '@/containers/Loader/GenerateResponse'
import { MainViewState } from '@/constants/screens'
import { activeModelAtom } from '@/hooks/useActiveModel'
import { loadModelErrorAtom } from '@/hooks/useActiveModel'
import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
import useInference from '@/hooks/useInference'
import { useMainViewState } from '@/hooks/useMainViewState'
import ChatItem from '../ChatItem'
@ -26,10 +23,9 @@ import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
const ChatBody: React.FC = () => {
const messages = useAtomValue(getCurrentChatMessagesAtom)
const activeModel = useAtomValue(activeModelAtom)
const { downloadedModels } = useGetDownloadedModels()
const { setMainViewState } = useMainViewState()
const { isGeneratingResponse } = useInference()
const loadModelError = useAtomValue(loadModelErrorAtom)
if (downloadedModels.length === 0)
return (
@ -90,15 +86,14 @@ const ChatBody: React.FC = () => {
message.content.length > 0) && (
<ChatItem {...message} key={message.id} />
)}
{(message.status === MessageStatus.Error ||
message.status === MessageStatus.Stopped) &&
{!loadModelError &&
(message.status === MessageStatus.Error ||
message.status === MessageStatus.Stopped) &&
index === messages.length - 1 && (
<ErrorMessage message={message} />
)}
</div>
))}
{activeModel && isGeneratingResponse && <GenerateResponse />}
</ScrollToBottom>
)}
</Fragment>

View File

@ -17,7 +17,6 @@ import {
deleteMessageAtom,
getCurrentChatMessagesAtom,
} from '@/helpers/atoms/ChatMessage.atom'
import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
@ -25,8 +24,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
const thread = useAtomValue(activeThreadAtom)
const deleteMessage = useSetAtom(deleteMessageAtom)
const { resendChatMessage } = useSendChatMessage()
const { activeModel } = useActiveModel()
const totalRam = useAtomValue(totalRamAtom)
const regenerateMessage = async () => {
const lastMessageIndex = messages.length - 1
@ -70,33 +67,26 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
{message.status === MessageStatus.Error && (
<div key={message.id} className="mt-10 flex flex-col items-center">
<span className="mb-3 text-center text-sm font-medium text-gray-500">
{Number(activeModel?.metadata.size) > totalRam ? (
<>
Oops! Model size exceeds available RAM. Consider selecting a
smaller model or upgrading your RAM for smoother performance.
</>
) : (
<>
<p>Apologies, something&apos;s amiss!</p>
Jan&apos;s in beta. Find troubleshooting guides{' '}
<a
href="https://jan.ai/guides/troubleshooting"
target="_blank"
className="text-blue-600 hover:underline dark:text-blue-300"
>
here
</a>{' '}
or reach out to us on{' '}
<a
href="https://discord.gg/AsJ8krTT3N"
target="_blank"
className="text-blue-600 hover:underline dark:text-blue-300"
>
Discord
</a>{' '}
for assistance.
</>
)}
<>
<p>Apologies, something&apos;s amiss!</p>
Jan&apos;s in beta. Find troubleshooting guides{' '}
<a
href="https://jan.ai/guides/troubleshooting"
target="_blank"
className="text-blue-600 hover:underline dark:text-blue-300"
>
here
</a>{' '}
or reach out to us on{' '}
<a
href="https://discord.gg/AsJ8krTT3N"
target="_blank"
className="text-blue-600 hover:underline dark:text-blue-300"
>
Discord
</a>{' '}
for assistance.
</>
</span>
</div>
)}

View File

@ -0,0 +1,48 @@
import { MessageStatus, ThreadMessage } from '@janhq/core'
import { useAtomValue } from 'jotai'
import { useActiveModel } from '@/hooks/useActiveModel'
import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
const LoadModelErrorMessage = () => {
const { activeModel } = useActiveModel()
const availableRam = useAtomValue(totalRamAtom)
return (
<>
<div className="mt-10 flex flex-col items-center">
<span className="mb-3 text-center text-sm font-medium text-gray-500">
{Number(activeModel?.metadata.size) > availableRam ? (
<>
Oops! Model size exceeds available RAM. Consider selecting a
smaller model or upgrading your RAM for smoother performance.
</>
) : (
<>
<p>Apologies, something&apos;s amiss!</p>
Jan&apos;s in beta. Find troubleshooting guides{' '}
<a
href="https://jan.ai/guides/troubleshooting"
target="_blank"
className="text-blue-600 hover:underline dark:text-blue-300"
>
here
</a>{' '}
or reach out to us on{' '}
<a
href="https://discord.gg/AsJ8krTT3N"
target="_blank"
className="text-blue-600 hover:underline dark:text-blue-300"
>
Discord
</a>{' '}
for assistance.
</>
)}
</span>
</div>
</>
)
}
export default LoadModelErrorMessage

View File

@ -9,6 +9,7 @@ import { UploadCloudIcon } from 'lucide-react'
import { twMerge } from 'tailwind-merge'
import GenerateResponse from '@/containers/Loader/GenerateResponse'
import ModelReload from '@/containers/Loader/ModelReload'
import ModelStart from '@/containers/Loader/ModelStart'
@ -19,6 +20,7 @@ import { snackbar } from '@/containers/Toast'
import { FeatureToggleContext } from '@/context/FeatureToggle'
import { activeModelAtom, loadModelErrorAtom } from '@/hooks/useActiveModel'
import { queuedMessageAtom, reloadModelAtom } from '@/hooks/useSendChatMessage'
import ChatBody from '@/screens/Chat/ChatBody'
@ -26,12 +28,14 @@ import ChatBody from '@/screens/Chat/ChatBody'
import ThreadList from '@/screens/Chat/ThreadList'
import ChatInput from './ChatInput'
import LoadModelErrorMessage from './LoadModelErrorMessage'
import RequestDownloadModel from './RequestDownloadModel'
import Sidebar from './Sidebar'
import {
activeThreadAtom,
engineParamsUpdateAtom,
isGeneratingResponseAtom,
} from '@/helpers/atoms/Thread.atom'
const renderError = (code: string) => {
@ -63,6 +67,11 @@ const ChatScreen: React.FC = () => {
const setFileUpload = useSetAtom(fileUploadAtom)
const { experimentalFeature } = useContext(FeatureToggleContext)
const activeModel = useAtomValue(activeModelAtom)
const isGeneratingResponse = useAtomValue(isGeneratingResponseAtom)
const loadModelError = useAtomValue(loadModelErrorAtom)
const { getRootProps, isDragReject } = useDropzone({
noClick: true,
multiple: false,
@ -202,6 +211,9 @@ const ChatScreen: React.FC = () => {
</span>
</div>
)}
{activeModel && isGeneratingResponse && <GenerateResponse />}
{loadModelError && <LoadModelErrorMessage />}
<ChatInput />
</div>
</div>