fix: unload model while loading cause unknown error (#2649)
* fix: unload model while loading cause unknown error * chore: mask placeholder
This commit is contained in:
parent
07e1a2295c
commit
9479beb7d1
@ -38,6 +38,8 @@ const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llama
|
||||
// The URL for the Nitro subprocess to kill itself
|
||||
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||
|
||||
const NITRO_PORT_FREE_CHECK_INTERVAL = 100
|
||||
|
||||
// The supported model format
|
||||
// TODO: Should be an array to support more models
|
||||
const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||
@ -150,19 +152,9 @@ async function loadModel(
|
||||
async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
|
||||
// Gather system information for CPU physical cores and memory
|
||||
return killSubprocess()
|
||||
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
|
||||
.then(() => {
|
||||
/**
|
||||
* There is a problem with Windows process manager
|
||||
* Should wait for awhile to make sure the port is free and subprocess is killed
|
||||
* The tested threshold is 500ms
|
||||
**/
|
||||
if (process.platform === 'win32') {
|
||||
return new Promise((resolve) => setTimeout(resolve, 500))
|
||||
} else {
|
||||
return Promise.resolve()
|
||||
}
|
||||
})
|
||||
.then(() =>
|
||||
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
)
|
||||
.then(() => spawnNitroProcess(systemInfo))
|
||||
.then(() => loadLLMModel(currentSettings))
|
||||
.then(validateModelStatus)
|
||||
@ -235,7 +227,7 @@ function loadLLMModel(settings: any): Promise<Response> {
|
||||
},
|
||||
body: JSON.stringify(settings),
|
||||
retries: 3,
|
||||
retryDelay: 500,
|
||||
retryDelay: 300,
|
||||
})
|
||||
.then((res) => {
|
||||
log(
|
||||
@ -266,7 +258,7 @@ async function validateModelStatus(): Promise<void> {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
retries: 5,
|
||||
retryDelay: 500,
|
||||
retryDelay: 300,
|
||||
}).then(async (res: Response) => {
|
||||
log(
|
||||
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
|
||||
@ -311,7 +303,9 @@ async function killSubprocess(): Promise<void> {
|
||||
signal: controller.signal,
|
||||
})
|
||||
.catch(() => {}) // Do nothing with this attempt
|
||||
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
|
||||
.then(() =>
|
||||
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
)
|
||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||
.catch((err) => {
|
||||
log(
|
||||
@ -330,7 +324,7 @@ async function killSubprocess(): Promise<void> {
|
||||
return killRequest()
|
||||
} else {
|
||||
return tcpPortUsed
|
||||
.waitUntilFree(PORT, 300, 5000)
|
||||
.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||
.then(() => resolve())
|
||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||
.catch(() => {
|
||||
@ -391,10 +385,12 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
||||
reject(`child process exited with code ${code}`)
|
||||
})
|
||||
|
||||
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
|
||||
log(`[NITRO]::Debug: Nitro is ready`)
|
||||
resolve()
|
||||
})
|
||||
tcpPortUsed
|
||||
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
|
||||
.then(() => {
|
||||
log(`[NITRO]::Debug: Nitro is ready`)
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
"description": "The Triton LLM API uses API keys for authentication.",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"placeholder": "API Key",
|
||||
"placeholder": "xxxxxxxxxxxxxxxxxxxx",
|
||||
"value": "",
|
||||
"type": "password"
|
||||
}
|
||||
|
||||
@ -25,6 +25,8 @@ export const stateModelAtom = atom<ModelState>({
|
||||
model: undefined,
|
||||
})
|
||||
|
||||
export let loadModelController: AbortController | undefined
|
||||
|
||||
export function useActiveModel() {
|
||||
const [activeModel, setActiveModel] = useAtom(activeModelAtom)
|
||||
const activeThread = useAtomValue(activeThreadAtom)
|
||||
@ -46,6 +48,7 @@ export function useActiveModel() {
|
||||
console.debug(`Model ${modelId} is already initialized. Ignore..`)
|
||||
return Promise.resolve()
|
||||
}
|
||||
loadModelController = new AbortController()
|
||||
|
||||
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
|
||||
|
||||
@ -104,6 +107,9 @@ export function useActiveModel() {
|
||||
})
|
||||
})
|
||||
.catch((error) => {
|
||||
if (loadModelController?.signal.aborted)
|
||||
return Promise.reject(new Error('aborted'))
|
||||
|
||||
setStateModel(() => ({
|
||||
state: 'start',
|
||||
loading: false,
|
||||
@ -131,12 +137,13 @@ export function useActiveModel() {
|
||||
|
||||
setStateModel({ state: 'stop', loading: true, model: stoppingModel })
|
||||
const engine = EngineManager.instance().get(stoppingModel.engine)
|
||||
await engine
|
||||
return engine
|
||||
?.unloadModel(stoppingModel)
|
||||
.catch()
|
||||
.then(() => {
|
||||
setActiveModel(undefined)
|
||||
setStateModel({ state: 'start', loading: false, model: undefined })
|
||||
loadModelController?.abort()
|
||||
})
|
||||
},
|
||||
[activeModel, setActiveModel, setStateModel, stateModel]
|
||||
|
||||
@ -164,39 +164,33 @@ const Sidebar: React.FC = () => {
|
||||
</CardSidebar>
|
||||
|
||||
<CardSidebar title="Model" isShow={true}>
|
||||
<div className="px-2 pt-4">
|
||||
<div className="flex flex-col gap-4 px-2 py-4">
|
||||
<DropdownListSidebar />
|
||||
|
||||
{modelSettings.length > 0 && (
|
||||
<div className="mt-6">
|
||||
<CardSidebar title="Inference Parameters" asChild>
|
||||
<div className="px-2 py-4">
|
||||
<ModelSetting componentProps={modelSettings} />
|
||||
</div>
|
||||
</CardSidebar>
|
||||
</div>
|
||||
<CardSidebar title="Inference Parameters" asChild>
|
||||
<div className="px-2 py-4">
|
||||
<ModelSetting componentProps={modelSettings} />
|
||||
</div>
|
||||
</CardSidebar>
|
||||
)}
|
||||
|
||||
{promptTemplateSettings.length > 0 && (
|
||||
<div className="mt-4">
|
||||
<CardSidebar title="Model Parameters" asChild>
|
||||
<div className="px-2 py-4">
|
||||
<PromptTemplateSetting
|
||||
componentData={promptTemplateSettings}
|
||||
/>
|
||||
</div>
|
||||
</CardSidebar>
|
||||
</div>
|
||||
<CardSidebar title="Model Parameters" asChild>
|
||||
<div className="px-2 py-4">
|
||||
<PromptTemplateSetting
|
||||
componentData={promptTemplateSettings}
|
||||
/>
|
||||
</div>
|
||||
</CardSidebar>
|
||||
)}
|
||||
|
||||
{engineSettings.length > 0 && (
|
||||
<div className="my-4">
|
||||
<CardSidebar title="Engine Parameters" asChild>
|
||||
<div className="px-2 py-4">
|
||||
<EngineSetting componentData={engineSettings} />
|
||||
</div>
|
||||
</CardSidebar>
|
||||
</div>
|
||||
<CardSidebar title="Engine Parameters" asChild>
|
||||
<div className="px-2 py-4">
|
||||
<EngineSetting componentData={engineSettings} />
|
||||
</div>
|
||||
</CardSidebar>
|
||||
)}
|
||||
</div>
|
||||
</CardSidebar>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user