fix: unload model while loading cause unknown error (#2649)
* fix: unload model while loading cause unknown error * chore: mask placeholder
This commit is contained in:
parent
07e1a2295c
commit
9479beb7d1
@ -38,6 +38,8 @@ const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llama
|
|||||||
// The URL for the Nitro subprocess to kill itself
|
// The URL for the Nitro subprocess to kill itself
|
||||||
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||||
|
|
||||||
|
const NITRO_PORT_FREE_CHECK_INTERVAL = 100
|
||||||
|
|
||||||
// The supported model format
|
// The supported model format
|
||||||
// TODO: Should be an array to support more models
|
// TODO: Should be an array to support more models
|
||||||
const SUPPORTED_MODEL_FORMAT = '.gguf'
|
const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||||
@ -150,19 +152,9 @@ async function loadModel(
|
|||||||
async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
|
async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
|
||||||
// Gather system information for CPU physical cores and memory
|
// Gather system information for CPU physical cores and memory
|
||||||
return killSubprocess()
|
return killSubprocess()
|
||||||
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
|
.then(() =>
|
||||||
.then(() => {
|
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||||
/**
|
)
|
||||||
* There is a problem with Windows process manager
|
|
||||||
* Should wait for awhile to make sure the port is free and subprocess is killed
|
|
||||||
* The tested threshold is 500ms
|
|
||||||
**/
|
|
||||||
if (process.platform === 'win32') {
|
|
||||||
return new Promise((resolve) => setTimeout(resolve, 500))
|
|
||||||
} else {
|
|
||||||
return Promise.resolve()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.then(() => spawnNitroProcess(systemInfo))
|
.then(() => spawnNitroProcess(systemInfo))
|
||||||
.then(() => loadLLMModel(currentSettings))
|
.then(() => loadLLMModel(currentSettings))
|
||||||
.then(validateModelStatus)
|
.then(validateModelStatus)
|
||||||
@ -235,7 +227,7 @@ function loadLLMModel(settings: any): Promise<Response> {
|
|||||||
},
|
},
|
||||||
body: JSON.stringify(settings),
|
body: JSON.stringify(settings),
|
||||||
retries: 3,
|
retries: 3,
|
||||||
retryDelay: 500,
|
retryDelay: 300,
|
||||||
})
|
})
|
||||||
.then((res) => {
|
.then((res) => {
|
||||||
log(
|
log(
|
||||||
@ -266,7 +258,7 @@ async function validateModelStatus(): Promise<void> {
|
|||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
},
|
},
|
||||||
retries: 5,
|
retries: 5,
|
||||||
retryDelay: 500,
|
retryDelay: 300,
|
||||||
}).then(async (res: Response) => {
|
}).then(async (res: Response) => {
|
||||||
log(
|
log(
|
||||||
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
|
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
|
||||||
@ -311,7 +303,9 @@ async function killSubprocess(): Promise<void> {
|
|||||||
signal: controller.signal,
|
signal: controller.signal,
|
||||||
})
|
})
|
||||||
.catch(() => {}) // Do nothing with this attempt
|
.catch(() => {}) // Do nothing with this attempt
|
||||||
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
|
.then(() =>
|
||||||
|
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||||
|
)
|
||||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||||
.catch((err) => {
|
.catch((err) => {
|
||||||
log(
|
log(
|
||||||
@ -330,7 +324,7 @@ async function killSubprocess(): Promise<void> {
|
|||||||
return killRequest()
|
return killRequest()
|
||||||
} else {
|
} else {
|
||||||
return tcpPortUsed
|
return tcpPortUsed
|
||||||
.waitUntilFree(PORT, 300, 5000)
|
.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
|
||||||
.then(() => resolve())
|
.then(() => resolve())
|
||||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
|
||||||
.catch(() => {
|
.catch(() => {
|
||||||
@ -391,7 +385,9 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
|
|||||||
reject(`child process exited with code ${code}`)
|
reject(`child process exited with code ${code}`)
|
||||||
})
|
})
|
||||||
|
|
||||||
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
|
tcpPortUsed
|
||||||
|
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
|
||||||
|
.then(() => {
|
||||||
log(`[NITRO]::Debug: Nitro is ready`)
|
log(`[NITRO]::Debug: Nitro is ready`)
|
||||||
resolve()
|
resolve()
|
||||||
})
|
})
|
||||||
|
|||||||
@ -15,7 +15,7 @@
|
|||||||
"description": "The Triton LLM API uses API keys for authentication.",
|
"description": "The Triton LLM API uses API keys for authentication.",
|
||||||
"controllerType": "input",
|
"controllerType": "input",
|
||||||
"controllerProps": {
|
"controllerProps": {
|
||||||
"placeholder": "API Key",
|
"placeholder": "xxxxxxxxxxxxxxxxxxxx",
|
||||||
"value": "",
|
"value": "",
|
||||||
"type": "password"
|
"type": "password"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -25,6 +25,8 @@ export const stateModelAtom = atom<ModelState>({
|
|||||||
model: undefined,
|
model: undefined,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
export let loadModelController: AbortController | undefined
|
||||||
|
|
||||||
export function useActiveModel() {
|
export function useActiveModel() {
|
||||||
const [activeModel, setActiveModel] = useAtom(activeModelAtom)
|
const [activeModel, setActiveModel] = useAtom(activeModelAtom)
|
||||||
const activeThread = useAtomValue(activeThreadAtom)
|
const activeThread = useAtomValue(activeThreadAtom)
|
||||||
@ -46,6 +48,7 @@ export function useActiveModel() {
|
|||||||
console.debug(`Model ${modelId} is already initialized. Ignore..`)
|
console.debug(`Model ${modelId} is already initialized. Ignore..`)
|
||||||
return Promise.resolve()
|
return Promise.resolve()
|
||||||
}
|
}
|
||||||
|
loadModelController = new AbortController()
|
||||||
|
|
||||||
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
|
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
|
||||||
|
|
||||||
@ -104,6 +107,9 @@ export function useActiveModel() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
|
if (loadModelController?.signal.aborted)
|
||||||
|
return Promise.reject(new Error('aborted'))
|
||||||
|
|
||||||
setStateModel(() => ({
|
setStateModel(() => ({
|
||||||
state: 'start',
|
state: 'start',
|
||||||
loading: false,
|
loading: false,
|
||||||
@ -131,12 +137,13 @@ export function useActiveModel() {
|
|||||||
|
|
||||||
setStateModel({ state: 'stop', loading: true, model: stoppingModel })
|
setStateModel({ state: 'stop', loading: true, model: stoppingModel })
|
||||||
const engine = EngineManager.instance().get(stoppingModel.engine)
|
const engine = EngineManager.instance().get(stoppingModel.engine)
|
||||||
await engine
|
return engine
|
||||||
?.unloadModel(stoppingModel)
|
?.unloadModel(stoppingModel)
|
||||||
.catch()
|
.catch()
|
||||||
.then(() => {
|
.then(() => {
|
||||||
setActiveModel(undefined)
|
setActiveModel(undefined)
|
||||||
setStateModel({ state: 'start', loading: false, model: undefined })
|
setStateModel({ state: 'start', loading: false, model: undefined })
|
||||||
|
loadModelController?.abort()
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
[activeModel, setActiveModel, setStateModel, stateModel]
|
[activeModel, setActiveModel, setStateModel, stateModel]
|
||||||
|
|||||||
@ -164,21 +164,18 @@ const Sidebar: React.FC = () => {
|
|||||||
</CardSidebar>
|
</CardSidebar>
|
||||||
|
|
||||||
<CardSidebar title="Model" isShow={true}>
|
<CardSidebar title="Model" isShow={true}>
|
||||||
<div className="px-2 pt-4">
|
<div className="flex flex-col gap-4 px-2 py-4">
|
||||||
<DropdownListSidebar />
|
<DropdownListSidebar />
|
||||||
|
|
||||||
{modelSettings.length > 0 && (
|
{modelSettings.length > 0 && (
|
||||||
<div className="mt-6">
|
|
||||||
<CardSidebar title="Inference Parameters" asChild>
|
<CardSidebar title="Inference Parameters" asChild>
|
||||||
<div className="px-2 py-4">
|
<div className="px-2 py-4">
|
||||||
<ModelSetting componentProps={modelSettings} />
|
<ModelSetting componentProps={modelSettings} />
|
||||||
</div>
|
</div>
|
||||||
</CardSidebar>
|
</CardSidebar>
|
||||||
</div>
|
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{promptTemplateSettings.length > 0 && (
|
{promptTemplateSettings.length > 0 && (
|
||||||
<div className="mt-4">
|
|
||||||
<CardSidebar title="Model Parameters" asChild>
|
<CardSidebar title="Model Parameters" asChild>
|
||||||
<div className="px-2 py-4">
|
<div className="px-2 py-4">
|
||||||
<PromptTemplateSetting
|
<PromptTemplateSetting
|
||||||
@ -186,17 +183,14 @@ const Sidebar: React.FC = () => {
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</CardSidebar>
|
</CardSidebar>
|
||||||
</div>
|
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{engineSettings.length > 0 && (
|
{engineSettings.length > 0 && (
|
||||||
<div className="my-4">
|
|
||||||
<CardSidebar title="Engine Parameters" asChild>
|
<CardSidebar title="Engine Parameters" asChild>
|
||||||
<div className="px-2 py-4">
|
<div className="px-2 py-4">
|
||||||
<EngineSetting componentData={engineSettings} />
|
<EngineSetting componentData={engineSettings} />
|
||||||
</div>
|
</div>
|
||||||
</CardSidebar>
|
</CardSidebar>
|
||||||
</div>
|
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
</CardSidebar>
|
</CardSidebar>
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user