fix: unload model while loading cause unknown error (#2649)

* fix: unload model while loading cause unknown error

* chore: mask placeholder
This commit is contained in:
Louis 2024-04-09 11:31:42 +07:00 committed by GitHub
parent 07e1a2295c
commit 9479beb7d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 44 additions and 47 deletions

View File

@ -38,6 +38,8 @@ const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llama
// The URL for the Nitro subprocess to kill itself // The URL for the Nitro subprocess to kill itself
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
const NITRO_PORT_FREE_CHECK_INTERVAL = 100
// The supported model format // The supported model format
// TODO: Should be an array to support more models // TODO: Should be an array to support more models
const SUPPORTED_MODEL_FORMAT = '.gguf' const SUPPORTED_MODEL_FORMAT = '.gguf'
@ -150,19 +152,9 @@ async function loadModel(
async function runNitroAndLoadModel(systemInfo?: SystemInformation) { async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
// Gather system information for CPU physical cores and memory // Gather system information for CPU physical cores and memory
return killSubprocess() return killSubprocess()
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) .then(() =>
.then(() => { tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
/** )
* There is a problem with Windows process manager
* Should wait for awhile to make sure the port is free and subprocess is killed
* The tested threshold is 500ms
**/
if (process.platform === 'win32') {
return new Promise((resolve) => setTimeout(resolve, 500))
} else {
return Promise.resolve()
}
})
.then(() => spawnNitroProcess(systemInfo)) .then(() => spawnNitroProcess(systemInfo))
.then(() => loadLLMModel(currentSettings)) .then(() => loadLLMModel(currentSettings))
.then(validateModelStatus) .then(validateModelStatus)
@ -235,7 +227,7 @@ function loadLLMModel(settings: any): Promise<Response> {
}, },
body: JSON.stringify(settings), body: JSON.stringify(settings),
retries: 3, retries: 3,
retryDelay: 500, retryDelay: 300,
}) })
.then((res) => { .then((res) => {
log( log(
@ -266,7 +258,7 @@ async function validateModelStatus(): Promise<void> {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, },
retries: 5, retries: 5,
retryDelay: 500, retryDelay: 300,
}).then(async (res: Response) => { }).then(async (res: Response) => {
log( log(
`[NITRO]::Debug: Validate model state with response ${JSON.stringify( `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
@ -311,7 +303,9 @@ async function killSubprocess(): Promise<void> {
signal: controller.signal, signal: controller.signal,
}) })
.catch(() => {}) // Do nothing with this attempt .catch(() => {}) // Do nothing with this attempt
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) .then(() =>
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
)
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
.catch((err) => { .catch((err) => {
log( log(
@ -330,7 +324,7 @@ async function killSubprocess(): Promise<void> {
return killRequest() return killRequest()
} else { } else {
return tcpPortUsed return tcpPortUsed
.waitUntilFree(PORT, 300, 5000) .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
.then(() => resolve()) .then(() => resolve())
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
.catch(() => { .catch(() => {
@ -391,7 +385,9 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
reject(`child process exited with code ${code}`) reject(`child process exited with code ${code}`)
}) })
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { tcpPortUsed
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
.then(() => {
log(`[NITRO]::Debug: Nitro is ready`) log(`[NITRO]::Debug: Nitro is ready`)
resolve() resolve()
}) })

View File

@ -15,7 +15,7 @@
"description": "The Triton LLM API uses API keys for authentication.", "description": "The Triton LLM API uses API keys for authentication.",
"controllerType": "input", "controllerType": "input",
"controllerProps": { "controllerProps": {
"placeholder": "API Key", "placeholder": "xxxxxxxxxxxxxxxxxxxx",
"value": "", "value": "",
"type": "password" "type": "password"
} }

View File

@ -25,6 +25,8 @@ export const stateModelAtom = atom<ModelState>({
model: undefined, model: undefined,
}) })
export let loadModelController: AbortController | undefined
export function useActiveModel() { export function useActiveModel() {
const [activeModel, setActiveModel] = useAtom(activeModelAtom) const [activeModel, setActiveModel] = useAtom(activeModelAtom)
const activeThread = useAtomValue(activeThreadAtom) const activeThread = useAtomValue(activeThreadAtom)
@ -46,6 +48,7 @@ export function useActiveModel() {
console.debug(`Model ${modelId} is already initialized. Ignore..`) console.debug(`Model ${modelId} is already initialized. Ignore..`)
return Promise.resolve() return Promise.resolve()
} }
loadModelController = new AbortController()
let model = downloadedModelsRef?.current.find((e) => e.id === modelId) let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
@ -104,6 +107,9 @@ export function useActiveModel() {
}) })
}) })
.catch((error) => { .catch((error) => {
if (loadModelController?.signal.aborted)
return Promise.reject(new Error('aborted'))
setStateModel(() => ({ setStateModel(() => ({
state: 'start', state: 'start',
loading: false, loading: false,
@ -131,12 +137,13 @@ export function useActiveModel() {
setStateModel({ state: 'stop', loading: true, model: stoppingModel }) setStateModel({ state: 'stop', loading: true, model: stoppingModel })
const engine = EngineManager.instance().get(stoppingModel.engine) const engine = EngineManager.instance().get(stoppingModel.engine)
await engine return engine
?.unloadModel(stoppingModel) ?.unloadModel(stoppingModel)
.catch() .catch()
.then(() => { .then(() => {
setActiveModel(undefined) setActiveModel(undefined)
setStateModel({ state: 'start', loading: false, model: undefined }) setStateModel({ state: 'start', loading: false, model: undefined })
loadModelController?.abort()
}) })
}, },
[activeModel, setActiveModel, setStateModel, stateModel] [activeModel, setActiveModel, setStateModel, stateModel]

View File

@ -164,21 +164,18 @@ const Sidebar: React.FC = () => {
</CardSidebar> </CardSidebar>
<CardSidebar title="Model" isShow={true}> <CardSidebar title="Model" isShow={true}>
<div className="px-2 pt-4"> <div className="flex flex-col gap-4 px-2 py-4">
<DropdownListSidebar /> <DropdownListSidebar />
{modelSettings.length > 0 && ( {modelSettings.length > 0 && (
<div className="mt-6">
<CardSidebar title="Inference Parameters" asChild> <CardSidebar title="Inference Parameters" asChild>
<div className="px-2 py-4"> <div className="px-2 py-4">
<ModelSetting componentProps={modelSettings} /> <ModelSetting componentProps={modelSettings} />
</div> </div>
</CardSidebar> </CardSidebar>
</div>
)} )}
{promptTemplateSettings.length > 0 && ( {promptTemplateSettings.length > 0 && (
<div className="mt-4">
<CardSidebar title="Model Parameters" asChild> <CardSidebar title="Model Parameters" asChild>
<div className="px-2 py-4"> <div className="px-2 py-4">
<PromptTemplateSetting <PromptTemplateSetting
@ -186,17 +183,14 @@ const Sidebar: React.FC = () => {
/> />
</div> </div>
</CardSidebar> </CardSidebar>
</div>
)} )}
{engineSettings.length > 0 && ( {engineSettings.length > 0 && (
<div className="my-4">
<CardSidebar title="Engine Parameters" asChild> <CardSidebar title="Engine Parameters" asChild>
<div className="px-2 py-4"> <div className="px-2 py-4">
<EngineSetting componentData={engineSettings} /> <EngineSetting componentData={engineSettings} />
</div> </div>
</CardSidebar> </CardSidebar>
</div>
)} )}
</div> </div>
</CardSidebar> </CardSidebar>