From 9479beb7d13ee8b7a9c88563d91d0acb6e6af8ae Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 9 Apr 2024 11:31:42 +0700 Subject: [PATCH] fix: unload model while loading cause unknown error (#2649) * fix: unload model while loading cause unknown error * chore: mask placeholder --- .../src/node/index.ts | 38 ++++++++--------- .../resources/settings.json | 2 +- web/hooks/useActiveModel.ts | 9 +++- web/screens/Chat/Sidebar/index.tsx | 42 ++++++++----------- 4 files changed, 44 insertions(+), 47 deletions(-) diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 638d4c5eb..7cde94f79 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -38,6 +38,8 @@ const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llama // The URL for the Nitro subprocess to kill itself const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` +const NITRO_PORT_FREE_CHECK_INTERVAL = 100 + // The supported model format // TODO: Should be an array to support more models const SUPPORTED_MODEL_FORMAT = '.gguf' @@ -150,19 +152,9 @@ async function loadModel( async function runNitroAndLoadModel(systemInfo?: SystemInformation) { // Gather system information for CPU physical cores and memory return killSubprocess() - .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) - .then(() => { - /** - * There is a problem with Windows process manager - * Should wait for awhile to make sure the port is free and subprocess is killed - * The tested threshold is 500ms - **/ - if (process.platform === 'win32') { - return new Promise((resolve) => setTimeout(resolve, 500)) - } else { - return Promise.resolve() - } - }) + .then(() => + tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) + ) .then(() => spawnNitroProcess(systemInfo)) .then(() => loadLLMModel(currentSettings)) .then(validateModelStatus) @@ -235,7 +227,7 @@ function loadLLMModel(settings: any): Promise { }, body: JSON.stringify(settings), retries: 3, - retryDelay: 500, + retryDelay: 300, }) .then((res) => { log( @@ -266,7 +258,7 @@ async function validateModelStatus(): Promise { 'Content-Type': 'application/json', }, retries: 5, - retryDelay: 500, + retryDelay: 300, }).then(async (res: Response) => { log( `[NITRO]::Debug: Validate model state with response ${JSON.stringify( @@ -311,7 +303,9 @@ async function killSubprocess(): Promise { signal: controller.signal, }) .catch(() => {}) // Do nothing with this attempt - .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000)) + .then(() => + tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) + ) .then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .catch((err) => { log( @@ -330,7 +324,7 @@ async function killSubprocess(): Promise { return killRequest() } else { return tcpPortUsed - .waitUntilFree(PORT, 300, 5000) + .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000) .then(() => resolve()) .then(() => log(`[NITRO]::Debug: Nitro process is terminated`)) .catch(() => { @@ -391,10 +385,12 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise { reject(`child process exited with code ${code}`) }) - tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => { - log(`[NITRO]::Debug: Nitro is ready`) - resolve() - }) + tcpPortUsed + .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000) + .then(() => { + log(`[NITRO]::Debug: Nitro is ready`) + resolve() + }) }) } diff --git a/extensions/inference-triton-trtllm-extension/resources/settings.json b/extensions/inference-triton-trtllm-extension/resources/settings.json index bba69805e..6c90e917d 100644 --- a/extensions/inference-triton-trtllm-extension/resources/settings.json +++ b/extensions/inference-triton-trtllm-extension/resources/settings.json @@ -15,7 +15,7 @@ "description": "The Triton LLM API uses API keys for authentication.", "controllerType": "input", "controllerProps": { - "placeholder": "API Key", + "placeholder": "xxxxxxxxxxxxxxxxxxxx", "value": "", "type": "password" } diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index e2cba75b7..34ffd1af7 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -25,6 +25,8 @@ export const stateModelAtom = atom({ model: undefined, }) +export let loadModelController: AbortController | undefined + export function useActiveModel() { const [activeModel, setActiveModel] = useAtom(activeModelAtom) const activeThread = useAtomValue(activeThreadAtom) @@ -46,6 +48,7 @@ export function useActiveModel() { console.debug(`Model ${modelId} is already initialized. Ignore..`) return Promise.resolve() } + loadModelController = new AbortController() let model = downloadedModelsRef?.current.find((e) => e.id === modelId) @@ -104,6 +107,9 @@ export function useActiveModel() { }) }) .catch((error) => { + if (loadModelController?.signal.aborted) + return Promise.reject(new Error('aborted')) + setStateModel(() => ({ state: 'start', loading: false, @@ -131,12 +137,13 @@ export function useActiveModel() { setStateModel({ state: 'stop', loading: true, model: stoppingModel }) const engine = EngineManager.instance().get(stoppingModel.engine) - await engine + return engine ?.unloadModel(stoppingModel) .catch() .then(() => { setActiveModel(undefined) setStateModel({ state: 'start', loading: false, model: undefined }) + loadModelController?.abort() }) }, [activeModel, setActiveModel, setStateModel, stateModel] diff --git a/web/screens/Chat/Sidebar/index.tsx b/web/screens/Chat/Sidebar/index.tsx index bee0fa280..78249c242 100644 --- a/web/screens/Chat/Sidebar/index.tsx +++ b/web/screens/Chat/Sidebar/index.tsx @@ -164,39 +164,33 @@ const Sidebar: React.FC = () => { -
+
{modelSettings.length > 0 && ( -
- -
- -
-
-
+ +
+ +
+
)} {promptTemplateSettings.length > 0 && ( -
- -
- -
-
-
+ +
+ +
+
)} {engineSettings.length > 0 && ( -
- -
- -
-
-
+ +
+ +
+
)}