fix: unload model while loading cause unknown error (#2649)

* fix: unload model while loading cause unknown error * chore: mask placeholder
2024-04-09 11:31:42 +07:00 · 2024-04-09 11:31:42 +07:00 · 9479beb7d1
commit 9479beb7d1
parent 07e1a2295c
4 changed files with 44 additions and 47 deletions
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -38,6 +38,8 @@ const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llama
 // The URL for the Nitro subprocess to kill itself
 const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`

+const NITRO_PORT_FREE_CHECK_INTERVAL = 100
+
 // The supported model format
 // TODO: Should be an array to support more models
 const SUPPORTED_MODEL_FORMAT = '.gguf'
@ -150,19 +152,9 @@ async function loadModel(
 async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
  // Gather system information for CPU physical cores and memory
  return killSubprocess()
-    .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
-    .then(() => {
-      /**
-       * There is a problem with Windows process manager
-       * Should wait for awhile to make sure the port is free and subprocess is killed
-       * The tested threshold is 500ms
-       **/
-      if (process.platform === 'win32') {
-        return new Promise((resolve) => setTimeout(resolve, 500))
-      } else {
-        return Promise.resolve()
-      }
-    })
+    .then(() =>
+      tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
+    )
    .then(() => spawnNitroProcess(systemInfo))
    .then(() => loadLLMModel(currentSettings))
    .then(validateModelStatus)
@ -235,7 +227,7 @@ function loadLLMModel(settings: any): Promise<Response> {
    },
    body: JSON.stringify(settings),
    retries: 3,
-    retryDelay: 500,
+    retryDelay: 300,
  })
    .then((res) => {
      log(
@ -266,7 +258,7 @@ async function validateModelStatus(): Promise<void> {
      'Content-Type': 'application/json',
    },
    retries: 5,
-    retryDelay: 500,
+    retryDelay: 300,
  }).then(async (res: Response) => {
    log(
      `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
@ -311,7 +303,9 @@ async function killSubprocess(): Promise<void> {
      signal: controller.signal,
    })
      .catch(() => {}) // Do nothing with this attempt
-      .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
+      .then(() =>
+        tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
+      )
      .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
      .catch((err) => {
        log(
@ -330,7 +324,7 @@ async function killSubprocess(): Promise<void> {
          return killRequest()
        } else {
          return tcpPortUsed
-            .waitUntilFree(PORT, 300, 5000)
+            .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
            .then(() => resolve())
            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
            .catch(() => {
@ -391,10 +385,12 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
      reject(`child process exited with code ${code}`)
    })

-    tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
-      log(`[NITRO]::Debug: Nitro is ready`)
-      resolve()
-    })
+    tcpPortUsed
+      .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
+      .then(() => {
+        log(`[NITRO]::Debug: Nitro is ready`)
+        resolve()
+      })
  })
 }

--- a/extensions/inference-triton-trtllm-extension/resources/settings.json
+++ b/extensions/inference-triton-trtllm-extension/resources/settings.json
@ -15,7 +15,7 @@
    "description": "The Triton LLM API uses API keys for authentication.",
    "controllerType": "input",
    "controllerProps": {
-      "placeholder": "API Key",
+      "placeholder": "xxxxxxxxxxxxxxxxxxxx",
      "value": "",
      "type": "password"
    }
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@ -25,6 +25,8 @@ export const stateModelAtom = atom<ModelState>({
  model: undefined,
 })

+export let loadModelController: AbortController | undefined
+
 export function useActiveModel() {
  const [activeModel, setActiveModel] = useAtom(activeModelAtom)
  const activeThread = useAtomValue(activeThreadAtom)
@ -46,6 +48,7 @@ export function useActiveModel() {
      console.debug(`Model ${modelId} is already initialized. Ignore..`)
      return Promise.resolve()
    }
+    loadModelController = new AbortController()

    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)

@ -104,6 +107,9 @@ export function useActiveModel() {
        })
      })
      .catch((error) => {
+        if (loadModelController?.signal.aborted)
+          return Promise.reject(new Error('aborted'))
+
        setStateModel(() => ({
          state: 'start',
          loading: false,
@ -131,12 +137,13 @@ export function useActiveModel() {

      setStateModel({ state: 'stop', loading: true, model: stoppingModel })
      const engine = EngineManager.instance().get(stoppingModel.engine)
-      await engine
+      return engine
        ?.unloadModel(stoppingModel)
        .catch()
        .then(() => {
          setActiveModel(undefined)
          setStateModel({ state: 'start', loading: false, model: undefined })
+          loadModelController?.abort()
        })
    },
    [activeModel, setActiveModel, setStateModel, stateModel]
--- a/web/screens/Chat/Sidebar/index.tsx
+++ b/web/screens/Chat/Sidebar/index.tsx
@ -164,39 +164,33 @@ const Sidebar: React.FC = () => {
        </CardSidebar>

        <CardSidebar title="Model" isShow={true}>
-          <div className="px-2 pt-4">
+          <div className="flex flex-col gap-4 px-2 py-4">
            <DropdownListSidebar />

            {modelSettings.length > 0 && (
-              <div className="mt-6">
-                <CardSidebar title="Inference Parameters" asChild>
-                  <div className="px-2 py-4">
-                    <ModelSetting componentProps={modelSettings} />
-                  </div>
-                </CardSidebar>
-              </div>
+              <CardSidebar title="Inference Parameters" asChild>
+                <div className="px-2 py-4">
+                  <ModelSetting componentProps={modelSettings} />
+                </div>
+              </CardSidebar>
            )}

            {promptTemplateSettings.length > 0 && (
-              <div className="mt-4">
-                <CardSidebar title="Model Parameters" asChild>
-                  <div className="px-2 py-4">
-                    <PromptTemplateSetting
-                      componentData={promptTemplateSettings}
-                    />
-                  </div>
-                </CardSidebar>
-              </div>
+              <CardSidebar title="Model Parameters" asChild>
+                <div className="px-2 py-4">
+                  <PromptTemplateSetting
+                    componentData={promptTemplateSettings}
+                  />
+                </div>
+              </CardSidebar>
            )}

            {engineSettings.length > 0 && (
-              <div className="my-4">
-                <CardSidebar title="Engine Parameters" asChild>
-                  <div className="px-2 py-4">
-                    <EngineSetting componentData={engineSettings} />
-                  </div>
-                </CardSidebar>
-              </div>
+              <CardSidebar title="Engine Parameters" asChild>
+                <div className="px-2 py-4">
+                  <EngineSetting componentData={engineSettings} />
+                </div>
+              </CardSidebar>
            )}
          </div>
        </CardSidebar>