fix: unload model while loading cause unknown error (#2649)

* fix: unload model while loading cause unknown error * chore: mask placeholder
2024-04-09 11:31:42 +07:00 · 2024-04-09 11:31:42 +07:00 · 9479beb7d1
commit 9479beb7d1
parent 07e1a2295c
4 changed files with 44 additions and 47 deletions
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -38,6 +38,8 @@ const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llama
 // The URL for the Nitro subprocess to kill itself
 const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
 const NITRO_PORT_FREE_CHECK_INTERVAL = 100
 // The supported model format
 // TODO: Should be an array to support more models
 const SUPPORTED_MODEL_FORMAT = '.gguf'
@ -150,19 +152,9 @@ async function loadModel(
 async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
  // Gather system information for CPU physical cores and memory
  return killSubprocess()
-    .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
+    .then(() =>
-    .then(() => {
+      tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-      /**
+    )
       * There is a problem with Windows process manager
       * Should wait for awhile to make sure the port is free and subprocess is killed
       * The tested threshold is 500ms
       **/
      if (process.platform === 'win32') {
        return new Promise((resolve) => setTimeout(resolve, 500))
      } else {
        return Promise.resolve()
      }
    })
    .then(() => spawnNitroProcess(systemInfo))
    .then(() => loadLLMModel(currentSettings))
    .then(validateModelStatus)
@ -235,7 +227,7 @@ function loadLLMModel(settings: any): Promise<Response> {
    },
    body: JSON.stringify(settings),
    retries: 3,
-    retryDelay: 500,
+    retryDelay: 300,
  })
    .then((res) => {
      log(
@ -266,7 +258,7 @@ async function validateModelStatus(): Promise<void> {
      'Content-Type': 'application/json',
    },
    retries: 5,
-    retryDelay: 500,
+    retryDelay: 300,
  }).then(async (res: Response) => {
    log(
      `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
@ -311,7 +303,9 @@ async function killSubprocess(): Promise<void> {
      signal: controller.signal,
    })
      .catch(() => {}) // Do nothing with this attempt
-      .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
+      .then(() =>
        tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
      )
      .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
      .catch((err) => {
        log(
@ -330,7 +324,7 @@ async function killSubprocess(): Promise<void> {
          return killRequest()
        } else {
          return tcpPortUsed
-            .waitUntilFree(PORT, 300, 5000)
+            .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
            .then(() => resolve())
            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
            .catch(() => {
@ -391,7 +385,9 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
      reject(`child process exited with code ${code}`)
    })
-    tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
+    tcpPortUsed
      .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
      .then(() => {
        log(`[NITRO]::Debug: Nitro is ready`)
        resolve()
      })
--- a/extensions/inference-triton-trtllm-extension/resources/settings.json
+++ b/extensions/inference-triton-trtllm-extension/resources/settings.json
@ -15,7 +15,7 @@
    "description": "The Triton LLM API uses API keys for authentication.",
    "controllerType": "input",
    "controllerProps": {
-      "placeholder": "API Key",
+      "placeholder": "xxxxxxxxxxxxxxxxxxxx",
      "value": "",
      "type": "password"
    }
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@ -25,6 +25,8 @@ export const stateModelAtom = atom<ModelState>({
  model: undefined,
 })
 export let loadModelController: AbortController | undefined
 export function useActiveModel() {
  const [activeModel, setActiveModel] = useAtom(activeModelAtom)
  const activeThread = useAtomValue(activeThreadAtom)
@ -46,6 +48,7 @@ export function useActiveModel() {
      console.debug(`Model ${modelId} is already initialized. Ignore..`)
      return Promise.resolve()
    }
    loadModelController = new AbortController()
    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
@ -104,6 +107,9 @@ export function useActiveModel() {
        })
      })
      .catch((error) => {
        if (loadModelController?.signal.aborted)
          return Promise.reject(new Error('aborted'))
        setStateModel(() => ({
          state: 'start',
          loading: false,
@ -131,12 +137,13 @@ export function useActiveModel() {
      setStateModel({ state: 'stop', loading: true, model: stoppingModel })
      const engine = EngineManager.instance().get(stoppingModel.engine)
-      await engine
+      return engine
        ?.unloadModel(stoppingModel)
        .catch()
        .then(() => {
          setActiveModel(undefined)
          setStateModel({ state: 'start', loading: false, model: undefined })
          loadModelController?.abort()
        })
    },
    [activeModel, setActiveModel, setStateModel, stateModel]
--- a/web/screens/Chat/Sidebar/index.tsx
+++ b/web/screens/Chat/Sidebar/index.tsx
@ -164,21 +164,18 @@ const Sidebar: React.FC = () => {
        </CardSidebar>
        <CardSidebar title="Model" isShow={true}>
-          <div className="px-2 pt-4">
+          <div className="flex flex-col gap-4 px-2 py-4">
            <DropdownListSidebar />
            {modelSettings.length > 0 && (
              <div className="mt-6">
              <CardSidebar title="Inference Parameters" asChild>
                <div className="px-2 py-4">
                  <ModelSetting componentProps={modelSettings} />
                </div>
              </CardSidebar>
              </div>
            )}
            {promptTemplateSettings.length > 0 && (
              <div className="mt-4">
              <CardSidebar title="Model Parameters" asChild>
                <div className="px-2 py-4">
                  <PromptTemplateSetting
@ -186,17 +183,14 @@ const Sidebar: React.FC = () => {
                  />
                </div>
              </CardSidebar>
              </div>
            )}
            {engineSettings.length > 0 && (
              <div className="my-4">
              <CardSidebar title="Engine Parameters" asChild>
                <div className="px-2 py-4">
                  <EngineSetting componentData={engineSettings} />
                </div>
              </CardSidebar>
              </div>
            )}
          </div>
        </CardSidebar>