fix: cancel loading model with stop action (#2607)

2024-04-04 10:57:54 +07:00 · 2024-04-04 10:57:54 +07:00 · 1eaf13b13e
commit 1eaf13b13e
parent 7f92a5aef0
11 changed files with 96 additions and 45 deletions
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -51,6 +51,7 @@
    "path-browserify": "^1.0.1",
    "rxjs": "^7.8.1",
    "tcp-port-used": "^1.0.2",
+    "terminate": "^2.6.1",
    "ulidx": "^2.3.0"
  },
  "engines": {
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -13,6 +13,7 @@ import {
  SystemInformation,
 } from '@janhq/core/node'
 import { executableNitroFile } from './execute'
+import terminate from 'terminate'

 // Polyfill fetch with retry
 const fetchRetry = fetchRT(fetch)
@ -304,23 +305,43 @@ async function killSubprocess(): Promise<void> {
  setTimeout(() => controller.abort(), 5000)
  log(`[NITRO]::Debug: Request to kill Nitro`)

-  return fetch(NITRO_HTTP_KILL_URL, {
-    method: 'DELETE',
-    signal: controller.signal,
-  })
-    .then(() => {
-      subprocess?.kill()
-      subprocess = undefined
+  const killRequest = () => {
+    return fetch(NITRO_HTTP_KILL_URL, {
+      method: 'DELETE',
+      signal: controller.signal,
    })
-    .catch(() => {}) // Do nothing with this attempt
-    .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
-    .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
-    .catch((err) => {
-      log(
-        `[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
-      )
-      throw 'PORT_NOT_AVAILABLE'
+      .catch(() => {}) // Do nothing with this attempt
+      .then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
+      .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+      .catch((err) => {
+        log(
+          `[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
+        )
+        throw 'PORT_NOT_AVAILABLE'
+      })
+  }
+
+  if (subprocess?.pid) {
+    log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
+    const pid = subprocess.pid
+    return new Promise((resolve, reject) => {
+      terminate(pid, function (err) {
+        if (err) {
+          return killRequest()
+        } else {
+          return tcpPortUsed
+            .waitUntilFree(PORT, 300, 5000)
+            .then(() => resolve())
+            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+            .catch(() => {
+              killRequest()
+            })
+        }
+      })
    })
+  } else {
+    return killRequest()
+  }
 }

 /**
--- a/web/containers/Loader/ModelReload.tsx
+++ b/web/containers/Loader/ModelReload.tsx
@ -41,7 +41,7 @@ export default function ModelReload() {
          style={{ width: `${loader}%` }}
        />
        <span className="relative z-10">
-          Reloading model {stateModel.model}
+          Reloading model {stateModel.model?.id}
        </span>
      </div>
    </div>
--- a/web/containers/Loader/ModelStart.tsx
+++ b/web/containers/Loader/ModelStart.tsx
@ -44,7 +44,7 @@ export default function ModelStart() {
        <span className="relative z-10">
          {stateModel.state === 'start' ? 'Starting' : 'Stopping'}
          &nbsp;model&nbsp;
-          {stateModel.model}
+          {stateModel.model?.id}
        </span>
      </div>
    </div>
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@ -79,7 +79,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {

  const onModelStopped = useCallback(() => {
    setActiveModel(undefined)
-    setStateModel({ state: 'start', loading: false, model: '' })
+    setStateModel({ state: 'start', loading: false, model: undefined })
  }, [setActiveModel, setStateModel])

  const updateThreadTitle = useCallback(
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@ -13,10 +13,16 @@ import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 export const activeModelAtom = atom<Model | undefined>(undefined)
 export const loadModelErrorAtom = atom<string | undefined>(undefined)

-export const stateModelAtom = atom({
+type ModelState = {
+  state: string
+  loading: boolean
+  model?: Model
+}
+
+export const stateModelAtom = atom<ModelState>({
  state: 'start',
  loading: false,
-  model: '',
+  model: undefined,
 })

 export function useActiveModel() {
@ -35,7 +41,7 @@ export function useActiveModel() {
  const startModel = async (modelId: string) => {
    if (
      (activeModel && activeModel.id === modelId) ||
-      (stateModel.model === modelId && stateModel.loading)
+      (stateModel.model?.id === modelId && stateModel.loading)
    ) {
      console.debug(`Model ${modelId} is already initialized. Ignore..`)
      return Promise.resolve()
@ -52,7 +58,7 @@ export function useActiveModel() {

    setActiveModel(undefined)

-    setStateModel({ state: 'start', loading: true, model: modelId })
+    setStateModel({ state: 'start', loading: true, model })

    if (!model) {
      toaster({
@ -63,7 +69,7 @@ export function useActiveModel() {
      setStateModel(() => ({
        state: 'start',
        loading: false,
-        model: '',
+        model: undefined,
      }))

      return Promise.reject(`Model ${modelId} not found!`)
@ -89,7 +95,7 @@ export function useActiveModel() {
        setStateModel(() => ({
          state: 'stop',
          loading: false,
-          model: model.id,
+          model,
        }))
        toaster({
          title: 'Success!',
@ -101,7 +107,7 @@ export function useActiveModel() {
        setStateModel(() => ({
          state: 'start',
          loading: false,
-          model: model.id,
+          model,
        }))

        toaster({
@ -114,20 +120,39 @@ export function useActiveModel() {
      })
  }

-  const stopModel = useCallback(async () => {
-    if (!activeModel || (stateModel.state === 'stop' && stateModel.loading))
+  const stopModel = useCallback(
+    async (model?: Model) => {
+      const stoppingModel = activeModel || model
+      if (
+        !stoppingModel ||
+        (!model && stateModel.state === 'stop' && stateModel.loading)
+      )
+        return
+
+      setStateModel({ state: 'stop', loading: true, model: stoppingModel })
+      const engine = EngineManager.instance().get(stoppingModel.engine)
+      await engine
+        ?.unloadModel(stoppingModel)
+        .catch()
+        .then(() => {
+          setActiveModel(undefined)
+          setStateModel({ state: 'start', loading: false, model: undefined })
+        })
+    },
+    [activeModel, setActiveModel, setStateModel, stateModel]
+  )
+
+  const stopInference = useCallback(async () => {
+    // Loading model
+    if (stateModel.loading) {
+      stopModel(stateModel.model)
      return
+    }
+    if (!activeModel) return

-    setStateModel({ state: 'stop', loading: true, model: activeModel.id })
    const engine = EngineManager.instance().get(activeModel.engine)
-    await engine
-      ?.unloadModel(activeModel)
-      .catch()
-      .then(() => {
-        setActiveModel(undefined)
-        setStateModel({ state: 'start', loading: false, model: '' })
-      })
-  }, [activeModel, stateModel, setActiveModel, setStateModel])
+    engine?.stopInference()
+  }, [activeModel, stateModel, stopModel])

-  return { activeModel, startModel, stopModel, stateModel }
+  return { activeModel, startModel, stopModel, stopInference, stateModel }
 }
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@ -19,6 +19,7 @@ import { fileUploadAtom } from '@/containers/Providers/Jotai'

 import { generateThreadId } from '@/utils/thread'

+import { useActiveModel } from './useActiveModel'
 import useRecommendedModel from './useRecommendedModel'

 import useSetActiveThread from './useSetActiveThread'
@ -65,6 +66,7 @@ export const useCreateNewThread = () => {
  const { recommendedModel, downloadedModels } = useRecommendedModel()

  const threads = useAtomValue(threadsAtom)
+  const { stopInference } = useActiveModel()

  const requestCreateNewThread = async (
    assistant: Assistant,
@ -72,7 +74,7 @@ export const useCreateNewThread = () => {
  ) => {
    // Stop generating if any
    setIsGeneratingResponse(false)
-    events.emit(InferenceEvent.OnInferenceStopped, {})
+    stopInference()

    const defaultModel = model ?? recommendedModel ?? downloadedModels[0]

--- a/web/screens/Chat/ChatInput/index.tsx
+++ b/web/screens/Chat/ChatInput/index.tsx
@ -44,7 +44,7 @@ import {

 const ChatInput: React.FC = () => {
  const activeThread = useAtomValue(activeThreadAtom)
-  const { stateModel } = useActiveModel()
+  const { stateModel, activeModel } = useActiveModel()
  const messages = useAtomValue(getCurrentChatMessagesAtom)

  const [currentPrompt, setCurrentPrompt] = useAtom(currentPromptAtom)
@ -60,6 +60,7 @@ const ChatInput: React.FC = () => {
  const experimentalFeature = useAtomValue(experimentalFeatureEnabledAtom)
  const isGeneratingResponse = useAtomValue(isGeneratingResponseAtom)
  const threadStates = useAtomValue(threadStatesAtom)
+  const { stopInference } = useActiveModel()

  const isStreamingResponse = Object.values(threadStates).some(
    (threadState) => threadState.waitingForResponse
@ -107,7 +108,7 @@ const ChatInput: React.FC = () => {
  }

  const onStopInferenceClick = async () => {
-    events.emit(InferenceEvent.OnInferenceStopped, {})
+    stopInference()
  }

  /**
--- a/web/screens/Chat/EditChatInput/index.tsx
+++ b/web/screens/Chat/EditChatInput/index.tsx
@ -50,7 +50,7 @@ type Props = {

 const EditChatInput: React.FC<Props> = ({ message }) => {
  const activeThread = useAtomValue(activeThreadAtom)
-  const { stateModel } = useActiveModel()
+  const { stateModel, stopInference } = useActiveModel()
  const messages = useAtomValue(getCurrentChatMessagesAtom)

  const [editPrompt, setEditPrompt] = useAtom(editPromptAtom)
@ -127,7 +127,7 @@ const EditChatInput: React.FC<Props> = ({ message }) => {
  }

  const onStopInferenceClick = async () => {
-    events.emit(InferenceEvent.OnInferenceStopped, {})
+    stopInference()
  }

  return (
--- a/web/screens/Chat/LoadModelError/index.tsx
+++ b/web/screens/Chat/LoadModelError/index.tsx
@ -34,7 +34,8 @@ const LoadModelError = () => {
          <ModalTroubleShooting />
        </div>
      ) : loadModelError &&
-        loadModelError?.includes('EXTENSION_IS_NOT_INSTALLED') ? (
+        typeof loadModelError.includes === 'function' &&
+        loadModelError.includes('EXTENSION_IS_NOT_INSTALLED') ? (
        <div className="flex w-full flex-col items-center text-center text-sm font-medium text-gray-500">
          <p className="w-[90%]">
            Model is currently unavailable. Please switch to a different model
--- a/web/screens/Settings/Models/Row.tsx
+++ b/web/screens/Settings/Models/Row.tsx
@ -43,7 +43,7 @@ export default function RowModel(props: RowModelProps) {
  const { activeModel, startModel, stopModel, stateModel } = useActiveModel()
  const { deleteModel } = useDeleteModel()

-  const isActiveModel = stateModel.model === props.data.id
+  const isActiveModel = stateModel.model?.id === props.data.id

  const [serverEnabled, setServerEnabled] = useAtom(serverEnabledAtom)

@ -84,7 +84,7 @@ export default function RowModel(props: RowModelProps) {
            <span className="h-2 w-2 rounded-full bg-green-500" />
            <span>Active</span>
          </Badge>
-        ) : stateModel.loading && stateModel.model === props.data.id ? (
+        ) : stateModel.loading && stateModel.model?.id === props.data.id ? (
          <Badge
            className="inline-flex items-center space-x-2"
            themes="secondary"