From c0949b2d7eb7e454adee2d70235ade70a066a1d0 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 11 Apr 2024 12:47:41 +0700
Subject: [PATCH] fix: better kill process tensorrt-llm (#2681)

---
 .../tensorrt-llm-extension/package.json       |  6 +-
 .../tensorrt-llm-extension/src/node/index.ts  | 78 +++++++++++++------
 2 files changed, 57 insertions(+), 27 deletions(-)
diff --git a/extensions/tensorrt-llm-extension/package.json b/extensions/tensorrt-llm-extension/package.json
index 9e21e2571..02b0b4e8c 100644
--- a/extensions/tensorrt-llm-extension/package.json
+++ b/extensions/tensorrt-llm-extension/package.json
@@ -37,10 +37,10 @@
     "@rollup/plugin-json": "^6.1.0",
     "@rollup/plugin-node-resolve": "^15.2.3",
     "@rollup/plugin-replace": "^5.0.5",
+    "@types/decompress": "4.2.7",
     "@types/node": "^20.11.4",
     "@types/os-utils": "^0.0.4",
     "@types/tcp-port-used": "^1.0.4",
-    "@types/decompress": "4.2.7",
     "cpx": "^1.5.0",
     "download-cli": "^1.1.1",
     "rimraf": "^3.0.2",
@@ -58,6 +58,7 @@
     "path-browserify": "^1.0.1",
     "rxjs": "^7.8.1",
     "tcp-port-used": "^1.0.2",
+    "terminate": "^2.6.1",
     "ulidx": "^2.3.0"
   },
   "engines": {
@@ -72,6 +73,7 @@
     "tcp-port-used",
     "fetch-retry",
     "decompress",
-    "@janhq/core"
+    "@janhq/core",
+    "terminate"
   ]
 }
diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts
index ecb176d21..eb92c98af 100644
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@@ -9,12 +9,14 @@ import {
   PromptTemplate,
 } from '@janhq/core/node'
 import decompress from 'decompress'
+import terminate from 'terminate'
 
 // Polyfill fetch with retry
 const fetchRetry = fetchRT(fetch)
 
 const supportedPlatform = (): string[] => ['win32', 'linux']
 const supportedGpuArch = (): string[] => ['ampere', 'ada']
+const PORT_CHECK_INTERVAL = 100
 
 /**
  * The response object for model init operation.
@@ -64,28 +66,57 @@ async function loadModel(
 /**
  * Stops a Engine subprocess.
  */
-function unloadModel(): Promise<any> {
+function unloadModel(): Promise<void> {
   const controller = new AbortController()
   setTimeout(() => controller.abort(), 5000)
   debugLog(`Request to kill engine`)
 
-  subprocess?.kill()
-  return fetch(TERMINATE_ENGINE_URL, {
-    method: 'DELETE',
-    signal: controller.signal,
-  })
-    .then(() => {
-      subprocess = undefined
+  const killRequest = () => {
+    return fetch(TERMINATE_ENGINE_URL, {
+      method: 'DELETE',
+      signal: controller.signal,
     })
-    .catch(() => {}) // Do nothing with this attempt
-    .then(() => tcpPortUsed.waitUntilFree(parseInt(ENGINE_PORT), 300, 5000)) // Wait for port available
-    .then(() => debugLog(`Engine process is terminated`))
-    .catch((err) => {
-      debugLog(
-        `Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
-      )
-      return { err: 'PORT_NOT_AVAILABLE' }
+      .then(() => {
+        subprocess = undefined
+      })
+      .catch(() => {}) // Do nothing with this attempt
+      .then(() =>
+        tcpPortUsed.waitUntilFree(
+          parseInt(ENGINE_PORT),
+          PORT_CHECK_INTERVAL,
+          5000
+        )
+      ) // Wait for port available
+      .then(() => debugLog(`Engine process is terminated`))
+      .catch((err) => {
+        debugLog(
+          `Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
+        )
+        throw 'PORT_NOT_AVAILABLE'
+      })
+  }
+
+  if (subprocess?.pid) {
+    log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
+    const pid = subprocess.pid
+    return new Promise((resolve, reject) => {
+      terminate(pid, function (err) {
+        if (err) {
+          return killRequest()
+        } else {
+          return tcpPortUsed
+            .waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
+            .then(() => resolve())
+            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+            .catch(() => {
+              killRequest()
+            })
+        }
+      })
     })
+  } else {
+    return killRequest()
+  }
 }
 /**
  * 1. Spawn engine process
@@ -97,11 +128,6 @@ async function runEngineAndLoadModel(
   systemInfo: SystemInformation
 ) {
   return unloadModel()
-    .then((res) => {
-      if (res?.error) {
-        throw new Error(res.error)
-      }
-    })
     .then(() => runEngine(systemInfo))
     .then(() => loadModelRequest(settings))
     .catch((err) => {
@@ -220,10 +246,12 @@ async function runEngine(systemInfo: SystemInformation): Promise<void> {
       reject(`child process exited with code ${code}`)
     })
 
-    tcpPortUsed.waitUntilUsed(parseInt(ENGINE_PORT), 300, 30000).then(() => {
-      debugLog(`Engine is ready`)
-      resolve()
-    })
+    tcpPortUsed
+      .waitUntilUsed(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 30000)
+      .then(() => {
+        debugLog(`Engine is ready`)
+        resolve()
+      })
   })
 }