chore: replace nitro by cortex-cpp (#2912)

2024-05-16 17:46:49 +07:00 · 2024-05-16 17:46:49 +07:00 · 537ef20a54
commit 537ef20a54
parent 218259945f
16 changed files with 96 additions and 71 deletions
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
 // The URL for the Nitro subprocess
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
 // The URL for the Nitro subprocess to load a model
-export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
+export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
 // The URL for the Nitro subprocess to validate a model
-export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
+export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`

 // The URL for the Nitro subprocess to kill itself
 export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`

-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
 }

 const spawnNitroProcess = async (): Promise<void> => {
-  log(`[SERVER]::Debug: Spawning Nitro subprocess...`)
+  log(`[SERVER]::Debug: Spawning cortex subprocess...`)

  let binaryFolder = join(
    getJanExtensionsPath(),
    '@janhq',
-    'inference-nitro-extension',
+    'inference-cortex-extension',
    'dist',
    'bin'
  )
@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
  const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
  // Execute the binary
  log(
-    `[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+    `[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
  )
  subprocess = spawn(
    executableOptions.executablePath,
@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
  })

  subprocess.on('close', (code: any) => {
-    log(`[SERVER]::Debug: Nitro exited with code: ${code}`)
+    log(`[SERVER]::Debug: cortex exited with code: ${code}`)
    subprocess = undefined
  })

  tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
-    log(`[SERVER]::Debug: Nitro is ready`)
+    log(`[SERVER]::Debug: cortex is ready`)
  })
 }

@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
  let binaryFolder = join(
    getJanExtensionsPath(),
    '@janhq',
-    'inference-nitro-extension',
+    'inference-cortex-extension',
    'dist',
    'bin'
  )

  let cudaVisibleDevices = ''
-  let binaryName = 'nitro'
+  let binaryName = 'cortex-cpp'
  /**
   * The binary folder is different for each platform.
   */
@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
      }
      cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
    }
-    binaryName = 'nitro.exe'
+    binaryName = 'cortex-cpp.exe'
  } else if (process.platform === 'darwin') {
    /**
     *  For MacOS: mac-universal both Silicon and InteL
     */
-    binaryFolder = join(binaryFolder, 'mac-universal')
+    if(process.arch === 'arm64') {
+    binaryFolder = join(binaryFolder, 'mac-arm64')
+    } else {
+      binaryFolder = join(binaryFolder, 'mac-amd64')
+    }
  } else {
    /**
     *  For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
    retryDelay: 500,
  })
    .then((res: any) => {
-      log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`)
+      log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
      return Promise.resolve(res)
    })
    .catch((err: any) => {
@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
      })
    }, 5000)
    const tcpPortUsed = require('tcp-port-used')
-    log(`[SERVER]::Debug: Request to kill Nitro`)
+    log(`[SERVER]::Debug: Request to kill cortex`)

    fetch(NITRO_HTTP_KILL_URL, {
      method: 'DELETE',
--- a/core/src/node/helper/resource.ts
+++ b/core/src/node/helper/resource.ts
@ -4,7 +4,7 @@ import { log } from './logger'

 export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
  const cpu = await physicalCpuCount()
-  log(`[NITRO]::CPU information - ${cpu}`)
+  log(`[CORTEX]::CPU information - ${cpu}`)

  return {
    numCpuPhysicalCore: cpu,
--- a/extensions/assistant-extension/src/node/index.ts
+++ b/extensions/assistant-extension/src/node/index.ts
@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
 }
 export async function toolRetrievalIngestNewDocument(
  file: string,
+  model: string,
  engine: string
 ) {
  const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
  const threadPath = path.dirname(filePath.replace('files', ''))
-  retrieval.updateEmbeddingEngine(engine)
+  retrieval.updateEmbeddingEngine(model, engine)
  return retrieval
    .ingestAgentKnowledge(filePath, `${threadPath}/memory`)
    .catch((err) => {
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@ -28,14 +28,14 @@ export class Retrieval {
    })
  }

-  public updateEmbeddingEngine(engine: string): void {
+  public updateEmbeddingEngine(model: string, engine: string): void {
    // Engine settings are not compatible with the current embedding model params
    // Switch case manually for now
    if (engine === 'nitro') {
      this.embeddingModel = new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding' },
+        { openAIApiKey: 'nitro-embedding', model },
        // TODO: Raw settings
-        { basePath: 'http://127.0.0.1:3928/v1' }
+        { basePath: 'http://127.0.0.1:3928/v1' },
      )
    } else {
      // Fallback to OpenAI Settings
--- a/extensions/assistant-extension/src/tools/retrieval.ts
+++ b/extensions/assistant-extension/src/tools/retrieval.ts
@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
          NODE,
          'toolRetrievalIngestNewDocument',
          docFile,
+          data.model?.id,
          data.model?.engine
        )
      } else {
--- a/extensions/inference-nitro-extension/.gitignore
+++ b/extensions/inference-nitro-extension/.gitignore
@ -0,0 +1,2 @@
+bin
+!version.txt
--- a/extensions/inference-nitro-extension/bin/version.txt
+++ b/extensions/inference-nitro-extension/bin/version.txt
@ -1 +1 @@
-0.3.22
+0.4.4
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@ -1,3 +1,3 @@
@echo off
-set /p NITRO_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
+set /p CORTEX_VERSION=<./bin/version.txt
+.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -1,8 +1,8 @@
 {
-  "name": "@janhq/inference-nitro-extension",
-  "productName": "Nitro Inference Engine",
+  "name": "@janhq/inference-cortex-extension",
+  "productName": "Cortex Inference Engine",
  "version": "1.0.7",
-  "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
+  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
@ -10,8 +10,8 @@
  "scripts": {
    "test": "jest",
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
-    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro",
+    "downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
+    "downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
    "downloadnitro:win32": "download.bat",
    "downloadnitro": "run-script-os",
    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-nitro-extension/rollup.config.ts
@ -80,7 +80,7 @@ export default [
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
        INFERENCE_URL: JSON.stringify(
          process.env.INFERENCE_URL ||
-            'http://127.0.0.1:3928/inferences/llamacpp/chat_completion'
+            'http://127.0.0.1:3928/inferences/server/chat_completion'
        ),
        TROUBLESHOOTING_URL: JSON.stringify(
          'https://jan.ai/guides/troubleshooting'
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
    const executableFolderPath = await joinPath([
      janDataFolderPath,
      'engines',
-      this.name ?? 'nitro',
+      this.name ?? 'cortex-cpp',
      this.version ?? '1.0.0',
    ])

@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
      const executableFolderPath = await joinPath([
        janDataFolderPath,
        'engines',
-        this.name ?? 'nitro',
+        this.name ?? 'cortex-cpp',
        this.version ?? '1.0.0',
      ])

--- a/extensions/inference-nitro-extension/src/node/execute.test.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.test.ts
@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
    Object.defineProperty(process, 'platform', {
      value: 'darwin',
    })
+    Object.defineProperty(process, 'arch', {
+      value: 'arm64',
+    })
    expect(executableNitroFile(testSettings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`mac-universal${sep}nitro`),
+        executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
+        cudaVisibleDevices: '',
+        vkVisibleDevices: '',
+      })
+    )
+    Object.defineProperty(process, 'arch', {
+      value: 'amd64',
+    })
+    expect(executableNitroFile(testSettings)).toEqual(
+      expect.objectContaining({
+        executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cpu${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.ts
@ -1,4 +1,4 @@
-import { GpuSetting, SystemInformation } from '@janhq/core'
+import { GpuSetting } from '@janhq/core'
 import * as path from 'path'

 export interface NitroExecutableOptions {
@ -24,7 +24,7 @@ const os = (): string => {
  return process.platform === 'win32'
    ? 'win'
    : process.platform === 'darwin'
-      ? 'mac-universal'
+      ? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
      : 'linux'
 }

@ -52,7 +52,7 @@ export const executableNitroFile = (
    .join('-')
  let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `nitro${extension()}`
+  let binaryName = `cortex-cpp${extension()}`

  return {
    executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -34,9 +34,9 @@ const LOCAL_HOST = '127.0.0.1'
 // The URL for the Nitro subprocess
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
 // The URL for the Nitro subprocess to load a model
-const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
+const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
 // The URL for the Nitro subprocess to validate a model
-const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
+const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
 // The URL for the Nitro subprocess to kill itself
 const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`

@ -50,7 +50,7 @@ const SUPPORTED_MODEL_FORMAT = '.gguf'
 let subprocess: ChildProcessWithoutNullStreams | undefined = undefined

 // The current model settings
-let currentSettings: ModelSettingParams | undefined = undefined
+let currentSettings: ModelSettingParams & { model?: string } | undefined = undefined

 /**
 * Stops a Nitro subprocess.
@ -77,7 +77,7 @@ async function loadModel(
  }

  if (params.model.engine !== InferenceEngine.nitro) {
-    return Promise.reject('Not a nitro model')
+    return Promise.reject('Not a cortex model')
  } else {
    const nitroResourceProbe = await getSystemResourceInfo()
    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
@ -135,6 +135,7 @@ async function loadModel(
      // model.settings can override the default settings
      ...params.model.settings,
      llama_model_path,
+      model: params.model.id,
      // This is critical and requires real CPU physical core count (or performance core)
      ...(params.model.settings.mmproj && {
        mmproj: path.isAbsolute(params.model.settings.mmproj)
@ -142,7 +143,7 @@ async function loadModel(
          : path.join(modelFolder, params.model.settings.mmproj),
      }),
    }
-    return runNitroAndLoadModel(systemInfo)
+    return runNitroAndLoadModel(params.model.id, systemInfo)
  }
 }

@ -152,7 +153,7 @@ async function loadModel(
 * 3. Validate model status
 * @returns
 */
-async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
+async function runNitroAndLoadModel(modelId: string, systemInfo?: SystemInformation) {
  // Gather system information for CPU physical cores and memory
  return killSubprocess()
    .then(() =>
@ -160,10 +161,10 @@ async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
    )
    .then(() => spawnNitroProcess(systemInfo))
    .then(() => loadLLMModel(currentSettings))
-    .then(validateModelStatus)
+    .then(() => validateModelStatus(modelId))
    .catch((err) => {
      // TODO: Broadcast error so app could display proper error message
-      log(`[NITRO]::Error: ${err}`)
+      log(`[CORTEX]::Error: ${err}`)
      return { error: err }
    })
 }
@ -222,7 +223,7 @@ function loadLLMModel(settings: any): Promise<Response> {
  if (!settings?.ngl) {
    settings.ngl = 100
  }
-  log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
+  log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`)
  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
    method: 'POST',
    headers: {
@ -234,14 +235,14 @@ function loadLLMModel(settings: any): Promise<Response> {
  })
    .then((res) => {
      log(
-        `[NITRO]::Debug: Load model success with response ${JSON.stringify(
+        `[CORTEX]::Debug: Load model success with response ${JSON.stringify(
          res
        )}`
      )
      return Promise.resolve(res)
    })
    .catch((err) => {
-      log(`[NITRO]::Error: Load model failed with error ${err}`)
+      log(`[CORTEX]::Error: Load model failed with error ${err}`)
      return Promise.reject(err)
    })
 }
@ -252,11 +253,12 @@ function loadLLMModel(settings: any): Promise<Response> {
 * If the model is loaded successfully, the object is empty.
 * If the model is not loaded successfully, the object contains an error message.
 */
-async function validateModelStatus(): Promise<void> {
+async function validateModelStatus(modelId: string): Promise<void> {
  // Send a GET request to the validation URL.
  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
-    method: 'GET',
+    method: 'POST',
+    body: JSON.stringify({ model: modelId }),
    headers: {
      'Content-Type': 'application/json',
    },
@ -264,7 +266,7 @@ async function validateModelStatus(): Promise<void> {
    retryDelay: 300,
  }).then(async (res: Response) => {
    log(
-      `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
+      `[CORTEX]::Debug: Validate model state with response ${JSON.stringify(
        res.status
      )}`
    )
@ -275,7 +277,7 @@ async function validateModelStatus(): Promise<void> {
      // Otherwise, return an object with an error message.
      if (body.model_loaded) {
        log(
-          `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
+          `[CORTEX]::Debug: Validate model state success with response ${JSON.stringify(
            body
          )}`
        )
@ -283,7 +285,7 @@ async function validateModelStatus(): Promise<void> {
      }
    }
    log(
-      `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
+      `[CORTEX]::Debug: Validate model state failed with response ${JSON.stringify(
        res.statusText
      )}`
    )
@ -298,7 +300,7 @@ async function validateModelStatus(): Promise<void> {
 async function killSubprocess(): Promise<void> {
  const controller = new AbortController()
  setTimeout(() => controller.abort(), 5000)
-  log(`[NITRO]::Debug: Request to kill Nitro`)
+  log(`[CORTEX]::Debug: Request to kill cortex`)

  const killRequest = () => {
    return fetch(NITRO_HTTP_KILL_URL, {
@ -309,17 +311,17 @@ async function killSubprocess(): Promise<void> {
      .then(() =>
        tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
      )
-      .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+      .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
      .catch((err) => {
        log(
-          `[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
+          `[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
        )
        throw 'PORT_NOT_AVAILABLE'
      })
  }

  if (subprocess?.pid) {
-    log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
+    log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
    const pid = subprocess.pid
    return new Promise((resolve, reject) => {
      terminate(pid, function (err) {
@ -329,7 +331,7 @@ async function killSubprocess(): Promise<void> {
          tcpPortUsed
            .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
            .then(() => resolve())
-            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+            .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
            .catch(() => {
              killRequest().then(resolve).catch(reject)
            })
@ -346,22 +348,24 @@ async function killSubprocess(): Promise<void> {
 * @returns A promise that resolves when the Nitro subprocess is started.
 */
 function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
-  log(`[NITRO]::Debug: Spawning Nitro subprocess...`)
+  log(`[CORTEX]::Debug: Spawning cortex subprocess...`)

  return new Promise<void>(async (resolve, reject) => {
-    let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
    let executableOptions = executableNitroFile(systemInfo?.gpuSetting)

    const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
    // Execute the binary
    log(
-      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+      `[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
+    )
+    log(
+      path.parse(executableOptions.executablePath).dir
    )
    subprocess = spawn(
      executableOptions.executablePath,
      ['1', LOCAL_HOST, PORT.toString()],
      {
-        cwd: binaryFolder,
+        cwd: path.join(path.parse(executableOptions.executablePath).dir),
        env: {
          ...process.env,
          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
@ -375,15 +379,15 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {

    // Handle subprocess output
    subprocess.stdout.on('data', (data: any) => {
-      log(`[NITRO]::Debug: ${data}`)
+      log(`[CORTEX]::Debug: ${data}`)
    })

    subprocess.stderr.on('data', (data: any) => {
-      log(`[NITRO]::Error: ${data}`)
+      log(`[CORTEX]::Error: ${data}`)
    })

    subprocess.on('close', (code: any) => {
-      log(`[NITRO]::Debug: Nitro exited with code: ${code}`)
+      log(`[CORTEX]::Debug: cortex exited with code: ${code}`)
      subprocess = undefined
      reject(`child process exited with code ${code}`)
    })
@ -391,7 +395,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
    tcpPortUsed
      .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
      .then(() => {
-        log(`[NITRO]::Debug: Nitro is ready`)
+        log(`[CORTEX]::Debug: cortex is ready`)
        resolve()
      })
  })
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@ -97,7 +97,7 @@ function unloadModel(): Promise<void> {
  }

  if (subprocess?.pid) {
-    log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
+    log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
    const pid = subprocess.pid
    return new Promise((resolve, reject) => {
      terminate(pid, function (err) {
@ -107,7 +107,7 @@ function unloadModel(): Promise<void> {
          return tcpPortUsed
            .waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
            .then(() => resolve())
-            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+            .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
            .catch(() => {
              killRequest()
            })
 @ -1 +1 @@
 .3.22
 .4.4