From 537ef20a548bdf054585ec0adfafa9135da9654c Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 16 May 2024 17:46:49 +0700
Subject: [PATCH] chore: replace nitro by cortex-cpp (#2912)

---
 core/src/node/api/restful/helper/consts.ts    |  6 +-
 .../node/api/restful/helper/startStopModel.ts | 26 ++++----
 core/src/node/helper/resource.ts              |  2 +-
 .../assistant-extension/src/node/index.ts     |  3 +-
 .../assistant-extension/src/node/retrieval.ts |  6 +-
 .../src/tools/retrieval.ts                    |  1 +
 .../inference-nitro-extension/.gitignore      |  2 +
 .../inference-nitro-extension/bin/version.txt |  2 +-
 .../inference-nitro-extension/download.bat    |  4 +-
 .../inference-nitro-extension/package.json    | 10 +--
 .../rollup.config.ts                          |  2 +-
 .../inference-nitro-extension/src/index.ts    |  4 +-
 .../src/node/execute.test.ts                  | 27 +++++---
 .../src/node/execute.ts                       |  6 +-
 .../src/node/index.ts                         | 62 ++++++++++---------
 .../tensorrt-llm-extension/src/node/index.ts  |  4 +-
 16 files changed, 96 insertions(+), 71 deletions(-)
 create mode 100644 extensions/inference-nitro-extension/.gitignore
diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts
index bc3cfe300..8d8f8e341 100644
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
 // The URL for the Nitro subprocess
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
 // The URL for the Nitro subprocess to load a model
-export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
+export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
 // The URL for the Nitro subprocess to validate a model
-export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
+export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
 
 // The URL for the Nitro subprocess to kill itself
 export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
 
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts
index bcd182cb5..8665850da 100644
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
 }
 
 const spawnNitroProcess = async (): Promise<void> => {
-  log(`[SERVER]::Debug: Spawning Nitro subprocess...`)
+  log(`[SERVER]::Debug: Spawning cortex subprocess...`)
 
   let binaryFolder = join(
     getJanExtensionsPath(),
     '@janhq',
-    'inference-nitro-extension',
+    'inference-cortex-extension',
     'dist',
     'bin'
   )
@@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
   const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
   // Execute the binary
   log(
-    `[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+    `[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
   )
   subprocess = spawn(
     executableOptions.executablePath,
@@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
   })
 
   subprocess.on('close', (code: any) => {
-    log(`[SERVER]::Debug: Nitro exited with code: ${code}`)
+    log(`[SERVER]::Debug: cortex exited with code: ${code}`)
     subprocess = undefined
   })
 
   tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
-    log(`[SERVER]::Debug: Nitro is ready`)
+    log(`[SERVER]::Debug: cortex is ready`)
   })
 }
 
@@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
   let binaryFolder = join(
     getJanExtensionsPath(),
     '@janhq',
-    'inference-nitro-extension',
+    'inference-cortex-extension',
     'dist',
     'bin'
   )
 
   let cudaVisibleDevices = ''
-  let binaryName = 'nitro'
+  let binaryName = 'cortex-cpp'
   /**
    * The binary folder is different for each platform.
    */
@@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
       }
       cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
     }
-    binaryName = 'nitro.exe'
+    binaryName = 'cortex-cpp.exe'
   } else if (process.platform === 'darwin') {
     /**
      *  For MacOS: mac-universal both Silicon and InteL
      */
-    binaryFolder = join(binaryFolder, 'mac-universal')
+    if(process.arch === 'arm64') {
+    binaryFolder = join(binaryFolder, 'mac-arm64')
+    } else {
+      binaryFolder = join(binaryFolder, 'mac-amd64')
+    }
   } else {
     /**
      *  For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
@@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
     retryDelay: 500,
   })
     .then((res: any) => {
-      log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`)
+      log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
       return Promise.resolve(res)
     })
     .catch((err: any) => {
@@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
       })
     }, 5000)
     const tcpPortUsed = require('tcp-port-used')
-    log(`[SERVER]::Debug: Request to kill Nitro`)
+    log(`[SERVER]::Debug: Request to kill cortex`)
 
     fetch(NITRO_HTTP_KILL_URL, {
       method: 'DELETE',
diff --git a/core/src/node/helper/resource.ts b/core/src/node/helper/resource.ts
index 6c4a71478..c7bfbf20c 100644
--- a/core/src/node/helper/resource.ts
+++ b/core/src/node/helper/resource.ts
@@ -4,7 +4,7 @@ import { log } from './logger'
 
 export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
   const cpu = await physicalCpuCount()
-  log(`[NITRO]::CPU information - ${cpu}`)
+  log(`[CORTEX]::CPU information - ${cpu}`)
 
   return {
     numCpuPhysicalCore: cpu,
diff --git a/extensions/assistant-extension/src/node/index.ts b/extensions/assistant-extension/src/node/index.ts
index f303dd51d..46835614d 100644
--- a/extensions/assistant-extension/src/node/index.ts
+++ b/extensions/assistant-extension/src/node/index.ts
@@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
 }
 export async function toolRetrievalIngestNewDocument(
   file: string,
+  model: string,
   engine: string
 ) {
   const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
   const threadPath = path.dirname(filePath.replace('files', ''))
-  retrieval.updateEmbeddingEngine(engine)
+  retrieval.updateEmbeddingEngine(model, engine)
   return retrieval
     .ingestAgentKnowledge(filePath, `${threadPath}/memory`)
     .catch((err) => {
diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts
index e89357d5c..52193f221 100644
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@@ -28,14 +28,14 @@ export class Retrieval {
     })
   }
 
-  public updateEmbeddingEngine(engine: string): void {
+  public updateEmbeddingEngine(model: string, engine: string): void {
     // Engine settings are not compatible with the current embedding model params
     // Switch case manually for now
     if (engine === 'nitro') {
       this.embeddingModel = new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding' },
+        { openAIApiKey: 'nitro-embedding', model },
         // TODO: Raw settings
-        { basePath: 'http://127.0.0.1:3928/v1' }
+        { basePath: 'http://127.0.0.1:3928/v1' },
       )
     } else {
       // Fallback to OpenAI Settings
diff --git a/extensions/assistant-extension/src/tools/retrieval.ts b/extensions/assistant-extension/src/tools/retrieval.ts
index e58305c60..a1a641941 100644
--- a/extensions/assistant-extension/src/tools/retrieval.ts
+++ b/extensions/assistant-extension/src/tools/retrieval.ts
@@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
           NODE,
           'toolRetrievalIngestNewDocument',
           docFile,
+          data.model?.id,
           data.model?.engine
         )
       } else {
diff --git a/extensions/inference-nitro-extension/.gitignore b/extensions/inference-nitro-extension/.gitignore
new file mode 100644
index 000000000..10780f1d4
--- /dev/null
+++ b/extensions/inference-nitro-extension/.gitignore
@@ -0,0 +1,2 @@
+bin
+!version.txt
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/bin/version.txt b/extensions/inference-nitro-extension/bin/version.txt
index 0c4b45492..6f2743d65 100644
--- a/extensions/inference-nitro-extension/bin/version.txt
+++ b/extensions/inference-nitro-extension/bin/version.txt
@@ -1 +1 @@
-0.3.22
+0.4.4
diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat
index c99162eba..9bd2d4b07 100644
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@@ -1,3 +1,3 @@
 @echo off
-set /p NITRO_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
+set /p CORTEX_VERSION=<./bin/version.txt
+.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
index 7d31a1d38..d396778d9 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@@ -1,8 +1,8 @@
 {
-  "name": "@janhq/inference-nitro-extension",
-  "productName": "Nitro Inference Engine",
+  "name": "@janhq/inference-cortex-extension",
+  "productName": "Cortex Inference Engine",
   "version": "1.0.7",
-  "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
+  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
   "main": "dist/index.js",
   "node": "dist/node/index.cjs.js",
   "author": "Jan <service@jan.ai>",
@@ -10,8 +10,8 @@
   "scripts": {
     "test": "jest",
     "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
-    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro",
+    "downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
+    "downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
     "downloadnitro:win32": "download.bat",
     "downloadnitro": "run-script-os",
     "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts
index f1d3eb32f..b0707f404 100644
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-nitro-extension/rollup.config.ts
@@ -80,7 +80,7 @@ export default [
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
         INFERENCE_URL: JSON.stringify(
           process.env.INFERENCE_URL ||
-            'http://127.0.0.1:3928/inferences/llamacpp/chat_completion'
+            'http://127.0.0.1:3928/inferences/server/chat_completion'
         ),
         TROUBLESHOOTING_URL: JSON.stringify(
           'https://jan.ai/guides/troubleshooting'
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index e6bad64f4..a027e8844 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
     const executableFolderPath = await joinPath([
       janDataFolderPath,
       'engines',
-      this.name ?? 'nitro',
+      this.name ?? 'cortex-cpp',
       this.version ?? '1.0.0',
     ])
 
@@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
       const executableFolderPath = await joinPath([
         janDataFolderPath,
         'engines',
-        this.name ?? 'nitro',
+        this.name ?? 'cortex-cpp',
         this.version ?? '1.0.0',
       ])
 
diff --git a/extensions/inference-nitro-extension/src/node/execute.test.ts b/extensions/inference-nitro-extension/src/node/execute.test.ts
index dfd26deb8..cf9e84acf 100644
--- a/extensions/inference-nitro-extension/src/node/execute.test.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.test.ts
@@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
     Object.defineProperty(process, 'platform', {
       value: 'darwin',
     })
+    Object.defineProperty(process, 'arch', {
+      value: 'arm64',
+    })
     expect(executableNitroFile(testSettings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`mac-universal${sep}nitro`),
+        executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
+        cudaVisibleDevices: '',
+        vkVisibleDevices: '',
+      })
+    )
+    Object.defineProperty(process, 'arch', {
+      value: 'amd64',
+    })
+    expect(executableNitroFile(testSettings)).toEqual(
+      expect.objectContaining({
+        executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cpu${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts
index 2cfcfe4f3..417734afa 100644
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.ts
@@ -1,4 +1,4 @@
-import { GpuSetting, SystemInformation } from '@janhq/core'
+import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 
 export interface NitroExecutableOptions {
@@ -24,7 +24,7 @@ const os = (): string => {
   return process.platform === 'win32'
     ? 'win'
     : process.platform === 'darwin'
-      ? 'mac-universal'
+      ? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
       : 'linux'
 }
 
@@ -52,7 +52,7 @@ export const executableNitroFile = (
     .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `nitro${extension()}`
+  let binaryName = `cortex-cpp${extension()}`
 
   return {
     executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),
diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
index fbfdb8761..03e172c63 100644
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@@ -34,9 +34,9 @@ const LOCAL_HOST = '127.0.0.1'
 // The URL for the Nitro subprocess
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
 // The URL for the Nitro subprocess to load a model
-const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
+const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
 // The URL for the Nitro subprocess to validate a model
-const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
+const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
 // The URL for the Nitro subprocess to kill itself
 const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
 
@@ -50,7 +50,7 @@ const SUPPORTED_MODEL_FORMAT = '.gguf'
 let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
 
 // The current model settings
-let currentSettings: ModelSettingParams | undefined = undefined
+let currentSettings: ModelSettingParams & { model?: string } | undefined = undefined
 
 /**
  * Stops a Nitro subprocess.
@@ -77,7 +77,7 @@ async function loadModel(
   }
 
   if (params.model.engine !== InferenceEngine.nitro) {
-    return Promise.reject('Not a nitro model')
+    return Promise.reject('Not a cortex model')
   } else {
     const nitroResourceProbe = await getSystemResourceInfo()
     // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
@@ -135,6 +135,7 @@ async function loadModel(
       // model.settings can override the default settings
       ...params.model.settings,
       llama_model_path,
+      model: params.model.id,
       // This is critical and requires real CPU physical core count (or performance core)
       ...(params.model.settings.mmproj && {
         mmproj: path.isAbsolute(params.model.settings.mmproj)
@@ -142,7 +143,7 @@ async function loadModel(
           : path.join(modelFolder, params.model.settings.mmproj),
       }),
     }
-    return runNitroAndLoadModel(systemInfo)
+    return runNitroAndLoadModel(params.model.id, systemInfo)
   }
 }
 
@@ -152,7 +153,7 @@ async function loadModel(
  * 3. Validate model status
  * @returns
  */
-async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
+async function runNitroAndLoadModel(modelId: string, systemInfo?: SystemInformation) {
   // Gather system information for CPU physical cores and memory
   return killSubprocess()
     .then(() =>
@@ -160,10 +161,10 @@ async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
     )
     .then(() => spawnNitroProcess(systemInfo))
     .then(() => loadLLMModel(currentSettings))
-    .then(validateModelStatus)
+    .then(() => validateModelStatus(modelId))
     .catch((err) => {
       // TODO: Broadcast error so app could display proper error message
-      log(`[NITRO]::Error: ${err}`)
+      log(`[CORTEX]::Error: ${err}`)
       return { error: err }
     })
 }
@@ -222,7 +223,7 @@ function loadLLMModel(settings: any): Promise<Response> {
   if (!settings?.ngl) {
     settings.ngl = 100
   }
-  log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
+  log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`)
   return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
     method: 'POST',
     headers: {
@@ -234,14 +235,14 @@ function loadLLMModel(settings: any): Promise<Response> {
   })
     .then((res) => {
       log(
-        `[NITRO]::Debug: Load model success with response ${JSON.stringify(
+        `[CORTEX]::Debug: Load model success with response ${JSON.stringify(
           res
         )}`
       )
       return Promise.resolve(res)
     })
     .catch((err) => {
-      log(`[NITRO]::Error: Load model failed with error ${err}`)
+      log(`[CORTEX]::Error: Load model failed with error ${err}`)
       return Promise.reject(err)
     })
 }
@@ -252,11 +253,12 @@ function loadLLMModel(settings: any): Promise<Response> {
  * If the model is loaded successfully, the object is empty.
  * If the model is not loaded successfully, the object contains an error message.
  */
-async function validateModelStatus(): Promise<void> {
+async function validateModelStatus(modelId: string): Promise<void> {
   // Send a GET request to the validation URL.
   // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
   return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
-    method: 'GET',
+    method: 'POST',
+    body: JSON.stringify({ model: modelId }),
     headers: {
       'Content-Type': 'application/json',
     },
@@ -264,7 +266,7 @@ async function validateModelStatus(): Promise<void> {
     retryDelay: 300,
   }).then(async (res: Response) => {
     log(
-      `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
+      `[CORTEX]::Debug: Validate model state with response ${JSON.stringify(
         res.status
       )}`
     )
@@ -275,7 +277,7 @@ async function validateModelStatus(): Promise<void> {
       // Otherwise, return an object with an error message.
       if (body.model_loaded) {
         log(
-          `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
+          `[CORTEX]::Debug: Validate model state success with response ${JSON.stringify(
             body
           )}`
         )
@@ -283,7 +285,7 @@ async function validateModelStatus(): Promise<void> {
       }
     }
     log(
-      `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
+      `[CORTEX]::Debug: Validate model state failed with response ${JSON.stringify(
         res.statusText
       )}`
     )
@@ -298,7 +300,7 @@ async function validateModelStatus(): Promise<void> {
 async function killSubprocess(): Promise<void> {
   const controller = new AbortController()
   setTimeout(() => controller.abort(), 5000)
-  log(`[NITRO]::Debug: Request to kill Nitro`)
+  log(`[CORTEX]::Debug: Request to kill cortex`)
 
   const killRequest = () => {
     return fetch(NITRO_HTTP_KILL_URL, {
@@ -309,17 +311,17 @@ async function killSubprocess(): Promise<void> {
       .then(() =>
         tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
       )
-      .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+      .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
       .catch((err) => {
         log(
-          `[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
+          `[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
         )
         throw 'PORT_NOT_AVAILABLE'
       })
   }
 
   if (subprocess?.pid) {
-    log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
+    log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
     const pid = subprocess.pid
     return new Promise((resolve, reject) => {
       terminate(pid, function (err) {
@@ -329,7 +331,7 @@ async function killSubprocess(): Promise<void> {
           tcpPortUsed
             .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
             .then(() => resolve())
-            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+            .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
             .catch(() => {
               killRequest().then(resolve).catch(reject)
             })
@@ -346,22 +348,24 @@ async function killSubprocess(): Promise<void> {
  * @returns A promise that resolves when the Nitro subprocess is started.
  */
 function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
-  log(`[NITRO]::Debug: Spawning Nitro subprocess...`)
+  log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
 
   return new Promise<void>(async (resolve, reject) => {
-    let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
     let executableOptions = executableNitroFile(systemInfo?.gpuSetting)
 
     const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
     // Execute the binary
     log(
-      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+      `[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
+    )
+    log(
+      path.parse(executableOptions.executablePath).dir
     )
     subprocess = spawn(
       executableOptions.executablePath,
       ['1', LOCAL_HOST, PORT.toString()],
       {
-        cwd: binaryFolder,
+        cwd: path.join(path.parse(executableOptions.executablePath).dir),
         env: {
           ...process.env,
           CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
@@ -375,15 +379,15 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
 
     // Handle subprocess output
     subprocess.stdout.on('data', (data: any) => {
-      log(`[NITRO]::Debug: ${data}`)
+      log(`[CORTEX]::Debug: ${data}`)
     })
 
     subprocess.stderr.on('data', (data: any) => {
-      log(`[NITRO]::Error: ${data}`)
+      log(`[CORTEX]::Error: ${data}`)
     })
 
     subprocess.on('close', (code: any) => {
-      log(`[NITRO]::Debug: Nitro exited with code: ${code}`)
+      log(`[CORTEX]::Debug: cortex exited with code: ${code}`)
       subprocess = undefined
       reject(`child process exited with code ${code}`)
     })
@@ -391,7 +395,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
     tcpPortUsed
       .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
       .then(() => {
-        log(`[NITRO]::Debug: Nitro is ready`)
+        log(`[CORTEX]::Debug: cortex is ready`)
         resolve()
       })
   })
diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts
index eb92c98af..c8bc48459 100644
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@@ -97,7 +97,7 @@ function unloadModel(): Promise<void> {
   }
 
   if (subprocess?.pid) {
-    log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
+    log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
     const pid = subprocess.pid
     return new Promise((resolve, reject) => {
       terminate(pid, function (err) {
@@ -107,7 +107,7 @@ function unloadModel(): Promise<void> {
           return tcpPortUsed
             .waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
             .then(() => resolve())
-            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+            .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
             .catch(() => {
               killRequest()
             })