Resolved conflicts by keeping HEAD changes

2025-05-17 12:55:38 +05:30 · 2025-05-17 12:55:38 +05:30 · a8abc9f9aa
commit a8abc9f9aa
parent 19274f7e69
22 changed files with 541 additions and 5458 deletions
--- a/extensions/inference-cortex-extension/.gitignore
+++ b/extensions/inference-cortex-extension/.gitignore
@ -1,2 +0,0 @@
 bin
 !version.txt
--- a/extensions/inference-cortex-extension/README.md
+++ b/extensions/inference-cortex-extension/README.md
@ -1,75 +0,0 @@
 # Create a Jan Extension using Typescript
 Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
 ## Create Your Own Extension
 To create your own extension, you can use this repository as a template! Just follow the below instructions:
 1. Click the Use this template button at the top of the repository
 2. Select Create a new repository
 3. Select an owner and name for your new repository
 4. Click Create repository
 5. Clone your new repository
 ## Initial Setup
 After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
 > [!NOTE]
 >
 > You'll need to have a reasonably modern version of
 > [Node.js](https://nodejs.org) handy. If you are using a version manager like
 > [`nodenv`](https://github.com/nodenv/nodenv) or
 > [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
 > root of your repository to install the version specified in
 > [`package.json`](./package.json). Otherwise, 20.x or later should work!
 1. :hammer_and_wrench: Install the dependencies
   ```bash
   npm install
   ```
 1. :building_construction: Package the TypeScript for distribution
   ```bash
   npm run bundle
   ```
 1. :white_check_mark: Check your artifact
   There will be a tgz file in your extension directory now
 ## Update the Extension Metadata
 The [`package.json`](package.json) file defines metadata about your extension, such as
 extension name, main entry, description and version.
 When you copy this repository, update `package.json` with the name, description for your extension.
 ## Update the Extension Code
 The [`src/`](./src/) directory is the heart of your extension! This contains the
 source code that will be run when your extension functions are invoked. You can replace the
 contents of this directory with your own code.
 There are a few things to keep in mind when writing your extension code:
 - Most Jan Extension functions are processed asynchronously.
  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
  ```typescript
  import { events, MessageEvent, MessageRequest } from '@janhq/core'
  function onStart(): Promise<any> {
    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
      this.inference(data)
    )
  }
  ```
  For more information about the Jan Extension Core module, see the
  [documentation](https://github.com/menloresearch/jan/blob/main/core/README.md).
 So, what are you waiting for? Go ahead and start customizing your extension!
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -1 +0,0 @@
 1.0.13-rc9
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@ -1,40 +0,0 @@
@echo off
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
 set ENGINE_VERSION=b5509
@REM Download llama.cpp binaries
 set DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
 set DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
 set CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%
 set SUBFOLDERS=win-noavx-cuda-cu12.0-x64 win-noavx-cuda-cu11.7-x64 win-avx2-cuda-cu12.0-x64 win-avx2-cuda-cu11.7-x64 win-noavx-x64 win-avx-x64 win-avx2-x64 win-avx512-x64 win-vulkan-x64
 call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/menloresearch/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-cu12.0-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-cuda-cu12.0-x64/%ENGINE_VERSION%
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-cu11.7-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-cuda-cu11.7-x64/%ENGINE_VERSION%
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-cu12.0-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-cuda-cu12.0-x64/%ENGINE_VERSION%
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-cu11.7-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-cuda-cu11.7-x64/%ENGINE_VERSION%
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-noavx-x64/%ENGINE_VERSION%
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx-x64/%ENGINE_VERSION%
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx2-x64/%ENGINE_VERSION%
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512-x64.tar.gz -e --strip 2 -o %SHARED_PATH%/engines/llama.cpp/win-avx512-x64/%ENGINE_VERSION%
 call .\node_modules\.bin\download %DOWNLOAD_GGML_URL%-vulkan-x64.zip -e --strip 1 -o %SHARED_PATH%/engines/llama.cpp/win-vulkan-x64/%ENGINE_VERSION%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu12.0-x64.tar.gz -e --strip 1 -o %BIN_PATH%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu11.7-x64.tar.gz -e --strip 1 -o %BIN_PATH%
 move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
 del %BIN_PATH%\cortex-beta.exe
 del %BIN_PATH%\cortex.exe
@REM Loop through each folder and move DLLs
 for %%F in (%SUBFOLDERS%) do (
    echo Processing folder: %SHARED_PATH%\engines\llama.cpp\%%F\%ENGINE_VERSION%
    @REM Move cu*.dll files
    for %%D in (%SHARED_PATH%\engines\llama.cpp\%%F\%ENGINE_VERSION%\cu*.dll) do (
        move "%%D" "%BIN_PATH%"        
    )
 )
 echo DLL files moved successfully.
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@ -1,50 +0,0 @@
 #!/bin/bash
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
 ENGINE_VERSION=b5509
 CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
 ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
 CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
 BIN_PATH=./bin
 SHARED_PATH="../../electron/shared"
 # Detect platform
 OS_TYPE=$(uname)
 if [ "$OS_TYPE" == "Linux" ]; then
    # Linux downloads
    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
    mv ./bin/cortex-server-beta ./bin/cortex-server
    rm -rf ./bin/cortex
    rm -rf ./bin/cortex-beta
    chmod +x "./bin/cortex-server"
    # Download engines for Linux
    download "${ENGINE_DOWNLOAD_URL}-linux-noavx-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-x64/${ENGINE_VERSION}" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-avx-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx-x64/${ENGINE_VERSION}" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-avx2-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-x64/${ENGINE_VERSION}" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-avx512-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx512-x64/${ENGINE_VERSION}" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu12.0-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu12.0-x64/${ENGINE_VERSION}" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu11.7-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu11.7-x64/${ENGINE_VERSION}" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu12.0-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu12.0-x64/${ENGINE_VERSION}" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu11.7-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu11.7-x64/${ENGINE_VERSION}" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-vulkan-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/linux-vulkan-x64/${ENGINE_VERSION}" 1
    download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu12.0-x64.tar.gz" -e --strip 1 -o "${BIN_PATH}" 1
    download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu11.7-x64.tar.gz" -e --strip 1 -o "${BIN_PATH}" 1
 elif [ "$OS_TYPE" == "Darwin" ]; then
    # macOS downloads
    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
    mv ./bin/cortex-server-beta ./bin/cortex-server
    rm -rf ./bin/cortex
    rm -rf ./bin/cortex-beta
    chmod +x "./bin/cortex-server"
    # Download engines for macOS
    download "${ENGINE_DOWNLOAD_URL}-macos-arm64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/macos-arm64/${ENGINE_VERSION}"
    download "${ENGINE_DOWNLOAD_URL}-macos-x64.tar.gz" -e --strip 2 -o "${SHARED_PATH}/engines/llama.cpp/macos-x64/${ENGINE_VERSION}"
 else
    echo "Unsupported operating system: $OS_TYPE"
    exit 1
 fi
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@ -1,67 +0,0 @@
 {
  "name": "@janhq/inference-cortex-extension",
  "productName": "Cortex Inference Engine",
  "version": "1.0.25",
  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
    "test": "vitest run",
    "build": "rolldown -c rolldown.config.mjs",
    "downloadcortex:linux:darwin": "./download.sh",
    "downloadcortex:win32": "download.bat",
    "downloadcortex": "run-script-os",
    "build:publish:darwin": "rimraf *.tgz --glob || true && yarn build && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
    "build:publish:win32:linux": "rimraf *.tgz --glob || true && yarn build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
    "build:publish": "run-script-os"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/node/index.cjs.js"
  },
  "devDependencies": {
    "@jest/globals": "^29.7.0",
    "@types/decompress": "^4.2.7",
    "@types/jest": "^29.5.12",
    "@types/node": "^20.11.4",
    "@types/os-utils": "^0.0.4",
    "@types/tcp-port-used": "^1.0.4",
    "cpx": "^1.5.0",
    "download-cli": "^1.1.1",
    "jest": "^29.7.0",
    "rimraf": "^3.0.2",
    "rolldown": "1.0.0-beta.1",
    "run-script-os": "^1.1.6",
    "ts-jest": "^29.1.2",
    "typescript": "^5.3.3",
    "vitest": "^3.0.8"
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
    "fetch-retry": "^5.0.6",
    "ky": "^1.7.2",
    "p-queue": "^8.0.1",
    "rxjs": "^7.8.1",
    "ulidx": "^2.3.0"
  },
  "engines": {
    "node": ">=18.0.0"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ],
  "bundleDependencies": [
    "tcp-port-used",
    "fetch-retry",
    "@janhq/core",
    "decompress"
  ],
  "installConfig": {
    "hoistingLimits": "workspaces"
  },
  "packageManager": "yarn@4.5.3"
 }
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@ -1,126 +0,0 @@
 [
  {
    "key": "auto_unload_models",
    "title": "Auto-Unload Old Models",
    "description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  },
  {
    "key": "context_shift",
    "title": "Context Shift",
    "description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": false
    }
  },
  {
    "key": "cont_batching",
    "title": "Continuous Batching",
    "description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": ""
    }
  },
  {
    "key": "n_parallel",
    "title": "Parallel Operations",
    "description": "Number of prompts that can be processed simultaneously by the model.",
    "controllerType": "input",
    "controllerProps": {
      "value": "",
      "placeholder": "1",
      "type": "number",
      "textAlign": "right"
    }
  },
  {
    "key": "cpu_threads",
    "title": "CPU Threads",
    "description": "Number of CPU cores used for model processing when running without GPU.",
    "controllerType": "input",
    "controllerProps": {
      "value": "",
      "placeholder": "-1 (auto-detect)",
      "type": "number",
      "textAlign": "right"
    }
  },
  {
    "key": "threads_batch",
    "title": "Threads (Batch)",
    "description": "Number of threads for batch and prompt processing (default: same as Threads).",
    "controllerType": "input",
    "controllerProps": {
      "value": "",
      "placeholder": "-1 (same as Threads)",
      "type": "number"
    }
  },
  {
    "key": "flash_attn",
    "title": "Flash Attention",
    "description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  },
  {
    "key": "caching_enabled",
    "title": "Caching",
    "description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  },
  {
    "key": "cache_type",
    "title": "KV Cache Type",
    "description": "Controls memory usage and precision trade-off.",
    "controllerType": "dropdown",
    "controllerProps": {
      "value": "q8_0",
      "options": [
        {
          "value": "q4_0",
          "name": "q4_0"
        },
        {
          "value": "q8_0",
          "name": "q8_0"
        },
        {
          "value": "f16",
          "name": "f16"
        }
      ]
    }
  },
  {
    "key": "use_mmap",
    "title": "mmap",
    "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  },
  {
    "key": "hugging-face-access-token",
    "title": "Hugging Face Access Token",
    "description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub, allowing applications to perform specific actions specified by the scope of permissions granted.",
    "controllerType": "input",
    "controllerProps": {
      "value": "",
      "placeholder": "hf_**********************************",
      "type": "password",
      "inputActions": ["unobscure", "copy"]
    }
  }
 ]
--- a/extensions/inference-cortex-extension/rolldown.config.mjs
+++ b/extensions/inference-cortex-extension/rolldown.config.mjs
@ -1,44 +0,0 @@
 import { defineConfig } from 'rolldown'
 import packageJson from './package.json' with { type: 'json' }
 import defaultSettingJson from './resources/default_settings.json' with { type: 'json' }
 export default defineConfig([
  {
    input: 'src/index.ts',
    output: {
      format: 'esm',
      file: 'dist/index.js',
    },
    platform: 'browser',
    define: {
      NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
      SETTINGS: JSON.stringify(defaultSettingJson),
      CORTEX_API_URL: JSON.stringify(
        `http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
      ),
      CORTEX_SOCKET_URL: JSON.stringify(
        `ws://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
      ),
      CORTEX_ENGINE_VERSION: JSON.stringify('b5509'),
    },
  },
  {
    input: 'src/node/index.ts',
    external: ['@janhq/core/node'],
    output: {
      format: 'cjs',
      file: 'dist/node/index.cjs.js',
      sourcemap: false,
      inlineDynamicImports: true,
    },
    resolve: {
      extensions: ['.js', '.ts', '.json'],
    },
    define: {
      CORTEX_API_URL: JSON.stringify(
        `http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
      ),
    },
    platform: 'node',
  },
 ])
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@ -1,5 +0,0 @@
 declare const NODE: string
 declare const CORTEX_API_URL: string
 declare const CORTEX_SOCKET_URL: string
 declare const CORTEX_ENGINE_VERSION: string
 declare const SETTINGS: any
--- a/extensions/inference-cortex-extension/src/index.test.ts
+++ b/extensions/inference-cortex-extension/src/index.test.ts
@ -1,452 +0,0 @@
 import { describe, beforeEach, it, expect, vi, afterEach } from 'vitest'
 // Must mock before imports
 vi.mock('@janhq/core', () => {
  return {
    executeOnMain: vi.fn().mockResolvedValue({}),
    events: {
      emit: vi.fn()
    },
    extractModelLoadParams: vi.fn().mockReturnValue({}),
    ModelEvent: {
      OnModelsUpdate: 'OnModelsUpdate',
      OnModelStopped: 'OnModelStopped'
    },
    EngineEvent: {
      OnEngineUpdate: 'OnEngineUpdate'
    },
    InferenceEngine: {
      cortex: 'cortex',
      nitro: 'nitro',
      cortex_llamacpp: 'cortex_llamacpp'
    },
    LocalOAIEngine: class LocalOAIEngine {
      onLoad() {}
      onUnload() {}
    }
  }
 })
 import JanInferenceCortexExtension, { Settings } from './index'
 import { InferenceEngine, ModelEvent, EngineEvent, executeOnMain, events } from '@janhq/core'
 import ky from 'ky'
 // Mock global variables
 const CORTEX_API_URL = 'http://localhost:3000'
 const CORTEX_SOCKET_URL = 'ws://localhost:3000'
 const SETTINGS = [
  { id: 'n_parallel', name: 'Parallel Execution', description: 'Number of parallel executions', type: 'number', value: '4' },
  { id: 'cont_batching', name: 'Continuous Batching', description: 'Enable continuous batching', type: 'boolean', value: true },
  { id: 'caching_enabled', name: 'Caching', description: 'Enable caching', type: 'boolean', value: true },
  { id: 'flash_attn', name: 'Flash Attention', description: 'Enable flash attention', type: 'boolean', value: true },
  { id: 'cache_type', name: 'Cache Type', description: 'Type of cache to use', type: 'string', value: 'f16' },
  { id: 'use_mmap', name: 'Use Memory Map', description: 'Use memory mapping', type: 'boolean', value: true },
  { id: 'cpu_threads', name: 'CPU Threads', description: 'Number of CPU threads', type: 'number', value: '' }
 ]
 const NODE = 'node'
 // Mock globals
 vi.stubGlobal('CORTEX_API_URL', CORTEX_API_URL)
 vi.stubGlobal('CORTEX_SOCKET_URL', CORTEX_SOCKET_URL)
 vi.stubGlobal('SETTINGS', SETTINGS)
 vi.stubGlobal('NODE', NODE)
 vi.stubGlobal('window', {
  addEventListener: vi.fn()
 })
 // Mock WebSocket
 class MockWebSocket {
  url :string
  listeners: {}
  onclose: Function 
  constructor(url) {
    this.url = url
    this.listeners = {}
  }
  addEventListener(event, listener) {
    this.listeners[event] = listener
  }
  emit(event, data) {
    if (this.listeners[event]) {
      this.listeners[event](data)
    }
  }
  close() {
    if (this.onclose) {
      this.onclose({ code: 1000 })
    }
  }
 }
 // Mock global WebSocket
 // @ts-ignore
 global.WebSocket = vi.fn().mockImplementation((url) => new MockWebSocket(url))
 describe('JanInferenceCortexExtension', () => {
  let extension
  beforeEach(() => {
    // Reset mocks
    vi.clearAllMocks()
    // Create a new instance for each test
    extension = new JanInferenceCortexExtension()
    // Mock the getSetting method
    extension.getSetting = vi.fn().mockImplementation((key, defaultValue) => {
      switch(key) {
        case Settings.n_parallel:
          return '4'
        case Settings.cont_batching:
          return true
        case Settings.caching_enabled:
          return true
        case Settings.flash_attn:
          return true
        case Settings.cache_type:
          return 'f16'
        case Settings.use_mmap:
          return true
        case Settings.cpu_threads:
          return ''
        default:
          return defaultValue
      }
    })
    // Mock methods
    extension.registerSettings = vi.fn()
    extension.onLoad = vi.fn()
    extension.clean = vi.fn().mockResolvedValue({})
    extension.healthz = vi.fn().mockResolvedValue({})
    extension.subscribeToEvents = vi.fn()
  })
  describe('onSettingUpdate', () => {
    it('should update n_parallel setting correctly', () => {
      extension.onSettingUpdate(Settings.n_parallel, '8')
      expect(extension.n_parallel).toBe(8)
    })
    it('should update cont_batching setting correctly', () => {
      extension.onSettingUpdate(Settings.cont_batching, false)
      expect(extension.cont_batching).toBe(false)
    })
    it('should update caching_enabled setting correctly', () => {
      extension.onSettingUpdate(Settings.caching_enabled, false)
      expect(extension.caching_enabled).toBe(false)
    })
    it('should update flash_attn setting correctly', () => {
      extension.onSettingUpdate(Settings.flash_attn, false)
      expect(extension.flash_attn).toBe(false)
    })
    it('should update cache_type setting correctly', () => {
      extension.onSettingUpdate(Settings.cache_type, 'f32')
      expect(extension.cache_type).toBe('f32')
    })
    it('should update use_mmap setting correctly', () => {
      extension.onSettingUpdate(Settings.use_mmap, false)
      expect(extension.use_mmap).toBe(false)
    })
    it('should update cpu_threads setting correctly', () => {
      extension.onSettingUpdate(Settings.cpu_threads, '4')
      expect(extension.cpu_threads).toBe(4)
    })
    it('should not update cpu_threads when value is not a number', () => {
      extension.cpu_threads = undefined
      extension.onSettingUpdate(Settings.cpu_threads, 'not-a-number')
      expect(extension.cpu_threads).toBeUndefined()
    })
  })
  describe('onUnload', () => {
    it('should clean up resources correctly', async () => {
      extension.shouldReconnect = true
      await extension.onUnload()
      expect(extension.shouldReconnect).toBe(false)
      expect(extension.clean).toHaveBeenCalled()
      expect(executeOnMain).toHaveBeenCalledWith(NODE, 'dispose')
    })
  })
  describe('loadModel', () => {
    it('should remove llama_model_path and mmproj from settings', async () => {
      // Setup
      const model = {
        id: 'test-model',
        settings: {
          llama_model_path: '/path/to/model',
          mmproj: '/path/to/mmproj',
          some_setting: 'value'
        },
        engine: InferenceEngine.cortex_llamacpp
      }
      // Mock ky.post
      vi.spyOn(ky, 'post').mockImplementation(() => ({
        // @ts-ignore
        json: () => Promise.resolve({}),
        catch: () => ({
          finally: () => ({
            // @ts-ignore
            then: () => Promise.resolve({})
          })
        })
      }))
      // Setup queue for testing
      extension.queue = { add: vi.fn(fn => fn()) }
      // Execute
      await extension.loadModel(model)
      // Verify settings were filtered
      expect(model.settings).not.toHaveProperty('llama_model_path')
      expect(model.settings).not.toHaveProperty('mmproj')
      expect(model.settings).toHaveProperty('some_setting')
    })
    it('should convert nitro to cortex_llamacpp engine', async () => {
      // Setup
      const model = {
        id: 'test-model',
        settings: {},
        engine: InferenceEngine.nitro
      }
      // Mock ky.post
      const mockKyPost = vi.spyOn(ky, 'post').mockImplementation(() => ({
        // @ts-ignore
        json: () => Promise.resolve({}),
        catch: () => ({
          finally: () => ({
            // @ts-ignore
            then: () => Promise.resolve({})
          })
        })
      }))
      // Setup queue for testing
      extension.queue = { add: vi.fn(fn => fn()) }
      // Execute
      await extension.loadModel(model)
      // Verify API call
      expect(mockKyPost).toHaveBeenCalledWith(
        `${CORTEX_API_URL}/v1/models/start`,
        expect.objectContaining({
          json: expect.objectContaining({
            engine: InferenceEngine.cortex_llamacpp
          })
        })
      )
    })
  })
  describe('unloadModel', () => {
    it('should call the correct API endpoint and abort loading if in progress', async () => {
      // Setup
      const model = { id: 'test-model' }
      const mockAbort = vi.fn()
      extension.abortControllers.set(model.id, { abort: mockAbort })
      // Mock ky.post
      const mockKyPost = vi.spyOn(ky, 'post').mockImplementation(() => ({
        // @ts-ignore
        json: () => Promise.resolve({}),
        finally: () => ({
          // @ts-ignore
          then: () => Promise.resolve({})
        })
      }))
      // Execute
      await extension.unloadModel(model)
      // Verify API call
      expect(mockKyPost).toHaveBeenCalledWith(
        `${CORTEX_API_URL}/v1/models/stop`,
        expect.objectContaining({
          json: { model: model.id }
        })
      )
      // Verify abort controller was called
      expect(mockAbort).toHaveBeenCalled()
    })
  })
  describe('clean', () => {
    it('should make a DELETE request to destroy process manager', async () => {
      // Mock the ky.delete function directly
      const mockDelete = vi.fn().mockReturnValue({
        catch: vi.fn().mockReturnValue(Promise.resolve({}))
      })
      // Replace the original implementation
      vi.spyOn(ky, 'delete').mockImplementation(mockDelete)
      // Override the clean method to use the real implementation
      // @ts-ignore
      extension.clean = JanInferenceCortexExtension.prototype.clean
      // Call the method
      await extension.clean()
      // Verify the correct API call was made
      expect(mockDelete).toHaveBeenCalledWith(
        `${CORTEX_API_URL}/processmanager/destroy`,
        expect.objectContaining({
          timeout: 2000,
          retry: expect.objectContaining({
            limit: 0
          })
        })
      )
    })
  })
  describe('WebSocket events', () => {
    it('should handle WebSocket events correctly', () => {
      // Create a mock implementation for subscribeToEvents that stores the socket
      let messageHandler;
      let closeHandler;
      // Override the private method
      extension.subscribeToEvents = function() {
        this.socket = new MockWebSocket('ws://localhost:3000/events');
        this.socket.addEventListener('message', (event) => {
          const data = JSON.parse(event.data);
          // Store for testing
          messageHandler = data;
          const transferred = data.task.items.reduce(
            (acc, cur) => acc + cur.downloadedBytes,
            0
          );
          const total = data.task.items.reduce(
            (acc, cur) => acc + cur.bytes,
            0
          );
          const percent = total > 0 ? transferred / total : 0;
          events.emit(
            data.type === 'DownloadUpdated' ? 'onFileDownloadUpdate' :
            data.type === 'DownloadSuccess' ? 'onFileDownloadSuccess' : 
            data.type,
            {
              modelId: data.task.id,
              percent: percent,
              size: {
                transferred: transferred,
                total: total,
              },
              downloadType: data.task.type,
            }
          );
          if (data.task.type === 'Engine') {
            events.emit(EngineEvent.OnEngineUpdate, {
              type: data.type,
              percent: percent,
              id: data.task.id,
            });
          }
          else if (data.type === 'DownloadSuccess') {
            setTimeout(() => {
              events.emit(ModelEvent.OnModelsUpdate, {
                fetch: true,
              });
            }, 500);
          }
        });
        this.socket.onclose = (event) => {
          closeHandler = event;
          // Notify app to update model running state
          events.emit(ModelEvent.OnModelStopped, {});
        };
      };
      // Setup queue
      extension.queue = {
        add: vi.fn(fn => fn())
      };
      // Execute the method
      extension.subscribeToEvents();
      // Simulate a message event
      extension.socket.listeners.message({
        data: JSON.stringify({
          type: 'DownloadUpdated',
          task: {
            id: 'test-model',
            type: 'Model',
            items: [
              { downloadedBytes: 50, bytes: 100 }
            ]
          }
        })
      });
      // Verify event emission
      expect(events.emit).toHaveBeenCalledWith(
        'onFileDownloadUpdate',
        expect.objectContaining({
          modelId: 'test-model',
          percent: 0.5
        })
      );
      // Simulate a download success event
      vi.useFakeTimers();
      extension.socket.listeners.message({
        data: JSON.stringify({
          type: 'DownloadSuccess',
          task: {
            id: 'test-model',
            type: 'Model',
            items: [
              { downloadedBytes: 100, bytes: 100 }
            ]
          }
        })
      });
      // Fast-forward time to trigger the timeout
      vi.advanceTimersByTime(500);
      // Verify the ModelEvent.OnModelsUpdate event was emitted
      expect(events.emit).toHaveBeenCalledWith(
        ModelEvent.OnModelsUpdate,
        { fetch: true }
      );
      vi.useRealTimers();
      // Trigger websocket close
      extension.socket.onclose({ code: 1000 });
      // Verify OnModelStopped event was emitted
      expect(events.emit).toHaveBeenCalledWith(
        ModelEvent.OnModelStopped, 
        {}
      );
    });
  })
 })
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -1,435 +0,0 @@
 /**
 * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 * @version 1.0.0
 * @module inference-extension/src/index
 */
 import {
  Model,
  EngineEvent,
  LocalOAIEngine,
  extractModelLoadParams,
  events,
  ModelEvent,
 } from '@janhq/core'
 import ky, { KyInstance } from 'ky'
 /**
 * Event subscription types of Downloader
 */
 enum DownloadTypes {
  DownloadUpdated = 'onFileDownloadUpdate',
  DownloadError = 'onFileDownloadError',
  DownloadSuccess = 'onFileDownloadSuccess',
  DownloadStopped = 'onFileDownloadStopped',
  DownloadStarted = 'onFileDownloadStarted',
 }
 enum Settings {
  n_parallel = 'n_parallel',
  cont_batching = 'cont_batching',
  caching_enabled = 'caching_enabled',
  flash_attn = 'flash_attn',
  cache_type = 'cache_type',
  use_mmap = 'use_mmap',
  cpu_threads = 'cpu_threads',
  huggingfaceToken = 'hugging-face-access-token',
  auto_unload_models = 'auto_unload_models',
  context_shift = 'context_shift',
 }
 type LoadedModelResponse = { data: { engine: string; id: string }[] }
 /**
 * A class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 */
 export default class JanInferenceCortexExtension extends LocalOAIEngine {
  nodeModule: string = 'node'
  provider: string = 'cortex'
  shouldReconnect = true
  /** Default Engine model load settings */
  n_parallel?: number
  cont_batching: boolean = false
  caching_enabled: boolean = true
  flash_attn: boolean = true
  use_mmap: boolean = true
  cache_type: string = 'q8'
  cpu_threads?: number
  auto_unload_models: boolean = true
  reasoning_budget = -1 // Default reasoning budget in seconds
  context_shift = false
  /**
   * The URL for making inference requests.
   */
  inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
  /**
   * Socket instance of events subscription
   */
  socket?: WebSocket = undefined
  abortControllers = new Map<string, AbortController>()
  api?: KyInstance
  /**
   * Get the API instance
   * @returns
   */
  async apiInstance(): Promise<KyInstance> {
    if (this.api) return this.api
    const apiKey = await window.core?.api.appToken()
    this.api = ky.extend({
      prefixUrl: CORTEX_API_URL,
      headers: apiKey
        ? {
            Authorization: `Bearer ${apiKey}`,
          }
        : {},
      retry: 10,
    })
    return this.api
  }
  /**
   * Authorization headers for the API requests.
   * @returns
   */
  headers(): Promise<HeadersInit> {
    return window.core?.api.appToken().then((token: string) => ({
      Authorization: `Bearer ${token}`,
    }))
  }
  /**
   * Called when the extension is loaded.
   */
  async onLoad() {
    super.onLoad()
    // Register Settings
    this.registerSettings(SETTINGS)
    const numParallel = await this.getSetting<string>(Settings.n_parallel, '')
    if (numParallel.length > 0 && parseInt(numParallel) > 0) {
      this.n_parallel = parseInt(numParallel)
    }
    if (this.n_parallel && this.n_parallel > 1)
      this.cont_batching = await this.getSetting<boolean>(
        Settings.cont_batching,
        false
      )
    this.caching_enabled = await this.getSetting<boolean>(
      Settings.caching_enabled,
      true
    )
    this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
    this.context_shift = await this.getSetting<boolean>(
      Settings.context_shift,
      false
    )
    this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
    if (this.caching_enabled)
      this.cache_type = await this.getSetting<string>(Settings.cache_type, 'q8')
    this.auto_unload_models = await this.getSetting<boolean>(
      Settings.auto_unload_models,
      true
    )
    const threads_number = Number(
      await this.getSetting<string>(Settings.cpu_threads, '')
    )
    if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
    const huggingfaceToken = await this.getSetting<string>(
      Settings.huggingfaceToken,
      ''
    )
    if (huggingfaceToken) {
      this.updateCortexConfig({ huggingface_token: huggingfaceToken })
    }
    this.subscribeToEvents()
    window.addEventListener('beforeunload', () => {
      this.clean()
    })
    // Migrate configs
    if (!localStorage.getItem('cortex_migration_completed')) {
      const config = await this.getCortexConfig()
      console.log('Start cortex.cpp migration', config)
      if (config && config.huggingface_token) {
        this.updateSettings([
          {
            key: Settings.huggingfaceToken,
            controllerProps: {
              value: config.huggingface_token,
            },
          },
        ])
        this.updateCortexConfig({
          huggingface_token: config.huggingface_token,
        })
        localStorage.setItem('cortex_migration_completed', 'true')
      }
    }
  }
  async onUnload() {
    console.log('Clean up cortex.cpp services')
    this.shouldReconnect = false
    this.clean()
    super.onUnload()
  }
  /**
   * Subscribe to settings update and make change accordingly
   * @param key
   * @param value
   */
  onSettingUpdate<T>(key: string, value: T): void {
    if (key === Settings.n_parallel && typeof value === 'string') {
      if (value.length > 0 && parseInt(value) > 0) {
        this.n_parallel = parseInt(value)
      }
    } else if (key === Settings.cont_batching && typeof value === 'boolean') {
      this.cont_batching = value as boolean
    } else if (key === Settings.caching_enabled && typeof value === 'boolean') {
      this.caching_enabled = value as boolean
    } else if (key === Settings.flash_attn && typeof value === 'boolean') {
      this.flash_attn = value as boolean
    } else if (key === Settings.cache_type && typeof value === 'string') {
      this.cache_type = value as string
    } else if (key === Settings.use_mmap && typeof value === 'boolean') {
      this.use_mmap = value as boolean
    } else if (key === Settings.cpu_threads && typeof value === 'string') {
      const threads_number = Number(value)
      if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
    } else if (key === Settings.huggingfaceToken) {
      this.updateCortexConfig({ huggingface_token: value })
    } else if (key === Settings.auto_unload_models) {
      this.auto_unload_models = value as boolean
    } else if (key === Settings.context_shift && typeof value === 'boolean') {
      this.context_shift = value
    }
  }
  override async loadModel(
    model: Partial<Model> & {
      id: string
      settings?: object
      file_path?: string
    },
    abortController: AbortController
  ): Promise<void> {
    // Cortex will handle these settings
    const { llama_model_path, mmproj, ...settings } = model.settings ?? {}
    model.settings = settings
    const controller = abortController ?? new AbortController()
    const { signal } = controller
    this.abortControllers.set(model.id, controller)
    const loadedModels = await this.activeModels()
    // This is to avoid loading the same model multiple times
    if (loadedModels.some((e: { id: string }) => e.id === model.id)) {
      console.log(`Model ${model.id} already loaded`)
      return
    }
    if (this.auto_unload_models) {
      // Unload the last used model if it is not the same as the current one
      for (const lastUsedModel of loadedModels) {
        if (lastUsedModel.id !== model.id) {
          console.log(`Unloading last used model: ${lastUsedModel.id}`)
          await this.unloadModel(lastUsedModel as Model)
        }
      }
    }
    const modelSettings = extractModelLoadParams(model.settings)
    return await this.apiInstance().then((api) =>
      api
        .post('v1/models/start', {
          json: {
            ...modelSettings,
            model: model.id,
            engine:
              model.engine === 'nitro' // Legacy model cache
                ? 'llama-cpp'
                : model.engine,
            ...(this.n_parallel ? { n_parallel: this.n_parallel } : {}),
            ...(this.use_mmap ? { use_mmap: true } : {}),
            ...(this.caching_enabled ? { caching_enabled: true } : {}),
            ...(this.flash_attn ? { flash_attn: true } : {}),
            ...(this.caching_enabled && this.cache_type
              ? { cache_type: this.cache_type }
              : {}),
            ...(this.cpu_threads && this.cpu_threads > 0
              ? { cpu_threads: this.cpu_threads }
              : {}),
            ...(this.cont_batching && this.n_parallel && this.n_parallel > 1
              ? { cont_batching: this.cont_batching }
              : {}),
            ...(model.id.toLowerCase().includes('jan-nano')
              ? { reasoning_budget: 0 }
              : { reasoning_budget: this.reasoning_budget }),
            ...(this.context_shift !== true // explicit true required to enable context shift
              ? { 'no-context-shift': true }
              : {}),
            ...(modelSettings.ngl === -1 || modelSettings.ngl === undefined
              ? { ngl: 100 }
              : {}),
          },
          timeout: false,
          signal,
        })
        .json()
        .catch(async (e) => {
          throw (await e.response?.json()) ?? e
        })
        .finally(() => this.abortControllers.delete(model.id))
        .then()
    )
  }
  override async unloadModel(model: Model): Promise<void> {
    return this.apiInstance().then((api) =>
      api
        .post('v1/models/stop', {
          json: { model: model.id },
          retry: {
            limit: 0,
          },
        })
        .json()
        .finally(() => {
          this.abortControllers.get(model.id)?.abort()
        })
        .then()
    )
  }
  async activeModels(): Promise<(object & { id: string })[]> {
    return await this.apiInstance()
      .then((e) =>
        e.get('inferences/server/models', {
          retry: {
            limit: 0, // Do not retry
          },
        })
      )
      .then((e) => e.json())
      .then((e) => (e as LoadedModelResponse).data ?? [])
      .catch(() => [])
  }
  /**
   * Clean cortex processes
   * @returns
   */
  private async clean(): Promise<any> {
    return this.apiInstance()
      .then((api) =>
        api.delete('processmanager/destroy', {
          timeout: 2000, // maximum 2 seconds
          retry: {
            limit: 0,
          },
        })
      )
      .catch(() => {
        // Do nothing
      })
  }
  /**
   * Update cortex config
   * @param body
   */
  private async updateCortexConfig(body: {
    [key: string]: any
  }): Promise<void> {
    return this.apiInstance()
      .then((api) => api.patch('v1/configs', { json: body }).then(() => {}))
      .catch((e) => console.debug(e))
  }
  /**
   * Get cortex config
   * @param body
   */
  private async getCortexConfig(): Promise<any> {
    return this.apiInstance()
      .then((api) => api.get('v1/configs').json())
      .catch((e) => console.debug(e))
  }
  /**
   * Subscribe to cortex.cpp websocket events
   */
  private subscribeToEvents() {
    this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`)
    this.socket.addEventListener('message', (event) => {
      const data = JSON.parse(event.data)
      const transferred = data.task.items.reduce(
        (acc: number, cur: any) => acc + cur.downloadedBytes,
        0
      )
      const total = data.task.items.reduce(
        (acc: number, cur: any) => acc + cur.bytes,
        0
      )
      const percent = total > 0 ? transferred / total : 0
      events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
        modelId: data.task.id,
        percent: percent,
        size: {
          transferred: transferred,
          total: total,
        },
        downloadType: data.task.type,
      })
      if (data.task.type === 'Engine') {
        events.emit(EngineEvent.OnEngineUpdate, {
          type: DownloadTypes[data.type as keyof typeof DownloadTypes],
          percent: percent,
          id: data.task.id,
        })
      } else {
        if (data.type === DownloadTypes.DownloadSuccess) {
          // Delay for the state update from cortex.cpp
          // Just to be sure
          setTimeout(() => {
            events.emit(ModelEvent.OnModelsUpdate, {
              fetch: true,
            })
          }, 500)
        }
      }
    })
    /**
     * This is to handle the server segfault issue
     */
    this.socket.onclose = (event) => {
      // Notify app to update model running state
      events.emit(ModelEvent.OnModelStopped, {})
      // Reconnect to the /events websocket
      if (this.shouldReconnect) {
        setTimeout(() => this.subscribeToEvents(), 1000)
      }
    }
  }
 }
--- a/extensions/inference-cortex-extension/src/node/index.test.ts
+++ b/extensions/inference-cortex-extension/src/node/index.test.ts
@ -1,144 +0,0 @@
 import { describe, it, expect, vi } from 'vitest'
 // Mocks
 const CORTEX_API_URL = 'http://localhost:3000'
 vi.stubGlobal('CORTEX_API_URL', CORTEX_API_URL)
 vi.mock('@janhq/core/node', (actual) => ({
  ...actual(),
  getJanDataFolderPath: () => '',
  appResourcePath: () => '/mock/path',
  log: vi.fn(),
  getSystemResourceInfo: () => {
    return {
      cpu: {
        cores: 1,
        logicalCores: 1,
        threads: 1,
        model: 'model',
        speed: 1,
      },
      memory: {
        total: 1,
        free: 1,
      },
      gpu: {
        model: 'model',
        memory: 1,
        cuda: {
          version: 'version',
          devices: 'devices',
        },
        vulkan: {
          version: 'version',
          devices: 'devices',
        },
      },
    }
  },
 }))
 vi.mock('fs', () => ({
  default: {
    readdirSync: () => [],
  },
 }))
 vi.mock('./watchdog', () => {
  return {
    ProcessWatchdog: vi.fn().mockImplementation(() => {
      return {
        start: vi.fn(),
        terminate: vi.fn(),
      }
    }),
  }
 })
 vi.mock('child_process', () => ({
  exec: () => {
    return {
      stdout: { on: vi.fn() },
      stderr: { on: vi.fn() },
      on: vi.fn(),
    }
  },
  spawn: () => {
    return {
      stdout: { on: vi.fn() },
      stderr: { on: vi.fn() },
      on: vi.fn(),
      pid: '111',
    }
  },
 }))
 import index from './index'
 describe('Cortex extension node interface', () => {
  describe('run', () => {
    it('should start the cortex subprocess on macOS', async () => {
      Object.defineProperty(process, 'platform', {
        value: 'darwin',
      })
      const result = await index.run()
      expect(result).toBeUndefined()
    })
    it('should start the cortex subprocess on Windows', async () => {
      Object.defineProperty(process, 'platform', {
        value: 'win32',
      })
      const result = await index.run()
      expect(result).toBeUndefined()
    })
    it('should set the proper environment variables based on platform', async () => {
      // Test for Windows
      Object.defineProperty(process, 'platform', {
        value: 'win32',
      })
      process.env.PATH = '/original/path'
      await index.run()
      expect(process.env.PATH).toContain('/original/path')
      // Test for non-Windows (macOS/Linux)
      Object.defineProperty(process, 'platform', {
        value: 'darwin',
      })
      process.env.LD_LIBRARY_PATH = '/original/ld/path'
      await index.run()
      expect(process.env.LD_LIBRARY_PATH).toContain('/original/ld/path')
    })
  })
  describe('dispose', () => {
    it('should dispose a model successfully on Mac', async () => {
      Object.defineProperty(process, 'platform', {
        value: 'darwin',
      })
      // Call the dispose function
      const result = index.dispose()
      // Assert that the result is as expected
      expect(result).toBeUndefined()
    })
    it('should kill the subprocess successfully on Windows', async () => {
      Object.defineProperty(process, 'platform', {
        value: 'win32',
      })
      // Call the dispose function
      const result = index.dispose()
      // Assert that the result is as expected
      expect(result).toBeUndefined()
    })
  })
 })
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -1,103 +0,0 @@
 import path from 'path'
 import { appResourcePath, getJanDataFolderPath, log } from '@janhq/core/node'
 import { ProcessWatchdog } from './watchdog'
 let watchdog: ProcessWatchdog | undefined = undefined
 /**
 * Spawns a Nitro subprocess.
 * @returns A promise that resolves when the Nitro subprocess is started.
 */
 function run(): Promise<any> {
  log(`[CORTEX]:: Spawning cortex subprocess...`)
  return new Promise<void>(async (resolve, reject) => {
    // let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
    let binaryName = `cortex-server${
      process.platform === 'win32' ? '.exe' : ''
    }`
    const binPath = path.join(__dirname, '..', 'bin')
    const executablePath = path.join(binPath, binaryName)
    addEnvPaths(binPath)
    const sharedPath = path.join(appResourcePath(), 'shared')
    // Execute the binary
    log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
    const dataFolderPath = getJanDataFolderPath()
    if (watchdog) {
      watchdog.terminate()
    }
    // The HOST address to use for the cortex subprocess
    const LOCAL_PORT = CORTEX_API_URL.split(':').pop() ?? '39291'
    watchdog = new ProcessWatchdog(
      executablePath,
      [
        '--start-server',
        '--port',
        LOCAL_PORT.toString(),
        '--config_file_path',
        `${path.join(dataFolderPath, '.janrc')}`,
        '--data_folder_path',
        dataFolderPath,
        'config',
        '--api_keys',
        process.env.appToken ?? 'cortex.cpp',
      ],
      {
        env: {
          ...process.env,
          // CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
          // // Vulkan - Support 1 device at a time for now
          // ...(gpuVisibleDevices?.length > 0 && {
          //   GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
          // }),
        },
        cwd: sharedPath,
      }
    )
    watchdog.start()
    resolve()
  })
 }
 /**
 * Every module should have a dispose function
 * This will be called when the extension is unloaded and should clean up any resources
 * Also called when app is closed
 */
 function dispose() {
  watchdog?.terminate()
 }
 /**
 * Set the environment paths for the cortex subprocess
 * @param dest
 */
 function addEnvPaths(dest: string) {
  // Add engine path to the PATH and LD_LIBRARY_PATH
  if (process.platform === 'win32') {
    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
  } else {
    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
      path.delimiter,
      dest
    )
  }
 }
 /**
 * Cortex process info
 */
 export interface CortexProcessInfo {
  isRunning: boolean
 }
 export default {
  run,
  dispose,
 }
--- a/extensions/inference-cortex-extension/src/node/watchdog.ts
+++ b/extensions/inference-cortex-extension/src/node/watchdog.ts
@ -1,84 +0,0 @@
 import { log } from '@janhq/core/node'
 import { spawn, ChildProcess } from 'child_process'
 import { EventEmitter } from 'events'
 interface WatchdogOptions {
  cwd?: string
  restartDelay?: number
  maxRestarts?: number
  env?: NodeJS.ProcessEnv
 }
 export class ProcessWatchdog extends EventEmitter {
  private command: string
  private args: string[]
  private options: WatchdogOptions
  private process: ChildProcess | null
  private restartDelay: number
  private maxRestarts: number
  private restartCount: number
  private isTerminating: boolean
  constructor(command: string, args: string[], options: WatchdogOptions = {}) {
    super()
    this.command = command
    this.args = args
    this.options = options
    this.process = null
    this.restartDelay = options.restartDelay || 5000
    this.maxRestarts = options.maxRestarts || 5
    this.restartCount = 0
    this.isTerminating = false
  }
  start(): void {
    this.spawnProcess()
  }
  private spawnProcess(): void {
    if (this.isTerminating) return
    log(`Starting process: ${this.command} ${this.args.join(' ')}`)
    this.process = spawn(this.command, this.args, this.options)
    this.process.stdout?.on('data', (data: Buffer) => {
      log(`Process output: ${data}`)
      this.emit('output', data.toString())
    })
    this.process.stderr?.on('data', (data: Buffer) => {
      log(`Process error: ${data}`)
      this.emit('error', data.toString())
    })
    this.process.on('close', (code: number | null) => {
      log(`Process exited with code ${code}`)
      this.emit('close', code)
      if (!this.isTerminating) {
        this.restartProcess()
      }
    })
  }
  private restartProcess(): void {
    if (this.restartCount < this.maxRestarts) {
      this.restartCount++
      log(
        `Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
      )
      setTimeout(() => this.spawnProcess(), this.restartDelay)
    } else {
      log('Max restart attempts reached. Exiting watchdog.')
      this.emit('maxRestartsReached')
    }
  }
  terminate(): void {
    this.isTerminating = true
    if (this.process) {
      log('Terminating watched process...')
      this.process.kill()
    }
    this.emit('terminated')
  }
 }
--- a/extensions/inference-cortex-extension/tsconfig.json
+++ b/extensions/inference-cortex-extension/tsconfig.json
@ -1,15 +0,0 @@
 {
  "compilerOptions": {
    "moduleResolution": "node",
    "target": "es2016",
    "module": "esnext",
    "strict": true,
    "sourceMap": true,
    "esModuleInterop": true,
    "outDir": "dist",
    "importHelpers": true,
    "typeRoots": ["node_modules/@types"]
  },
  "include": ["src"],
  "exclude": ["src/**/*.test.ts"]
 }
--- a/extensions/llamacpp-extension/package.json
+++ b/extensions/llamacpp-extension/package.json
@ -21,6 +21,7 @@
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
    "@tauri-apps/api": "^1.4.0",
    "fetch-retry": "^5.0.6",
    "ulidx": "^2.3.0"
  },
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -6,10 +6,55 @@
 * @module llamacpp-extension/src/index
 */
-import { RemoteOAIEngine, getJanDataFolderPath, fs, ModelCapability, Model } from '@janhq/core'
+import {
  AIEngine,
  localProvider,
  getJanDataFolderPath,
  fs,
  Model,
 } from '@janhq/core'
-export enum Settings {
+import { invoke } from '@tauri-apps/api/tauri'
-  port = 'port',
+import {
  LocalProvider,
  ModelInfo,
  ListOptions,
  ListResult,
  PullOptions,
  PullResult,
  LoadOptions,
  SessionInfo,
  UnloadOptions,
  UnloadResult,
  ChatOptions,
  ChatCompletion,
  ChatCompletionChunk,
  DeleteOptions,
  DeleteResult,
  ImportOptions,
  ImportResult,
  AbortPullOptions,
  AbortPullResult,
  ChatCompletionRequest,
 } from './types'
 /**
 * Helper to convert GGUF model filename to a more structured ID/name
 * Example: "mistral-7b-instruct-v0.2.Q4_K_M.gguf" -> { baseModelId: "mistral-7b-instruct-v0.2", quant: "Q4_K_M" }
 **/
 function parseGGUFFileName(filename: string): {
  baseModelId: string
  quant?: string
 } {
  const nameWithoutExt = filename.replace(/\.gguf$/i, '')
  // Try to split by common quantization patterns (e.g., .Q4_K_M, -IQ2_XS)
  const match = nameWithoutExt.match(
    /^(.*?)[-_]([QqIiFf]\w{1,3}_\w{1,3}|[Qq]\d+_[KkSsMmXxLl\d]+|[IiQq]\d+_[XxSsMm]+|[Qq]\d+)$/
  )
  if (match && match[1] && match[2]) {
    return { baseModelId: match[1], quant: match[2] }
  }
  return { baseModelId: nameWithoutExt }
 }
 /**
@ -17,99 +62,246 @@ export enum Settings {
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 */
-export default class LlamacppProvider extends RemoteOAIEngine {
+export default class inference_llamacpp_extension
-  inferenceUrl: string = ''
+  extends AIEngine
-  baseURL: string = ''
+  implements localProvider
-  provider: string = ENGINE
+{
  provider: string = 'llamacpp'
  readonly providerId: string = 'llamcpp'
  private activeSessions: Map<string, SessionInfo> = new Map()
  private modelsBasePath!: string
  override async onLoad(): Promise<void> {
-    super.onLoad()
+    super.onLoad() // Calls registerEngine() from AIEngine
    this.registerSettings(SETTINGS_DEFINITIONS)
-    // Register Settings
+    const customPath = await this.getSetting<string>(
-    this.registerSettings(SETTINGS)
+      LlamaCppSettings.ModelsPath,
      ''
    )
    if (customPath && (await fs.exists(customPath))) {
      this.modelsBasePath = customPath
    } else {
      this.modelsBasePath = await path.join(
        await getJanDataFolderPath(),
        'models',
        ENGINE_ID
      )
    }
    await fs.createDirAll(this.modelsBasePath)
-    // register models
+    console.log(
-    const models = await this.listModels()
+      `${this.providerId} provider loaded. Models path: ${this.modelsBasePath}`
-    this.registerModels(models)
+    )
-    // NOTE: port 0 may mean request free port from OS. we may want
+    // Optionally, list and register models with the core system if AIEngine expects it
-    // to take advantage of this. llama-server --port 0 on macOS works.
+    // const models = await this.listModels({ providerId: this.providerId });
-    const port = await this.getSetting<number>(Settings.port, 0)
+    // this.registerModels(this.mapModelInfoToCoreModel(models)); // mapModelInfoToCoreModel would be a helper
    this.updateBaseUrl(port)
  }
-  // onSettingUpdate<T>(key: string, value: T): void {
+  async getModelsPath(): Promise<string> {
-  //   if (key === Settings.apiKey) {
+    // Ensure modelsBasePath is initialized
-  //     this.apiKey = value as string
+    if (!this.modelsBasePath) {
-  //   } else if (key === Settings.baseUrl) {
+      const customPath = await this.getSetting<string>(
-  //     if (typeof value !== 'string') return
+        LlamaCppSettings.ModelsPath,
-  //     this.updateBaseUrl(value)
+        ''
-  //   }
+      )
-  // }
+      if (customPath && (await fs.exists(customPath))) {
-
+        this.modelsBasePath = customPath
-  updateBaseUrl(value: number): void {
+      } else {
-    if (value == 0) {
+        this.modelsBasePath = await path.join(
-      // set to default value
+          await getJanDataFolderPath(),
-      SETTINGS.forEach((setting) => {
+          'models',
-        if (setting.key === Settings.port) {
+          ENGINE_ID
-          value = setting.controllerProps.value as number
+        )
      }
      await fs.createDirAll(this.modelsBasePath)
    }
    return this.modelsBasePath
  }
  async listModels(_opts: ListOptions): Promise<ListResult> {
    const modelsDir = await this.getModelsPath()
    const result: ModelInfo[] = []
    try {
      if (!(await fs.exists(modelsDir))) {
        await fs.createDirAll(modelsDir)
        return []
      }
      const entries = await fs.readDir(modelsDir)
      for (const entry of entries) {
        if (entry.name?.endsWith('.gguf') && entry.isFile) {
          const modelPath = await path.join(modelsDir, entry.name)
          const stats = await fs.stat(modelPath) // Tauri's fs.stat or Node's fs.statSync
          const parsedName = parseGGUFFileName(entry.name)
          result.push({
            id: `${parsedName.baseModelId}${parsedName.quant ? `/${parsedName.quant}` : ''}`, // e.g., "mistral-7b/Q4_0"
            name: entry.name.replace('.gguf', ''), // Or a more human-friendly name
            quant_type: parsedName.quant,
            providerId: this.providerId,
            sizeBytes: stats.size,
            path: modelPath,
            tags: [this.providerId, parsedName.quant || 'unknown_quant'].filter(
              Boolean
            ) as string[],
          })
        }
-    this.baseURL = `http://127.0.0.1:${value}`
+      }
-    this.inferenceUrl = `${this.baseURL}/chat/completions`
+    } catch (error) {
      console.error(`[${this.providerId}] Error listing models:`, error)
      // Depending on desired behavior, either throw or return empty/partial list
    }
    return result
  }
-  async listModels(): Promise<Model[]> {
+  // pullModel
-    let modelIds = []
+  async pullModel(opts: PullOptions): Promise<PullResult> {
-
+    // TODO: Implement pullModel
-    const modelsFolder = `${await getJanDataFolderPath()}/models`
+    return 0;
    // cortexso models
    const cortexsoFolder = `${modelsFolder}/cortex.so`
    const modelDirs = await fs.readdirSync(cortexsoFolder)
    for (const modelDir of modelDirs) {
      const modelName = modelDir.split('/').pop()
      // TODO: try removing this check
      // skip files start with . e.g. .DS_store
      if (!modelName || modelName.startsWith('.')) continue
      const variantDirs = await fs.readdirSync(modelDir)
      for (const variantDir of variantDirs) {
        // NOTE: we can't detect unfinished download here
        const ggufPath = `${variantDir}/model.gguf`
        if (await fs.existsSync(ggufPath)) {
          const variantName = variantDir.split('/').pop()
          modelIds.push(`${modelName}/${variantName}`)
        }
      }
  }
-    // TODO: list models under huggingface.co
+  // abortPull
  async abortPull(opts: AbortPullOptions): Promise<AbortPullResult> {
    // TODO: implement abortPull
  }
-    const models = modelIds.map((modelId) => {
+  async loadModel(opts: LoadOptions): Promise<SessionInfo> {
    if (opts.providerId !== this.providerId) {
      throw new Error('Invalid providerId for LlamaCppProvider.loadModel')
    }
    const sessionId = uuidv4()
    const loadParams = {
      model_path: opts.modelPath,
      session_id: sessionId, // Pass sessionId to Rust for tracking
      // Default llama.cpp server options, can be overridden by opts.options
      port: opts.options?.port ?? 0, // 0 for dynamic port assignment by OS
      n_gpu_layers:
        opts.options?.n_gpu_layers ??
        (await this.getSetting(LlamaCppSettings.DefaultNGpuLayers, -1)),
      n_ctx:
        opts.options?.n_ctx ??
        (await this.getSetting(LlamaCppSettings.DefaultNContext, 2048)),
      // Spread any other options from opts.options
      ...(opts.options || {}),
    }
    try {
      console.log(
        `[${this.providerId}] Requesting to load model: ${opts.modelPath} with options:`,
        loadParams
      )
      // This matches the Rust handler: core::utils::extensions::inference_llamacpp_extension::server::load
      const rustResponse: {
        session_id: string
        port: number
        model_path: string
        settings: Record<string, unknown>
      } = await invoke('plugin:llamacpp|load', { params: loadParams }) // Adjust namespace if needed
      if (!rustResponse || !rustResponse.port) {
        throw new Error(
          'Rust load function did not return expected port or session info.'
        )
      }
      const sessionInfo: SessionInfo = {
        sessionId: rustResponse.session_id, // Use sessionId from Rust if it regenerates/confirms it
        port: rustResponse.port,
        modelPath: rustResponse.model_path,
        providerId: this.providerId,
        settings: rustResponse.settings, // Settings actually used by the server
      }
      this.activeSessions.set(sessionInfo.sessionId, sessionInfo)
      console.log(
        `[${this.providerId}] Model loaded: ${sessionInfo.modelPath} on port ${sessionInfo.port}, session: ${sessionInfo.sessionId}`
      )
      return sessionInfo
    } catch (error) {
      console.error(
        `[${this.providerId}] Error loading model ${opts.modelPath}:`,
        error
      )
      throw error // Re-throw to be handled by the caller
    }
  }
  async unloadModel(opts: UnloadOptions): Promise<UnloadResult> {
    if (opts.providerId !== this.providerId) {
      return { success: false, error: 'Invalid providerId' }
    }
    const session = this.activeSessions.get(opts.sessionId)
    if (!session) {
      return {
-        sources: [],
+        success: false,
-        object: 'model',
+        error: `No active session found for id: ${opts.sessionId}`,
        version: '1.0',
        format: 'api',
        id: modelId,
        name: modelId,
        created: 0,
        description: '',
        settings: {},
        parameters: {},
        metadata: {
          author: '',
          tags: [],
          size: 0,
        },
        engine: this.provider,
        capabilities: [ModelCapability.completion],
      }
-    })
+    }
-    return models
+
    try {
      console.log(
        `[${this.providerId}] Requesting to unload model for session: ${opts.sessionId}`
      )
      // Matches: core::utils::extensions::inference_llamacpp_extension::server::unload
      const rustResponse: { success: boolean; error?: string } = await invoke(
        'plugin:llamacpp|unload',
        { sessionId: opts.sessionId }
      )
      if (rustResponse.success) {
        this.activeSessions.delete(opts.sessionId)
        console.log(
          `[${this.providerId}] Session ${opts.sessionId} unloaded successfully.`
        )
        return { success: true }
      } else {
        console.error(
          `[${this.providerId}] Failed to unload session ${opts.sessionId}: ${rustResponse.error}`
        )
        return {
          success: false,
          error: rustResponse.error || 'Unknown error during unload',
        }
      }
    } catch (error: any) {
      console.error(
        `[${this.providerId}] Error invoking unload for session ${opts.sessionId}:`,
        error
      )
      return { success: false, error: error.message || String(error) }
    }
  }
  async chat(
    opts: ChatOptions
  ): Promise<ChatCompletion | AsyncIterable<ChatCompletionChunk>> {}
  async deleteModel(opts: DeleteOptions): Promise<DeleteResult> {}
  async importModel(opts: ImportOptions): Promise<ImportResult> {}
  override async loadModel(model: Model): Promise<any> {
    if (model.engine?.toString() !== this.provider) return Promise.resolve()
    console.log(
      `[${this.providerId} AIEngine] Received OnModelInit for:`,
      model.id
    )
    return super.loadModel(model)
  }
  override async unloadModel(model?: Model): Promise<any> {
    if (model?.engine && model.engine.toString() !== this.provider)
      return Promise.resolve()
    console.log(
      `[${this.providerId} AIEngine] Received OnModelStop for:`,
      model?.id || 'all models'
    )
    return super.unloadModel(model)
  }
 }
--- a/extensions/llamacpp-extension/src/types.ts
+++ b/extensions/llamacpp-extension/src/types.ts
@ -0,0 +1,199 @@
 // src/providers/local/types.ts
 // --- Re-using OpenAI types (minimal definitions for this example) ---
 // In a real project, you'd import these from 'openai' or a shared types package.
 export interface ChatCompletionRequestMessage {
  role: 'system' | 'user' | 'assistant' | 'tool';
  content: string | null;
  name?: string;
  tool_calls?: any[]; // Simplified
  tool_call_id?: string;
 }
 export interface ChatCompletionRequest {
  model: string; // Model ID, though for local it might be implicit via sessionId
  messages: ChatCompletionRequestMessage[];
  temperature?: number | null;
  top_p?: number | null;
  n?: number | null;
  stream?: boolean | null;
  stop?: string | string[] | null;
  max_tokens?: number;
  presence_penalty?: number | null;
  frequency_penalty?: number | null;
  logit_bias?: Record<string, number> | null;
  user?: string;
  // ... TODO: other OpenAI params
 }
 export interface ChatCompletionChunkChoiceDelta {
  content?: string | null;
  role?: 'system' | 'user' | 'assistant' | 'tool';
  tool_calls?: any[]; // Simplified
 }
 export interface ChatCompletionChunkChoice {
  index: number;
  delta: ChatCompletionChunkChoiceDelta;
  finish_reason?: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
 }
 export interface ChatCompletionChunk {
  id: string;
  object: 'chat.completion.chunk';
  created: number;
  model: string;
  choices: ChatCompletionChunkChoice[];
  system_fingerprint?: string;
 }
 export interface ChatCompletionChoice {
  index: number;
  message: ChatCompletionRequestMessage; // Response message
  finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call';
  logprobs?: any; // Simplified
 }
 export interface ChatCompletion {
  id: string;
  object: 'chat.completion';
  created: number;
  model: string; // Model ID used
  choices: ChatCompletionChoice[];
  usage?: {
    prompt_tokens: number;
    completion_tokens: number;
    total_tokens: number;
  };
  system_fingerprint?: string;
 }
 // --- End OpenAI types ---
 // Shared model metadata
 export interface ModelInfo {
  id: string;            // e.g. "qwen3-4B" or "org/model/quant"
  name: string;          // human‑readable, e.g., "Qwen3 4B Q4_0"
  quant_type?: string;    // q4_0 (optional as it might be part of ID or name)
  providerId: string;    // e.g. "llama.cpp"
  sizeBytes: number;
  tags?: string[];
  path?: string;          // Absolute path to the model file, if applicable
  // Additional provider-specific metadata can be added here
  [key: string]: any;
 }
 // 1. /list
 export interface ListOptions {
  providerId: string; // To specify which provider if a central manager calls this
 }
 export type ListResult = ModelInfo[];
 // 2. /pull
 export interface PullOptions {
  providerId: string;
  modelId: string;         // Identifier for the model to pull (e.g., from a known registry)
  downloadUrl: string;     // URL to download the model from
  /** optional callback to receive download progress */
  onProgress?: (progress: { percent: number; downloadedBytes: number; totalBytes?: number; }) => void;
 }
 export interface PullResult {
  success: boolean;
  path?: string;         // local file path to the pulled model
  error?: string;
  modelInfo?: ModelInfo; // Info of the pulled model
 }
 // 3. /load
 export interface LoadOptions {
  providerId: string;
  modelPath: string;
  /** any provider‑specific tuning options for llama.cpp server */
  options?: {
    port?: number; // 0 means dynamic port
    n_gpu_layers?: number;
    n_ctx?: number; // context size
    // ... other llama-cpp-python or llama.cpp server flags
    [key: string]: any;
  };
 }
 export interface SessionInfo {
  sessionId: string;    // opaque handle for unload/chat
  port: number;       // llama-server output port (corrected from portid)
  modelPath: string;    // path of the loaded model
  providerId: string;
  settings: Record<string, unknown>; // The actual settings used to load
 }
 // 4. /unload
 export interface UnloadOptions {
  providerId: string;
  sessionId: string;
 }
 export interface UnloadResult {
  success: boolean;
  error?: string;
 }
 // 5. /chat
 export interface ChatOptions {
  providerId: string;
  sessionId: string;
  /** Full OpenAI ChatCompletionRequest payload */
  payload: ChatCompletionRequest;
 }
 // Output for /chat will be Promise<ChatCompletion> for non-streaming
 // or Promise<AsyncIterable<ChatCompletionChunk>> for streaming
 // 6. /delete
 export interface DeleteOptions {
  providerId: string;
  modelId: string; // The ID of the model to delete (implies finding its path)
  modelPath?: string; // Optionally, direct path can be provided
 }
 export interface DeleteResult {
  success: boolean;
  error?: string;
 }
 // 7. /import
 export interface ImportOptions {
  providerId: string;
  sourcePath: string; // Path to the local model file to import
  desiredModelId?: string; // Optional: if user wants to name it specifically
 }
 export interface ImportResult {
  success: boolean;
  modelInfo?: ModelInfo;
  error?: string;
 }
 // 8. /abortPull
 export interface AbortPullOptions {
  providerId: string;
  modelId: string; // The modelId whose download is to be aborted
 }
 export interface AbortPullResult {
  success: boolean;
  error?: string;
 }
 // The interface for any local provider
 export interface LocalProvider {
  readonly providerId: string;
  listModels(opts: ListOptions): Promise<ListResult>;
  pullModel(opts: PullOptions): Promise<PullResult>;
  loadModel(opts: LoadOptions): Promise<SessionInfo>;
  unloadModel(opts: UnloadOptions): Promise<UnloadResult>;
  chat(opts: ChatOptions): Promise<ChatCompletion | AsyncIterable<ChatCompletionChunk>>;
  deleteModel(opts: DeleteOptions): Promise<DeleteResult>;
  importModel(opts: ImportOptions): Promise<ImportResult>;
  abortPull(opts: AbortPullOptions): Promise<AbortPullResult>;
  // Optional: for direct access to underlying client if needed for specific streaming cases
  getChatClient?(sessionId: string): any; // e.g., an OpenAI client instance configured for the session
 }
--- a/extensions/yarn.lock
+++ b/extensions/yarn.lock
--- a/src-tauri/src/core/setup.rs
+++ b/src-tauri/src/core/setup.rs
@ -3,12 +3,9 @@ use std::{
    fs::{self, File},
    io::Read,
    path::PathBuf,
    sync::Arc,
 };
 use tar::Archive;
-use tauri::{App, Emitter, Listener, Manager};
+use tauri::{App, Emitter, Manager};
 use tauri_plugin_shell::process::{CommandChild, CommandEvent};
 use tauri_plugin_shell::ShellExt;
 use tauri_plugin_store::StoreExt;
 use tokio::sync::Mutex;
 use tokio::time::{sleep, Duration}; // Using tokio::sync::Mutex
@ -200,22 +197,18 @@ pub fn setup_mcp(app: &App) {
    let state = app.state::<AppState>();
    let servers = state.mcp_servers.clone();
    let app_handle: tauri::AppHandle = app.handle().clone();
    // Setup kill-mcp-servers event listener (similar to cortex kill-sidecar)
    let app_handle_for_kill = app_handle.clone();
    app_handle.listen("kill-mcp-servers", move |_event| {
        let app_handle = app_handle_for_kill.clone();
        tauri::async_runtime::spawn(async move {
            log::info!("Received kill-mcp-servers event - cleaning up MCP servers");
            let app_state = app_handle.state::<AppState>();
            // Stop all running MCP servers
            if let Err(e) = super::mcp::stop_mcp_servers(app_state.mcp_servers.clone()).await {
                log::error!("Failed to stop MCP servers: {}", e);
                return;
            }
            // Clear active servers and restart counts
            {
                let mut active_servers = app_state.mcp_active_servers.lock().await;
@ -225,11 +218,9 @@ pub fn setup_mcp(app: &App) {
                let mut restart_counts = app_state.mcp_restart_counts.lock().await;
                restart_counts.clear();
            }
            log::info!("MCP servers cleaned up successfully");
        });
    });
    tauri::async_runtime::spawn(async move {
        if let Err(e) = run_mcp_commands(&app_handle, servers).await {
            log::error!("Failed to run mcp commands: {}", e);
@ -471,65 +462,22 @@ pub fn setup_sidecar(app: &App) -> Result<(), String> {
    Ok(())
 }
 //pub fn setup_engine_binaries(app: &App) -> Result<(), String> {
 //    // Copy engine binaries to app_data
 //    let app_data_dir = app.handle().path().app_data_dir().unwrap();
 //    let binaries_dir = app.handle().path().resource_dir().unwrap().join("binaries");
 //    let themes_dir = app
 //        .handle()
 //        .path()
 //        .resource_dir()
 //        .unwrap()
 //        .join("resources");
 //
-// Clean up function to kill the sidecar process
+//    if let Err(e) = copy_dir_all(binaries_dir, app_data_dir.clone()) {
-//
+//        log::error!("Failed to copy binaries: {}", e);
-pub fn clean_up() {
+//    }
-    #[cfg(windows)]
+//    if let Err(e) = copy_dir_all(themes_dir, app_data_dir.clone()) {
-    {
+//        log::error!("Failed to copy themes: {}", e);
-        use std::os::windows::process::CommandExt;
+//    }
-        let _ = std::process::Command::new("taskkill")
+//    Ok(())
-            .args(["-f", "-im", "llama-server.exe"])
+//}
            .creation_flags(0x08000000)
            .spawn();
        let _ = std::process::Command::new("taskkill")
            .args(["-f", "-im", "cortex-server.exe"])
            .creation_flags(0x08000000)
            .spawn();
    }
    #[cfg(unix)]
    {
        let _ = std::process::Command::new("pkill")
            .args(["-f", "llama-server"])
            .spawn();
        let _ = std::process::Command::new("pkill")
            .args(["-f", "cortex-server"])
            .spawn();
    }
    log::info!("Clean up function executed, sidecar processes killed.");
 }
 fn copy_dir_all(src: PathBuf, dst: PathBuf) -> Result<(), String> {
    fs::create_dir_all(&dst).map_err(|e| e.to_string())?;
    log::info!("Copying from {:?} to {:?}", src, dst);
    for entry in fs::read_dir(src).map_err(|e| e.to_string())? {
        let entry = entry.map_err(|e| e.to_string())?;
        let ty = entry.file_type().map_err(|e| e.to_string())?;
        if ty.is_dir() {
            copy_dir_all(entry.path(), dst.join(entry.file_name())).map_err(|e| e.to_string())?;
        } else {
            fs::copy(entry.path(), dst.join(entry.file_name())).map_err(|e| e.to_string())?;
        }
    }
    Ok(())
 }
 pub fn setup_engine_binaries(app: &App) -> Result<(), String> {
    // Copy engine binaries to app_data
    let app_data_dir = get_jan_data_folder_path(app.handle().clone());
    let binaries_dir = app.handle().path().resource_dir().unwrap().join("binaries");
    let resources_dir = app
        .handle()
        .path()
        .resource_dir()
        .unwrap()
        .join("resources");
    if let Err(e) = copy_dir_all(binaries_dir, app_data_dir.clone()) {
        log::error!("Failed to copy binaries: {}", e);
    }
    if let Err(e) = copy_dir_all(resources_dir, app_data_dir.clone()) {
        log::error!("Failed to copy resources: {}", e);
    }
    Ok(())
 }
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -10,8 +10,8 @@ use crate::core::state::AppState;
 pub enum ServerError {
    #[error("Server is already running")]
    AlreadyRunning,
-    #[error("Server is not running")]
+  //  #[error("Server is not running")]
-    NotRunning,
+  //  NotRunning,
    #[error("Failed to locate server binary: {0}")]
    BinaryNotFound(String),
    #[error("Failed to determine resource path: {0}")]
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -1,16 +1,14 @@
 mod core;
 use core::{
    cmd::get_jan_data_folder_path,
-    setup::{self, setup_engine_binaries, setup_mcp, setup_sidecar},
+    setup::{self, setup_mcp},
    state::{generate_app_token, AppState},
    utils::download::DownloadManagerState,
 };
 use std::{collections::HashMap, sync::Arc};
 use tauri::Emitter;
 use tokio::sync::Mutex;
 use crate::core::setup::clean_up;
 #[cfg_attr(mobile, tauri::mobile_entry_point)]
 pub fn run() {
@ -122,17 +120,17 @@ pub fn run() {
                log::error!("Failed to install extensions: {}", e);
            }
            setup_mcp(app);
            setup_sidecar(app).expect("Failed to setup sidecar");
            setup_engine_binaries(app).expect("Failed to setup engine binaries");
            Ok(())
        })
        .on_window_event(|window, event| match event {
            tauri::WindowEvent::CloseRequested { .. } => {
                if window.label() == "main" {
                    window.emit("kill-sidecar", ()).unwrap();
                    window.emit("kill-mcp-servers", ()).unwrap();
                    clean_up();
                }
                let client = Client::new();
                let url = "http://127.0.0.1:39291/processManager/destroy";
                let _ = client.delete(url).send();
            }
            _ => {}
        })