Merge pull request #2916 from janhq/dev

Release/0.4.13 to main
2024-05-16 21:35:15 +07:00 · 2024-05-16 21:35:15 +07:00 · f2947c14f5
commit f2947c14f5
parent 63a2f22414 537ef20a54
98 changed files with 1848 additions and 556 deletions
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@ -57,19 +57,19 @@ jobs:
          rm -rf ~/jan
          make clean
-      # - name: Get Commit Message for PR
+      - name: Get Commit Message for PR
-      #   if : github.event_name == 'pull_request'
+        if : github.event_name == 'pull_request'
-      #   run: |
+        run: |
-      #     echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}})" >> $GITHUB_ENV
+          echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}})" >> $GITHUB_ENV
-      # - name: Get Commit Message for push event
+      - name: Get Commit Message for push event
-      #   if : github.event_name == 'push'
+        if : github.event_name == 'push'
-      #   run: |
+        run: |
-      #     echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}})" >> $GITHUB_ENV
+          echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}})" >> $GITHUB_ENV
-      # - name: "Config report portal"
+      - name: "Config report portal"
-      #   run: |
+        run: |
-      #     make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App macos" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
+          make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App macos" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
      - name: Linter and test
        run: |
@ -78,9 +78,9 @@ jobs:
          make test
        env:
          CSC_IDENTITY_AUTO_DISCOVERY: "false"
-          # TURBO_API: "${{ secrets.TURBO_API }}"
+          TURBO_API: "${{ secrets.TURBO_API }}"
-          # TURBO_TEAM: "macos"
+          TURBO_TEAM: "macos"
-          # TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
+          TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
  test-on-macos-pr-target:
    if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository
@ -141,16 +141,16 @@ jobs:
          }
          make clean
-      # - name: Get Commit Message for push event
+      - name: Get Commit Message for push event
-      #   if : github.event_name == 'push'
+        if : github.event_name == 'push'
-      #   shell: bash
+        shell: bash
-      #   run: |
+        run: |
-      #     echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
+          echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
-      # - name: "Config report portal"
+      - name: "Config report portal"
-      #   shell: bash
+        shell: bash
-      #   run: |
+        run: |
-      #     make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows ${{ matrix.antivirus-tools }}" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
+          make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows ${{ matrix.antivirus-tools }}" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
      - name: Linter and test
        shell: powershell
@ -158,10 +158,10 @@ jobs:
          npm config set registry ${{ secrets.NPM_PROXY }} --global
          yarn config set registry ${{ secrets.NPM_PROXY }} --global
          make test
-        # env:
+        env:
-        #   TURBO_API: "${{ secrets.TURBO_API }}"
+          TURBO_API: "${{ secrets.TURBO_API }}"
-        #   TURBO_TEAM: "windows"
+          TURBO_TEAM: "windows"
-        #   TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
+          TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
  test-on-windows-pr:
    if: (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository)
    runs-on: windows-desktop-default-windows-security
@ -189,16 +189,16 @@ jobs:
          }
          make clean
-      # - name: Get Commit Message for PR
+      - name: Get Commit Message for PR
-      #   if : github.event_name == 'pull_request'
+        if : github.event_name == 'pull_request'
-      #   shell: bash
+        shell: bash
-      #   run: |
+        run: |
-      #     echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
+          echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
-      # - name: "Config report portal"
+      - name: "Config report portal"
-      #   shell: bash
+        shell: bash
-      #   run: |
+        run: |
-      #     make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
+          make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Windows" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
      - name: Linter and test
        shell: powershell
@ -206,10 +206,10 @@ jobs:
          npm config set registry ${{ secrets.NPM_PROXY }} --global
          yarn config set registry ${{ secrets.NPM_PROXY }} --global
          make test
-        # env:
+        env:
-        #   TURBO_API: "${{ secrets.TURBO_API }}"
+          TURBO_API: "${{ secrets.TURBO_API }}"
-        #   TURBO_TEAM: "windows"
+          TURBO_TEAM: "windows"
-        #   TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
+          TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
  test-on-windows-pr-target:
    if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository
@ -266,20 +266,20 @@ jobs:
          rm -rf ~/jan
          make clean
-      # - name: Get Commit Message for PR
+      - name: Get Commit Message for PR
-      #   if : github.event_name == 'pull_request'
+        if : github.event_name == 'pull_request'
-      #   run: |
+        run: |
-      #     echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
+          echo "REPORT_PORTAL_DESCRIPTION=${{github.event.after}}" >> $GITHUB_ENV
-      # - name: Get Commit Message for push event
+      - name: Get Commit Message for push event
-      #   if : github.event_name == 'push'
+        if : github.event_name == 'push'
-      #   run: |
+        run: |
-      #     echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
+          echo "REPORT_PORTAL_DESCRIPTION=${{github.sha}}" >> $GITHUB_ENV
-      # - name: "Config report portal"
+      - name: "Config report portal"
-      #   shell: bash
+        shell: bash
-      #   run: |
+        run: |
-      #     make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Linux" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
+          make update-playwright-config REPORT_PORTAL_URL=${{ secrets.REPORT_PORTAL_URL }} REPORT_PORTAL_API_KEY=${{ secrets.REPORT_PORTAL_API_KEY }} REPORT_PORTAL_PROJECT_NAME=${{ secrets.REPORT_PORTAL_PROJECT_NAME }} REPORT_PORTAL_LAUNCH_NAME="Jan App Linux" REPORT_PORTAL_DESCRIPTION="${{env.REPORT_PORTAL_DESCRIPTION}}"
      - name: Linter and test
        run: |
@ -288,10 +288,10 @@ jobs:
          npm config set registry ${{ secrets.NPM_PROXY }} --global
          yarn config set registry ${{ secrets.NPM_PROXY }} --global
          make test
-        # env:
+        env:
-        #   TURBO_API: "${{ secrets.TURBO_API }}"
+          TURBO_API: "${{ secrets.TURBO_API }}"
-        #   TURBO_TEAM: "linux"
+          TURBO_TEAM: "linux"
-        #   TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
+          TURBO_TOKEN: "${{ secrets.TURBO_TOKEN }}"
  test-on-ubuntu-pr-target:
    runs-on: [self-hosted, Linux, ubuntu-desktop]
--- a/.github/workflows/template-build-macos-arm64.yml
+++ b/.github/workflows/template-build-macos-arm64.yml
@ -41,7 +41,7 @@ on:
 jobs:
  build-macos:
-    runs-on: macos-silicon
+    runs-on: macos-latest
    environment: production
    permissions:
      contents: write
@ -55,15 +55,9 @@ jobs:
        uses: actions/setup-node@v1
        with:
          node-version: 20
      - name: Unblock keychain
        run: |
          security unlock-keychain -p ${{ secrets.KEYCHAIN_PASSWORD }} ~/Library/Keychains/login.keychain-db
      # - uses: actions/setup-python@v5
      #   with:
      #     python-version: '3.11'
-      # - name: Install jq
+      - name: Install jq
-      #   uses: dcarbone/install-jq-action@v2.0.1
+        uses: dcarbone/install-jq-action@v2.0.1
      - name: Update app version based on latest release tag with build number
        if: inputs.public_provider != 'github'
@ -101,17 +95,17 @@ jobs:
        env:
          VERSION_TAG: ${{ inputs.new_version }}
-      # - name: Get Cer for code signing
+      - name: Get Cer for code signing
-      #   run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12
+        run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12
-      #   shell: bash
+        shell: bash
-      #   env:
+        env:
-      #     CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
+          CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
-      # - uses: apple-actions/import-codesign-certs@v2
+      - uses: apple-actions/import-codesign-certs@v2
-      #   continue-on-error: true
+        continue-on-error: true
-      #   with:
+        with:
-      #     p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
+          p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
-      #     p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
+          p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
      - name: Build and publish app to cloudflare r2 or github artifactory
        if: inputs.public_provider != 'github'
@ -125,9 +119,9 @@ jobs:
          fi
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          # CSC_LINK: "/tmp/codesign.p12"
+          CSC_LINK: "/tmp/codesign.p12"
-          # CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
+          CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
-          # CSC_IDENTITY_AUTO_DISCOVERY: "true"
+          CSC_IDENTITY_AUTO_DISCOVERY: "true"
          APPLE_ID: ${{ secrets.APPLE_ID }}
          APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
          APP_PATH: "."
@ -143,9 +137,9 @@ jobs:
          make build-and-publish
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          # CSC_LINK: "/tmp/codesign.p12"
+          CSC_LINK: "/tmp/codesign.p12"
-          # CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
+          CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
-          # CSC_IDENTITY_AUTO_DISCOVERY: "true"
+          CSC_IDENTITY_AUTO_DISCOVERY: "true"
          APPLE_ID: ${{ secrets.APPLE_ID }}
          APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
          APP_PATH: "."
--- a/.github/workflows/template-build-macos-x64.yml
+++ b/.github/workflows/template-build-macos-x64.yml
@ -159,4 +159,3 @@ jobs:
        with:
          name: latest-mac-x64
          path: ./electron/dist/latest-mac.yml
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -68,6 +68,10 @@ export function requestInference(
            let cachedLines = ''
            for (const line of lines) {
              try {
                if (transformResponse) {
                  content += transformResponse(line)
                  subscriber.next(content ?? '')
                } else {
                  const toParse = cachedLines + line
                  if (!line.includes('data: [DONE]')) {
                    const data = JSON.parse(toParse.replace('data: ', ''))
@ -77,6 +81,7 @@ export function requestInference(
                    }
                    if (content !== '') subscriber.next(content)
                  }
                }
              } catch {
                cachedLines = line
              }
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
 // The URL for the Nitro subprocess
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
 // The URL for the Nitro subprocess to load a model
-export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
+export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
 // The URL for the Nitro subprocess to validate a model
-export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
+export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
 // The URL for the Nitro subprocess to kill itself
 export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
 }
 const spawnNitroProcess = async (): Promise<void> => {
-  log(`[SERVER]::Debug: Spawning Nitro subprocess...`)
+  log(`[SERVER]::Debug: Spawning cortex subprocess...`)
  let binaryFolder = join(
    getJanExtensionsPath(),
    '@janhq',
-    'inference-nitro-extension',
+    'inference-cortex-extension',
    'dist',
    'bin'
  )
@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
  const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
  // Execute the binary
  log(
-    `[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+    `[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
  )
  subprocess = spawn(
    executableOptions.executablePath,
@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
  })
  subprocess.on('close', (code: any) => {
-    log(`[SERVER]::Debug: Nitro exited with code: ${code}`)
+    log(`[SERVER]::Debug: cortex exited with code: ${code}`)
    subprocess = undefined
  })
  tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
-    log(`[SERVER]::Debug: Nitro is ready`)
+    log(`[SERVER]::Debug: cortex is ready`)
  })
 }
@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
  let binaryFolder = join(
    getJanExtensionsPath(),
    '@janhq',
-    'inference-nitro-extension',
+    'inference-cortex-extension',
    'dist',
    'bin'
  )
  let cudaVisibleDevices = ''
-  let binaryName = 'nitro'
+  let binaryName = 'cortex-cpp'
  /**
   * The binary folder is different for each platform.
   */
@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
      }
      cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
    }
-    binaryName = 'nitro.exe'
+    binaryName = 'cortex-cpp.exe'
  } else if (process.platform === 'darwin') {
    /**
     *  For MacOS: mac-universal both Silicon and InteL
     */
-    binaryFolder = join(binaryFolder, 'mac-universal')
+    if(process.arch === 'arm64') {
    binaryFolder = join(binaryFolder, 'mac-arm64')
    } else {
      binaryFolder = join(binaryFolder, 'mac-amd64')
    }
  } else {
    /**
     *  For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
    retryDelay: 500,
  })
    .then((res: any) => {
-      log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`)
+      log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
      return Promise.resolve(res)
    })
    .catch((err: any) => {
@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
      })
    }, 5000)
    const tcpPortUsed = require('tcp-port-used')
-    log(`[SERVER]::Debug: Request to kill Nitro`)
+    log(`[SERVER]::Debug: Request to kill cortex`)
    fetch(NITRO_HTTP_KILL_URL, {
      method: 'DELETE',
--- a/core/src/node/helper/resource.ts
+++ b/core/src/node/helper/resource.ts
@ -4,7 +4,7 @@ import { log } from './logger'
 export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
  const cpu = await physicalCpuCount()
-  log(`[NITRO]::CPU information - ${cpu}`)
+  log(`[CORTEX]::CPU information - ${cpu}`)
  return {
    numCpuPhysicalCore: cpu,
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@ -19,6 +19,7 @@ export enum NativeRoute {
  showMainWindow = 'showMainWindow',
  quickAskSizeUpdated = 'quickAskSizeUpdated',
  ackDeepLink = 'ackDeepLink',
 }
 /**
@ -45,6 +46,8 @@ export enum AppEvent {
  onUserSubmitQuickAsk = 'onUserSubmitQuickAsk',
  onSelectedText = 'onSelectedText',
  onDeepLink = 'onDeepLink',
 }
 export enum DownloadRoute {
--- a/electron/handlers/native.ts
+++ b/electron/handlers/native.ts
@ -151,4 +151,8 @@ export function handleAppIPCs() {
    async (_event, heightOffset: number): Promise<void> =>
      windowManager.expandQuickAskWindow(heightOffset)
  )
  ipcMain.handle(NativeRoute.ackDeepLink, async (_event): Promise<void> => {
    windowManager.ackDeepLink()
  })
 }
--- a/electron/main.ts
+++ b/electron/main.ts
@ -1,6 +1,6 @@
 import { app, BrowserWindow } from 'electron'
-import { join } from 'path'
+import { join, resolve } from 'path'
 /**
 * Managers
 **/
@ -39,15 +39,44 @@ const quickAskUrl = `${mainUrl}/search`
 const gotTheLock = app.requestSingleInstanceLock()
 if (process.defaultApp) {
  if (process.argv.length >= 2) {
    app.setAsDefaultProtocolClient('jan', process.execPath, [
      resolve(process.argv[1]),
    ])
  }
 } else {
  app.setAsDefaultProtocolClient('jan')
 }
 const createMainWindow = () => {
  const startUrl = app.isPackaged ? `file://${mainPath}` : mainUrl
  windowManager.createMainWindow(preloadPath, startUrl)
 }
 app
  .whenReady()
  .then(() => {
    if (!gotTheLock) {
      app.quit()
      throw new Error('Another instance of the app is already running')
    } else {
      app.on(
        'second-instance',
        (_event, commandLine, _workingDirectory): void => {
          if (process.platform === 'win32' || process.platform === 'linux') {
            // this is for handling deeplink on windows and linux
            // since those OS will emit second-instance instead of open-url
            const url = commandLine.pop()
            if (url) {
              windowManager.sendMainAppDeepLink(url)
            }
          }
          windowManager.showMainWindow()
        }
      )
    }
  })
  .then(setupReactDevTool)
  .then(setupCore)
  .then(createUserSpace)
  .then(migrateExtensions)
@ -60,6 +89,7 @@ app
  .then(registerGlobalShortcuts)
  .then(() => {
    if (!app.isPackaged) {
      setupReactDevTool()
      windowManager.mainWindow?.webContents.openDevTools()
    }
  })
@ -75,11 +105,11 @@ app
    })
  })
-app.on('second-instance', (_event, _commandLine, _workingDirectory) => {
+app.on('open-url', (_event, url) => {
-  windowManager.showMainWindow()
+  windowManager.sendMainAppDeepLink(url)
 })
-app.on('before-quit', function (evt) {
+app.on('before-quit', function (_event) {
  trayManager.destroyCurrentTray()
 })
@ -104,11 +134,6 @@ function createQuickAskWindow() {
  windowManager.createQuickAskWindow(preloadPath, startUrl)
 }
 function createMainWindow() {
  const startUrl = app.isPackaged ? `file://${mainPath}` : mainUrl
  windowManager.createMainWindow(preloadPath, startUrl)
 }
 /**
 * Handles various IPC messages from the renderer process.
 */
--- a/electron/managers/window.ts
+++ b/electron/managers/window.ts
@ -14,9 +14,9 @@ class WindowManager {
  private _quickAskWindowVisible = false
  private _mainWindowVisible = false
  private deeplink: string | undefined
  /**
   * Creates a new window instance.
   * @param {Electron.BrowserWindowConstructorOptions} options - The options to create the window with.
   * @returns The created window instance.
   */
  createMainWindow(preloadPath: string, startUrl: string) {
@ -29,6 +29,17 @@ class WindowManager {
      },
    })
    if (process.platform === 'win32' || process.platform === 'linux') {
      /// This is work around for windows deeplink.
      /// second-instance event is not fired when app is not open, so the app
      /// does not received the deeplink.
      const commandLine = process.argv.slice(1)
      if (commandLine.length > 0) {
        const url = commandLine[0]
        this.sendMainAppDeepLink(url)
      }
    }
    /* Load frontend app to the window */
    this.mainWindow.loadURL(startUrl)
@ -123,6 +134,22 @@ class WindowManager {
    )
  }
  /**
   * Try to send the deep link to the main app.
   */
  sendMainAppDeepLink(url: string): void {
    this.deeplink = url
    const interval = setInterval(() => {
      if (!this.deeplink) clearInterval(interval)
      const mainWindow = this.mainWindow
      if (mainWindow) {
        mainWindow.webContents.send(AppEvent.onDeepLink, this.deeplink)
        if (mainWindow.isMinimized()) mainWindow.restore()
        mainWindow.focus()
      }
    }, 500)
  }
  cleanUp(): void {
    if (!this.mainWindow?.isDestroyed()) {
      this.mainWindow?.close()
@ -137,6 +164,13 @@ class WindowManager {
      this._quickAskWindowVisible = false
    }
  }
  /**
   * Acknowledges that the window has received a deep link. We can remove it.
   */
  ackDeepLink() {
    this.deeplink = undefined
  }
 }
 export const windowManager = new WindowManager()
--- a/electron/package.json
+++ b/electron/package.json
@ -61,6 +61,14 @@
      "include": "scripts/uninstaller.nsh",
      "deleteAppDataOnUninstall": true
    },
    "protocols": [
      {
        "name": "Jan",
        "schemes": [
          "jan"
        ]
      }
    ],
    "artifactName": "jan-${os}-${arch}-${version}.${ext}"
  },
  "scripts": {
@ -96,7 +104,7 @@
    "request": "^2.88.2",
    "request-progress": "^3.0.0",
    "ulidx": "^2.3.0",
-    "@nut-tree/nut-js": "^4.0.0"
+    "@kirillvakalov/nut-tree__nut-js": "4.2.1-2"
  },
  "devDependencies": {
    "@electron/notarize": "^2.1.0",
--- a/electron/utils/dev.ts
+++ b/electron/utils/dev.ts
@ -1,7 +1,4 @@
 import { app } from 'electron'
 export const setupReactDevTool = async () => {
  if (!app.isPackaged) {
  // Which means you're running from source code
  const { default: installExtension, REACT_DEVELOPER_TOOLS } = await import(
    'electron-devtools-installer'
@ -14,4 +11,3 @@ export const setupReactDevTool = async () => {
    // Only log the error and don't throw it because it's not critical
  }
 }
 }
--- a/electron/utils/selectedText.ts
+++ b/electron/utils/selectedText.ts
@ -1,5 +1,5 @@
 import { clipboard, globalShortcut } from 'electron'
-import { keyboard, Key } from '@nut-tree/nut-js'
+import { keyboard, Key } from "@kirillvakalov/nut-tree__nut-js"
 /**
 * Gets selected text by synthesizing the keyboard shortcut
--- a/extensions/assistant-extension/src/node/index.ts
+++ b/extensions/assistant-extension/src/node/index.ts
@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
 }
 export async function toolRetrievalIngestNewDocument(
  file: string,
  model: string,
  engine: string
 ) {
  const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
  const threadPath = path.dirname(filePath.replace('files', ''))
-  retrieval.updateEmbeddingEngine(engine)
+  retrieval.updateEmbeddingEngine(model, engine)
  return retrieval
    .ingestAgentKnowledge(filePath, `${threadPath}/memory`)
    .catch((err) => {
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@ -28,14 +28,14 @@ export class Retrieval {
    })
  }
-  public updateEmbeddingEngine(engine: string): void {
+  public updateEmbeddingEngine(model: string, engine: string): void {
    // Engine settings are not compatible with the current embedding model params
    // Switch case manually for now
    if (engine === 'nitro') {
      this.embeddingModel = new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding' },
+        { openAIApiKey: 'nitro-embedding', model },
        // TODO: Raw settings
-        { basePath: 'http://127.0.0.1:3928/v1' }
+        { basePath: 'http://127.0.0.1:3928/v1' },
      )
    } else {
      // Fallback to OpenAI Settings
--- a/extensions/assistant-extension/src/tools/retrieval.ts
+++ b/extensions/assistant-extension/src/tools/retrieval.ts
@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
          NODE,
          'toolRetrievalIngestNewDocument',
          docFile,
          data.model?.id,
          data.model?.engine
        )
      } else {
--- a/extensions/inference-anthropic-extension/README.md
+++ b/extensions/inference-anthropic-extension/README.md
@ -0,0 +1,79 @@
 # Anthropic Engine Extension
 Created using Jan extension example
 # Create a Jan Extension using Typescript
 Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
 ## Create Your Own Extension
 To create your own extension, you can use this repository as a template! Just follow the below instructions:
 1. Click the Use this template button at the top of the repository
 2. Select Create a new repository
 3. Select an owner and name for your new repository
 4. Click Create repository
 5. Clone your new repository
 ## Initial Setup
 After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
 > [!NOTE]
 >
 > You'll need to have a reasonably modern version of
 > [Node.js](https://nodejs.org) handy. If you are using a version manager like
 > [`nodenv`](https://github.com/nodenv/nodenv) or
 > [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
 > root of your repository to install the version specified in
 > [`package.json`](./package.json). Otherwise, 20.x or later should work!
 1. :hammer_and_wrench: Install the dependencies
   ```bash
   npm install
   ```
 1. :building_construction: Package the TypeScript for distribution
   ```bash
   npm run bundle
   ```
 1. :white_check_mark: Check your artifact
   There will be a tgz file in your extension directory now
 ## Update the Extension Metadata
 The [`package.json`](package.json) file defines metadata about your extension, such as
 extension name, main entry, description and version.
 When you copy this repository, update `package.json` with the name, description for your extension.
 ## Update the Extension Code
 The [`src/`](./src/) directory is the heart of your extension! This contains the
 source code that will be run when your extension functions are invoked. You can replace the
 contents of this directory with your own code.
 There are a few things to keep in mind when writing your extension code:
 - Most Jan Extension functions are processed asynchronously.
  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
  ```typescript
  import { events, MessageEvent, MessageRequest } from '@janhq/core'
  function onStart(): Promise<any> {
    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
      this.inference(data)
    )
  }
  ```
  For more information about the Jan Extension Core module, see the
  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
 So, what are you waiting for? Go ahead and start customizing your extension!
--- a/extensions/inference-anthropic-extension/package.json
+++ b/extensions/inference-anthropic-extension/package.json
@ -0,0 +1,43 @@
 {
  "name": "@janhq/inference-anthropic-extension",
  "productName": "Anthropic Inference Engine",
  "version": "1.0.0",
  "description": "This extension enables Anthropic chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
  "engine": "anthropic",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
    "build": "tsc -b . && webpack --config webpack.config.js",
    "build:publish": "rimraf *.tgz --glob && yarn build && npm pack && cpx *.tgz ../../pre-install",
    "sync:core": "cd ../.. && yarn build:core && cd extensions && rm yarn.lock &&  cd inference-anthropic-extension && yarn && yarn build:publish"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/module.js"
  },
  "devDependencies": {
    "cpx": "^1.5.0",
    "rimraf": "^3.0.2",
    "webpack": "^5.88.2",
    "webpack-cli": "^5.1.4",
    "ts-loader": "^9.5.0"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
    "fetch-retry": "^5.0.6",
    "ulidx": "^2.3.0"
  },
  "engines": {
    "node": ">=18.0.0"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ],
  "bundleDependencies": [
    "fetch-retry"
  ]
 }
--- a/extensions/inference-anthropic-extension/resources/models.json
+++ b/extensions/inference-anthropic-extension/resources/models.json
@ -0,0 +1,83 @@
 [
  {
    "sources": [
      {
        "url": "https://www.anthropic.com/"
      }
    ],
    "id": "claude-3-opus-20240229",
    "object": "model",
    "name": "Claude 3 Opus",
    "version": "1.0",
    "description": "Claude 3 Opus is a powerful model suitables for highly complex task.",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "stream": false
    },
    "metadata": {
      "author": "Anthropic",
      "tags": [
        "General",
        "Big Context Length"
      ]
    },
    "engine": "anthropic"
  },
  {
    "sources": [
      {
        "url": "https://www.anthropic.com/"
      }
    ],
    "id": "claude-3-sonnet-20240229",
    "object": "model",
    "name": "Claude 3 Sonnet",
    "version": "1.0",
    "description": "Claude 3 Sonnet is an ideal model balance of intelligence and speed for enterprise workloads.",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "stream": false
    },
    "metadata": {
      "author": "Anthropic",
      "tags": [
        "General",
        "Big Context Length"
      ]
    },
    "engine": "anthropic"
  },
  {
    "sources": [
      {
        "url": "https://www.anthropic.com/"
      }
    ],
    "id": "claude-3-haiku-20240307",
    "object": "model",
    "name": "Claude 3 Haiku",
    "version": "1.0",
    "description": "Claude 3 Haiku is the fastest model provides near-instant responsiveness.",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "stream": false
    },
    "metadata": {
      "author": "Anthropic",
      "tags": [
        "General",
        "Big Context Length"
      ]
    },
    "engine": "anthropic"
  }
 ]
--- a/extensions/inference-anthropic-extension/resources/settings.json
+++ b/extensions/inference-anthropic-extension/resources/settings.json
@ -0,0 +1,23 @@
 [
  {
    "key": "chat-completions-endpoint",
    "title": "Chat Completions Endpoint",
    "description": "The endpoint to use for chat completions. See the [Anthropic API documentation](https://docs.anthropic.com/claude/docs/intro-to-claude) for more information.",
    "controllerType": "input",
    "controllerProps": {
      "placeholder": "https://api.anthropic.com/v1/messages",
      "value": "https://api.anthropic.com/v1/messages"
    }
  },
  {
    "key": "anthropic-api-key",
    "title": "API Key",
    "description": "The Anthropic API uses API keys for authentication. Visit your [API Keys](https://console.anthropic.com/settings/keys) page to retrieve the API key you'll use in your requests.",
    "controllerType": "input",
    "controllerProps": {
      "placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
      "value": "",
      "type": "password"
    }
  }
 ]
--- a/extensions/inference-anthropic-extension/src/index.ts
+++ b/extensions/inference-anthropic-extension/src/index.ts
@ -0,0 +1,124 @@
 /**
 * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 * @version 1.0.0
 * @module inference-anthropic-extension/src/index
 */
 import { RemoteOAIEngine } from '@janhq/core'
 import { PayloadType } from '@janhq/core'
 import { ChatCompletionRole } from '@janhq/core'
 declare const SETTINGS: Array<any>
 declare const MODELS: Array<any>
 enum Settings {
  apiKey = 'anthropic-api-key',
  chatCompletionsEndPoint = 'chat-completions-endpoint',
 }
 type AnthropicPayloadType = {
  model?: string
  max_tokens?: number
  messages?: Array<{ role: string; content: string }>
 }
 /**
 * A class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 */
 export default class JanInferenceAnthropicExtension extends RemoteOAIEngine {
  inferenceUrl: string = ''
  provider: string = 'anthropic'
  maxTokens: number = 4096
  override async onLoad(): Promise<void> {
    super.onLoad()
    // Register Settings
    this.registerSettings(SETTINGS)
    this.registerModels(MODELS)
    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
    this.inferenceUrl = await this.getSetting<string>(
      Settings.chatCompletionsEndPoint,
      ''
    )
    if (this.inferenceUrl.length === 0) {
      SETTINGS.forEach((setting) => {
        if (setting.key === Settings.chatCompletionsEndPoint) {
          this.inferenceUrl = setting.controllerProps.value as string
        }
      })
    }
  }
  // Override the headers method to include the x-API-key in the request headers
  override async headers(): Promise<HeadersInit> {
    return {
      'Content-Type': 'application/json',
      'x-api-key': this.apiKey,
      'anthropic-version': '2023-06-01',
    }
  }
  onSettingUpdate<T>(key: string, value: T): void {
    if (key === Settings.apiKey) {
      this.apiKey = value as string
    } else if (key === Settings.chatCompletionsEndPoint) {
      if (typeof value !== 'string') return
      if (value.trim().length === 0) {
        SETTINGS.forEach((setting) => {
          if (setting.key === Settings.chatCompletionsEndPoint) {
            this.inferenceUrl = setting.controllerProps.value as string
          }
        })
      } else {
        this.inferenceUrl = value
      }
    }
  }
  // Override the transformPayload method to convert the payload to the required format
  transformPayload = (payload: PayloadType): AnthropicPayloadType => {
    if (!payload.messages || payload.messages.length === 0) {
      return { max_tokens: this.maxTokens, messages: [], model: payload.model }
    }
    const convertedData: AnthropicPayloadType = {
      max_tokens: this.maxTokens,
      messages: [],
      model: payload.model,
    }
    payload.messages.forEach((item, index) => {
      if (item.role === ChatCompletionRole.User) {
        convertedData.messages.push({
          role: 'user',
          content: item.content as string,
        })
      } else if (item.role === ChatCompletionRole.Assistant) {
        convertedData.messages.push({
          role: 'assistant',
          content: item.content as string,
        })
      }
    })
    return convertedData
  }
  // Override the transformResponse method to convert the response to the required format
  transformResponse = (data: any): string => {
    if (data.content && data.content.length > 0 && data.content[0].text) {
      return data.content[0].text
    } else {
      console.error('Invalid response format:', data)
      return ''
    }
  }
 }
--- a/extensions/inference-anthropic-extension/tsconfig.json
+++ b/extensions/inference-anthropic-extension/tsconfig.json
@ -0,0 +1,14 @@
 {
  "compilerOptions": {
    "target": "es2016",
    "module": "ES6",
    "moduleResolution": "node",
    "outDir": "./dist",
    "esModuleInterop": true,
    "forceConsistentCasingInFileNames": true,
    "strict": false,
    "skipLibCheck": true,
    "rootDir": "./src"
  },
  "include": ["./src"]
 }
--- a/extensions/inference-anthropic-extension/webpack.config.js
+++ b/extensions/inference-anthropic-extension/webpack.config.js
@ -0,0 +1,37 @@
 const webpack = require('webpack')
 const packageJson = require('./package.json')
 const settingJson = require('./resources/settings.json')
 const modelsJson = require('./resources/models.json')
 module.exports = {
  experiments: { outputModule: true },
  entry: './src/index.ts', // Adjust the entry point to match your project's main file
  mode: 'production',
  module: {
    rules: [
      {
        test: /\.tsx?$/,
        use: 'ts-loader',
        exclude: /node_modules/,
      },
    ],
  },
  plugins: [
    new webpack.DefinePlugin({
      MODELS: JSON.stringify(modelsJson),
      SETTINGS: JSON.stringify(settingJson),
      ENGINE: JSON.stringify(packageJson.engine),
    }),
  ],
  output: {
    filename: 'index.js', // Adjust the output file name as needed
    library: { type: 'module' }, // Specify ESM output format
  },
  resolve: {
    extensions: ['.ts', '.js'],
  },
  optimization: {
    minimize: false,
  },
  // Add loaders and other configuration as needed for your project
 }
--- a/extensions/inference-cohere-extension/resources/models.json
+++ b/extensions/inference-cohere-extension/resources/models.json
@ -19,7 +19,37 @@
    },
    "metadata": {
      "author": "Cohere",
-      "tags": ["General", "Big Context Length"]
+      "tags": [
        "General",
        "Big Context Length"
      ]
    },
    "engine": "cohere"
  },
  {
    "sources": [
      {
        "url": "https://cohere.com"
      }
    ],
    "id": "command-r",
    "object": "model",
    "name": "Command R",
    "version": "1.0",
    "description": "Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 128000,
      "temperature": 0.7,
      "stream": false
    },
    "metadata": {
      "author": "Cohere",
      "tags": [
        "General",
        "Big Context Length"
      ]
    },
    "engine": "cohere"
  }
--- a/extensions/inference-cohere-extension/resources/settings.json
+++ b/extensions/inference-cohere-extension/resources/settings.json
@ -12,7 +12,7 @@
  {
    "key": "cohere-api-key",
    "title": "API Key",
-    "description": "The Cohere API uses API keys for authentication. Visit your [API Keys](https://platform.openai.com/account/api-keys) page to retrieve the API key you'll use in your requests.",
+    "description": "The Cohere API uses API keys for authentication. Visit your [API Keys](https://dashboard.cohere.com/api-keys) page to retrieve the API key you'll use in your requests.",
    "controllerType": "input",
    "controllerProps": {
      "placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
--- a/extensions/inference-cohere-extension/src/index.ts
+++ b/extensions/inference-cohere-extension/src/index.ts
@ -3,7 +3,7 @@
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 * @version 1.0.0
- * @module inference-openai-extension/src/index
+ * @module inference-cohere-extension/src/index
 */
 import { RemoteOAIEngine } from '@janhq/core'
@ -26,8 +26,8 @@ enum RoleType {
 type CoherePayloadType = {
  chat_history?: Array<{ role: RoleType; message: string }>
-  message?: string,
+  message?: string
-  preamble?: string,
+  preamble?: string
 }
 /**
@ -82,18 +82,24 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
    if (payload.messages.length === 0) {
      return {}
    }
    const { messages, ...params } = payload
    const convertedData: CoherePayloadType = {
      ...params,
      chat_history: [],
      message: '',
    }
-    payload.messages.forEach((item, index) => {
+    messages.forEach((item, index) => {
      // Assign the message of the last item to the `message` property
-      if (index === payload.messages.length - 1) {
+      if (index === messages.length - 1) {
        convertedData.message = item.content as string
        return
      }
      if (item.role === ChatCompletionRole.User) {
-        convertedData.chat_history.push({ role: RoleType.user, message: item.content as string})
+        convertedData.chat_history.push({
          role: RoleType.user,
          message: item.content as string,
        })
      } else if (item.role === ChatCompletionRole.Assistant) {
        convertedData.chat_history.push({
          role: RoleType.chatbot,
@ -106,5 +112,7 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
    return convertedData
  }
-  transformResponse = (data: any) => data.text
+  transformResponse = (data: any) => {
    return typeof data === 'object' ? data.text : JSON.parse(data).text ?? ''
  }
 }
--- a/extensions/inference-groq-extension/package.json
+++ b/extensions/inference-groq-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-groq-extension",
  "productName": "Groq Inference Engine",
-  "version": "1.0.0",
+  "version": "1.0.1",
  "description": "This extension enables fast Groq chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/extensions/inference-groq-extension/resources/models.json
+++ b/extensions/inference-groq-extension/resources/models.json
@ -8,22 +8,25 @@
    "id": "llama3-70b-8192",
    "object": "model",
    "name": "Groq Llama 3 70b",
-    "version": "1.0",
+    "version": "1.1",
    "description": "Groq Llama 3 70b with supercharged speed!",
    "format": "api",
-    "settings": {
+    "settings": {},
      "text_model": false
    },
    "parameters": {
      "max_tokens": 8192,
      "temperature": 0.7,
-      "top_p": 1,
+      "top_p": 0.95,
-      "stop": null,
+      "stream": true,
-      "stream": true
+      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "Meta",
-      "tags": ["General", "Big Context Length"]
+      "tags": [
        "General",
        "Big Context Length"
      ]
    },
    "engine": "groq"
  },
@ -36,22 +39,25 @@
    "id": "llama3-8b-8192",
    "object": "model",
    "name": "Groq Llama 3 8b",
-    "version": "1.0",
+    "version": "1.1",
    "description": "Groq Llama 3 8b with supercharged speed!",
    "format": "api",
-    "settings": {
+    "settings": {},
      "text_model": false
    },
    "parameters": {
      "max_tokens": 8192,
      "temperature": 0.7,
-      "top_p": 1,
+      "top_p": 0.95,
-      "stop": null,
+      "stream": true,
-      "stream": true
+      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "Meta",
-      "tags": ["General", "Big Context Length"]
+      "tags": [
        "General",
        "Big Context Length"
      ]
    },
    "engine": "groq"
  },
@ -64,50 +70,24 @@
    "id": "gemma-7b-it",
    "object": "model",
    "name": "Groq Gemma 7b Instruct",
-    "version": "1.0",
+    "version": "1.1",
    "description": "Groq Gemma 7b Instruct with supercharged speed!",
    "format": "api",
-    "settings": {
+    "settings": {},
      "text_model": false
    },
    "parameters": {
-      "max_tokens": 4096,
+      "max_tokens": 8192,
      "temperature": 0.7,
-      "top_p": 1,
+      "top_p": 0.95,
-      "stop": null,
+      "stream": true,
-      "stream": true
+      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "Google",
-      "tags": ["General"]
+      "tags": [
-    },
+        "General"
-    "engine": "groq"
+      ]
  },
  {
    "sources": [
      {
        "url": "https://groq.com"
      }
    ],
    "id": "llama2-70b-4096",
    "object": "model",
    "name": "Groq Llama 2 70b",
    "version": "1.0",
    "description": "Groq Llama 2 70b with supercharged speed!",
    "format": "api",
    "settings": {
      "text_model": false
    },
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "top_p": 1,
      "stop": null,
      "stream": true
    },
    "metadata": {
      "author": "Meta",
      "tags": ["General", "Big Context Length"]
    },
    "engine": "groq"
  },
@ -120,22 +100,25 @@
    "id": "mixtral-8x7b-32768",
    "object": "model",
    "name": "Groq Mixtral 8x7b Instruct",
-    "version": "1.0",
+    "version": "1.1",
    "description": "Groq Mixtral 8x7b Instruct is Mixtral with supercharged speed!",
    "format": "api",
-    "settings": {
+    "settings": {},
      "text_model": false
    },
    "parameters": {
-      "max_tokens": 4096,
+      "max_tokens": 32768,
      "temperature": 0.7,
-      "top_p": 1,
+      "top_p": 0.95,
-      "stop": null,
+      "stream": true,
-      "stream": true
+      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "Mistral",
-      "tags": ["General", "Big Context Length"]
+      "tags": [
        "General",
        "Big Context Length"
      ]
    },
    "engine": "groq"
  }
--- a/extensions/inference-martian-extension/README.md
+++ b/extensions/inference-martian-extension/README.md
@ -0,0 +1,79 @@
 # Martian Engine Extension
 Created using Jan extension example
 # Create a Jan Extension using Typescript
 Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
 ## Create Your Own Extension
 To create your own extension, you can use this repository as a template! Just follow the below instructions:
 1. Click the Use this template button at the top of the repository
 2. Select Create a new repository
 3. Select an owner and name for your new repository
 4. Click Create repository
 5. Clone your new repository
 ## Initial Setup
 After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
 > [!NOTE]
 >
 > You'll need to have a reasonably modern version of
 > [Node.js](https://nodejs.org) handy. If you are using a version manager like
 > [`nodenv`](https://github.com/nodenv/nodenv) or
 > [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
 > root of your repository to install the version specified in
 > [`package.json`](./package.json). Otherwise, 20.x or later should work!
 1. :hammer_and_wrench: Install the dependencies
   ```bash
   npm install
   ```
 1. :building_construction: Package the TypeScript for distribution
   ```bash
   npm run bundle
   ```
 1. :white_check_mark: Check your artifact
   There will be a tgz file in your extension directory now
 ## Update the Extension Metadata
 The [`package.json`](package.json) file defines metadata about your extension, such as
 extension name, main entry, description and version.
 When you copy this repository, update `package.json` with the name, description for your extension.
 ## Update the Extension Code
 The [`src/`](./src/) directory is the heart of your extension! This contains the
 source code that will be run when your extension functions are invoked. You can replace the
 contents of this directory with your own code.
 There are a few things to keep in mind when writing your extension code:
 - Most Jan Extension functions are processed asynchronously.
  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
  ```typescript
  import { events, MessageEvent, MessageRequest } from '@janhq/core'
  function onStart(): Promise<any> {
    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
      this.inference(data)
    )
  }
  ```
  For more information about the Jan Extension Core module, see the
  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
 So, what are you waiting for? Go ahead and start customizing your extension!
--- a/extensions/inference-martian-extension/package.json
+++ b/extensions/inference-martian-extension/package.json
@ -0,0 +1,42 @@
 {
  "name": "@janhq/inference-martian-extension",
  "productName": "Martian Inference Engine",
  "version": "1.0.1",
  "description": "This extension enables Martian chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
  "engine": "martian",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
    "build": "tsc -b . && webpack --config webpack.config.js",
    "build:publish": "rimraf *.tgz --glob && yarn build && npm pack && cpx *.tgz ../../pre-install"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/module.js"
  },
  "devDependencies": {
    "cpx": "^1.5.0",
    "rimraf": "^3.0.2",
    "webpack": "^5.88.2",
    "webpack-cli": "^5.1.4",
    "ts-loader": "^9.5.0"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
    "fetch-retry": "^5.0.6",
    "ulidx": "^2.3.0"
  },
  "engines": {
    "node": ">=18.0.0"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ],
  "bundleDependencies": [
    "fetch-retry"
  ]
 }
--- a/extensions/inference-martian-extension/resources/models.json
+++ b/extensions/inference-martian-extension/resources/models.json
@ -0,0 +1,32 @@
 [
  {
    "sources": [
      {
        "url": "https://withmartian.com/"
      }
    ],
    "id": "router",
    "object": "model",
    "name": "Martian Model Router",
    "version": "1.0",
    "description": "Martian Model Router dynamically routes requests to the best LLM in real-time",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "Martian",
      "tags": [
        "General"
      ]
    },
    "engine": "martian"
  }
 ]
--- a/extensions/inference-martian-extension/resources/settings.json
+++ b/extensions/inference-martian-extension/resources/settings.json
@ -0,0 +1,23 @@
 [
  {
    "key": "chat-completions-endpoint",
    "title": "Chat Completions Endpoint",
    "description": "The endpoint to use for chat completions. See the [Martian API documentation](https://docs.withmartian.com/martian-model-router/api-reference/get-chat-completions) for more information.",
    "controllerType": "input",
    "controllerProps": {
      "placeholder": "https://withmartian.com/api/openai/v1/chat/completions",
      "value": "https://withmartian.com/api/openai/v1/chat/completions"
    }
  },
  {
    "key": "martian-api-key",
    "title": "API Key",
    "description": "The Martian API uses API keys for authentication. Visit your [API Keys](https://withmartian.com/dashboard) page to retrieve the API key you'll use in your requests.",
    "controllerType": "input",
    "controllerProps": {
      "placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
      "value": "",
      "type": "password"
    }
  }
 ]
--- a/extensions/inference-martian-extension/src/index.ts
+++ b/extensions/inference-martian-extension/src/index.ts
@ -0,0 +1,66 @@
 /**
 * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 * @version 1.0.0
 * @module inference-martian-extension/src/index
 */
 import { RemoteOAIEngine, SettingComponentProps } from '@janhq/core'
 declare const SETTINGS: Array<any>
 declare const MODELS: Array<any>
 enum Settings {
  apiKey = 'martian-api-key',
  chatCompletionsEndPoint = 'chat-completions-endpoint',
 }
 /**
 * A class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 */
 export default class JanInferenceMartianExtension extends RemoteOAIEngine {
  inferenceUrl: string = ''
  provider: string = 'martian'
  override async onLoad(): Promise<void> {
    super.onLoad()
    // Register Settings
    this.registerSettings(SETTINGS)
    this.registerModels(MODELS)
    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
    this.inferenceUrl = await this.getSetting<string>(
      Settings.chatCompletionsEndPoint,
      ''
    )
    if (this.inferenceUrl.length === 0) {
      SETTINGS.forEach((setting) => {
        if (setting.key === Settings.chatCompletionsEndPoint) {
          this.inferenceUrl = setting.controllerProps.value as string
        }
      })
    }
  }
  onSettingUpdate<T>(key: string, value: T): void {
    if (key === Settings.apiKey) {
      this.apiKey = value as string
    } else if (key === Settings.chatCompletionsEndPoint) {
      if (typeof value !== 'string') return
      if (value.trim().length === 0) {
        SETTINGS.forEach((setting) => {
          if (setting.key === Settings.chatCompletionsEndPoint) {
            this.inferenceUrl = setting.controllerProps.value as string
          }
        })
      } else {
        this.inferenceUrl = value
      }
    }
  }
 }
--- a/extensions/inference-martian-extension/tsconfig.json
+++ b/extensions/inference-martian-extension/tsconfig.json
@ -0,0 +1,14 @@
 {
  "compilerOptions": {
    "target": "es2016",
    "module": "ES6",
    "moduleResolution": "node",
    "outDir": "./dist",
    "esModuleInterop": true,
    "forceConsistentCasingInFileNames": true,
    "strict": false,
    "skipLibCheck": true,
    "rootDir": "./src"
  },
  "include": ["./src"]
 }
--- a/extensions/inference-martian-extension/webpack.config.js
+++ b/extensions/inference-martian-extension/webpack.config.js
@ -0,0 +1,37 @@
 const webpack = require('webpack')
 const packageJson = require('./package.json')
 const settingJson = require('./resources/settings.json')
 const modelsJson = require('./resources/models.json')
 module.exports = {
  experiments: { outputModule: true },
  entry: './src/index.ts', // Adjust the entry point to match your project's main file
  mode: 'production',
  module: {
    rules: [
      {
        test: /\.tsx?$/,
        use: 'ts-loader',
        exclude: /node_modules/,
      },
    ],
  },
  plugins: [
    new webpack.DefinePlugin({
      MODELS: JSON.stringify(modelsJson),
      SETTINGS: JSON.stringify(settingJson),
      ENGINE: JSON.stringify(packageJson.engine),
    }),
  ],
  output: {
    filename: 'index.js', // Adjust the output file name as needed
    library: { type: 'module' }, // Specify ESM output format
  },
  resolve: {
    extensions: ['.ts', '.js'],
  },
  optimization: {
    minimize: false,
  },
  // Add loaders and other configuration as needed for your project
 }
--- a/extensions/inference-mistral-extension/package.json
+++ b/extensions/inference-mistral-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-mistral-extension",
  "productName": "MistralAI Inference Engine",
-  "version": "1.0.0",
+  "version": "1.0.1",
  "description": "This extension enables Mistral chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/extensions/inference-mistral-extension/resources/models.json
+++ b/extensions/inference-mistral-extension/resources/models.json
@ -8,48 +8,20 @@
    "id": "mistral-small-latest",
    "object": "model",
    "name": "Mistral Small",
-    "version": "1.0",
+    "version": "1.1",
-    "description": "Mistral Small is the ideal choice for simpe tasks that one can do in builk - like Classification, Customer Support, or Text Generation. It offers excellent performance at an affordable price point.",
+    "description": "Mistral Small is the ideal choice for simple tasks (Classification, Customer Support, or Text Generation) at an affordable price.",
    "format": "api",
    "settings": {},
    "parameters": {
-      "max_tokens": 4096,
+      "max_tokens": 32000,
-      "temperature": 0.7
+      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true
    },
    "metadata": {
      "author": "Mistral",
      "tags": [
-        "Classification",
+        "General"
        "Customer Support",
        "Text Generation"
      ]
    },
    "engine": "mistral"
  },
  {
    "sources": [
      {
        "url": "https://docs.mistral.ai/api/"
      }
    ],
    "id": "mistral-medium-latest",
    "object": "model",
    "name": "Mistral Medium",
    "version": "1.0",
    "description": "Mistral Medium is the ideal for intermediate tasks that require moderate reasoning - like Data extraction, Summarizing a Document, Writing a Job Description, or Writing Product Descriptions. Mistral Medium strikes a balance between performance and capability, making it suitable for a wide range of tasks that only require language transformaion",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7
    },
    "metadata": {
      "author": "Mistral",
      "tags": [
        "Data extraction",
        "Summarizing a Document",
        "Writing a Job Description",
        "Writing Product Descriptions"
      ]
    },
    "engine": "mistral"
@ -63,21 +35,47 @@
    "id": "mistral-large-latest",
    "object": "model",
    "name": "Mistral Large",
-    "version": "1.0",
+    "version": "1.1",
-    "description": "Mistral Large is ideal for complex tasks that require large reasoning capabilities or are highly specialized - like Synthetic Text Generation, Code Generation, RAG, or Agents.",
+    "description": "Mistral Large is ideal for complex tasks (Synthetic Text Generation, Code Generation, RAG, or Agents).",
    "format": "api",
    "settings": {},
    "parameters": {
-      "max_tokens": 4096,
+      "max_tokens": 32000,
-      "temperature": 0.7
+      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true
    },
    "metadata": {
      "author": "Mistral",
      "tags": [
-        "Text Generation",
+        "General"
-        "Code Generation",
+      ]
-        "RAG",
+    },
-        "Agents"
+    "engine": "mistral"
  },
  {
    "sources": [
      {
        "url": "https://docs.mistral.ai/api/"
      }
    ],
    "id": "open-mixtral-8x22b",
    "object": "model",
    "name": "Mixtral 8x22B",
    "version": "1.1",
    "description": "Mixtral 8x22B is a high-performance, cost-effective model designed for complex tasks.",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 32000,
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true
    },
    "metadata": {
      "author": "Mistral",
      "tags": [
        "General"
      ]
    },
    "engine": "mistral"
--- a/extensions/inference-nitro-extension/.gitignore
+++ b/extensions/inference-nitro-extension/.gitignore
@ -0,0 +1,2 @@
 bin
 !version.txt
--- a/extensions/inference-nitro-extension/bin/version.txt
+++ b/extensions/inference-nitro-extension/bin/version.txt
@ -1 +1 @@
-0.3.22
+0.4.4
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@ -1,3 +1,3 @@
@echo off
-set /p NITRO_VERSION=<./bin/version.txt
+set /p CORTEX_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
+.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -1,8 +1,8 @@
 {
-  "name": "@janhq/inference-nitro-extension",
+  "name": "@janhq/inference-cortex-extension",
-  "productName": "Nitro Inference Engine",
+  "productName": "Cortex Inference Engine",
-  "version": "1.0.4",
+  "version": "1.0.7",
-  "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
+  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
@ -10,8 +10,8 @@
  "scripts": {
    "test": "jest",
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
+    "downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
-    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro",
+    "downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
    "downloadnitro:win32": "download.bat",
    "downloadnitro": "run-script-os",
    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
--- a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json
@ -8,19 +8,20 @@
  "id": "codeninja-1.0-7b",
  "object": "model",
  "name": "CodeNinja 7B Q4",
-  "version": "1.0",
+  "version": "1.1",
  "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 8192,
    "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
-    "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf"
+    "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf",
    "ngl": 32
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 8192,
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
--- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json
@ -8,19 +8,20 @@
    "id": "command-r-34b",
    "object": "model",
    "name": "Command-R v01 34B Q4",
-    "version": "1.3",
+    "version": "1.4",
    "description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
    "format": "gguf",
    "settings": {
-      "ctx_len": 4096,
+      "ctx_len": 131072,
      "prompt_template": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
-      "llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf"
+      "llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf",
      "ngl": 40
    },
    "parameters": {
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
-      "max_tokens": 4096,
+      "max_tokens": 131072,
      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json
@ -8,19 +8,20 @@
  "id": "deepseek-coder-1.3b",
  "object": "model",
  "name": "Deepseek Coder 1.3B Q8",
-  "version": "1.0",
+  "version": "1.1",
  "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 16384,
    "prompt_template": "### Instruction:\n{prompt}\n### Response:",
-    "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf"
+    "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf",
    "ngl": 24
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 16384,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json
@ -1,26 +1,27 @@
 {
  "sources": [
    {
-      "filename": "deepseek-coder-33b-instruct.Q5_K_M.gguf",
+      "filename": "deepseek-coder-33b-instruct.Q4_K_M.gguf",
-      "url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf"
+      "url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q4_K_M.gguf"
    }
  ],
  "id": "deepseek-coder-34b",
  "object": "model",
-  "name": "Deepseek Coder 33B Q5",
+  "name": "Deepseek Coder 33B Q4",
-  "version": "1.0",
+  "version": "1.1",
  "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 16384,
    "prompt_template": "### Instruction:\n{prompt}\n### Response:",
-    "llama_model_path": "deepseek-coder-33b-instruct.Q5_K_M.gguf"
+    "llama_model_path": "deepseek-coder-33b-instruct.Q4_K_M.gguf",
    "ngl": 62
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 16384,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/dolphin-phi-2/model.json
+++ b/extensions/inference-nitro-extension/resources/models/dolphin-phi-2/model.json
@ -1,32 +0,0 @@
 {
    "sources": [
      {
        "url": "https://huggingface.co/TheBloke/dolphin-2_6-phi-2-GGUF/resolve/main/dolphin-2_6-phi-2.Q8_0.gguf",
        "filename": "dolphin-2_6-phi-2.Q8_0.gguf"
      }
    ],
    "id": "dolphin-phi-2",
    "object": "model",
    "name": "Dolphin Phi-2 2.7B Q8",
    "version": "1.0",
    "description": "Dolphin Phi-2 is a good alternative for Phi-2 in chatting",
    "format": "gguf",
    "settings": {
      "ctx_len": 4096,
      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
      "llama_model_path": "dolphin-2_6-phi-2.Q8_0.gguf"
    },
    "parameters": {
      "max_tokens": 4096,
      "stop": ["<|im_end|>"]
    },
    "metadata": {
      "author": "Cognitive Computations, Microsoft",
      "tags": [
        "3B",
        "Finetuned"
      ],
      "size": 2960000000
    },
    "engine": "nitro"
  }
--- a/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/gemma-2b/model.json
@ -8,19 +8,20 @@
  "id": "gemma-2b",
  "object": "model",
  "name": "Gemma 2B Q4",
-  "version": "1.0",
+  "version": "1.1",
  "description": "Gemma is built from the same technology with Google's Gemini.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 8192,
    "prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
-    "llama_model_path": "gemma-2b-it-q4_k_m.gguf"
+    "llama_model_path": "gemma-2b-it-q4_k_m.gguf",
    "ngl": 18
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 8192,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/gemma-7b/model.json
@ -8,19 +8,20 @@
  "id": "gemma-7b",
  "object": "model",
  "name": "Gemma 7B Q4",
-  "version": "1.0",
+  "version": "1.1",
  "description": "Google's Gemma is built for multilingual purpose",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 8192,
    "prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
-    "llama_model_path": "gemma-7b-it-q4_K_M.gguf"
+    "llama_model_path": "gemma-7b-it-q4_K_M.gguf",
    "ngl": 28
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 8192,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json
@ -14,7 +14,8 @@
  "settings": {
    "ctx_len": 4096,
    "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
-    "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf"
+    "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf",
    "ngl": 80
  },
  "parameters": {
    "temperature": 0.7,
--- a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json
@ -14,7 +14,8 @@
  "settings": {
    "ctx_len": 4096,
    "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
-    "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf"
+    "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf",
    "ngl": 32
  },
  "parameters": {
    "temperature": 0.7,
--- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json
+++ b/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json
@ -8,19 +8,20 @@
    "id": "llama3-8b-instruct",
    "object": "model",
    "name": "Llama 3 8B Q4",
-    "version": "1.0",
+    "version": "1.1",
    "description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.",
    "format": "gguf",
    "settings": {
      "ctx_len": 8192,
      "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
-      "llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
+      "llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
      "ngl": 32
    },
    "parameters": {
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
-      "max_tokens": 4096,
+      "max_tokens": 8192,
      "stop": ["<|end_of_text|>","<|eot_id|>"],
      "frequency_penalty": 0,
      "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json
@ -1,35 +1,38 @@
 {
    "sources": [
      {
-        "filename": "Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
+        "filename": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf",
-        "url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/resolve/main/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf"
+        "url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf"
      }
    ],
-    "id": "hermes-pro-7b",
+    "id": "llama3-hermes-8b",
    "object": "model",
-    "name": "Hermes Pro 7B Q4",
+    "name": "Hermes Pro Llama 3 8B Q4",
    "version": "1.1",
-    "description": "Hermes Pro is superior in Roleplaying, Reasoning and Explaining problem.",
+    "description": "Hermes Pro is well-designed for General chat and JSON output.",
    "format": "gguf",
    "settings": {
-      "ctx_len": 4096,
+      "ctx_len": 8192,
      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
-      "llama_model_path": "Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf"
+      "llama_model_path": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf",
      "ngl": 32
    },
    "parameters": {
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
-      "max_tokens": 4096,
+      "max_tokens": 8192,
      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "NousResearch",
-      "tags": ["7B", "Finetuned"],
+      "tags": [
-      "size": 4370000000
+        "7B",
        "Finetuned"
      ],
      "size": 4920000000
    },
    "engine": "nitro"
  }
--- a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json
@ -14,7 +14,8 @@
    "settings": {
      "ctx_len": 2048,
      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
-      "llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf"
+      "llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf",
      "ngl": 22
    },
    "parameters": {
      "temperature": 0.7,
--- a/extensions/inference-nitro-extension/resources/models/miqu-70b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/miqu-70b/model.json
@ -1,34 +0,0 @@
 {
    "sources": [
      {
        "filename": "miqu-1-70b.q4_k_m.gguf",
        "url": "https://huggingface.co/miqudev/miqu-1-70b/resolve/main/miqu-1-70b.q4_k_m.gguf"
      }
    ],
    "id": "miqu-70b",
    "object": "model",
    "name": "Mistral 70B Q4",
    "version": "1.0",
    "description": "A leak weight of Mistral 70B model.",
    "format": "gguf",
    "settings": {
      "ctx_len": 4096,
      "prompt_template": "[INST] {prompt} [/INST]",
      "llama_model_path": "miqu-1-70b.q4_k_m.gguf"
    },
    "parameters": {
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
      "max_tokens": 4096,
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "miqudev",
      "tags": ["70B", "Foundational Model"],
      "size": 26440000000
    },
    "engine": "nitro"
  }
--- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
+++ b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
@ -8,20 +8,21 @@
  "id": "mistral-ins-7b-q4",
  "object": "model",
  "name": "Mistral Instruct 7B Q4",
-  "version": "1.0",
+  "version": "1.1",
  "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 32768,
    "prompt_template": "[INST] {prompt} [/INST]",
-    "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+    "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
    "ngl": 32
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 32768,
-    "stop": [],
+    "stop": ["[/INST]"],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
--- a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json
+++ b/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json
@ -8,19 +8,20 @@
  "id": "mixtral-8x7b-instruct",
  "object": "model",
  "name": "Mixtral 8x7B Instruct Q4",
-  "version": "1.0",
+  "version": "1.1",
  "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 32768,
    "prompt_template": "[INST] {prompt} [/INST]",
-    "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
+    "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
    "ngl": 100
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 32768,
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
--- a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json
@ -8,19 +8,20 @@
  "id": "noromaid-7b",
  "object": "model",
  "name": "Noromaid 7B Q4",
-  "version": "1.0",
+  "version": "1.1",
  "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 32768,
    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
-    "llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf"
+    "llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf",
    "ngl": 32
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 32768,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json
@ -8,19 +8,20 @@
  "id": "openchat-3.5-7b",
  "object": "model",
  "name": "Openchat-3.5 7B Q4",
-  "version": "1.0",
+  "version": "1.1",
  "description": "The performance of Openchat surpasses ChatGPT-3.5 and Grok-1 across various benchmarks.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 8192,
    "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
-    "llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf"
+    "llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf",
    "ngl": 32
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 8192,
    "stop": ["<|end_of_turn|>"],
    "frequency_penalty": 0,
    "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/openhermes-neural-7b/model.json
@ -1,34 +0,0 @@
 {
  "sources": [
    {
      "filename": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
      "url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
    }
  ],
  "id": "openhermes-neural-7b",
  "object": "model",
  "name": "OpenHermes Neural 7B Q4",
  "version": "1.1",
  "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
  "format": "gguf",
  "settings": {
    "ctx_len": 4096,
    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
    "llama_model_path": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 4096,
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "Intel, Jan",
    "tags": ["7B", "Merged"],
    "size": 4370000000,
    "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png"
  },
  "engine": "nitro"
 }
--- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json
@ -13,7 +13,7 @@
    "format": "gguf",
    "settings": {
      "ctx_len": 4096,
-      "prompt_template": "<|system|>\n{system_message}<|end|>\n<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
+      "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
      "llama_model_path": "Phi-3-mini-4k-instruct-q4.gguf"
    },
    "parameters": {
--- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/phind-34b/model.json
@ -8,19 +8,20 @@
  "id": "phind-34b",
  "object": "model",
  "name": "Phind 34B Q4",
-  "version": "1.1",
+  "version": "1.2",
  "description": "Phind 34B is the best Open-source coding model.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 16384,
    "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant",
-    "llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf"
+    "llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf",
    "ngl": 48
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 16384,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json
@ -8,19 +8,20 @@
  "id": "qwen-7b",
  "object": "model",
  "name": "Qwen Chat 7B Q4",
-  "version": "1.0",
+  "version": "1.1",
  "description": "Qwen is optimized at Chinese, ideal for everyday tasks.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 32768,
    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
-    "llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf"
+    "llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf",
    "ngl": 32
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 32768,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json
@ -14,7 +14,8 @@
    "settings": {
      "ctx_len": 4096,
      "prompt_template": "<|user|>\n{prompt}<|endoftext|>\n<|assistant|>",
-      "llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf"
+      "llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf",
      "ngl": 32
    },
    "parameters": {
      "temperature": 0.7,
--- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json
@ -12,15 +12,16 @@
  "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 32768,
    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
-    "llama_model_path": "stealth-v1.3.Q4_K_M.gguf"
+    "llama_model_path": "stealth-v1.3.Q4_K_M.gguf",
    "ngl": 32
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 32768,
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
--- a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json
@ -14,7 +14,8 @@
  "settings": {
    "ctx_len": 4096,
    "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
-    "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+    "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
    "ngl": 22
  },
  "parameters": {
    "temperature": 0.7,
--- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json
@ -12,15 +12,16 @@
  "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 32768,
    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
-    "llama_model_path": "trinity-v1.2.Q4_K_M.gguf"
+    "llama_model_path": "trinity-v1.2.Q4_K_M.gguf",
    "ngl": 32
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 32768,
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
--- a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json
@ -8,19 +8,20 @@
    "id": "vistral-7b",
    "object": "model",
    "name": "Vistral 7B Q4",
-    "version": "1.0",
+    "version": "1.1",
    "description": "Vistral 7B has a deep understanding of Vietnamese.",
    "format": "gguf",
    "settings": {
-      "ctx_len": 4096,
+      "ctx_len": 32768,
      "prompt_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n{prompt} [/INST]",
-      "llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf"
+      "llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf",
      "ngl": 32
    },
    "parameters": {
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
-      "max_tokens": 4096,
+      "max_tokens": 32768,
      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json
@ -12,15 +12,16 @@
  "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
  "format": "gguf",
  "settings": {
-    "ctx_len": 4096,
+    "ctx_len": 16384,
    "prompt_template": "### Instruction:\n{prompt}\n### Response:",
-    "llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf"
+    "llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf",
    "ngl": 40
  },
  "parameters": {
    "temperature": 0.7,
    "top_p": 0.95,
    "stream": true,
-    "max_tokens": 4096,
+    "max_tokens": 16384,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
--- a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json
+++ b/extensions/inference-nitro-extension/resources/models/yi-34b/model.json
@ -14,7 +14,8 @@
  "settings": {
    "ctx_len": 4096,
    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
-    "llama_model_path": "yi-34b-chat.Q4_K_M.gguf"
+    "llama_model_path": "yi-34b-chat.Q4_K_M.gguf",
    "ngl": 60
  },
  "parameters": {
    "temperature": 0.7,
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-nitro-extension/rollup.config.ts
@ -12,21 +12,17 @@ const codeninja7bJson = require('./resources/models/codeninja-1.0-7b/model.json'
 const commandr34bJson = require('./resources/models/command-r-34b/model.json')
 const deepseekCoder13bJson = require('./resources/models/deepseek-coder-1.3b/model.json')
 const deepseekCoder34bJson = require('./resources/models/deepseek-coder-34b/model.json')
 const dolphinPhi2Json = require('./resources/models/dolphin-phi-2/model.json')
 const gemma2bJson = require('./resources/models/gemma-2b/model.json')
 const gemma7bJson = require('./resources/models/gemma-7b/model.json')
 const hermesPro7bJson = require('./resources/models/hermes-pro-7b/model.json')
 const llama2Chat70bJson = require('./resources/models/llama2-chat-70b/model.json')
 const llama2Chat7bJson = require('./resources/models/llama2-chat-7b/model.json')
 const llamacorn1bJson = require('./resources/models/llamacorn-1.1b/model.json')
 const llava13bJson = require('./resources/models/llava-13b/model.json')
 const llava7bJson = require('./resources/models/llava-7b/model.json')
 const miqu70bJson = require('./resources/models/miqu-70b/model.json')
 const mistralIns7bq4Json = require('./resources/models/mistral-ins-7b-q4/model.json')
 const mixtral8x7bInstructJson = require('./resources/models/mixtral-8x7b-instruct/model.json')
 const noromaid7bJson = require('./resources/models/noromaid-7b/model.json')
 const openchat357bJson = require('./resources/models/openchat-3.5-7b/model.json')
 const openhermesNeural7bJson = require('./resources/models/openhermes-neural-7b/model.json')
 const phind34bJson = require('./resources/models/phind-34b/model.json')
 const qwen7bJson = require('./resources/models/qwen-7b/model.json')
 const stableZephyr3bJson = require('./resources/models/stable-zephyr-3b/model.json')
@ -37,6 +33,7 @@ const vistral7bJson = require('./resources/models/vistral-7b/model.json')
 const wizardcoder13bJson = require('./resources/models/wizardcoder-13b/model.json')
 const yi34bJson = require('./resources/models/yi-34b/model.json')
 const llama3Json = require('./resources/models/llama3-8b-instruct/model.json')
 const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.json')
 export default [
  {
@ -56,21 +53,17 @@ export default [
          commandr34bJson,
          deepseekCoder13bJson,
          deepseekCoder34bJson,
          dolphinPhi2Json,
          gemma2bJson,
          gemma7bJson,
          hermesPro7bJson,
          llama2Chat70bJson,
          llama2Chat7bJson,
          llamacorn1bJson,
          llava13bJson,
          llava7bJson,
          miqu70bJson,
          mistralIns7bq4Json,
          mixtral8x7bInstructJson,
          noromaid7bJson,
          openchat357bJson,
          openhermesNeural7bJson,
          phind34bJson,
          qwen7bJson,
          stableZephyr3bJson,
@ -80,13 +73,14 @@ export default [
          vistral7bJson,
          wizardcoder13bJson,
          yi34bJson,
-          llama3Json
+          llama3Json,
          llama3Hermes8bJson
        ]),
        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
        INFERENCE_URL: JSON.stringify(
          process.env.INFERENCE_URL ||
-            'http://127.0.0.1:3928/inferences/llamacpp/chat_completion'
+            'http://127.0.0.1:3928/inferences/server/chat_completion'
        ),
        TROUBLESHOOTING_URL: JSON.stringify(
          'https://jan.ai/guides/troubleshooting'
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
    const executableFolderPath = await joinPath([
      janDataFolderPath,
      'engines',
-      this.name ?? 'nitro',
+      this.name ?? 'cortex-cpp',
      this.version ?? '1.0.0',
    ])
@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
      const executableFolderPath = await joinPath([
        janDataFolderPath,
        'engines',
-        this.name ?? 'nitro',
+        this.name ?? 'cortex-cpp',
        this.version ?? '1.0.0',
      ])
--- a/extensions/inference-nitro-extension/src/node/execute.test.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.test.ts
@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
    Object.defineProperty(process, 'platform', {
      value: 'darwin',
    })
    Object.defineProperty(process, 'arch', {
      value: 'arm64',
    })
    expect(executableNitroFile(testSettings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`mac-universal${sep}nitro`),
+        executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
    )
    Object.defineProperty(process, 'arch', {
      value: 'amd64',
    })
    expect(executableNitroFile(testSettings)).toEqual(
      expect.objectContaining({
        executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cpu${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
    }
    expect(executableNitroFile(settings)).toEqual(
      expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.ts
@ -1,4 +1,4 @@
-import { GpuSetting, SystemInformation } from '@janhq/core'
+import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 export interface NitroExecutableOptions {
@ -24,7 +24,7 @@ const os = (): string => {
  return process.platform === 'win32'
    ? 'win'
    : process.platform === 'darwin'
-      ? 'mac-universal'
+      ? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
      : 'linux'
 }
@ -52,7 +52,7 @@ export const executableNitroFile = (
    .join('-')
  let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `nitro${extension()}`
+  let binaryName = `cortex-cpp${extension()}`
  return {
    executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -34,9 +34,9 @@ const LOCAL_HOST = '127.0.0.1'
 // The URL for the Nitro subprocess
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
 // The URL for the Nitro subprocess to load a model
-const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
+const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
 // The URL for the Nitro subprocess to validate a model
-const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
+const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
 // The URL for the Nitro subprocess to kill itself
 const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
@ -50,7 +50,7 @@ const SUPPORTED_MODEL_FORMAT = '.gguf'
 let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
 // The current model settings
-let currentSettings: ModelSettingParams | undefined = undefined
+let currentSettings: ModelSettingParams & { model?: string } | undefined = undefined
 /**
 * Stops a Nitro subprocess.
@ -77,7 +77,7 @@ async function loadModel(
  }
  if (params.model.engine !== InferenceEngine.nitro) {
-    return Promise.reject('Not a nitro model')
+    return Promise.reject('Not a cortex model')
  } else {
    const nitroResourceProbe = await getSystemResourceInfo()
    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
@ -135,6 +135,7 @@ async function loadModel(
      // model.settings can override the default settings
      ...params.model.settings,
      llama_model_path,
      model: params.model.id,
      // This is critical and requires real CPU physical core count (or performance core)
      ...(params.model.settings.mmproj && {
        mmproj: path.isAbsolute(params.model.settings.mmproj)
@ -142,7 +143,7 @@ async function loadModel(
          : path.join(modelFolder, params.model.settings.mmproj),
      }),
    }
-    return runNitroAndLoadModel(systemInfo)
+    return runNitroAndLoadModel(params.model.id, systemInfo)
  }
 }
@ -152,7 +153,7 @@ async function loadModel(
 * 3. Validate model status
 * @returns
 */
-async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
+async function runNitroAndLoadModel(modelId: string, systemInfo?: SystemInformation) {
  // Gather system information for CPU physical cores and memory
  return killSubprocess()
    .then(() =>
@ -160,10 +161,10 @@ async function runNitroAndLoadModel(systemInfo?: SystemInformation) {
    )
    .then(() => spawnNitroProcess(systemInfo))
    .then(() => loadLLMModel(currentSettings))
-    .then(validateModelStatus)
+    .then(() => validateModelStatus(modelId))
    .catch((err) => {
      // TODO: Broadcast error so app could display proper error message
-      log(`[NITRO]::Error: ${err}`)
+      log(`[CORTEX]::Error: ${err}`)
      return { error: err }
    })
 }
@ -222,7 +223,7 @@ function loadLLMModel(settings: any): Promise<Response> {
  if (!settings?.ngl) {
    settings.ngl = 100
  }
-  log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
+  log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`)
  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
    method: 'POST',
    headers: {
@ -234,14 +235,14 @@ function loadLLMModel(settings: any): Promise<Response> {
  })
    .then((res) => {
      log(
-        `[NITRO]::Debug: Load model success with response ${JSON.stringify(
+        `[CORTEX]::Debug: Load model success with response ${JSON.stringify(
          res
        )}`
      )
      return Promise.resolve(res)
    })
    .catch((err) => {
-      log(`[NITRO]::Error: Load model failed with error ${err}`)
+      log(`[CORTEX]::Error: Load model failed with error ${err}`)
      return Promise.reject(err)
    })
 }
@ -252,11 +253,12 @@ function loadLLMModel(settings: any): Promise<Response> {
 * If the model is loaded successfully, the object is empty.
 * If the model is not loaded successfully, the object contains an error message.
 */
-async function validateModelStatus(): Promise<void> {
+async function validateModelStatus(modelId: string): Promise<void> {
  // Send a GET request to the validation URL.
  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
-    method: 'GET',
+    method: 'POST',
    body: JSON.stringify({ model: modelId }),
    headers: {
      'Content-Type': 'application/json',
    },
@ -264,7 +266,7 @@ async function validateModelStatus(): Promise<void> {
    retryDelay: 300,
  }).then(async (res: Response) => {
    log(
-      `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
+      `[CORTEX]::Debug: Validate model state with response ${JSON.stringify(
        res.status
      )}`
    )
@ -275,7 +277,7 @@ async function validateModelStatus(): Promise<void> {
      // Otherwise, return an object with an error message.
      if (body.model_loaded) {
        log(
-          `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
+          `[CORTEX]::Debug: Validate model state success with response ${JSON.stringify(
            body
          )}`
        )
@ -283,7 +285,7 @@ async function validateModelStatus(): Promise<void> {
      }
    }
    log(
-      `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
+      `[CORTEX]::Debug: Validate model state failed with response ${JSON.stringify(
        res.statusText
      )}`
    )
@ -298,7 +300,7 @@ async function validateModelStatus(): Promise<void> {
 async function killSubprocess(): Promise<void> {
  const controller = new AbortController()
  setTimeout(() => controller.abort(), 5000)
-  log(`[NITRO]::Debug: Request to kill Nitro`)
+  log(`[CORTEX]::Debug: Request to kill cortex`)
  const killRequest = () => {
    return fetch(NITRO_HTTP_KILL_URL, {
@ -309,17 +311,17 @@ async function killSubprocess(): Promise<void> {
      .then(() =>
        tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
      )
-      .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+      .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
      .catch((err) => {
        log(
-          `[NITRO]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
+          `[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
        )
        throw 'PORT_NOT_AVAILABLE'
      })
  }
  if (subprocess?.pid) {
-    log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
+    log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
    const pid = subprocess.pid
    return new Promise((resolve, reject) => {
      terminate(pid, function (err) {
@ -329,7 +331,7 @@ async function killSubprocess(): Promise<void> {
          tcpPortUsed
            .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
            .then(() => resolve())
-            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+            .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
            .catch(() => {
              killRequest().then(resolve).catch(reject)
            })
@ -346,22 +348,24 @@ async function killSubprocess(): Promise<void> {
 * @returns A promise that resolves when the Nitro subprocess is started.
 */
 function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
-  log(`[NITRO]::Debug: Spawning Nitro subprocess...`)
+  log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
  return new Promise<void>(async (resolve, reject) => {
    let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
    let executableOptions = executableNitroFile(systemInfo?.gpuSetting)
    const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
    // Execute the binary
    log(
-      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+      `[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
    )
    log(
      path.parse(executableOptions.executablePath).dir
    )
    subprocess = spawn(
      executableOptions.executablePath,
      ['1', LOCAL_HOST, PORT.toString()],
      {
-        cwd: binaryFolder,
+        cwd: path.join(path.parse(executableOptions.executablePath).dir),
        env: {
          ...process.env,
          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
@ -375,15 +379,15 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
    // Handle subprocess output
    subprocess.stdout.on('data', (data: any) => {
-      log(`[NITRO]::Debug: ${data}`)
+      log(`[CORTEX]::Debug: ${data}`)
    })
    subprocess.stderr.on('data', (data: any) => {
-      log(`[NITRO]::Error: ${data}`)
+      log(`[CORTEX]::Error: ${data}`)
    })
    subprocess.on('close', (code: any) => {
-      log(`[NITRO]::Debug: Nitro exited with code: ${code}`)
+      log(`[CORTEX]::Debug: cortex exited with code: ${code}`)
      subprocess = undefined
      reject(`child process exited with code ${code}`)
    })
@ -391,7 +395,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
    tcpPortUsed
      .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
      .then(() => {
-        log(`[NITRO]::Debug: Nitro is ready`)
+        log(`[CORTEX]::Debug: cortex is ready`)
        resolve()
      })
  })
--- a/extensions/inference-openai-extension/package.json
+++ b/extensions/inference-openai-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-openai-extension",
  "productName": "OpenAI Inference Engine",
-  "version": "1.0.0",
+  "version": "1.0.2",
  "description": "This extension enables OpenAI chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/extensions/inference-openai-extension/resources/models.json
+++ b/extensions/inference-openai-extension/resources/models.json
@ -5,20 +5,27 @@
        "url": "https://openai.com"
      }
    ],
-    "id": "gpt-4",
+    "id": "gpt-4-turbo",
    "object": "model",
-    "name": "OpenAI GPT 4",
+    "name": "OpenAI GPT 4 Turbo",
-    "version": "1.0",
+    "version": "1.2",
-    "description": "OpenAI GPT 4 model is extremely good",
+    "description": "OpenAI GPT 4 Turbo model is extremely good",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
-      "temperature": 0.7
+      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "OpenAI",
-      "tags": ["General", "Big Context Length"]
+      "tags": [
        "General"
      ]
    },
    "engine": "openai"
  },
@ -31,8 +38,8 @@
    "id": "gpt-4-vision-preview",
    "object": "model",
    "name": "OpenAI GPT 4 with Vision (Preview)",
-    "version": "1.0",
+    "version": "1.1",
-    "description": "OpenAI GPT 4 with Vision model is extremely good in preview",
+    "description": "OpenAI GPT-4 Vision model features vision understanding capabilities",
    "format": "api",
    "settings": {
      "vision_model": true,
@ -40,34 +47,16 @@
    },
    "parameters": {
      "max_tokens": 4096,
-      "temperature": 0.7
+      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true
    },
    "metadata": {
      "author": "OpenAI",
-      "tags": ["General", "Big Context Length", "Vision"]
+      "tags": [
-    },
+        "General",
-    "engine": "openai"
+        "Vision"
-  },
+      ]
  {
    "sources": [
      {
        "url": "https://openai.com"
      }
    ],
    "id": "gpt-3.5-turbo-16k-0613",
    "object": "model",
    "name": "OpenAI GPT 3.5 Turbo 16k 0613",
    "version": "1.0",
    "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7
    },
    "metadata": {
      "author": "OpenAI",
      "tags": ["General", "Big Context Length"]
    },
    "engine": "openai"
  },
@ -80,17 +69,54 @@
    "id": "gpt-3.5-turbo",
    "object": "model",
    "name": "OpenAI GPT 3.5 Turbo",
-    "version": "1.0",
+    "version": "1.1",
-    "description": "OpenAI GPT 3.5 Turbo model is extremely good",
+    "description": "OpenAI GPT 3.5 Turbo model is extremely fast",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
-      "temperature": 0.7
+      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "OpenAI",
-      "tags": ["General", "Big Context Length"]
+      "tags": [
        "General"
      ]
    },
    "engine": "openai"
  },
  {
    "sources": [
      {
        "url": "https://openai.com"
      }
    ],
    "id": "gpt-4o",
    "object": "model",
    "name": "OpenAI GPT 4o",
    "version": "1.1",
    "description": "OpenAI GPT 4o is a new flagship model with fast speed and high quality",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "top_p": 0.95,
      "stream": true,
      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "OpenAI",
      "tags": [
        "General"
      ]
    },
    "engine": "openai"
  }
--- a/extensions/inference-openrouter-extension/README.md
+++ b/extensions/inference-openrouter-extension/README.md
@ -0,0 +1,79 @@
 # Open Router Engine Extension
 Created using Jan extension example
 # Create a Jan Extension using Typescript
 Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
 ## Create Your Own Extension
 To create your own extension, you can use this repository as a template! Just follow the below instructions:
 1. Click the Use this template button at the top of the repository
 2. Select Create a new repository
 3. Select an owner and name for your new repository
 4. Click Create repository
 5. Clone your new repository
 ## Initial Setup
 After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
 > [!NOTE]
 >
 > You'll need to have a reasonably modern version of
 > [Node.js](https://nodejs.org) handy. If you are using a version manager like
 > [`nodenv`](https://github.com/nodenv/nodenv) or
 > [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
 > root of your repository to install the version specified in
 > [`package.json`](./package.json). Otherwise, 20.x or later should work!
 1. :hammer_and_wrench: Install the dependencies
   ```bash
   npm install
   ```
 1. :building_construction: Package the TypeScript for distribution
   ```bash
   npm run bundle
   ```
 1. :white_check_mark: Check your artifact
   There will be a tgz file in your extension directory now
 ## Update the Extension Metadata
 The [`package.json`](package.json) file defines metadata about your extension, such as
 extension name, main entry, description and version.
 When you copy this repository, update `package.json` with the name, description for your extension.
 ## Update the Extension Code
 The [`src/`](./src/) directory is the heart of your extension! This contains the
 source code that will be run when your extension functions are invoked. You can replace the
 contents of this directory with your own code.
 There are a few things to keep in mind when writing your extension code:
 - Most Jan Extension functions are processed asynchronously.
  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
  ```typescript
  import { events, MessageEvent, MessageRequest } from '@janhq/core'
  function onStart(): Promise<any> {
    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
      this.inference(data)
    )
  }
  ```
  For more information about the Jan Extension Core module, see the
  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
 So, what are you waiting for? Go ahead and start customizing your extension!
--- a/extensions/inference-openrouter-extension/package.json
+++ b/extensions/inference-openrouter-extension/package.json
@ -0,0 +1,43 @@
 {
  "name": "@janhq/inference-openrouter-extension",
  "productName": "OpenRouter Inference Engine",
  "version": "1.0.0",
  "description": "This extension enables Open Router chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
  "engine": "openrouter",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
    "build": "tsc -b . && webpack --config webpack.config.js",
    "build:publish": "rimraf *.tgz --glob && yarn build && npm pack && cpx *.tgz ../../pre-install",
    "sync:core": "cd ../.. && yarn build:core && cd extensions && rm yarn.lock &&  cd inference-openrouter-extension && yarn && yarn build:publish"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/module.js"
  },
  "devDependencies": {
    "cpx": "^1.5.0",
    "rimraf": "^3.0.2",
    "webpack": "^5.88.2",
    "webpack-cli": "^5.1.4",
    "ts-loader": "^9.5.0"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
    "fetch-retry": "^5.0.6",
    "ulidx": "^2.3.0"
  },
  "engines": {
    "node": ">=18.0.0"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ],
  "bundleDependencies": [
    "fetch-retry"
  ]
 }
--- a/extensions/inference-openrouter-extension/resources/models.json
+++ b/extensions/inference-openrouter-extension/resources/models.json
@ -0,0 +1,28 @@
  [
  {
    "sources": [
      {
        "url": "https://openrouter.ai"
      }
    ],
    "id": "open-router-auto",
    "object": "model",
    "name": "OpenRouter",
    "version": "1.0",
    "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 1024,
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "metadata": {
      "author": "OpenRouter",
      "tags": ["General", "Big Context Length"]
    },
    "engine": "openrouter"
  }
 ]
--- a/extensions/inference-openrouter-extension/resources/settings.json
+++ b/extensions/inference-openrouter-extension/resources/settings.json
@ -0,0 +1,23 @@
 [
  {
    "key": "chat-completions-endpoint",
    "title": "Chat Completions Endpoint",
    "description": "The endpoint to use for chat completions. See the [OpenRouter API documentation](https://openrouter.ai/docs) for more information.",
    "controllerType": "input",
    "controllerProps": {
      "placeholder": "https://openrouter.ai/api/v1/chat/completions",
      "value": "https://openrouter.ai/api/v1/chat/completions"
    }
  },
  {
    "key": "openrouter-api-key",
    "title": "API Key",
    "description": "The OpenRouter API uses API keys for authentication. Visit your [API Keys](https://openrouter.ai/keys) page to retrieve the API key you'll use in your requests.",
    "controllerType": "input",
    "controllerProps": {
      "placeholder": "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
      "value": "",
      "type": "password"
    }
  }
 ]
--- a/extensions/inference-openrouter-extension/src/index.ts
+++ b/extensions/inference-openrouter-extension/src/index.ts
@ -0,0 +1,76 @@
 /**
 * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 * @version 1.0.0
 * @module inference-openai-extension/src/index
 */
 import { RemoteOAIEngine } from '@janhq/core'
 import { PayloadType } from '@janhq/core'
 import { ChatCompletionRole } from '@janhq/core'
 declare const SETTINGS: Array<any>
 declare const MODELS: Array<any>
 enum Settings {
  apiKey = 'openrouter-api-key',
  chatCompletionsEndPoint = 'chat-completions-endpoint',
 }
 enum RoleType {
  user = 'USER',
  chatbot = 'CHATBOT',
  system = 'SYSTEM',
 }
 /**
 * A class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 */
 export default class JanInferenceOpenRouterExtension extends RemoteOAIEngine {
  inferenceUrl: string = ''
  provider: string = 'openrouter'
  override async onLoad(): Promise<void> {
    super.onLoad()
    // Register Settings
    this.registerSettings(SETTINGS)
    this.registerModels(MODELS)
    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
    this.inferenceUrl = await this.getSetting<string>(
      Settings.chatCompletionsEndPoint,
      ''
    )
    if (this.inferenceUrl.length === 0) {
      SETTINGS.forEach((setting) => {
        if (setting.key === Settings.chatCompletionsEndPoint) {
          this.inferenceUrl = setting.controllerProps.value as string
        }
      })
    }
  }
  onSettingUpdate<T>(key: string, value: T): void {
    if (key === Settings.apiKey) {
      this.apiKey = value as string
    } else if (key === Settings.chatCompletionsEndPoint) {
      if (typeof value !== 'string') return
      if (value.trim().length === 0) {
        SETTINGS.forEach((setting) => {
          if (setting.key === Settings.chatCompletionsEndPoint) {
            this.inferenceUrl = setting.controllerProps.value as string
          }
        })
      } else {
        this.inferenceUrl = value
      }
    }
  }
  transformPayload = (payload: PayloadType)=>({...payload,model:"openrouter/auto"})
 }
--- a/extensions/inference-openrouter-extension/tsconfig.json
+++ b/extensions/inference-openrouter-extension/tsconfig.json
@ -0,0 +1,14 @@
 {
  "compilerOptions": {
    "target": "es2016",
    "module": "ES6",
    "moduleResolution": "node",
    "outDir": "./dist",
    "esModuleInterop": true,
    "forceConsistentCasingInFileNames": true,
    "strict": false,
    "skipLibCheck": true,
    "rootDir": "./src"
  },
  "include": ["./src"]
 }
--- a/extensions/inference-openrouter-extension/webpack.config.js
+++ b/extensions/inference-openrouter-extension/webpack.config.js
@ -0,0 +1,37 @@
 const webpack = require('webpack')
 const packageJson = require('./package.json')
 const settingJson = require('./resources/settings.json')
 const modelsJson = require('./resources/models.json')
 module.exports = {
  experiments: { outputModule: true },
  entry: './src/index.ts', // Adjust the entry point to match your project's main file
  mode: 'production',
  module: {
    rules: [
      {
        test: /\.tsx?$/,
        use: 'ts-loader',
        exclude: /node_modules/,
      },
    ],
  },
  plugins: [
    new webpack.DefinePlugin({
      MODELS: JSON.stringify(modelsJson),
      SETTINGS: JSON.stringify(settingJson),
      ENGINE: JSON.stringify(packageJson.engine),
    }),
  ],
  output: {
    filename: 'index.js', // Adjust the output file name as needed
    library: { type: 'module' }, // Specify ESM output format
  },
  resolve: {
    extensions: ['.ts', '.js'],
  },
  optimization: {
    minimize: false,
  },
  // Add loaders and other configuration as needed for your project
 }
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@ -97,7 +97,7 @@ function unloadModel(): Promise<void> {
  }
  if (subprocess?.pid) {
-    log(`[NITRO]::Debug: Killing PID ${subprocess.pid}`)
+    log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
    const pid = subprocess.pid
    return new Promise((resolve, reject) => {
      terminate(pid, function (err) {
@ -107,7 +107,7 @@ function unloadModel(): Promise<void> {
          return tcpPortUsed
            .waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
            .then(() => resolve())
-            .then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
+            .then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
            .catch(() => {
              killRequest()
            })
--- a/uikit/src/main.scss
+++ b/uikit/src/main.scss
@ -45,6 +45,7 @@
  --border: 20 5.9% 90%;
  --input: 20 5.9% 90%;
  --ring: 20 14.3% 4.1%;
  --scroll-bar: 60, 3%, 86%;
  .primary-blue {
    --primary: 221 83% 53%;
--- a/uikit/src/scroll-area/styles.scss
+++ b/uikit/src/scroll-area/styles.scss
@ -21,3 +21,38 @@
    @apply bg-border relative z-50 w-[10px] rounded-full;
  }
 }
 // Customized scroll bar
 ::-webkit-scrollbar {
  width: 7px;
 }
 ::-webkit-scrollbar-thumb {
  background-color: hsl(var(--scroll-bar));
  border-radius: 4px;
 }
 ::-webkit-scrollbar-track {
  background-color: hsl(var(--background));
 }
 ::-webkit-scrollbar-corner {
  background-color: hsl(var(--background));
 }
 ::-moz-scrollbar {
  width: 7px;
 }
 ::-moz-scrollbar-thumb {
  background-color: hsl(var(--scroll-bar));
  border-radius: 4px;
 }
 ::-moz-scrollbar-track {
  background-color: hsl(var(--background));
 }
 ::-moz-scrollbar-corner {
  background-color: hsl(var(--background));
 }
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
 import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
 import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
 import LoadingModal from '../LoadingModal'
 import MainViewContainer from '../MainViewContainer'
 import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal'
@ -69,6 +71,7 @@ const BaseLayout = () => {
          <BottomBar />
        </div>
      </div>
      <LoadingModal />
      {importModelStage === 'SELECTING_MODEL' && <SelectingModelModal />}
      {importModelStage === 'MODEL_SELECTED' && <ImportModelOptionModal />}
      {importModelStage === 'IMPORTING_MODEL' && <ImportingModelModal />}
--- a/web/containers/ListContainer/index.tsx
+++ b/web/containers/ListContainer/index.tsx
@ -1,4 +1,4 @@
-import { ReactNode, useEffect, useRef } from 'react'
+import { ReactNode, useCallback, useEffect, useRef } from 'react'
 type Props = {
  children: ReactNode
@ -6,20 +6,44 @@ type Props = {
 const ListContainer: React.FC<Props> = ({ children }) => {
  const listRef = useRef<HTMLDivElement>(null)
  const prevScrollTop = useRef(0)
  const isUserManuallyScrollingUp = useRef(false)
  const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
    const currentScrollTop = event.currentTarget.scrollTop
    if (prevScrollTop.current > currentScrollTop) {
      console.debug('User is manually scrolling up')
      isUserManuallyScrollingUp.current = true
    } else {
      const currentScrollTop = event.currentTarget.scrollTop
      const scrollHeight = event.currentTarget.scrollHeight
      const clientHeight = event.currentTarget.clientHeight
      if (currentScrollTop + clientHeight >= scrollHeight) {
        console.debug('Scrolled to the bottom')
        isUserManuallyScrollingUp.current = false
      }
    }
    prevScrollTop.current = currentScrollTop
  }, [])
  useEffect(() => {
-    const scrollHeight = listRef.current?.scrollHeight ?? 0
+    if (isUserManuallyScrollingUp.current === true) return
    const scrollHeight = listRef.current?.scrollHeight ?? 0
    listRef.current?.scrollTo({
      top: scrollHeight,
-      behavior: 'smooth',
+      behavior: 'instant',
    })
    })
  }, [listRef.current?.scrollHeight, isUserManuallyScrollingUp])
  return (
    <div
      ref={listRef}
      className="flex h-full w-full flex-col overflow-y-scroll"
      onScroll={handleScroll}
    >
      {children}
    </div>
--- a/web/containers/LoadingModal/index.tsx
+++ b/web/containers/LoadingModal/index.tsx
@ -0,0 +1,26 @@
 import { Modal, ModalContent, ModalHeader, ModalTitle } from '@janhq/uikit'
 import { atom, useAtomValue } from 'jotai'
 export type LoadingInfo = {
  title: string
  message: string
 }
 export const loadingModalInfoAtom = atom<LoadingInfo | undefined>(undefined)
 const ResettingModal: React.FC = () => {
  const loadingInfo = useAtomValue(loadingModalInfoAtom)
  return (
    <Modal open={loadingInfo != null}>
      <ModalContent>
        <ModalHeader>
          <ModalTitle>{loadingInfo?.title}</ModalTitle>
        </ModalHeader>
        <p className="text-muted-foreground">{loadingInfo?.message}</p>
      </ModalContent>
    </Modal>
  )
 }
 export default ResettingModal
--- a/web/containers/Providers/DeepLinkListener.tsx
+++ b/web/containers/Providers/DeepLinkListener.tsx
@ -0,0 +1,101 @@
 import { Fragment, ReactNode } from 'react'
 import { useSetAtom } from 'jotai'
 import { useDebouncedCallback } from 'use-debounce'
 import { useGetHFRepoData } from '@/hooks/useGetHFRepoData'
 import { loadingModalInfoAtom } from '../LoadingModal'
 import { toaster } from '../Toast'
 import {
  importHuggingFaceModelStageAtom,
  importingHuggingFaceRepoDataAtom,
 } from '@/helpers/atoms/HuggingFace.atom'
 type Props = {
  children: ReactNode
 }
 const DeepLinkListener: React.FC<Props> = ({ children }) => {
  const { getHfRepoData } = useGetHFRepoData()
  const setLoadingInfo = useSetAtom(loadingModalInfoAtom)
  const setImportingHuggingFaceRepoData = useSetAtom(
    importingHuggingFaceRepoDataAtom
  )
  const setImportHuggingFaceModelStage = useSetAtom(
    importHuggingFaceModelStageAtom
  )
  const handleDeepLinkAction = useDebouncedCallback(
    async (deepLinkAction: DeepLinkAction) => {
      if (
        deepLinkAction.action !== 'models' ||
        deepLinkAction.provider !== 'huggingface'
      ) {
        console.error(
          `Invalid deeplink action (${deepLinkAction.action}) or provider (${deepLinkAction.provider})`
        )
        return
      }
      try {
        setLoadingInfo({
          title: 'Getting Hugging Face models',
          message: 'Please wait..',
        })
        const data = await getHfRepoData(deepLinkAction.resource)
        setImportingHuggingFaceRepoData(data)
        setImportHuggingFaceModelStage('REPO_DETAIL')
        setLoadingInfo(undefined)
      } catch (err) {
        setLoadingInfo(undefined)
        toaster({
          title: 'Failed to get Hugging Face models',
          description: err instanceof Error ? err.message : 'Unexpected Error',
          type: 'error',
        })
        console.error(err)
      }
    },
    300
  )
  window.electronAPI?.onDeepLink((_event: string, input: string) => {
    window.core?.api?.ackDeepLink()
    const action = deeplinkParser(input)
    if (!action) return
    handleDeepLinkAction(action)
  })
  return <Fragment>{children}</Fragment>
 }
 type DeepLinkAction = {
  action: string
  provider: string
  resource: string
 }
 const deeplinkParser = (
  deepLink: string | undefined
 ): DeepLinkAction | undefined => {
  if (!deepLink) return undefined
  try {
    const url = new URL(deepLink)
    const params = url.pathname.split('/').filter((str) => str.length > 0)
    if (params.length < 3) return undefined
    const action = params[0]
    const provider = params[1]
    const resource = params.slice(2).join('/')
    return { action, provider, resource }
  } catch (err) {
    console.error(err)
    return undefined
  }
 }
 export default DeepLinkListener
--- a/web/containers/Providers/index.tsx
+++ b/web/containers/Providers/index.tsx
@ -22,6 +22,7 @@ import Loader from '../Loader'
 import DataLoader from './DataLoader'
 import DeepLinkListener from './DeepLinkListener'
 import KeyListener from './KeyListener'
 import { extensionManager } from '@/extension'
@ -78,7 +79,9 @@ const Providers = ({ children }: PropsWithChildren) => {
          <KeyListener>
            <EventListenerWrapper>
              <TooltipProvider delayDuration={0}>
-                <DataLoader>{children}</DataLoader>
+                <DataLoader>
                  <DeepLinkListener>{children}</DeepLinkListener>
                </DataLoader>
              </TooltipProvider>
            </EventListenerWrapper>
            <Toaster />
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@ -99,6 +99,11 @@ export const useCreateNewThread = () => {
        ? { ctx_len: 2048 }
        : {}
    const overriddenParameters =
      defaultModel?.parameters.max_tokens && defaultModel.parameters.max_tokens
        ? { max_tokens: 2048 }
        : {}
    const createdAt = Date.now()
    const assistantInfo: ThreadAssistantInfo = {
      assistant_id: assistant.id,
@ -107,7 +112,8 @@ export const useCreateNewThread = () => {
      model: {
        id: defaultModel?.id ?? '*',
        settings: { ...defaultModel?.settings, ...overriddenSettings } ?? {},
-        parameters: defaultModel?.parameters ?? {},
+        parameters:
          { ...defaultModel?.parameters, ...overriddenParameters } ?? {},
        engine: defaultModel?.engine,
      },
      instructions: assistant.instructions,
--- a/web/screens/Chat/ChatBody/index.tsx
+++ b/web/screens/Chat/ChatBody/index.tsx
@ -22,8 +22,8 @@ const ChatBody: React.FC = () => {
  const downloadedModels = useAtomValue(downloadedModelsAtom)
  const loadModelError = useAtomValue(loadModelErrorAtom)
-  if (downloadedModels.length === 0) return <EmptyModel />
+  if (!downloadedModels.length) return <EmptyModel />
-  if (messages.length === 0) return <EmptyThread />
+  if (!messages.length) return <EmptyThread />
  return (
    <ListContainer>
--- a/web/screens/Chat/EditChatInput/index.tsx
+++ b/web/screens/Chat/EditChatInput/index.tsx
@ -129,12 +129,10 @@ const EditChatInput: React.FC<Props> = ({ message }) => {
  }
  return (
-    <div className="mx-auto flex w-full flex-shrink-0 items-end justify-center space-x-4 pb-0 pt-1">
+    <div className="mx-auto flex w-full flex-shrink-0 flex-col items-start justify-center space-y-4 pb-0 pt-1">
      <div className="relative flex w-full flex-col">
        <Textarea
-          className={twMerge(
+          className={twMerge('max-h-[400px] resize-none pr-20')}
            'max-h-[400px] resize-none overflow-y-hidden pr-20'
          )}
          style={{ height: '40px' }}
          ref={textareaRef}
          onKeyDown={onKeyDown}
--- a/web/screens/Chat/ModelSetting/SettingComponent.tsx
+++ b/web/screens/Chat/ModelSetting/SettingComponent.tsx
@ -3,12 +3,17 @@ import {
  InputComponentProps,
  CheckboxComponentProps,
  SliderComponentProps,
  InferenceEngine,
 } from '@janhq/core'
 import { useAtomValue } from 'jotai/react'
 import Checkbox from '@/containers/Checkbox'
 import ModelConfigInput from '@/containers/ModelConfigInput'
 import SliderRightPanel from '@/containers/SliderRightPanel'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 type Props = {
  componentProps: SettingComponentProps[]
  disabled?: boolean
@ -20,6 +25,7 @@ const SettingComponent: React.FC<Props> = ({
  disabled = false,
  onValueUpdated,
 }) => {
  const activeThread = useAtomValue(activeThreadAtom)
  const components = componentProps.map((data) => {
    switch (data.controllerType) {
      case 'slider': {
@ -31,7 +37,16 @@ const SettingComponent: React.FC<Props> = ({
            title={data.title}
            description={data.description}
            min={min}
-            max={max}
+            max={
              data.key === 'max_tokens' &&
              activeThread &&
              activeThread.assistants[0].model.engine === InferenceEngine.nitro
                ? Number(
                    activeThread &&
                      activeThread.assistants[0].model.settings.ctx_len
                  )
                : max
            }
            step={step}
            value={value}
            name={data.key}
--- a/web/screens/Chat/ModelSetting/predefinedComponent.ts
+++ b/web/screens/Chat/ModelSetting/predefinedComponent.ts
@ -33,7 +33,7 @@ export const presetConfiguration: Record<string, SettingComponentProps> = {
      'The context length for model operations varies; the maximum depends on the specific model used.',
    controllerType: 'slider',
    controllerProps: {
-      min: 0,
+      min: 128,
      max: 4096,
      step: 128,
      value: 2048,
--- a/web/screens/Chat/Sidebar/index.tsx
+++ b/web/screens/Chat/Sidebar/index.tsx
@ -118,6 +118,32 @@ const Sidebar: React.FC = () => {
      updateModelParameter(activeThread, {
        params: { [key]: value },
      })
      if (
        activeThread.assistants[0].model.parameters.max_tokens &&
        activeThread.assistants[0].model.settings.ctx_len
      ) {
        if (
          key === 'max_tokens' &&
          Number(value) > activeThread.assistants[0].model.settings.ctx_len
        ) {
          updateModelParameter(activeThread, {
            params: {
              max_tokens: activeThread.assistants[0].model.settings.ctx_len,
            },
          })
        }
        if (
          key === 'ctx_len' &&
          Number(value) < activeThread.assistants[0].model.parameters.max_tokens
        ) {
          updateModelParameter(activeThread, {
            params: {
              max_tokens: activeThread.assistants[0].model.settings.ctx_len,
            },
          })
        }
      }
    },
    [activeThread, setEngineParamsUpdate, stopModel, updateModelParameter]
  )