Sync release 0.4.9 to dev (#2407)

* fix: move tensorrt executable to engine (#2400) * fix: move tensorrt executable to engine Signed-off-by: James <james@jan.ai> * some update Signed-off-by: hiro <hiro@jan.ai> * chore: bump tensorrt version * fix: wrong destroy path * fix: install extensions in parallel * chore: update path for tensorrt engine (#2404) Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> --------- Signed-off-by: James <james@jan.ai> Signed-off-by: hiro <hiro@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: hiro <hiro@jan.ai> Co-authored-by: Louis <louis@jan.ai> * Release/v0.4.9 (#2421) * fix: turn off experimental settings should also turn off quick ask (#2411) * fix: app glitches 1s generating response before starting model (#2412) * fix: disable experimental feature should also disable vulkan (#2414) * fix: model load stuck on windows when can't get CPU core count (#2413) Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> * feat: TensorRT-LLM engine update support (#2415) * fix: engine update * chore: add remove prepopulated models Signed-off-by: James <james@jan.ai> * update tinyjensen url Signed-off-by: James <james@jan.ai> * update llamacorn Signed-off-by: James <james@jan.ai> * update Mistral 7B Instruct v0.1 int4 Signed-off-by: James <james@jan.ai> * update tensorrt Signed-off-by: James <james@jan.ai> * update Signed-off-by: hiro <hiro@jan.ai> * update Signed-off-by: James <james@jan.ai> * prettier Signed-off-by: James <james@jan.ai> * update mistral config Signed-off-by: James <james@jan.ai> * fix some lint Signed-off-by: James <james@jan.ai> --------- Signed-off-by: James <james@jan.ai> Signed-off-by: hiro <hiro@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: hiro <hiro@jan.ai> * Tensorrt LLM disable turing support (#2418) Co-authored-by: Hien To <tominhhien97@gmail.com> * chore: add prompt template tensorrtllm (#2375) * chore: add prompt template tensorrtllm * Add Prompt template for mistral and correct model metadata --------- Co-authored-by: Hien To <tominhhien97@gmail.com> * fix: correct tensorrt mistral model.json (#2419) --------- Signed-off-by: James <james@jan.ai> Signed-off-by: hiro <hiro@jan.ai> Co-authored-by: Louis <louis@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: hiro <hiro@jan.ai> Co-authored-by: hiento09 <136591877+hiento09@users.noreply.github.com> Co-authored-by: Hien To <tominhhien97@gmail.com> --------- Signed-off-by: James <james@jan.ai> Signed-off-by: hiro <hiro@jan.ai> Co-authored-by: NamH <NamNh0122@gmail.com> Co-authored-by: James <james@jan.ai> Co-authored-by: hiro <hiro@jan.ai> Co-authored-by: hiento09 <136591877+hiento09@users.noreply.github.com> Co-authored-by: Hien To <tominhhien97@gmail.com>
2024-03-19 12:20:09 +07:00 · 2024-03-19 12:20:09 +07:00 · 489e8aab24
commit 489e8aab24
parent dd11ba293b
29 changed files with 607 additions and 151 deletions
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@ -22,6 +22,7 @@ on:
    branches:
      - main
      - dev
      - release/**
    paths:
      - "electron/**"
      - .github/workflows/jan-electron-linter-and-test.yml
--- a/core/package.json
+++ b/core/package.json
@ -46,7 +46,7 @@
  },
  "devDependencies": {
    "@types/jest": "^29.5.12",
-    "@types/node": "^12.0.2",
+    "@types/node": "^20.11.4",
    "eslint": "8.57.0",
    "eslint-plugin-jest": "^27.9.0",
    "jest": "^29.7.0",
--- a/core/src/api/index.ts
+++ b/core/src/api/index.ts
@ -33,7 +33,7 @@ export enum AppRoute {
  stopServer = 'stopServer',
  log = 'log',
  logServer = 'logServer',
-  systemInformations = 'systemInformations',
+  systemInformation = 'systemInformation',
  showToast = 'showToast',
 }
@ -95,6 +95,8 @@ export enum FileManagerRoute {
  getUserHomePath = 'getUserHomePath',
  fileStat = 'fileStat',
  writeBlob = 'writeBlob',
  mkdir = 'mkdir',
  rm = 'rm',
 }
 export type ApiFunction = (...args: any[]) => any
--- a/core/src/core.ts
+++ b/core/src/core.ts
@ -1,4 +1,4 @@
-import { DownloadRequest, FileStat, NetworkConfig } from './types'
+import { DownloadRequest, FileStat, NetworkConfig, SystemInformation } from './types'
 /**
 * Execute a extension module function in main process
@ -110,7 +110,8 @@ const isSubdirectory: (from: string, to: string) => Promise<boolean> = (from: st
 * Get system information
 * @returns {Promise<any>} - A promise that resolves with the system information.
 */
-const systemInformations: () => Promise<any> = () => global.core.api?.systemInformations()
+const systemInformation: () => Promise<SystemInformation> = () =>
  global.core.api?.systemInformation()
 /**
 * Show toast message from browser processes.
@ -146,7 +147,7 @@ export {
  log,
  isSubdirectory,
  getUserHomePath,
-  systemInformations,
+  systemInformation,
  showToast,
  FileStat,
 }
--- a/core/src/extension.ts
+++ b/core/src/extension.ts
@ -19,6 +19,7 @@ export interface Compatibility {
 const ALL_INSTALLATION_STATE = [
  'NotRequired', // not required.
  'Installed', // require and installed. Good to go.
  'Updatable', // require and installed but need to be updated.
  'NotInstalled', // require to be installed.
  'Corrupted', // require but corrupted. Need to redownload.
 ] as const
@ -59,6 +60,13 @@ export abstract class BaseExtension implements ExtensionType {
    return undefined
  }
  /**
   * Determine if the extension is updatable.
   */
  updatable(): boolean {
    return false
  }
  /**
   * Determine if the prerequisites for the extension are installed.
   *
--- a/core/src/extensions/ai-engines/LocalOAIEngine.ts
+++ b/core/src/extensions/ai-engines/LocalOAIEngine.ts
@ -1,4 +1,4 @@
-import { executeOnMain, getJanDataFolderPath, joinPath } from '../../core'
+import { executeOnMain, getJanDataFolderPath, joinPath, systemInformation } from '../../core'
 import { events } from '../../events'
 import { Model, ModelEvent } from '../../types'
 import { OAIEngine } from './OAIEngine'
@ -30,11 +30,11 @@ export abstract class LocalOAIEngine extends OAIEngine {
    if (model.engine.toString() !== this.provider) return
    const modelFolder = await joinPath([await getJanDataFolderPath(), this.modelFolder, model.id])
-
+    const systemInfo = await systemInformation()
    const res = await executeOnMain(this.nodeModule, this.loadModelFunctionName, {
      modelFolder,
      model,
-    })
+    }, systemInfo)
    if (res?.error) {
      events.emit(ModelEvent.OnModelFail, {
--- a/core/src/extensions/monitoring.ts
+++ b/core/src/extensions/monitoring.ts
@ -1,5 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { GpuSetting, MonitoringInterface } from '../index'
+import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../index'
 /**
 * Monitoring extension for system monitoring.
@ -16,4 +16,5 @@ export abstract class MonitoringExtension extends BaseExtension implements Monit
  abstract getGpuSetting(): Promise<GpuSetting>
  abstract getResourcesInfo(): Promise<any>
  abstract getCurrentLoad(): Promise<any>
  abstract getOsInfo(): Promise<OperatingSystemInfo>
 }
--- a/core/src/fs.ts
+++ b/core/src/fs.ts
@ -37,12 +37,17 @@ const readdirSync = (...args: any[]) => global.core.api?.readdirSync(...args)
 */
 const mkdirSync = (...args: any[]) => global.core.api?.mkdirSync(...args)
 const mkdir = (...args: any[]) => global.core.api?.mkdir(...args)
 /**
 * Removes a directory at the specified path.
 * @returns {Promise<any>} A Promise that resolves when the directory is removed successfully.
 */
 const rmdirSync = (...args: any[]) =>
  global.core.api?.rmdirSync(...args, { recursive: true, force: true })
 const rm = (path: string) => global.core.api?.rm(path)
 /**
 * Deletes a file from the local file system.
 * @param {string} path - The path of the file to delete.
@ -92,7 +97,9 @@ export const fs = {
  existsSync,
  readdirSync,
  mkdirSync,
  mkdir,
  rmdirSync,
  rm,
  unlinkSync,
  appendFileSync,
  copyFileSync,
--- a/core/src/node/api/processors/fsExt.ts
+++ b/core/src/node/api/processors/fsExt.ts
@ -88,4 +88,28 @@ export class FSExt implements Processor {
      })
    })
  }
  mkdir(path: string): Promise<void> {
    return new Promise((resolve, reject) => {
      fs.mkdir(path, { recursive: true }, (err) => {
        if (err) {
          reject(err)
        } else {
          resolve()
        }
      })
    })
  }
  rmdir(path: string): Promise<void> {
    return new Promise((resolve, reject) => {
      fs.rm(path, { recursive: true }, (err) => {
        if (err) {
          reject(err)
        } else {
          resolve()
        }
      })
    })
  }
 }
--- a/core/src/node/extension/store.ts
+++ b/core/src/node/extension/store.ts
@ -93,8 +93,7 @@ export function persistExtensions() {
 */
 export async function installExtensions(extensions: any) {
  const installed: Extension[] = []
-  for (const ext of extensions) {
+  const installations = extensions.map((ext: any): Promise<void> => {
    // Set install options and activation based on input type
    const isObject = typeof ext === 'object'
    const spec = isObject ? [ext.specifier, ext] : [ext]
    const activate = isObject ? ext.activate !== false : true
@ -102,15 +101,17 @@ export async function installExtensions(extensions: any) {
    // Install and possibly activate extension
    const extension = new Extension(...spec)
    if (!extension.origin) {
-      continue
+      return Promise.resolve()
    }
-    await extension._install()
+    return extension._install().then(() => {
-    if (activate) extension.setActive(true)
+      if (activate) extension.setActive(true)
      // Add extension to store if needed
      addExtension(extension)
      installed.push(extension)
    })
  })
-    // Add extension to store if needed
+  await Promise.all(installations)
    addExtension(extension)
    installed.push(extension)
  }
  // Return list of all installed extensions
  return installed
--- a/core/src/node/helper/config.ts
+++ b/core/src/node/helper/config.ts
@ -82,26 +82,34 @@ export const getJanExtensionsPath = (): string => {
 */
 export const physicalCpuCount = async (): Promise<number> => {
  const platform = os.platform()
-  if (platform === 'linux') {
+  try {
-    const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
+    if (platform === 'linux') {
-    return parseInt(output.trim(), 10)
+      const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
-  } else if (platform === 'darwin') {
+      return parseInt(output.trim(), 10)
-    const output = await exec('sysctl -n hw.physicalcpu_max')
+    } else if (platform === 'darwin') {
-    return parseInt(output.trim(), 10)
+      const output = await exec('sysctl -n hw.physicalcpu_max')
-  } else if (platform === 'win32') {
+      return parseInt(output.trim(), 10)
-    const output = await exec('WMIC CPU Get NumberOfCores')
+    } else if (platform === 'win32') {
-    return output
+      const output = await exec('WMIC CPU Get NumberOfCores')
-      .split(os.EOL)
+      return output
-      .map((line: string) => parseInt(line))
+        .split(os.EOL)
-      .filter((value: number) => !isNaN(value))
+        .map((line: string) => parseInt(line))
-      .reduce((sum: number, number: number) => sum + number, 1)
+        .filter((value: number) => !isNaN(value))
-  } else {
+        .reduce((sum: number, number: number) => sum + number, 1)
-    const cores = os.cpus().filter((cpu: any, index: number) => {
+    } else {
-      const hasHyperthreading = cpu.model.includes('Intel')
+      const cores = os.cpus().filter((cpu: any, index: number) => {
-      const isOdd = index % 2 === 1
+        const hasHyperthreading = cpu.model.includes('Intel')
-      return !hasHyperthreading || isOdd
+        const isOdd = index % 2 === 1
-    })
+        return !hasHyperthreading || isOdd
-    return cores.length
+      })
      return cores.length
    }
  } catch (err) {
    console.warn('Failed to get physical CPU count', err)
    // Divide by 2 to get rid of hyper threading
    const coreCount = Math.ceil(os.cpus().length / 2)
    console.debug('Using node API to get physical CPU count:', coreCount)
    return coreCount
  }
 }
--- a/core/src/node/helper/resource.ts
+++ b/core/src/node/helper/resource.ts
@ -1,6 +1,6 @@
 import { SystemResourceInfo } from '../../types'
 import { physicalCpuCount } from './config'
-import { log, logServer } from './log'
+import { log } from './log'
 export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
  const cpu = await physicalCpuCount()
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@ -30,3 +30,27 @@ export type GpuSettingInfo = {
  name: string
  arch?: string
 }
 export type SystemInformation = {
  gpuSetting: GpuSetting
  osInfo?: OperatingSystemInfo
 }
 export const SupportedPlatforms = ['win32', 'linux', 'darwin'] as const
 export type SupportedPlatformTuple = typeof SupportedPlatforms
 export type SupportedPlatform = SupportedPlatformTuple[number]
 export type OperatingSystemInfo = {
  platform: SupportedPlatform | 'unknown'
  arch: string
  release: string
  machine: string
  version: string
  totalMem: number
  freeMem: number
 }
 export type CpuCoreInfo = {
  model: string
  speed: number
 }
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -38,7 +38,7 @@ export default class JanModelExtension extends ModelExtension {
  private static readonly _tensorRtEngineFormat = '.engine'
  private static readonly _configDirName = 'config'
  private static readonly _defaultModelFileName = 'default-model.json'
-  private static readonly _supportedGpuArch = ['turing', 'ampere', 'ada']
+  private static readonly _supportedGpuArch = ['ampere', 'ada']
  /**
   * Called when the extension is loaded.
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@ -1,4 +1,9 @@
-import { GpuSetting, MonitoringExtension, executeOnMain } from '@janhq/core'
+import {
  GpuSetting,
  MonitoringExtension,
  OperatingSystemInfo,
  executeOnMain,
 } from '@janhq/core'
 /**
 * JanMonitoringExtension is a extension that provides system monitoring functionality.
@ -41,4 +46,8 @@ export default class JanMonitoringExtension extends MonitoringExtension {
  getCurrentLoad(): Promise<any> {
    return executeOnMain(NODE, 'getCurrentLoad')
  }
  getOsInfo(): Promise<OperatingSystemInfo> {
    return executeOnMain(NODE, 'getOsInfo')
  }
 }
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@ -1,9 +1,16 @@
-import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core'
+import {
  GpuSetting,
  GpuSettingInfo,
  OperatingSystemInfo,
  ResourceInfo,
  SupportedPlatforms,
 } from '@janhq/core'
 import { getJanDataFolderPath, log } from '@janhq/core/node'
 import { mem, cpu } from 'node-os-utils'
 import { exec } from 'child_process'
 import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
 import path from 'path'
 import os from 'os'
 /**
 * Path to the settings directory
@ -174,8 +181,7 @@ const updateNvidiaDriverInfo = async () =>
 const getGpuArch = (gpuName: string): string => {
  if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
-  if (gpuName.includes('20')) return 'turing'
+  if (gpuName.includes('30')) return 'ampere'
  else if (gpuName.includes('30')) return 'ampere'
  else if (gpuName.includes('40')) return 'ada'
  else return 'unknown'
 }
@ -320,3 +326,20 @@ const updateCudaExistence = (
  data.is_initial = false
  return data
 }
 export const getOsInfo = (): OperatingSystemInfo => {
  const platform =
    SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
  const osInfo: OperatingSystemInfo = {
    platform: platform,
    arch: process.arch,
    release: os.release(),
    machine: os.machine(),
    version: os.version(),
    totalMem: os.totalmem(),
    freeMem: os.freemem(),
  }
  return osInfo
 }
--- a/extensions/tensorrt-llm-extension/models.json
+++ b/extensions/tensorrt-llm-extension/models.json
@ -3,27 +3,31 @@
    "sources": [
      {
        "filename": "config.json",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/config.json"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/config.json"
      },
      {
-        "filename": "rank0.engine",
+        "filename": "mistral_float16_tp1_rank0.engine",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/rank0.engine"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
      },
      {
        "filename": "tokenizer.model",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
      },
      {
        "filename": "special_tokens_map.json",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
      },
      {
        "filename": "tokenizer.json",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
      },
      {
        "filename": "tokenizer_config.json",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
      },
      {
        "filename": "model.cache",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/model.cache"
      }
    ],
    "id": "llamacorn-1.1b-chat-fp16",
@ -50,27 +54,31 @@
    "sources": [
      {
        "filename": "config.json",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/config.json"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json"
      },
      {
-        "filename": "rank0.engine",
+        "filename": "mistral_float16_tp1_rank0.engine",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/rank0.engine"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
      },
      {
        "filename": "tokenizer.model",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
      },
      {
        "filename": "special_tokens_map.json",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
      },
      {
        "filename": "tokenizer.json",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
      },
      {
        "filename": "tokenizer_config.json",
-        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
      },
      {
        "filename": "model.cache",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache"
      }
    ],
    "id": "tinyjensen-1.1b-chat-fp16",
@ -92,5 +100,57 @@
      "size": 2151000000
    },
    "engine": "nitro-tensorrt-llm"
  },
  {
    "sources": [
      {
        "filename": "config.json",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/config.json"
      },
      {
        "filename": "mistral_float16_tp1_rank0.engine",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/mistral_float16_tp1_rank0.engine"
      },
      {
        "filename": "tokenizer.model",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.model"
      },
      {
        "filename": "special_tokens_map.json",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/special_tokens_map.json"
      },
      {
        "filename": "tokenizer.json",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.json"
      },
      {
        "filename": "tokenizer_config.json",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer_config.json"
      },
      {
        "filename": "model.cache",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/model.cache"
      }
    ],
    "id": "mistral-7b-instruct-int4",
    "object": "model",
    "name": "Mistral 7B Instruct v0.1 INT4",
    "version": "1.0",
    "description": "Mistral 7B Instruct v0.1 INT4",
    "format": "TensorRT-LLM",
    "settings": {
      "ctx_len": 2048,
      "text_model": false,
      "prompt_template": "[INST] {prompt} [/INST]"
    },
    "parameters": {
      "max_tokens": 4096
    },
    "metadata": {
      "author": "MistralAI",
      "tags": ["TensorRT-LLM", "7B", "Finetuned"],
      "size": 3840000000
    },
    "engine": "nitro-tensorrt-llm"
  }
 ]
--- a/extensions/tensorrt-llm-extension/package.json
+++ b/extensions/tensorrt-llm-extension/package.json
@ -18,6 +18,8 @@
      "0.1.0"
    ]
  },
  "tensorrtVersion": "0.1.8",
  "provider": "nitro-tensorrt-llm",
  "scripts": {
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
    "build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
--- a/extensions/tensorrt-llm-extension/rollup.config.ts
+++ b/extensions/tensorrt-llm-extension/rollup.config.ts
@ -16,11 +16,12 @@ export default [
    plugins: [
      replace({
        EXTENSION_NAME: JSON.stringify(packageJson.name),
-        TENSORRT_VERSION: JSON.stringify('0.1.5'),
+        TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
        PROVIDER: JSON.stringify(packageJson.provider),
        DOWNLOAD_RUNNER_URL:
-          process.platform === 'darwin' || process.platform === 'win32'
+          process.platform === 'win32'
            ? JSON.stringify(
-                'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
+                'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>-tensorrt-llm-v0.7.1/nitro-windows-v<version>-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz'
              )
            : JSON.stringify(
                'https://github.com/janhq/nitro-tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
@ -52,11 +53,14 @@ export default [
    },
    plugins: [
      replace({
        EXTENSION_NAME: JSON.stringify(packageJson.name),
        TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
        PROVIDER: JSON.stringify(packageJson.provider),
        LOAD_MODEL_URL: JSON.stringify(
          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
        ),
        TERMINATE_ENGINE_URL: JSON.stringify(
-          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/processmanager/destroy`
+          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/processmanager/destroy`
        ),
        ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
        ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
--- a/extensions/tensorrt-llm-extension/src/@types/global.d.ts
+++ b/extensions/tensorrt-llm-extension/src/@types/global.d.ts
@ -8,3 +8,4 @@ declare const DOWNLOAD_RUNNER_URL: string
 declare const TENSORRT_VERSION: string
 declare const COMPATIBILITY: object
 declare const EXTENSION_NAME: string
 declare const PROVIDER: string
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@ -16,11 +16,12 @@ import {
  executeOnMain,
  joinPath,
  showToast,
-  systemInformations,
+  systemInformation,
  LocalOAIEngine,
  fs,
  MessageRequest,
  ModelEvent,
  getJanDataFolderPath,
 } from '@janhq/core'
 import models from '../models.json'
@ -34,11 +35,13 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
   * Override custom function name for loading and unloading model
   * Which are implemented from node module
   */
-  override provider = 'nitro-tensorrt-llm'
+  override provider = PROVIDER
  override inferenceUrl = INFERENCE_URL
  override nodeModule = NODE
-  private supportedGpuArch = ['turing', 'ampere', 'ada']
+  private supportedGpuArch = ['ampere', 'ada']
  private supportedPlatform = ['win32', 'linux']
  private isUpdateAvailable = false
  compatibility() {
    return COMPATIBILITY as unknown as Compatibility
@ -54,7 +57,9 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
  }
  override async install(): Promise<void> {
-    const info = await systemInformations()
+    await this.removePopulatedModels()
    const info = await systemInformation()
    console.debug(
      `TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}`
    )
@ -83,12 +88,19 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
      return
    }
-    const binaryFolderPath = await executeOnMain(
+    const janDataFolderPath = await getJanDataFolderPath()
-      this.nodeModule,
+    const engineVersion = TENSORRT_VERSION
-      'binaryFolder'
+
-    )
+    const executableFolderPath = await joinPath([
-    if (!(await fs.existsSync(binaryFolderPath))) {
+      janDataFolderPath,
-      await fs.mkdirSync(binaryFolderPath)
+      'engines',
      this.provider,
      engineVersion,
      firstGpu.arch,
    ])
    if (!(await fs.existsSync(executableFolderPath))) {
      await fs.mkdir(executableFolderPath)
    }
    const placeholderUrl = DOWNLOAD_RUNNER_URL
@ -100,7 +112,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
    const tarball = await baseName(url)
-    const tarballFullPath = await joinPath([binaryFolderPath, tarball])
+    const tarballFullPath = await joinPath([executableFolderPath, tarball])
    const downloadRequest: DownloadRequest = {
      url,
      localPath: tarballFullPath,
@ -109,12 +121,16 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
    }
    downloadFile(downloadRequest)
    // TODO: wrap this into a Promise
    const onFileDownloadSuccess = async (state: DownloadState) => {
      // if other download, ignore
      if (state.fileName !== tarball) return
      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-      await executeOnMain(this.nodeModule, 'decompressRunner', tarballFullPath)
+      await executeOnMain(
        this.nodeModule,
        'decompressRunner',
        tarballFullPath,
        executableFolderPath
      )
      events.emit(DownloadEvent.onFileUnzipSuccess, state)
      // Prepopulate models as soon as it's ready
@ -128,6 +144,22 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
  }
  async removePopulatedModels(): Promise<void> {
    console.debug(`removePopulatedModels`, JSON.stringify(models))
    const janDataFolderPath = await getJanDataFolderPath()
    const modelFolderPath = await joinPath([janDataFolderPath, 'models'])
    for (const model of models) {
      const modelPath = await joinPath([modelFolderPath, model.id])
      console.debug(`modelPath: ${modelPath}`)
      if (await fs.existsSync(modelPath)) {
        console.debug(`Removing model ${modelPath}`)
        await fs.rmdirSync(modelPath)
      }
    }
    events.emit(ModelEvent.OnModelsUpdate, {})
  }
  async onModelInit(model: Model): Promise<void> {
    if (model.engine !== this.provider) return
@ -143,14 +175,70 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
    }
  }
-  override async installationState(): Promise<InstallationState> {
+  updatable() {
-    // For now, we just check the executable of nitro x tensor rt
+    return this.isUpdateAvailable
-    const isNitroExecutableAvailable = await executeOnMain(
+  }
      this.nodeModule,
      'isNitroExecutableAvailable'
    )
-    return isNitroExecutableAvailable ? 'Installed' : 'NotInstalled'
+  override async installationState(): Promise<InstallationState> {
    const info = await systemInformation()
    const gpuSetting: GpuSetting | undefined = info.gpuSetting
    if (gpuSetting === undefined) {
      console.warn(
        'No GPU setting found. TensorRT-LLM extension is not installed'
      )
      return 'NotInstalled' // TODO: maybe disabled / incompatible is more appropriate
    }
    if (gpuSetting.gpus.length === 0) {
      console.warn('No GPU found. TensorRT-LLM extension is not installed')
      return 'NotInstalled'
    }
    const firstGpu = gpuSetting.gpus[0]
    if (!firstGpu.name.toLowerCase().includes('nvidia')) {
      console.error('No Nvidia GPU found. Please check your GPU setting.')
      return 'NotInstalled'
    }
    if (firstGpu.arch === undefined) {
      console.error('No GPU architecture found. Please check your GPU setting.')
      return 'NotInstalled'
    }
    if (!this.supportedGpuArch.includes(firstGpu.arch)) {
      console.error(
        `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
      )
      return 'NotInstalled'
    }
    const osInfo = info.osInfo
    if (!osInfo) {
      console.error('No OS information found. Please check your OS setting.')
      return 'NotInstalled'
    }
    if (!this.supportedPlatform.includes(osInfo.platform)) {
      console.error(
        `Your OS: ${osInfo.platform} is not supported. Only Windows and Linux are supported.`
      )
      return 'NotInstalled'
    }
    const janDataFolderPath = await getJanDataFolderPath()
    const engineVersion = TENSORRT_VERSION
    const enginePath = await joinPath([
      janDataFolderPath,
      'engines',
      this.provider,
      engineVersion,
      firstGpu.arch,
      osInfo.platform === 'win32' ? 'nitro.exe' : 'nitro',
    ])
    // For now, we just check the executable of nitro x tensor rt
    return (await fs.existsSync(enginePath)) ? 'Installed' : 'NotInstalled'
  }
  override onInferenceStopped() {
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@ -2,13 +2,17 @@ import path from 'path'
 import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
 import tcpPortUsed from 'tcp-port-used'
 import fetchRT from 'fetch-retry'
-import { log } from '@janhq/core/node'
+import { log, getJanDataFolderPath } from '@janhq/core/node'
 import { existsSync } from 'fs'
 import decompress from 'decompress'
 import { SystemInformation } from '@janhq/core'
 import { PromptTemplate } from '@janhq/core'
 // Polyfill fetch with retry
 const fetchRetry = fetchRT(fetch)
 const supportedPlatform = (): string[] => ['win32', 'linux']
 const supportedGpuArch = (): string[] => ['ampere', 'ada']
 /**
 * The response object for model init operation.
 */
@ -24,16 +28,34 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
 * Initializes a engine subprocess to load a machine learning model.
 * @param params - The model load settings.
 */
-async function loadModel(params: any): Promise<{ error: Error | undefined }> {
+async function loadModel(
  params: any,
  systemInfo?: SystemInformation
 ): Promise<{ error: Error | undefined }> {
  // modelFolder is the absolute path to the running model folder
  // e.g. ~/jan/models/llama-2
  let modelFolder = params.modelFolder
  if (params.model.settings.prompt_template) {
    const promptTemplate = params.model.settings.prompt_template
    const prompt = promptTemplateConverter(promptTemplate)
    if (prompt?.error) {
      return Promise.reject(prompt.error)
    }
    params.model.settings.system_prompt = prompt.system_prompt
    params.model.settings.user_prompt = prompt.user_prompt
    params.model.settings.ai_prompt = prompt.ai_prompt
  }
  const settings: ModelLoadParams = {
    engine_path: modelFolder,
    ctx_len: params.model.settings.ctx_len ?? 2048,
    ...params.model.settings,
  }
-  return runEngineAndLoadModel(settings)
+  if (!systemInfo) {
    throw new Error('Cannot get system info. Unable to start nitro x tensorrt.')
  }
  return runEngineAndLoadModel(settings, systemInfo)
 }
 /**
@ -67,9 +89,12 @@ function unloadModel(): Promise<any> {
 * 2. Load model into engine subprocess
 * @returns
 */
-async function runEngineAndLoadModel(settings: ModelLoadParams) {
+async function runEngineAndLoadModel(
  settings: ModelLoadParams,
  systemInfo: SystemInformation
 ) {
  return unloadModel()
-    .then(runEngine)
+    .then(() => runEngine(systemInfo))
    .then(() => loadModelRequest(settings))
    .catch((err) => {
      // TODO: Broadcast error so app could display proper error message
@ -81,7 +106,7 @@ async function runEngineAndLoadModel(settings: ModelLoadParams) {
 /**
 * Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
 */
-function loadModelRequest(
+async function loadModelRequest(
  settings: ModelLoadParams
 ): Promise<{ error: Error | undefined }> {
  debugLog(`Loading model with params ${JSON.stringify(settings)}`)
@ -107,23 +132,66 @@ function loadModelRequest(
 /**
 * Spawns engine subprocess.
 */
-function runEngine(): Promise<any> {
+async function runEngine(systemInfo: SystemInformation): Promise<void> {
  debugLog(`Spawning engine subprocess...`)
  if (systemInfo.gpuSetting == null) {
    return Promise.reject(
      'No GPU information found. Please check your GPU setting.'
    )
  }
  if (systemInfo.gpuSetting.gpus.length === 0) {
    return Promise.reject('No GPU found. Please check your GPU setting.')
  }
  if (systemInfo.osInfo == null) {
    return Promise.reject(
      'No OS information found. Please check your OS setting.'
    )
  }
  const platform = systemInfo.osInfo.platform
  if (platform == null || supportedPlatform().includes(platform) === false) {
    return Promise.reject(
      'No OS architecture found. Please check your OS setting.'
    )
  }
  const gpu = systemInfo.gpuSetting.gpus[0]
  if (gpu.name.toLowerCase().includes('nvidia') === false) {
    return Promise.reject('No Nvidia GPU found. Please check your GPU setting.')
  }
  const gpuArch = gpu.arch
  if (gpuArch == null || supportedGpuArch().includes(gpuArch) === false) {
    return Promise.reject(
      `Your GPU: ${gpu.name} is not supported. Only ${supportedGpuArch().join(
        ', '
      )} series are supported.`
    )
  }
  const janDataFolderPath = await getJanDataFolderPath()
  const tensorRtVersion = TENSORRT_VERSION
  const provider = PROVIDER
  return new Promise<void>((resolve, reject) => {
    // Current directory by default
-    let binaryFolder = path.join(__dirname, '..', 'bin')
+
-    // Binary path
+    const executableFolderPath = path.join(
-    const binary = path.join(
+      janDataFolderPath,
-      binaryFolder,
+      'engines',
-      process.platform === 'win32' ? 'nitro.exe' : 'nitro'
+      provider,
      tensorRtVersion,
      gpuArch
    )
    const nitroExecutablePath = path.join(
      executableFolderPath,
      platform === 'win32' ? 'nitro.exe' : 'nitro'
    )
    const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
    // Execute the binary
-    debugLog(`Spawn nitro at path: ${binary}, and args: ${args}`)
+    debugLog(`Spawn nitro at path: ${nitroExecutablePath}, and args: ${args}`)
-    subprocess = spawn(binary, args, {
+    subprocess = spawn(nitroExecutablePath, args, {
-      cwd: binaryFolder,
+      cwd: executableFolderPath,
      env: {
        ...process.env,
      },
@ -155,12 +223,7 @@ function debugLog(message: string, level: string = 'Debug') {
  log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
 }
-const binaryFolder = async (): Promise<string> => {
+const decompressRunner = async (zipPath: string, output: string) => {
  return path.join(__dirname, '..', 'bin')
 }
 const decompressRunner = async (zipPath: string) => {
  const output = path.join(__dirname, '..', 'bin')
  console.debug(`Decompressing ${zipPath} to ${output}...`)
  try {
    const files = await decompress(zipPath, output)
@ -170,22 +233,57 @@ const decompressRunner = async (zipPath: string) => {
  }
 }
-const isNitroExecutableAvailable = async (): Promise<boolean> => {
+/**
-  const binary = path.join(
+ * Parse prompt template into agrs settings
-    __dirname,
+ * @param promptTemplate Template as string
-    '..',
+ * @returns
-    'bin',
+ */
-    process.platform === 'win32' ? 'nitro.exe' : 'nitro'
+function promptTemplateConverter(promptTemplate: string): PromptTemplate {
-  )
+  // Split the string using the markers
  const systemMarker = '{system_message}'
  const promptMarker = '{prompt}'
-  return existsSync(binary)
+  if (
    promptTemplate.includes(systemMarker) &&
    promptTemplate.includes(promptMarker)
  ) {
    // Find the indices of the markers
    const systemIndex = promptTemplate.indexOf(systemMarker)
    const promptIndex = promptTemplate.indexOf(promptMarker)
    // Extract the parts of the string
    const system_prompt = promptTemplate.substring(0, systemIndex)
    const user_prompt = promptTemplate.substring(
      systemIndex + systemMarker.length,
      promptIndex
    )
    const ai_prompt = promptTemplate.substring(
      promptIndex + promptMarker.length
    )
    // Return the split parts
    return { system_prompt, user_prompt, ai_prompt }
  } else if (promptTemplate.includes(promptMarker)) {
    // Extract the parts of the string for the case where only promptMarker is present
    const promptIndex = promptTemplate.indexOf(promptMarker)
    const user_prompt = promptTemplate.substring(0, promptIndex)
    const ai_prompt = promptTemplate.substring(
      promptIndex + promptMarker.length
    )
    // Return the split parts
    return { user_prompt, ai_prompt }
  }
  // Return an error if none of the conditions are met
  return { error: 'Cannot split prompt template' }
 }
 export default {
-  binaryFolder,
+  supportedPlatform,
  supportedGpuArch,
  decompressRunner,
  loadModel,
  unloadModel,
  dispose: unloadModel,
  isNitroExecutableAvailable,
 }
--- a/web/containers/Providers/index.tsx
+++ b/web/containers/Providers/index.tsx
@ -1,6 +1,6 @@
 'use client'
-import { PropsWithChildren, useEffect, useState } from 'react'
+import { PropsWithChildren, useCallback, useEffect, useState } from 'react'
 import { Toaster } from 'react-hot-toast'
@ -37,7 +37,7 @@ const Providers = (props: PropsWithChildren) => {
  const [activated, setActivated] = useState(false)
  const [settingUp, setSettingUp] = useState(false)
-  async function setupExtensions() {
+  const setupExtensions = useCallback(async () => {
    // Register all active extensions
    await extensionManager.registerActive()
@ -57,7 +57,7 @@ const Providers = (props: PropsWithChildren) => {
      setSettingUp(false)
      setActivated(true)
    }, 500)
-  }
+  }, [pathname])
  // Services Setup
  useEffect(() => {
@ -78,7 +78,7 @@ const Providers = (props: PropsWithChildren) => {
        setActivated(true)
      }
    }
-  }, [setupCore])
+  }, [setupCore, setupExtensions])
  return (
    <JotaiWrapper>
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@ -102,7 +102,6 @@ export default function useSendChatMessage() {
      console.error('No active thread')
      return
    }
    setIsGeneratingResponse(true)
    updateThreadWaiting(activeThreadRef.current.id, true)
    const messages: ChatCompletionMessage[] = [
      activeThreadRef.current.assistants[0]?.instructions,
@ -148,7 +147,7 @@ export default function useSendChatMessage() {
      await waitForModelStarting(modelId)
      setQueuedMessage(false)
    }
-
+    setIsGeneratingResponse(true)
    if (currentMessage.role !== ChatCompletionRole.User) {
      // Delete last response before regenerating
      deleteMessage(currentMessage.id ?? '')
@ -171,7 +170,6 @@ export default function useSendChatMessage() {
      console.error('No active thread')
      return
    }
    setIsGeneratingResponse(true)
    if (engineParamsUpdate) setReloadModel(true)
@ -361,7 +359,7 @@ export default function useSendChatMessage() {
      await waitForModelStarting(modelId)
      setQueuedMessage(false)
    }
-
+    setIsGeneratingResponse(true)
    events.emit(MessageEvent.OnMessageSent, messageRequest)
    setReloadModel(false)
--- a/web/hooks/useSettings.ts
+++ b/web/hooks/useSettings.ts
@ -70,11 +70,6 @@ export const useSettings = () => {
      }
    }
    await fs.writeFileSync(settingsFile, JSON.stringify(settings))
    // Relaunch to apply settings
    if (vulkan != null) {
      window.location.reload()
    }
  }
  return {
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@ -90,12 +90,38 @@ const Advanced = () => {
    [setPartialProxy, setProxy]
  )
-  const updateQuickAskEnabled = async (e: boolean) => {
+  const updateQuickAskEnabled = async (
    e: boolean,
    relaunch: boolean = true
  ) => {
    const appConfiguration: AppConfiguration =
      await window.core?.api?.getAppConfigurations()
    appConfiguration.quick_ask = e
    await window.core?.api?.updateAppConfiguration(appConfiguration)
-    window.core?.api?.relaunch()
+    if (relaunch) window.core?.api?.relaunch()
  }
  const updateVulkanEnabled = async (e: boolean, relaunch: boolean = true) => {
    toaster({
      title: 'Reload',
      description: 'Vulkan settings updated. Reload now to apply the changes.',
    })
    stopModel()
    setVulkanEnabled(e)
    await saveSettings({ vulkan: e, gpusInUse: [] })
    // Relaunch to apply settings
    if (relaunch) window.location.reload()
  }
  const updateExperimentalEnabled = async (e: boolean) => {
    setExperimentalEnabled(e)
    if (e) return
    // It affects other settings, so we need to reset them
    const isRelaunch = quickAskEnabled || vulkanEnabled
    if (quickAskEnabled) await updateQuickAskEnabled(false, false)
    if (vulkanEnabled) await updateVulkanEnabled(false, false)
    if (isRelaunch) window.core?.api?.relaunch()
  }
  useEffect(() => {
@ -179,7 +205,7 @@ const Advanced = () => {
          </div>
          <Switch
            checked={experimentalEnabled}
-            onCheckedChange={setExperimentalEnabled}
+            onCheckedChange={updateExperimentalEnabled}
          />
        </div>
@ -381,16 +407,7 @@ const Advanced = () => {
            <Switch
              checked={vulkanEnabled}
-              onCheckedChange={(e) => {
+              onCheckedChange={(e) => updateVulkanEnabled(e)}
                toaster({
                  title: 'Reload',
                  description:
                    'Vulkan settings updated. Reload now to apply the changes.',
                })
                stopModel()
                saveSettings({ vulkan: e, gpusInUse: [] })
                setVulkanEnabled(e)
              }}
            />
          </div>
        )}
--- a/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
+++ b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
@ -5,7 +5,7 @@ import {
  GpuSetting,
  InstallationState,
  abortDownload,
-  systemInformations,
+  systemInformation,
 } from '@janhq/core'
 import {
  Badge,
@ -23,6 +23,8 @@ import { useAtomValue } from 'jotai'
 import { Marked, Renderer } from 'marked'
 import UpdateExtensionModal from './UpdateExtensionModal'
 import { extensionManager } from '@/extension'
 import Extension from '@/extension/Extension'
 import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
@ -39,7 +41,7 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
    useState<InstallationState>('NotRequired')
  const installingExtensions = useAtomValue(installingExtensionAtom)
  const [isGpuSupported, setIsGpuSupported] = useState<boolean>(false)
-
+  const [promptUpdateModal, setPromptUpdateModal] = useState<boolean>(false)
  const isInstalling = installingExtensions.some(
    (e) => e.extensionId === item.name
  )
@ -51,7 +53,7 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
  useEffect(() => {
    const getSystemInfos = async () => {
-      const info = await systemInformations()
+      const info = await systemInformation()
      if (!info) {
        setIsGpuSupported(false)
        return
@ -69,7 +71,7 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
        return
      }
-      const supportedGpuArch = ['turing', 'ampere', 'ada']
+      const supportedGpuArch = ['ampere', 'ada']
      setIsGpuSupported(supportedGpuArch.includes(arch))
    }
    getSystemInfos()
@ -112,7 +114,7 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
  }
  const description = marked.parse(item.description ?? '', { async: false })
-  console.log(description)
+
  return (
    <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none">
      <div className="flex-1 flex-shrink-0 space-y-1.5">
@ -138,6 +140,7 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
            installProgress={progress}
            installState={installState}
            onInstallClick={onInstallClick}
            onUpdateClick={() => setPromptUpdateModal(true)}
            onCancelClick={onCancelInstallingClick}
          />
        </div>
@ -177,6 +180,9 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
          </div>
        </div>
      )}
      {promptUpdateModal && (
        <UpdateExtensionModal onUpdateClick={onInstallClick} />
      )}
    </div>
  )
 }
@ -185,6 +191,7 @@ type InstallStateProps = {
  installProgress: number
  installState: InstallationState
  onInstallClick: () => void
  onUpdateClick: () => void
  onCancelClick: () => void
 }
@ -192,6 +199,7 @@ const InstallStateIndicator: React.FC<InstallStateProps> = ({
  installProgress,
  installState,
  onInstallClick,
  onUpdateClick,
  onCancelClick,
 }) => {
  if (installProgress !== -1) {
@ -218,6 +226,12 @@ const InstallStateIndicator: React.FC<InstallStateProps> = ({
          Installed
        </div>
      )
    case 'Updatable':
      return (
        <Button themes="secondaryBlue" size="sm" onClick={onUpdateClick}>
          Update
        </Button>
      )
    case 'NotInstalled':
      return (
        <Button themes="secondaryBlue" size="sm" onClick={onInstallClick}>
--- a/web/screens/Settings/CoreExtensions/UpdateExtensionModal.tsx
+++ b/web/screens/Settings/CoreExtensions/UpdateExtensionModal.tsx
@ -0,0 +1,58 @@
 import React from 'react'
 import {
  Button,
  Modal,
  ModalClose,
  ModalContent,
  ModalFooter,
  ModalHeader,
  ModalPortal,
  ModalTitle,
  ModalTrigger,
 } from '@janhq/uikit'
 import { Paintbrush } from 'lucide-react'
 type Props = {
  onUpdateClick: () => void
 }
 const UpdateExtensionModal: React.FC<Props> = ({ onUpdateClick }) => {
  return (
    <Modal>
      <ModalTrigger asChild onClick={(e) => e.stopPropagation()}>
        <div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary">
          <Paintbrush size={16} className="text-muted-foreground" />
          <span className="text-bold text-black dark:text-muted-foreground">
            Update extension
          </span>
        </div>
      </ModalTrigger>
      <ModalPortal />
      <ModalContent>
        <ModalHeader>
          <ModalTitle>Clean Thread</ModalTitle>
        </ModalHeader>
        <p>
          Updating this extension may result in the loss of any custom models or
          data associated with the current version. We recommend backing up any
          important data before proceeding with the update.
        </p>
        <ModalFooter>
          <div className="flex gap-x-2">
            <ModalClose asChild onClick={(e) => e.stopPropagation()}>
              <Button themes="ghost">No</Button>
            </ModalClose>
            <ModalClose asChild>
              <Button themes="danger" onClick={onUpdateClick} autoFocus>
                Yes
              </Button>
            </ModalClose>
          </div>
        </ModalFooter>
      </ModalContent>
    </Modal>
  )
 }
 export default React.memo(UpdateExtensionModal)
--- a/web/services/appService.ts
+++ b/web/services/appService.ts
@ -1,20 +1,32 @@
-import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
+import {
  ExtensionTypeEnum,
  MonitoringExtension,
  SystemInformation,
 } from '@janhq/core'
 import { toaster } from '@/containers/Toast'
 import { extensionManager } from '@/extension'
 export const appService = {
-  systemInformations: async () => {
+  systemInformation: async (): Promise<SystemInformation | undefined> => {
-    const gpuSetting = await extensionManager
+    const monitorExtension = extensionManager?.get<MonitoringExtension>(
-      ?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
+      ExtensionTypeEnum.SystemMonitoring
-      ?.getGpuSetting()
+    )
    if (!monitorExtension) {
      console.warn('System monitoring extension not found')
      return undefined
    }
    const gpuSetting = await monitorExtension.getGpuSetting()
    const osInfo = await monitorExtension.getOsInfo()
    return {
      gpuSetting,
-      // TODO: Other system information
+      osInfo,
    }
  },
  showToast: (title: string, description: string) => {
    toaster({
      title,