feat: Nitro-Tensorrt-LLM Extension (#2280)

* feat: tensorrt-llm-extension * fix: loading * feat: add download tensorrt llm runner Signed-off-by: James <james@jan.ai> * feat: update to rollupjs instead of webpack for monitoring extension Signed-off-by: James <james@jan.ai> * feat: move update nvidia info to monitor extension Signed-off-by: James <james@jan.ai> * allow download tensorrt Signed-off-by: James <james@jan.ai> * update Signed-off-by: James <james@jan.ai> * allow download tensor rt based on gpu setting Signed-off-by: James <james@jan.ai> * update downloaded models Signed-off-by: James <james@jan.ai> * feat: add extension compatibility * dynamic tensor rt engines Signed-off-by: James <james@jan.ai> * update models Signed-off-by: James <james@jan.ai> * chore: remove ts-ignore * feat: getting installation state from extension Signed-off-by: James <james@jan.ai> * chore: adding type for decompress Signed-off-by: James <james@jan.ai> * feat: update according Louis's comment Signed-off-by: James <james@jan.ai> * feat: add progress for installing extension Signed-off-by: James <james@jan.ai> * chore: remove args from extension installation * fix: model download does not work properly * fix: do not allow user to stop tensorrtllm inference * fix: extension installed style * fix: download tensorrt does not update state Signed-off-by: James <james@jan.ai> * chore: replace int4 by fl16 * feat: modal for installing extension Signed-off-by: James <james@jan.ai> * fix: start download immediately after press install Signed-off-by: James <james@jan.ai> * fix: error switching between engines * feat: rename inference provider to ai engine and refactor to core * fix: missing ulid * fix: core bundler * feat: add cancel extension installing Signed-off-by: James <james@jan.ai> * remove mocking for mac Signed-off-by: James <james@jan.ai> * fix: show models only when extension is ready * add tensorrt badge for model Signed-off-by: James <james@jan.ai> * fix: copy * fix: add compatible check (#2342) * fix: add compatible check Signed-off-by: James <james@jan.ai> * fix: copy * fix: font * fix: copy * fix: broken monitoring extension * chore: bump engine * fix: copy * fix: model copy * fix: copy * fix: model json --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: Louis <louis@jan.ai> * fix: vulkan support * fix: installation button padding * fix: empty script * fix: remove hard code string --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: NamH <NamNh0122@gmail.com>
2024-03-14 14:07:22 +07:00 · 2024-03-14 14:07:22 +07:00 · d85d02693b
commit d85d02693b
parent 24c6dd05be
71 changed files with 2497 additions and 626 deletions
--- a/.gitignore
+++ b/.gitignore
@ -22,16 +22,16 @@ package-lock.json
 core/lib/**

 # Nitro binary files
-extensions/inference-nitro-extension/bin/*/nitro
-extensions/inference-nitro-extension/bin/*/*.metal
-extensions/inference-nitro-extension/bin/*/*.exe
-extensions/inference-nitro-extension/bin/*/*.dll
-extensions/inference-nitro-extension/bin/*/*.exp
-extensions/inference-nitro-extension/bin/*/*.lib
-extensions/inference-nitro-extension/bin/saved-*
-extensions/inference-nitro-extension/bin/*.tar.gz
-extensions/inference-nitro-extension/bin/vulkaninfoSDK.exe
-extensions/inference-nitro-extension/bin/vulkaninfo
+extensions/*-extension/bin/*/nitro
+extensions/*-extension/bin/*/*.metal
+extensions/*-extension/bin/*/*.exe
+extensions/*-extension/bin/*/*.dll
+extensions/*-extension/bin/*/*.exp
+extensions/*-extension/bin/*/*.lib
+extensions/*-extension/bin/saved-*
+extensions/*-extension/bin/*.tar.gz
+extensions/*-extension/bin/vulkaninfoSDK.exe
+extensions/*-extension/bin/vulkaninfo


 # Turborepo
--- a/core/package.json
+++ b/core/package.json
@ -45,11 +45,12 @@
    "start": "rollup -c rollup.config.ts -w"
  },
  "devDependencies": {
-    "jest": "^29.7.0",
    "@types/jest": "^29.5.12",
    "@types/node": "^12.0.2",
-    "eslint-plugin-jest": "^27.9.0",
    "eslint": "8.57.0",
+    "eslint-plugin-jest": "^27.9.0",
+    "jest": "^29.7.0",
+    "rimraf": "^3.0.2",
    "rollup": "^2.38.5",
    "rollup-plugin-commonjs": "^9.1.8",
    "rollup-plugin-json": "^3.1.0",
@ -58,7 +59,10 @@
    "rollup-plugin-typescript2": "^0.36.0",
    "ts-jest": "^29.1.2",
    "tslib": "^2.6.2",
-    "typescript": "^5.3.3",
-    "rimraf": "^3.0.2"
+    "typescript": "^5.3.3"
+  },
+  "dependencies": {
+    "rxjs": "^7.8.1",
+    "ulid": "^2.3.0"
  }
 }
--- a/core/rollup.config.ts
+++ b/core/rollup.config.ts
@ -64,7 +64,7 @@ export default [
      // Allow json resolution
      json(),
      // Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true }),
+      typescript({ useTsconfigDeclarationDir: true, exclude: ['src/*.ts', 'src/extensions/**'] }),
      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
      commonjs(),
      // Allow node_modules resolution, so you can use 'external' to control
--- a/core/src/api/index.ts
+++ b/core/src/api/index.ts
@ -33,6 +33,8 @@ export enum AppRoute {
  stopServer = 'stopServer',
  log = 'log',
  logServer = 'logServer',
+  systemInformations = 'systemInformations',
+  showToast = 'showToast',
 }

 export enum AppEvent {
@ -56,6 +58,7 @@ export enum DownloadEvent {
  onFileDownloadUpdate = 'onFileDownloadUpdate',
  onFileDownloadError = 'onFileDownloadError',
  onFileDownloadSuccess = 'onFileDownloadSuccess',
+  onFileUnzipSuccess = 'onFileUnzipSuccess',
 }

 export enum LocalImportModelEvent {
--- a/core/src/core.ts
+++ b/core/src/core.ts
@ -1,4 +1,4 @@
-import { FileStat } from './types'
+import { DownloadRequest, FileStat, NetworkConfig } from './types'

 /**
 * Execute a extension module function in main process
@ -17,18 +17,16 @@ const executeOnMain: (extension: string, method: string, ...args: any[]) => Prom

 /**
 * Downloads a file from a URL and saves it to the local file system.
- * @param {string} url - The URL of the file to download.
- * @param {string} fileName - The name to use for the downloaded file.
- * @param {object} network - Optional object to specify proxy/whether to ignore SSL certificates.
+ *
+ * @param {DownloadRequest} downloadRequest - The request to download the file.
+ * @param {NetworkConfig} network - Optional object to specify proxy/whether to ignore SSL certificates.
+ *
 * @returns {Promise<any>} A promise that resolves when the file is downloaded.
 */
-const downloadFile: (
-  url: string,
-  fileName: string,
-  network?: { proxy?: string; ignoreSSL?: boolean }
-) => Promise<any> = (url, fileName, network) => {
-  return global.core?.api?.downloadFile(url, fileName, network)
-}
+const downloadFile: (downloadRequest: DownloadRequest, network?: NetworkConfig) => Promise<any> = (
+  downloadRequest,
+  network
+) => global.core?.api?.downloadFile(downloadRequest, network)

 /**
 * Aborts the download of a specific file.
@ -108,6 +106,20 @@ const log: (message: string, fileName?: string) => void = (message, fileName) =>
 const isSubdirectory: (from: string, to: string) => Promise<boolean> = (from: string, to: string) =>
  global.core.api?.isSubdirectory(from, to)

+/**
+ * Get system information
+ * @returns {Promise<any>} - A promise that resolves with the system information.
+ */
+const systemInformations: () => Promise<any> = () => global.core.api?.systemInformations()
+
+/**
+ * Show toast message from browser processes.
+ * @param title
+ * @param message
+ * @returns
+ */
+const showToast: (title: string, message: string) => void = (title, message) =>
+  global.core.api?.showToast(title, message)
 /**
 * Register extension point function type definition
 */
@ -134,5 +146,7 @@ export {
  log,
  isSubdirectory,
  getUserHomePath,
+  systemInformations,
+  showToast,
  FileStat,
 }
--- a/core/src/extension.ts
+++ b/core/src/extension.ts
@ -10,6 +10,22 @@ export enum ExtensionTypeEnum {
 export interface ExtensionType {
  type(): ExtensionTypeEnum | undefined
 }
+
+export interface Compatibility {
+  platform: string[]
+  version: string
+}
+
+const ALL_INSTALLATION_STATE = [
+  'NotRequired', // not required.
+  'Installed', // require and installed. Good to go.
+  'NotInstalled', // require to be installed.
+  'Corrupted', // require but corrupted. Need to redownload.
+] as const
+
+export type InstallationStateTuple = typeof ALL_INSTALLATION_STATE
+export type InstallationState = InstallationStateTuple[number]
+
 /**
 * Represents a base extension.
 * This class should be extended by any class that represents an extension.
@ -33,4 +49,32 @@ export abstract class BaseExtension implements ExtensionType {
   * Any cleanup logic for the extension should be put here.
   */
  abstract onUnload(): void
+
+  /**
+   * The compatibility of the extension.
+   * This is used to check if the extension is compatible with the current environment.
+   * @property {Array} platform
+   */
+  compatibility(): Compatibility | undefined {
+    return undefined
+  }
+
+  /**
+   * Determine if the prerequisites for the extension are installed.
+   *
+   * @returns {boolean} true if the prerequisites are installed, false otherwise.
+   */
+  async installationState(): Promise<InstallationState> {
+    return 'NotRequired'
+  }
+
+  /**
+   * Install the prerequisites for the extension.
+   *
+   * @returns {Promise<void>}
+   */
+  // @ts-ignore
+  async install(...args): Promise<void> {
+    return
+  }
 }
--- a/core/src/extensions/ai-engines/AIEngine.ts
+++ b/core/src/extensions/ai-engines/AIEngine.ts
@ -0,0 +1,60 @@
+import { getJanDataFolderPath, joinPath } from '../../core'
+import { events } from '../../events'
+import { BaseExtension } from '../../extension'
+import { fs } from '../../fs'
+import { Model, ModelEvent } from '../../types'
+
+/**
+ * Base AIEngine
+ * Applicable to all AI Engines
+ */
+export abstract class AIEngine extends BaseExtension {
+  // The inference engine
+  abstract provider: string
+  // The model folder
+  modelFolder: string = 'models'
+
+  abstract models(): Promise<Model[]>
+
+  /**
+   * On extension load, subscribe to events.
+   */
+  onLoad() {
+    this.prePopulateModels()
+  }
+
+  /**
+   * Pre-populate models to App Data Folder
+   */
+  prePopulateModels(): Promise<void> {
+    return this.models().then((models) => {
+      const prePoluateOperations = models.map((model) =>
+        getJanDataFolderPath()
+          .then((janDataFolder) =>
+            // Attempt to create the model folder
+            joinPath([janDataFolder, this.modelFolder, model.id]).then((path) =>
+              fs
+                .mkdirSync(path)
+                .catch()
+                .then(() => path)
+            )
+          )
+          .then((path) => joinPath([path, 'model.json']))
+          .then((path) => {
+            // Do not overwite existing model.json
+            return fs.existsSync(path).then((exist: any) => {
+              if (!exist) return fs.writeFileSync(path, JSON.stringify(model, null, 2))
+            })
+          })
+          .catch((e: Error) => {
+            console.error('Error', e)
+          })
+      )
+      Promise.all(prePoluateOperations).then(() =>
+        // Emit event to update models
+        // So the UI can update the models list
+        events.emit(ModelEvent.OnModelsUpdate, {})
+      )
+    })
+  }
+}
--- a/core/src/extensions/ai-engines/LocalOAIEngine.ts
+++ b/core/src/extensions/ai-engines/LocalOAIEngine.ts
@ -0,0 +1,63 @@
+import { executeOnMain, getJanDataFolderPath, joinPath } from '../../core'
+import { events } from '../../events'
+import { Model, ModelEvent } from '../../types'
+import { OAIEngine } from './OAIEngine'
+
+/**
+ * Base OAI Local Inference Provider
+ * Added the implementation of loading and unloading model (applicable to local inference providers)
+ */
+export abstract class LocalOAIEngine extends OAIEngine {
+  // The inference engine
+  loadModelFunctionName: string = 'loadModel'
+  unloadModelFunctionName: string = 'unloadModel'
+  isRunning: boolean = false
+
+  /**
+   * On extension load, subscribe to events.
+   */
+  onLoad() {
+    super.onLoad()
+    // These events are applicable to local inference providers
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model))
+    events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model))
+  }
+
+  /**
+   * Load the model.
+   */
+  async onModelInit(model: Model) {
+    if (model.engine.toString() !== this.provider) return
+
+    const modelFolder = await joinPath([await getJanDataFolderPath(), this.modelFolder, model.id])
+
+    const res = await executeOnMain(this.nodeModule, this.loadModelFunctionName, {
+      modelFolder,
+      model,
+    })
+
+    if (res?.error) {
+      events.emit(ModelEvent.OnModelFail, {
+        ...model,
+        error: res.error,
+      })
+      return
+    } else {
+      this.loadedModel = model
+      events.emit(ModelEvent.OnModelReady, model)
+      this.isRunning = true
+    }
+  }
+  /**
+   * Stops the model.
+   */
+  onModelStop(model: Model) {
+    if (model.engine?.toString() !== this.provider) return
+
+    this.isRunning = false
+
+    executeOnMain(this.nodeModule, this.unloadModelFunctionName).then(() => {
+      events.emit(ModelEvent.OnModelStopped, {})
+    })
+  }
+}
--- a/core/src/extensions/ai-engines/OAIEngine.ts
+++ b/core/src/extensions/ai-engines/OAIEngine.ts
@ -0,0 +1,116 @@
+import { requestInference } from './helpers/sse'
+import { ulid } from 'ulid'
+import { AIEngine } from './AIEngine'
+import {
+  ChatCompletionRole,
+  ContentType,
+  InferenceEvent,
+  MessageEvent,
+  MessageRequest,
+  MessageRequestType,
+  MessageStatus,
+  Model,
+  ModelInfo,
+  ThreadContent,
+  ThreadMessage,
+} from '../../types'
+import { events } from '../../events'
+
+/**
+ * Base OAI Inference Provider
+ * Applicable to all OAI compatible inference providers
+ */
+export abstract class OAIEngine extends AIEngine {
+  // The inference engine
+  abstract inferenceUrl: string
+  abstract nodeModule: string
+
+  // Controller to handle stop requests
+  controller = new AbortController()
+  isCancelled = false
+
+  // The loaded model instance
+  loadedModel: Model | undefined
+
+  /**
+   * On extension load, subscribe to events.
+   */
+  onLoad() {
+    super.onLoad()
+    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
+    events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped())
+  }
+
+  /**
+   * On extension unload
+   */
+  onUnload(): void {}
+
+  /*
+   * Inference request
+   */
+  inference(data: MessageRequest) {
+    if (data.model?.engine?.toString() !== this.provider) return
+
+    const timestamp = Date.now()
+    const message: ThreadMessage = {
+      id: ulid(),
+      thread_id: data.threadId,
+      type: data.type,
+      assistant_id: data.assistantId,
+      role: ChatCompletionRole.Assistant,
+      content: [],
+      status: MessageStatus.Pending,
+      created: timestamp,
+      updated: timestamp,
+      object: 'thread.message',
+    }
+
+    if (data.type !== MessageRequestType.Summary) {
+      events.emit(MessageEvent.OnMessageResponse, message)
+    }
+
+    this.isCancelled = false
+    this.controller = new AbortController()
+
+    const model: ModelInfo = {
+      ...(this.loadedModel ? this.loadedModel : {}),
+      ...data.model,
+    }
+
+    requestInference(this.inferenceUrl, data.messages ?? [], model, this.controller).subscribe({
+      next: (content: any) => {
+        const messageContent: ThreadContent = {
+          type: ContentType.Text,
+          text: {
+            value: content.trim(),
+            annotations: [],
+          },
+        }
+        message.content = [messageContent]
+        events.emit(MessageEvent.OnMessageUpdate, message)
+      },
+      complete: async () => {
+        message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
+        events.emit(MessageEvent.OnMessageUpdate, message)
+      },
+      error: async (err: any) => {
+        if (this.isCancelled || message.content.length) {
+          message.status = MessageStatus.Stopped
+          events.emit(MessageEvent.OnMessageUpdate, message)
+          return
+        }
+        message.status = MessageStatus.Error
+        events.emit(MessageEvent.OnMessageUpdate, message)
+      },
+    })
+  }
+
+  /**
+   * Stops the inference.
+   */
+  onInferenceStopped() {
+    this.isCancelled = true
+    this.controller?.abort()
+  }
+}
--- a/core/src/extensions/ai-engines/helpers/sse.ts
+++ b/core/src/extensions/ai-engines/helpers/sse.ts
@ -0,0 +1,67 @@
+import { Observable } from 'rxjs'
+import { ModelRuntimeParams } from '../../../types'
+/**
+ * Sends a request to the inference server to generate a response based on the recent messages.
+ * @param recentMessages - An array of recent messages to use as context for the inference.
+ * @returns An Observable that emits the generated response as a string.
+ */
+export function requestInference(
+  inferenceUrl: string,
+  recentMessages: any[],
+  model: {
+    id: string
+    parameters: ModelRuntimeParams
+  },
+  controller?: AbortController
+): Observable<string> {
+  return new Observable((subscriber) => {
+    const requestBody = JSON.stringify({
+      messages: recentMessages,
+      model: model.id,
+      stream: true,
+      ...model.parameters,
+    })
+    fetch(inferenceUrl, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Access-Control-Allow-Origin': '*',
+        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+      },
+      body: requestBody,
+      signal: controller?.signal,
+    })
+      .then(async (response) => {
+        if (model.parameters.stream === false) {
+          const data = await response.json()
+          subscriber.next(data.choices[0]?.message?.content ?? '')
+        } else {
+          const stream = response.body
+          const decoder = new TextDecoder('utf-8')
+          const reader = stream?.getReader()
+          let content = ''
+
+          while (true && reader) {
+            const { done, value } = await reader.read()
+            if (done) {
+              break
+            }
+            const text = decoder.decode(value)
+            const lines = text.trim().split('\n')
+            for (const line of lines) {
+              if (line.startsWith('data: ') && !line.includes('data: [DONE]')) {
+                const data = JSON.parse(line.replace('data: ', ''))
+                content += data.choices[0]?.delta?.content ?? ''
+                if (content.startsWith('assistant: ')) {
+                  content = content.replace('assistant: ', '')
+                }
+                subscriber.next(content)
+              }
+            }
+          }
+        }
+        subscriber.complete()
+      })
+      .catch((err) => subscriber.error(err))
+  })
+}
--- a/core/src/extensions/ai-engines/index.ts
+++ b/core/src/extensions/ai-engines/index.ts
@ -0,0 +1,3 @@
+export * from './AIEngine'
+export * from './OAIEngine'
+export * from './LocalOAIEngine'
--- a/core/src/extensions/index.ts
+++ b/core/src/extensions/index.ts
@ -28,3 +28,8 @@ export { ModelExtension } from './model'
 * Hugging Face extension for converting HF models to GGUF.
 */
 export { HuggingFaceExtension } from './huggingface'
+
+/**
+ * Base AI Engines.
+ */
+export * from './ai-engines'
--- a/core/src/extensions/model.ts
+++ b/core/src/extensions/model.ts
@ -1,5 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { ImportingModel, Model, ModelInterface, OptionType } from '../index'
+import { GpuSetting, ImportingModel, Model, ModelInterface, OptionType } from '../index'

 /**
 * Model extension for managing models.
@ -14,6 +14,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter

  abstract downloadModel(
    model: Model,
+    gpuSettings?: GpuSetting,
    network?: { proxy: string; ignoreSSL?: boolean }
  ): Promise<void>
  abstract cancelModelDownload(modelId: string): Promise<void>
--- a/core/src/extensions/monitoring.ts
+++ b/core/src/extensions/monitoring.ts
@ -1,5 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { MonitoringInterface } from '../index'
+import { GpuSetting, MonitoringInterface } from '../index'

 /**
 * Monitoring extension for system monitoring.
@ -13,6 +13,7 @@ export abstract class MonitoringExtension extends BaseExtension implements Monit
    return ExtensionTypeEnum.SystemMonitoring
  }

+  abstract getGpuSetting(): Promise<GpuSetting>
  abstract getResourcesInfo(): Promise<any>
  abstract getCurrentLoad(): Promise<any>
 }
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@ -5,7 +5,7 @@ import { getJanDataFolderPath } from '../../helper'
 import { DownloadManager } from '../../helper/download'
 import { createWriteStream, renameSync } from 'fs'
 import { Processor } from './Processor'
-import { DownloadState } from '../../../types'
+import { DownloadRequest, DownloadState, NetworkConfig } from '../../../types'

 export class Downloader implements Processor {
  observer?: Function
@ -20,24 +20,27 @@ export class Downloader implements Processor {
    return func(this.observer, ...args)
  }

-  downloadFile(observer: any, url: string, localPath: string, network: any) {
+  downloadFile(observer: any, downloadRequest: DownloadRequest, network?: NetworkConfig) {
    const request = require('request')
    const progress = require('request-progress')

    const strictSSL = !network?.ignoreSSL
    const proxy = network?.proxy?.startsWith('http') ? network.proxy : undefined
+
+    const { localPath, url } = downloadRequest
+    let normalizedPath = localPath
    if (typeof localPath === 'string') {
-      localPath = normalizeFilePath(localPath)
+      normalizedPath = normalizeFilePath(localPath)
    }
-    const array = localPath.split(sep)
+    const array = normalizedPath.split(sep)
    const fileName = array.pop() ?? ''
    const modelId = array.pop() ?? ''

-    const destination = resolve(getJanDataFolderPath(), localPath)
+    const destination = resolve(getJanDataFolderPath(), normalizedPath)
    const rq = request({ url, strictSSL, proxy })

    // Put request to download manager instance
-    DownloadManager.instance.setRequest(localPath, rq)
+    DownloadManager.instance.setRequest(normalizedPath, rq)

    // Downloading file to a temp file first
    const downloadingTempFile = `${destination}.download`
@ -56,16 +59,25 @@ export class Downloader implements Processor {
        total: 0,
        transferred: 0,
      },
+      children: [],
      downloadState: 'downloading',
+      extensionId: downloadRequest.extensionId,
+      downloadType: downloadRequest.downloadType,
+      localPath: normalizedPath,
    }
    DownloadManager.instance.downloadProgressMap[modelId] = initialDownloadState

+    if (downloadRequest.downloadType === 'extension') {
+      observer?.(DownloadEvent.onFileDownloadUpdate, initialDownloadState)
+    }
+
    progress(rq, {})
      .on('progress', (state: any) => {
+        const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
        const downloadState: DownloadState = {
+          ...currentDownloadState,
          ...state,
-          modelId,
-          fileName,
+          fileName: fileName,
          downloadState: 'downloading',
        }
        console.debug('progress: ', downloadState)
@ -76,22 +88,22 @@ export class Downloader implements Processor {
        const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
        const downloadState: DownloadState = {
          ...currentDownloadState,
+          fileName: fileName,
          error: error.message,
          downloadState: 'error',
        }
-        if (currentDownloadState) {
-          DownloadManager.instance.downloadProgressMap[modelId] = downloadState
-        }

        observer?.(DownloadEvent.onFileDownloadError, downloadState)
+        DownloadManager.instance.downloadProgressMap[modelId] = downloadState
      })
      .on('end', () => {
        const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
-        if (currentDownloadState && DownloadManager.instance.networkRequests[localPath]) {
+        if (currentDownloadState && DownloadManager.instance.networkRequests[normalizedPath]) {
          // Finished downloading, rename temp file to actual file
          renameSync(downloadingTempFile, destination)
          const downloadState: DownloadState = {
            ...currentDownloadState,
+            fileName: fileName,
            downloadState: 'end',
          }
          observer?.(DownloadEvent.onFileDownloadSuccess, downloadState)
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@ -1,7 +1,16 @@
-import fs from 'fs'
+import {
+  existsSync,
+  readdirSync,
+  readFileSync,
+  writeFileSync,
+  mkdirSync,
+  appendFileSync,
+  createWriteStream,
+  rmdirSync,
+} from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
-import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../index'
+import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
 import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
 import { DEFAULT_CHAT_COMPLETION_URL } from './consts'

@ -9,12 +18,12 @@ import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
 export const getBuilder = async (configuration: RouteConfiguration) => {
  const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
  try {
-    if (!fs.existsSync(directoryPath)) {
+    if (!existsSync(directoryPath)) {
      console.debug('model folder not found')
      return []
    }

-    const files: string[] = fs.readdirSync(directoryPath)
+    const files: string[] = readdirSync(directoryPath)

    const allDirectories: string[] = []
    for (const file of files) {
@ -46,8 +55,8 @@ export const getBuilder = async (configuration: RouteConfiguration) => {
 }

 const readModelMetadata = (path: string): string | undefined => {
-  if (fs.existsSync(path)) {
-    return fs.readFileSync(path, 'utf-8')
+  if (existsSync(path)) {
+    return readFileSync(path, 'utf-8')
  } else {
    return undefined
  }
@ -81,7 +90,7 @@ export const deleteBuilder = async (configuration: RouteConfiguration, id: strin
    }

    const objectPath = join(directoryPath, id)
-    fs.rmdirSync(objectPath, { recursive: true })
+    rmdirSync(objectPath, { recursive: true })
    return {
      id: id,
      object: configuration.delete.object,
@ -96,20 +105,19 @@ export const getMessages = async (threadId: string): Promise<ThreadMessage[]> =>
  const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
  const messageFile = 'messages.jsonl'
  try {
-    const files: string[] = fs.readdirSync(threadDirPath)
+    const files: string[] = readdirSync(threadDirPath)
    if (!files.includes(messageFile)) {
      console.error(`${threadDirPath} not contains message file`)
      return []
    }

    const messageFilePath = join(threadDirPath, messageFile)
-    if (!fs.existsSync(messageFilePath)) {
+    if (!existsSync(messageFilePath)) {
      console.debug('message file not found')
      return []
    }

-    const lines = fs
-      .readFileSync(messageFilePath, 'utf-8')
+    const lines = readFileSync(messageFilePath, 'utf-8')
      .toString()
      .split('\n')
      .filter((line: any) => line !== '')
@ -157,11 +165,11 @@ export const createThread = async (thread: any) => {
    const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
    const threadJsonPath = join(threadDirPath, threadMetadataFileName)

-    if (!fs.existsSync(threadDirPath)) {
-      fs.mkdirSync(threadDirPath)
+    if (!existsSync(threadDirPath)) {
+      mkdirSync(threadDirPath)
    }

-    await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
+    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
    return updatedThread
  } catch (err) {
    return {
@ -191,7 +199,7 @@ export const updateThread = async (threadId: string, thread: any) => {
    const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
    const threadJsonPath = join(threadDirPath, threadMetadataFileName)

-    await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
+    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
    return updatedThread
  } catch (err) {
    return {
@ -233,10 +241,10 @@ export const createMessage = async (threadId: string, message: any) => {
    const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
    const threadMessagePath = join(threadDirPath, threadMessagesFileName)

-    if (!fs.existsSync(threadDirPath)) {
-      fs.mkdirSync(threadDirPath)
+    if (!existsSync(threadDirPath)) {
+      mkdirSync(threadDirPath)
    }
-    fs.appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
+    appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
    return threadMessage
  } catch (err) {
    return {
@ -259,8 +267,8 @@ export const downloadModel = async (
  }

  const directoryPath = join(getJanDataFolderPath(), 'models', modelId)
-  if (!fs.existsSync(directoryPath)) {
-    fs.mkdirSync(directoryPath)
+  if (!existsSync(directoryPath)) {
+    mkdirSync(directoryPath)
  }

  // path to model binary
@ -281,7 +289,7 @@ export const downloadModel = async (
      .on('end', function () {
        console.debug('end')
      })
-      .pipe(fs.createWriteStream(modelBinaryPath))
+      .pipe(createWriteStream(modelBinaryPath))
  }

  return {
--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@ -4,16 +4,43 @@ export type FileStat = {
 }

 export type DownloadState = {
-  modelId: string
+  modelId: string // TODO: change to download id
  fileName: string
  time: DownloadTime
  speed: number
-  percent: number

+  percent: number
  size: DownloadSize
-  children?: DownloadState[]
-  error?: string
  downloadState: 'downloading' | 'error' | 'end'
+  children?: DownloadState[]
+
+  error?: string
+  extensionId?: string
+  downloadType?: DownloadType
+  localPath?: string
+}
+
+export type DownloadType = 'model' | 'extension'
+
+export type DownloadRequest = {
+  /**
+   * The URL to download the file from.
+   */
+  url: string
+
+  /**
+   * The local path to save the file to.
+   */
+  localPath: string
+
+  /**
+   * The extension ID of the extension that initiated the download.
+   *
+   * Can be extension name.
+   */
+  extensionId?: string
+
+  downloadType?: DownloadType
 }

 type DownloadTime = {
--- a/core/src/types/miscellaneous/fileDownloadRequest.ts
+++ b/core/src/types/miscellaneous/fileDownloadRequest.ts
@ -0,0 +1,8 @@
+export type FileDownloadRequest = {
+  downloadId: string
+  url: string
+  localPath: string
+  fileName: string
+  displayName: string
+  metadata: Record<string, string | number>
+}
--- a/core/src/types/miscellaneous/index.ts
+++ b/core/src/types/miscellaneous/index.ts
@ -1,3 +1,5 @@
 export * from './systemResourceInfo'
 export * from './promptTemplate'
 export * from './appUpdate'
+export * from './fileDownloadRequest'
+export * from './networkConfig'
--- a/core/src/types/miscellaneous/networkConfig.ts
+++ b/core/src/types/miscellaneous/networkConfig.ts
@ -0,0 +1,4 @@
+export type NetworkConfig = {
+  proxy?: string
+  ignoreSSL?: boolean
+}
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@ -2,3 +2,31 @@ export type SystemResourceInfo = {
  numCpuPhysicalCore: number
  memAvailable: number
 }
+
+export type RunMode = 'cpu' | 'gpu'
+
+export type GpuSetting = {
+  notify: boolean
+  run_mode: RunMode
+  nvidia_driver: {
+    exist: boolean
+    version: string
+  }
+  cuda: {
+    exist: boolean
+    version: string
+  }
+  gpus: GpuSettingInfo[]
+  gpu_highest_vram: string
+  gpus_in_use: string[]
+  is_initial: boolean
+  // TODO: This needs to be set based on user toggle in settings
+  vulkan: boolean
+}
+
+export type GpuSettingInfo = {
+  id: string
+  vram: string
+  name: string
+  arch?: string
+}
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -19,6 +19,7 @@ export enum InferenceEngine {
  nitro = 'nitro',
  openai = 'openai',
  triton_trtllm = 'triton_trtllm',
+  nitro_tensorrt_llm = 'nitro-tensorrt-llm',

  tool_retrieval_enabled = 'tool_retrieval_enabled',
 }
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@ -1,3 +1,4 @@
+import { GpuSetting } from '../miscellaneous'
 import { Model } from './modelEntity'

 /**
@ -10,7 +11,11 @@ export interface ModelInterface {
   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
   * @returns A Promise that resolves when the model has been downloaded.
   */
-  downloadModel(model: Model, network?: { ignoreSSL?: boolean; proxy?: string }): Promise<void>
+  downloadModel(
+    model: Model,
+    gpuSettings?: GpuSetting,
+    network?: { ignoreSSL?: boolean; proxy?: string }
+  ): Promise<void>

  /**
   * Cancels the download of a specific model.
--- a/core/src/types/monitoring/index.ts
+++ b/core/src/types/monitoring/index.ts
@ -1 +1,2 @@
 export * from './monitoringInterface'
+export * from './resourceInfo'
--- a/core/src/types/monitoring/resourceInfo.ts
+++ b/core/src/types/monitoring/resourceInfo.ts
@ -0,0 +1,6 @@
+export type ResourceInfo = {
+  mem: {
+    totalMemory: number
+    usedMemory: number
+  }
+}
--- a/core/tsconfig.json
+++ b/core/tsconfig.json
@ -13,7 +13,7 @@
    "declarationDir": "dist/types",
    "outDir": "dist/lib",
    "importHelpers": true,
-    "types": ["@types/jest"]
+    "types": ["@types/jest"],
  },
-  "include": ["src"]
+  "include": ["src"],
 }
--- a/extensions/huggingface-extension/src/index.ts
+++ b/extensions/huggingface-extension/src/index.ts
@ -13,6 +13,7 @@ import {
  events,
  DownloadEvent,
  log,
+  DownloadRequest,
 } from '@janhq/core'
 import { ggufMetadata } from 'hyllama'

@ -148,7 +149,11 @@ export default class JanHuggingFaceExtension extends HuggingFaceExtension {

      if (this.interrupted) return
      if (!(await fs.existsSync(localPath))) {
-        downloadFile(url, localPath, network)
+        const downloadRequest: DownloadRequest = {
+          url,
+          localPath,
+        }
+        downloadFile(downloadRequest, network)
        filePaths.push(filePath)
      }
    }
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@ -1,3 +1,3 @@
@echo off
 set /p NITRO_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
+.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -8,7 +8,7 @@
  "license": "AGPL-3.0",
  "scripts": {
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro && download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
+    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
    "downloadnitro:win32": "download.bat",
    "downloadnitro": "run-script-os",
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -108,9 +108,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
    events.on(InferenceEvent.OnInferenceStopped, () =>
      this.onInferenceStopped()
    )
-
-    // Attempt to fetch nvidia info
-    await executeOnMain(NODE, 'updateNvidiaInfo', {})
  }

  /**
--- a/extensions/inference-nitro-extension/src/node/accelerator.ts
+++ b/extensions/inference-nitro-extension/src/node/accelerator.ts
@ -1,237 +0,0 @@
-import { writeFileSync, existsSync, readFileSync } from 'fs'
-import { exec, spawn } from 'child_process'
-import path from 'path'
-import { getJanDataFolderPath, log } from '@janhq/core/node'
-
-/**
- * Default GPU settings
- * TODO: This needs to be refactored to support multiple accelerators
- **/
-const DEFALT_SETTINGS = {
-  notify: true,
-  run_mode: 'cpu',
-  nvidia_driver: {
-    exist: false,
-    version: '',
-  },
-  cuda: {
-    exist: false,
-    version: '',
-  },
-  gpus: [],
-  gpu_highest_vram: '',
-  gpus_in_use: [],
-  is_initial: true,
-  // TODO: This needs to be set based on user toggle in settings
-  vulkan: false
-}
-
-/**
- * Path to the settings file
- **/
-export const GPU_INFO_FILE = path.join(
-  getJanDataFolderPath(),
-  'settings',
-  'settings.json'
-)
-
-/**
- * Current nitro process
- */
-let nitroProcessInfo: NitroProcessInfo | undefined = undefined
-
-/**
- * Nitro process info
- */
-export interface NitroProcessInfo {
-  isRunning: boolean
-}
-
-/**
- * This will retrive GPU informations and persist settings.json
- * Will be called when the extension is loaded to turn on GPU acceleration if supported
- */
-export async function updateNvidiaInfo() {
-  if (process.platform !== 'darwin') {
-    let data
-    try {
-      data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-    } catch (error) {
-      data = DEFALT_SETTINGS
-      writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-    }
-    updateNvidiaDriverInfo()
-    updateGpuInfo()
-  }
-}
-
-/**
- * Retrieve current nitro process
- */
-export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => {
-  nitroProcessInfo = {
-    isRunning: subprocess != null,
-  }
-  return nitroProcessInfo
-}
-
-/**
- * Validate nvidia and cuda for linux and windows
- */
-export async function updateNvidiaDriverInfo(): Promise<void> {
-  exec(
-    'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
-    (error, stdout) => {
-      let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-
-      if (!error) {
-        const firstLine = stdout.split('\n')[0].trim()
-        data['nvidia_driver'].exist = true
-        data['nvidia_driver'].version = firstLine
-      } else {
-        data['nvidia_driver'].exist = false
-      }
-
-      writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-      Promise.resolve()
-    }
-  )
-}
-
-/**
- * Check if file exists in paths
- */
-export function checkFileExistenceInPaths(
-  file: string,
-  paths: string[]
-): boolean {
-  return paths.some((p) => existsSync(path.join(p, file)))
-}
-
-/**
- * Validate cuda for linux and windows
- */
-export function updateCudaExistence(
-  data: Record<string, any> = DEFALT_SETTINGS
-): Record<string, any> {
-  let filesCuda12: string[]
-  let filesCuda11: string[]
-  let paths: string[]
-  let cudaVersion: string = ''
-
-  if (process.platform === 'win32') {
-    filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
-    filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
-    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
-  } else {
-    filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
-    filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
-    paths = process.env.LD_LIBRARY_PATH
-      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
-      : []
-    paths.push('/usr/lib/x86_64-linux-gnu/')
-  }
-
-  let cudaExists = filesCuda12.every(
-    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
-  )
-
-  if (!cudaExists) {
-    cudaExists = filesCuda11.every(
-      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
-    )
-    if (cudaExists) {
-      cudaVersion = '11'
-    }
-  } else {
-    cudaVersion = '12'
-  }
-
-  data['cuda'].exist = cudaExists
-  data['cuda'].version = cudaVersion
-  console.debug(data['is_initial'], data['gpus_in_use'])
-  if (cudaExists && data['is_initial'] && data['gpus_in_use'].length > 0) {
-    data.run_mode = 'gpu'
-  }
-  data.is_initial = false
-  return data
-}
-
-/**
- * Get GPU information
- */
-export async function updateGpuInfo(): Promise<void> {
-  let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-
-  // Cuda
-  if (data['vulkan'] === true) {
-    // Vulkan
-    exec(
-      process.platform === 'win32'
-        ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
-        : `${__dirname}/../bin/vulkaninfo --summary`,
-      (error, stdout) => {
-        if (!error) {
-          const output = stdout.toString()
-          log(output)
-          const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
-
-          let gpus = []
-          let match
-          while ((match = gpuRegex.exec(output)) !== null) {
-            const id = match[1]
-            const name = match[2]
-            gpus.push({ id, vram: 0, name })
-          }
-          data.gpus = gpus
-
-          if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
-            data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
-          }
-
-          data = updateCudaExistence(data)
-          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-        }
-        Promise.resolve()
-      }
-    )
-  } else {
-    exec(
-      'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
-      (error, stdout) => {
-        if (!error) {
-          log(stdout)
-          // Get GPU info and gpu has higher memory first
-          let highestVram = 0
-          let highestVramId = '0'
-          let gpus = stdout
-            .trim()
-            .split('\n')
-            .map((line) => {
-              let [id, vram, name] = line.split(', ')
-              vram = vram.replace(/\r/g, '')
-              if (parseFloat(vram) > highestVram) {
-                highestVram = parseFloat(vram)
-                highestVramId = id
-              }
-              return { id, vram, name }
-            })
-
-          data.gpus = gpus
-          data.gpu_highest_vram = highestVramId
-        } else {
-          data.gpus = []
-          data.gpu_highest_vram = ''
-        }
-
-        if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
-          data.gpus_in_use = [data['gpu_highest_vram']]
-        }
-
-        data = updateCudaExistence(data)
-        writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-        Promise.resolve()
-      }
-    )
-  }
-}
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.ts
@ -1,12 +1,19 @@
+import { getJanDataFolderPath } from '@janhq/core/node'
 import { readFileSync } from 'fs'
 import * as path from 'path'
-import { GPU_INFO_FILE } from './accelerator'

 export interface NitroExecutableOptions {
  executablePath: string
  cudaVisibleDevices: string
  vkVisibleDevices: string
 }
+
+export const GPU_INFO_FILE = path.join(
+  getJanDataFolderPath(),
+  'settings',
+  'settings.json'
+)
+
 /**
 * Find which executable file to run based on the current platform.
 * @returns The name of the executable file to run.
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -4,7 +4,6 @@ import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
 import tcpPortUsed from 'tcp-port-used'
 import fetchRT from 'fetch-retry'
 import { log, getSystemResourceInfo } from '@janhq/core/node'
-import { getNitroProcessInfo, updateNvidiaInfo } from './accelerator'
 import {
  Model,
  InferenceEngine,
@ -385,11 +384,26 @@ function dispose() {
  killSubprocess()
 }

+/**
+ * Nitro process info
+ */
+export interface NitroProcessInfo {
+  isRunning: boolean
+}
+
+/**
+ * Retrieve current nitro process
+ */
+const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
+  return {
+    isRunning: subprocess != null,
+  }
+}
+
 export default {
  runModel,
  stopModel,
  killSubprocess,
  dispose,
-  updateNvidiaInfo,
-  getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
+  getCurrentNitroProcessInfo,
 }
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -17,6 +17,8 @@ import {
  ImportingModel,
  LocalImportModelEvent,
  baseName,
+  GpuSetting,
+  DownloadRequest,
 } from '@janhq/core'

 import { extractFileName } from './helpers/path'
@ -29,10 +31,14 @@ export default class JanModelExtension extends ModelExtension {
  private static readonly _modelMetadataFileName = 'model.json'
  private static readonly _supportedModelFormat = '.gguf'
  private static readonly _incompletedModelFileName = '.download'
-  private static readonly _offlineInferenceEngine = InferenceEngine.nitro
-
+  private static readonly _offlineInferenceEngine = [
+    InferenceEngine.nitro,
+    InferenceEngine.nitro_tensorrt_llm,
+  ]
+  private static readonly _tensorRtEngineFormat = '.engine'
  private static readonly _configDirName = 'config'
  private static readonly _defaultModelFileName = 'default-model.json'
+  private static readonly _supportedGpuArch = ['turing', 'ampere', 'ada']

  /**
   * Called when the extension is loaded.
@ -89,12 +95,52 @@ export default class JanModelExtension extends ModelExtension {
   */
  async downloadModel(
    model: Model,
+    gpuSettings?: GpuSetting,
    network?: { ignoreSSL?: boolean; proxy?: string }
  ): Promise<void> {
    // create corresponding directory
    const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
    if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)

+    if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
+      if (!gpuSettings || gpuSettings.gpus.length === 0) {
+        console.error('No GPU found. Please check your GPU setting.')
+        return
+      }
+      const firstGpu = gpuSettings.gpus[0]
+      if (!firstGpu.name.toLowerCase().includes('nvidia')) {
+        console.error('No Nvidia GPU found. Please check your GPU setting.')
+        return
+      }
+      const gpuArch = firstGpu.arch
+      if (gpuArch === undefined) {
+        console.error(
+          'No GPU architecture found. Please check your GPU setting.'
+        )
+        return
+      }
+
+      if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
+        console.error(
+          `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
+        )
+        return
+      }
+
+      const os = 'windows' // TODO: remove this hard coded value
+
+      const newSources = model.sources.map((source) => {
+        const newSource = { ...source }
+        newSource.url = newSource.url
+          .replace(/<os>/g, os)
+          .replace(/<gpuarch>/g, gpuArch)
+        return newSource
+      })
+      model.sources = newSources
+    }
+
+    console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
+
    if (model.sources.length > 1) {
      // path to model binaries
      for (const source of model.sources) {
@ -105,8 +151,11 @@ export default class JanModelExtension extends ModelExtension {
        if (source.filename) {
          path = await joinPath([modelDirPath, source.filename])
        }
-
-        downloadFile(source.url, path, network)
+        const downloadRequest: DownloadRequest = {
+          url: source.url,
+          localPath: path,
+        }
+        downloadFile(downloadRequest, network)
      }
      // TODO: handle multiple binaries for web later
    } else {
@ -115,7 +164,11 @@ export default class JanModelExtension extends ModelExtension {
        JanModelExtension._supportedModelFormat
      )
      const path = await joinPath([modelDirPath, fileName])
-      downloadFile(model.sources[0]?.url, path, network)
+      const downloadRequest: DownloadRequest = {
+        url: model.sources[0]?.url,
+        localPath: path,
+      }
+      downloadFile(downloadRequest, network)

      if (window && window.core?.api && window.core.api.baseApiUrl) {
        this.startPollingDownloadProgress(model.id)
@ -238,7 +291,7 @@ export default class JanModelExtension extends ModelExtension {
  async getDownloadedModels(): Promise<Model[]> {
    return await this.getModelsMetadata(
      async (modelDir: string, model: Model) => {
-        if (model.engine !== JanModelExtension._offlineInferenceEngine)
+        if (!JanModelExtension._offlineInferenceEngine.includes(model.engine))
          return true

        // model binaries (sources) are absolute path & exist
@ -247,22 +300,32 @@ export default class JanModelExtension extends ModelExtension {
        )
        if (existFiles.every((exist) => exist)) return true

-        return await fs
+        const result = await fs
          .readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
          .then((files: string[]) => {
            // Model binary exists in the directory
            // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
            return (
              files.includes(modelDir) ||
-              files.filter(
-                (file) =>
+              files.filter((file) => {
+                if (
+                  file.endsWith(JanModelExtension._incompletedModelFileName)
+                ) {
+                  return false
+                }
+                return (
                  file
                    .toLowerCase()
-                    .includes(JanModelExtension._supportedModelFormat) &&
-                  !file.endsWith(JanModelExtension._incompletedModelFileName)
-              )?.length >= model.sources.length
+                    .includes(JanModelExtension._supportedModelFormat) ||
+                  file
+                    .toLowerCase()
+                    .includes(JanModelExtension._tensorRtEngineFormat)
+                )
+              })?.length > 0 // TODO: NamH find better way (can use basename to check the file name with source url)
            )
          })
+
+        return result
      }
    )
  }
--- a/extensions/monitoring-extension/bin/.gitkeep
+++ b/extensions/monitoring-extension/bin/.gitkeep
--- a/extensions/monitoring-extension/download.bat
+++ b/extensions/monitoring-extension/download.bat
@ -0,0 +1,2 @@
+@echo off
+.\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
--- a/extensions/monitoring-extension/package.json
+++ b/extensions/monitoring-extension/package.json
@ -3,21 +3,40 @@
  "version": "1.0.10",
  "description": "This extension provides system health and OS level data",
  "main": "dist/index.js",
-  "module": "dist/module.js",
+  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
-    "build": "tsc -b . && webpack --config webpack.config.js",
+    "build": "tsc --module commonjs && rollup -c rollup.config.ts && npm run download-artifacts",
+    "download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
+    "download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
+    "download-artifacts:win32": "download.bat",
+    "download-artifacts:linux": "download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
    "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../pre-install"
  },
+  "exports": {
+    ".": "./dist/index.js",
+    "./main": "./dist/node/index.cjs.js"
+  },
  "devDependencies": {
+    "@rollup/plugin-commonjs": "^25.0.7",
+    "@rollup/plugin-json": "^6.1.0",
+    "@rollup/plugin-node-resolve": "^15.2.3",
+    "@types/node": "^20.11.4",
+    "@types/node-os-utils": "^1.3.4",
+    "run-script-os": "^1.1.6",
+    "cpx": "^1.5.0",
    "rimraf": "^3.0.2",
-    "webpack": "^5.88.2",
-    "webpack-cli": "^5.1.4",
-    "ts-loader": "^9.5.0"
+    "rollup": "^2.38.5",
+    "rollup-plugin-define": "^1.0.1",
+    "rollup-plugin-sourcemaps": "^0.6.3",
+    "rollup-plugin-typescript2": "^0.36.0",
+    "typescript": "^5.3.3",
+    "download-cli": "^1.1.1"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
+    "@rollup/plugin-replace": "^5.0.5",
    "node-os-utils": "^1.3.7"
  },
  "files": [
--- a/extensions/monitoring-extension/rollup.config.ts
+++ b/extensions/monitoring-extension/rollup.config.ts
@ -0,0 +1,68 @@
+import resolve from '@rollup/plugin-node-resolve'
+import commonjs from '@rollup/plugin-commonjs'
+import sourceMaps from 'rollup-plugin-sourcemaps'
+import typescript from 'rollup-plugin-typescript2'
+import json from '@rollup/plugin-json'
+import replace from '@rollup/plugin-replace'
+const packageJson = require('./package.json')
+
+export default [
+  {
+    input: `src/index.ts`,
+    output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
+    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
+    external: [],
+    watch: {
+      include: 'src/**',
+    },
+    plugins: [
+      replace({
+        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
+      }),
+      // Allow json resolution
+      json(),
+      // Compile TypeScript files
+      typescript({ useTsconfigDeclarationDir: true }),
+      // Compile TypeScript files
+      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
+      commonjs(),
+      // Allow node_modules resolution, so you can use 'external' to control
+      // which external modules to include in the bundle
+      // https://github.com/rollup/rollup-plugin-node-resolve#usage
+      resolve({
+        extensions: ['.js', '.ts', '.svelte'],
+      }),
+
+      // Resolve source maps to the original source
+      sourceMaps(),
+    ],
+  },
+  {
+    input: `src/node/index.ts`,
+    output: [
+      { file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
+    ],
+    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
+    external: ['@janhq/core/node'],
+    watch: {
+      include: 'src/node/**',
+    },
+    plugins: [
+      // Allow json resolution
+      json(),
+      // Compile TypeScript files
+      typescript({ useTsconfigDeclarationDir: true }),
+      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
+      commonjs(),
+      // Allow node_modules resolution, so you can use 'external' to control
+      // which external modules to include in the bundle
+      // https://github.com/rollup/rollup-plugin-node-resolve#usage
+      resolve({
+        extensions: ['.ts', '.js', '.json'],
+      }),
+
+      // Resolve source maps to the original source
+      sourceMaps(),
+    ],
+  },
+]
--- a/extensions/monitoring-extension/src/@types/global.d.ts
+++ b/extensions/monitoring-extension/src/@types/global.d.ts
@ -1 +1,18 @@
-declare const MODULE: string
+declare const NODE: string
+
+type CpuGpuInfo = {
+  cpu: {
+    usage: number
+  }
+  gpu: GpuInfo[]
+}
+
+type GpuInfo = {
+  id: string
+  name: string
+  temperature: string
+  utilization: string
+  memoryTotal: string
+  memoryFree: string
+  memoryUtilization: string
+}
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@ -1,4 +1,4 @@
-import { MonitoringExtension, executeOnMain } from '@janhq/core'
+import { GpuSetting, MonitoringExtension, executeOnMain } from '@janhq/core'

 /**
 * JanMonitoringExtension is a extension that provides system monitoring functionality.
@ -8,19 +8,30 @@ export default class JanMonitoringExtension extends MonitoringExtension {
  /**
   * Called when the extension is loaded.
   */
-  async onLoad() {}
+  async onLoad() {
+    // Attempt to fetch nvidia info
+    await executeOnMain(NODE, 'updateNvidiaInfo')
+  }

  /**
   * Called when the extension is unloaded.
   */
  onUnload(): void {}

+  /**
+   * Returns the GPU configuration.
+   * @returns A Promise that resolves to an object containing the GPU configuration.
+   */
+  async getGpuSetting(): Promise<GpuSetting | undefined> {
+    return executeOnMain(NODE, 'getGpuConfig')
+  }
+
  /**
   * Returns information about the system resources.
   * @returns A Promise that resolves to an object containing information about the system resources.
   */
  getResourcesInfo(): Promise<any> {
-    return executeOnMain(MODULE, 'getResourcesInfo')
+    return executeOnMain(NODE, 'getResourcesInfo')
  }

  /**
@ -28,6 +39,6 @@ export default class JanMonitoringExtension extends MonitoringExtension {
   * @returns A Promise that resolves to an object containing information about the current system load.
   */
  getCurrentLoad(): Promise<any> {
-    return executeOnMain(MODULE, 'getCurrentLoad')
+    return executeOnMain(NODE, 'getCurrentLoad')
  }
 }
--- a/extensions/monitoring-extension/src/module.ts
+++ b/extensions/monitoring-extension/src/module.ts
@ -1,92 +0,0 @@
-const nodeOsUtils = require('node-os-utils')
-const getJanDataFolderPath = require('@janhq/core/node').getJanDataFolderPath
-const path = require('path')
-const { readFileSync } = require('fs')
-const exec = require('child_process').exec
-
-const NVIDIA_INFO_FILE = path.join(
-  getJanDataFolderPath(),
-  'settings',
-  'settings.json'
-)
-
-const getResourcesInfo = () =>
-  new Promise((resolve) => {
-    nodeOsUtils.mem.used().then((ramUsedInfo) => {
-      const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
-      const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
-      const response = {
-        mem: {
-          totalMemory,
-          usedMemory,
-        },
-      }
-      resolve(response)
-    })
-  })
-
-const getCurrentLoad = () =>
-  new Promise((resolve, reject) => {
-    nodeOsUtils.cpu.usage().then((cpuPercentage) => {
-      let data = {
-        run_mode: 'cpu',
-        gpus_in_use: [],
-      }
-      if (process.platform !== 'darwin') {
-        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
-      }
-      if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
-        const gpuIds = data['gpus_in_use'].join(',')
-        if (gpuIds !== '' && data['vulkan'] !== true) {
-          exec(
-            `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
-            (error, stdout, _) => {
-              if (error) {
-                console.error(`exec error: ${error}`)
-                reject(error)
-                return
-              }
-              const gpuInfo = stdout
-                .trim()
-                .split('\n')
-                .map((line) => {
-                  const [
-                    id,
-                    name,
-                    temperature,
-                    utilization,
-                    memoryTotal,
-                    memoryFree,
-                    memoryUtilization,
-                  ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
-                  return {
-                    id,
-                    name,
-                    temperature,
-                    utilization,
-                    memoryTotal,
-                    memoryFree,
-                    memoryUtilization,
-                  }
-                })
-              resolve({
-                cpu: { usage: cpuPercentage },
-                gpu: gpuInfo,
-              })
-            }
-          )
-        } else {
-          // Handle the case where gpuIds is empty
-          resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
-        }
-      } else {
-        // Handle the case where run_mode is not 'gpu' or no GPUs are in use
-        resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
-      }
-    })
-  })
-
-module.exports = {
-  getResourcesInfo,
-  getCurrentLoad,
-}
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@ -0,0 +1,317 @@
+import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core'
+import { getJanDataFolderPath, log } from '@janhq/core/node'
+import { mem, cpu } from 'node-os-utils'
+import { exec } from 'child_process'
+import { writeFileSync, existsSync, readFileSync } from 'fs'
+import path from 'path'
+
+/**
+ * Path to the settings file
+ **/
+export const GPU_INFO_FILE = path.join(
+  getJanDataFolderPath(),
+  'settings',
+  'settings.json'
+)
+
+/**
+ * Default GPU settings
+ * TODO: This needs to be refactored to support multiple accelerators
+ **/
+const DEFAULT_SETTINGS: GpuSetting = {
+  notify: true,
+  run_mode: 'cpu',
+  nvidia_driver: {
+    exist: false,
+    version: '',
+  },
+  cuda: {
+    exist: false,
+    version: '',
+  },
+  gpus: [],
+  gpu_highest_vram: '',
+  gpus_in_use: [],
+  is_initial: true,
+  // TODO: This needs to be set based on user toggle in settings
+  vulkan: false,
+}
+
+export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
+  if (process.platform === 'darwin') return undefined
+  return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+}
+
+export const getResourcesInfo = async (): Promise<ResourceInfo> => {
+  const ramUsedInfo = await mem.used()
+  const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
+  const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
+
+  const resourceInfo: ResourceInfo = {
+    mem: {
+      totalMemory,
+      usedMemory,
+    },
+  }
+
+  return resourceInfo
+}
+
+export const getCurrentLoad = () =>
+  new Promise<CpuGpuInfo>(async (resolve, reject) => {
+    const cpuPercentage = await cpu.usage()
+    let data = {
+      run_mode: 'cpu',
+      gpus_in_use: [],
+    }
+
+    if (process.platform !== 'darwin') {
+      data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+    }
+
+    if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
+      const gpuIds = data.gpus_in_use.join(',')
+      if (gpuIds !== '' && data['vulkan'] !== true) {
+        exec(
+          `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
+          (error, stdout, _) => {
+            if (error) {
+              console.error(`exec error: ${error}`)
+              throw new Error(error.message)
+            }
+            const gpuInfo: GpuInfo[] = stdout
+              .trim()
+              .split('\n')
+              .map((line) => {
+                const [
+                  id,
+                  name,
+                  temperature,
+                  utilization,
+                  memoryTotal,
+                  memoryFree,
+                  memoryUtilization,
+                ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
+                return {
+                  id,
+                  name,
+                  temperature,
+                  utilization,
+                  memoryTotal,
+                  memoryFree,
+                  memoryUtilization,
+                }
+              })
+
+            resolve({
+              cpu: { usage: cpuPercentage },
+              gpu: gpuInfo,
+            })
+          }
+        )
+      } else {
+        // Handle the case where gpuIds is empty
+        resolve({
+          cpu: { usage: cpuPercentage },
+          gpu: [],
+        })
+      }
+    } else {
+      // Handle the case where run_mode is not 'gpu' or no GPUs are in use
+      resolve({
+        cpu: { usage: cpuPercentage },
+        gpu: [],
+      })
+    }
+  })
+
+/**
+ * This will retrive GPU informations and persist settings.json
+ * Will be called when the extension is loaded to turn on GPU acceleration if supported
+ */
+export const updateNvidiaInfo = async () => {
+  // ignore if macos
+  if (process.platform === 'darwin') return
+
+  try {
+    JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+  } catch (error) {
+    writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
+  }
+
+  await updateNvidiaDriverInfo()
+  await updateGpuInfo()
+}
+
+const updateNvidiaDriverInfo = async () =>
+  new Promise((resolve, reject) => {
+    exec(
+      'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
+      (error, stdout) => {
+        const data: GpuSetting = JSON.parse(
+          readFileSync(GPU_INFO_FILE, 'utf-8')
+        )
+
+        if (!error) {
+          const firstLine = stdout.split('\n')[0].trim()
+          data.nvidia_driver.exist = true
+          data.nvidia_driver.version = firstLine
+        } else {
+          data.nvidia_driver.exist = false
+        }
+
+        writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
+        resolve({})
+      }
+    )
+  })
+
+const getGpuArch = (gpuName: string): string => {
+  if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
+
+  if (gpuName.includes('20')) return 'turing'
+  else if (gpuName.includes('30')) return 'ampere'
+  else if (gpuName.includes('40')) return 'ada'
+  else return 'unknown'
+}
+
+const updateGpuInfo = async () =>
+  new Promise((resolve, reject) => {
+    let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+
+    // Cuda
+    if (data.vulkan === true) {
+      // Vulkan
+      exec(
+        process.platform === 'win32'
+          ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
+          : `${__dirname}/../bin/vulkaninfo --summary`,
+        (error, stdout) => {
+          if (!error) {
+            const output = stdout.toString()
+
+            log(output)
+            const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
+
+            const gpus: GpuSettingInfo[] = []
+            let match
+            while ((match = gpuRegex.exec(output)) !== null) {
+              const id = match[1]
+              const name = match[2]
+              const arch = getGpuArch(name)
+              gpus.push({ id, vram: '0', name, arch })
+            }
+            data.gpus = gpus
+
+            if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
+              data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
+            }
+
+            data = updateCudaExistence(data)
+            writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
+            resolve({})
+          } else {
+            reject(error)
+          }
+        }
+      )
+    } else {
+      exec(
+        'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
+        (error, stdout) => {
+          if (!error) {
+            log(stdout)
+            // Get GPU info and gpu has higher memory first
+            let highestVram = 0
+            let highestVramId = '0'
+            const gpus: GpuSettingInfo[] = stdout
+              .trim()
+              .split('\n')
+              .map((line) => {
+                let [id, vram, name] = line.split(', ')
+                const arch = getGpuArch(name)
+                vram = vram.replace(/\r/g, '')
+                if (parseFloat(vram) > highestVram) {
+                  highestVram = parseFloat(vram)
+                  highestVramId = id
+                }
+                return { id, vram, name, arch }
+              })
+
+            data.gpus = gpus
+            data.gpu_highest_vram = highestVramId
+          } else {
+            data.gpus = []
+            data.gpu_highest_vram = ''
+          }
+
+          if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
+            data.gpus_in_use = [data.gpu_highest_vram]
+          }
+
+          data = updateCudaExistence(data)
+          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
+          resolve({})
+        }
+      )
+    }
+  })
+
+/**
+ * Check if file exists in paths
+ */
+const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
+  return paths.some((p) => existsSync(path.join(p, file)))
+}
+
+/**
+ * Validate cuda for linux and windows
+ */
+const updateCudaExistence = (
+  data: GpuSetting = DEFAULT_SETTINGS
+): GpuSetting => {
+  let filesCuda12: string[]
+  let filesCuda11: string[]
+  let paths: string[]
+  let cudaVersion: string = ''
+
+  if (process.platform === 'win32') {
+    filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
+    filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
+    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
+  } else {
+    filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
+    filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
+    paths = process.env.LD_LIBRARY_PATH
+      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
+      : []
+    paths.push('/usr/lib/x86_64-linux-gnu/')
+  }
+
+  let cudaExists = filesCuda12.every(
+    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
+  )
+
+  if (!cudaExists) {
+    cudaExists = filesCuda11.every(
+      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
+    )
+    if (cudaExists) {
+      cudaVersion = '11'
+    }
+  } else {
+    cudaVersion = '12'
+  }
+
+  data.cuda.exist = cudaExists
+  data.cuda.version = cudaVersion
+
+  console.debug(data.is_initial, data.gpus_in_use)
+
+  if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
+    data.run_mode = 'gpu'
+  }
+
+  data.is_initial = false
+  return data
+}
--- a/extensions/monitoring-extension/webpack.config.js
+++ b/extensions/monitoring-extension/webpack.config.js
@ -1,35 +0,0 @@
-const path = require('path')
-const webpack = require('webpack')
-const packageJson = require('./package.json')
-
-module.exports = {
-  experiments: { outputModule: true },
-  entry: './src/index.ts', // Adjust the entry point to match your project's main file
-  mode: 'production',
-  module: {
-    rules: [
-      {
-        test: /\.tsx?$/,
-        use: 'ts-loader',
-        exclude: /node_modules/,
-      },
-    ],
-  },
-  output: {
-    filename: 'index.js', // Adjust the output file name as needed
-    path: path.resolve(__dirname, 'dist'),
-    library: { type: 'module' }, // Specify ESM output format
-  },
-  plugins: [
-    new webpack.DefinePlugin({
-      MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
-    }),
-  ],
-  resolve: {
-    extensions: ['.ts', '.js'],
-  },
-  optimization: {
-    minimize: false,
-  },
-  // Add loaders and other configuration as needed for your project
-}
--- a/extensions/tensorrt-llm-extension/README.md
+++ b/extensions/tensorrt-llm-extension/README.md
@ -0,0 +1,79 @@
+# Tensorrt-LLM Extension
+
+Created using Jan extension example
+
+# Create a Jan Extension using Typescript
+
+Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
+
+## Create Your Own Extension
+
+To create your own extension, you can use this repository as a template! Just follow the below instructions:
+
+1. Click the Use this template button at the top of the repository
+2. Select Create a new repository
+3. Select an owner and name for your new repository
+4. Click Create repository
+5. Clone your new repository
+
+## Initial Setup
+
+After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
+
+> [!NOTE]
+>
+> You'll need to have a reasonably modern version of
+> [Node.js](https://nodejs.org) handy. If you are using a version manager like
+> [`nodenv`](https://github.com/nodenv/nodenv) or
+> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
+> root of your repository to install the version specified in
+> [`package.json`](./package.json). Otherwise, 20.x or later should work!
+
+1. :hammer_and_wrench: Install the dependencies
+
+   ```bash
+   npm install
+   ```
+
+1. :building_construction: Package the TypeScript for distribution
+
+   ```bash
+   npm run bundle
+   ```
+
+1. :white_check_mark: Check your artifact
+
+   There will be a tgz file in your extension directory now
+
+## Update the Extension Metadata
+
+The [`package.json`](package.json) file defines metadata about your extension, such as
+extension name, main entry, description and version.
+
+When you copy this repository, update `package.json` with the name, description for your extension.
+
+## Update the Extension Code
+
+The [`src/`](./src/) directory is the heart of your extension! This contains the
+source code that will be run when your extension functions are invoked. You can replace the
+contents of this directory with your own code.
+
+There are a few things to keep in mind when writing your extension code:
+
+- Most Jan Extension functions are processed asynchronously.
+  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
+
+  ```typescript
+  import { events, MessageEvent, MessageRequest } from '@janhq/core'
+
+  function onStart(): Promise<any> {
+    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
+      this.inference(data)
+    )
+  }
+  ```
+
+  For more information about the Jan Extension Core module, see the
+  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
+
+So, what are you waiting for? Go ahead and start customizing your extension!
--- a/extensions/tensorrt-llm-extension/models.json
+++ b/extensions/tensorrt-llm-extension/models.json
@ -0,0 +1,49 @@
+[
+  {
+    "sources": [
+      {
+        "filename": "config.json",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/config.json"
+      },
+      {
+        "filename": "rank0.engine",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/rank0.engine"
+      },
+      {
+        "filename": "tokenizer.model",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
+      },
+      {
+        "filename": "special_tokens_map.json",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
+      },
+      {
+        "filename": "tokenizer.json",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
+      },
+      {
+        "filename": "tokenizer_config.json",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
+      }
+    ],
+    "id": "llamacorn-1.1b-chat-fp16",
+    "object": "model",
+    "name": "LlamaCorn 1.1B Chat FP16",
+    "version": "1.0",
+    "description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
+    "format": "TensorRT-LLM",
+    "settings": {
+      "ctx_len": 2048
+    },
+    "parameters": {
+      "stream": true,
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "LLama",
+      "tags": ["TensorRT-LLM", "1B", "Finetuned"],
+      "size": 2151000000
+    },
+    "engine": "nitro-tensorrt-llm"
+  }
+]
--- a/extensions/tensorrt-llm-extension/package.json
+++ b/extensions/tensorrt-llm-extension/package.json
@ -0,0 +1,75 @@
+{
+  "name": "@janhq/tensorrt-llm-extension",
+  "version": "0.0.2",
+  "description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.",
+  "main": "dist/index.js",
+  "node": "dist/node/index.cjs.js",
+  "author": "Jan <service@jan.ai>",
+  "license": "AGPL-3.0",
+  "config": {
+    "host": "127.0.0.1",
+    "port": "3928"
+  },
+  "compatibility": {
+    "platform": [
+      "win32",
+      "linux"
+    ],
+    "app": [
+      "0.1.0"
+    ]
+  },
+  "scripts": {
+    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
+    "build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:linux": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" &&  npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:darwin": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" &&  npm pack && cpx *.tgz ../../pre-install",
+    "build:publish": "run-script-os"
+  },
+  "exports": {
+    ".": "./dist/index.js",
+    "./main": "./dist/node/index.cjs.js"
+  },
+  "devDependencies": {
+    "@rollup/plugin-commonjs": "^25.0.7",
+    "@rollup/plugin-json": "^6.1.0",
+    "@rollup/plugin-node-resolve": "^15.2.3",
+    "@rollup/plugin-replace": "^5.0.5",
+    "@types/node": "^20.11.4",
+    "@types/os-utils": "^0.0.4",
+    "@types/tcp-port-used": "^1.0.4",
+    "@types/decompress": "4.2.7",
+    "cpx": "^1.5.0",
+    "download-cli": "^1.1.1",
+    "rimraf": "^3.0.2",
+    "rollup": "^2.38.5",
+    "rollup-plugin-define": "^1.0.1",
+    "rollup-plugin-sourcemaps": "^0.6.3",
+    "rollup-plugin-typescript2": "^0.36.0",
+    "run-script-os": "^1.1.6",
+    "typescript": "^5.2.2"
+  },
+  "dependencies": {
+    "@janhq/core": "file:../../core",
+    "decompress": "^4.2.1",
+    "fetch-retry": "^5.0.6",
+    "path-browserify": "^1.0.1",
+    "rxjs": "^7.8.1",
+    "tcp-port-used": "^1.0.2",
+    "ulid": "^2.3.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "files": [
+    "dist/*",
+    "package.json",
+    "README.md"
+  ],
+  "bundleDependencies": [
+    "tcp-port-used",
+    "fetch-retry",
+    "decompress",
+    "@janhq/core"
+  ]
+}
--- a/extensions/tensorrt-llm-extension/rollup.config.ts
+++ b/extensions/tensorrt-llm-extension/rollup.config.ts
@ -0,0 +1,73 @@
+import resolve from '@rollup/plugin-node-resolve'
+import commonjs from '@rollup/plugin-commonjs'
+import sourceMaps from 'rollup-plugin-sourcemaps'
+import typescript from 'rollup-plugin-typescript2'
+import json from '@rollup/plugin-json'
+import replace from '@rollup/plugin-replace'
+const packageJson = require('./package.json')
+
+export default [
+  {
+    input: `src/index.ts`,
+    output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
+    watch: {
+      include: 'src/**',
+    },
+    plugins: [
+      replace({
+        EXTENSION_NAME: JSON.stringify(packageJson.name),
+        TENSORRT_VERSION: JSON.stringify('0.1.5'),
+        DOWNLOAD_RUNNER_URL:
+          process.platform === 'darwin' || process.platform === 'win32'
+            ? JSON.stringify(
+                'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
+              )
+            : JSON.stringify(
+                'https://github.com/janhq/nitro-tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
+              ),
+        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
+        INFERENCE_URL: JSON.stringify(
+          process.env.INFERENCE_URL ||
+            `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions`
+        ),
+        COMPATIBILITY: JSON.stringify(packageJson.compatibility),
+      }),
+      json(),
+      typescript({ useTsconfigDeclarationDir: true }),
+      commonjs(),
+      resolve({
+        extensions: ['.js', '.ts', '.svelte'],
+      }),
+      sourceMaps(),
+    ],
+  },
+  {
+    input: `src/node/index.ts`,
+    output: [
+      { file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
+    ],
+    external: ['@janhq/core/node'],
+    watch: {
+      include: 'src/node/**',
+    },
+    plugins: [
+      replace({
+        LOAD_MODEL_URL: JSON.stringify(
+          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
+        ),
+        TERMINATE_ENGINE_URL: JSON.stringify(
+          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/processmanager/destroy`
+        ),
+        ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
+        ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
+      }),
+      json(),
+      typescript({ useTsconfigDeclarationDir: true }),
+      commonjs(),
+      resolve({
+        extensions: ['.ts', '.js', '.json'],
+      }),
+      sourceMaps(),
+    ],
+  },
+]
--- a/extensions/tensorrt-llm-extension/src/@types/global.d.ts
+++ b/extensions/tensorrt-llm-extension/src/@types/global.d.ts
@ -0,0 +1,10 @@
+declare const NODE: string
+declare const INFERENCE_URL: string
+declare const LOAD_MODEL_URL: string
+declare const TERMINATE_ENGINE_URL: string
+declare const ENGINE_HOST: string
+declare const ENGINE_PORT: string
+declare const DOWNLOAD_RUNNER_URL: string
+declare const TENSORRT_VERSION: string
+declare const COMPATIBILITY: object
+declare const EXTENSION_NAME: string
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@ -0,0 +1,147 @@
+/**
+ * @module tensorrt-llm-extension/src/index
+ */
+
+import {
+  Compatibility,
+  DownloadEvent,
+  DownloadRequest,
+  DownloadState,
+  GpuSetting,
+  InstallationState,
+  Model,
+  baseName,
+  downloadFile,
+  events,
+  executeOnMain,
+  joinPath,
+  showToast,
+  systemInformations,
+  LocalOAIEngine,
+  fs,
+} from '@janhq/core'
+import models from '../models.json'
+
+/**
+ * TensorRTLLMExtension - Implementation of LocalOAIEngine
+ * @extends BaseOAILocalInferenceProvider
+ * Provide pre-populated models for TensorRTLLM
+ */
+export default class TensorRTLLMExtension extends LocalOAIEngine {
+  /**
+   * Override custom function name for loading and unloading model
+   * Which are implemented from node module
+   */
+  override provider = 'nitro-tensorrt-llm'
+  override inferenceUrl = INFERENCE_URL
+  override nodeModule = NODE
+
+  private supportedGpuArch = ['turing', 'ampere', 'ada']
+
+  compatibility() {
+    return COMPATIBILITY as unknown as Compatibility
+  }
+  /**
+   * models implemented by the extension
+   * define pre-populated models
+   */
+  async models(): Promise<Model[]> {
+    if ((await this.installationState()) === 'Installed')
+      return models as unknown as Model[]
+    return []
+  }
+
+  override async install(): Promise<void> {
+    const info = await systemInformations()
+    console.debug(
+      `TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}`
+    )
+    const gpuSetting: GpuSetting | undefined = info.gpuSetting
+    if (gpuSetting === undefined || gpuSetting.gpus.length === 0) {
+      console.error('No GPU setting found. Please check your GPU setting.')
+      return
+    }
+
+    // TODO: we only check for the first graphics card. Need to refactor this later.
+    const firstGpu = gpuSetting.gpus[0]
+    if (!firstGpu.name.toLowerCase().includes('nvidia')) {
+      console.error('No Nvidia GPU found. Please check your GPU setting.')
+      return
+    }
+
+    if (firstGpu.arch === undefined) {
+      console.error('No GPU architecture found. Please check your GPU setting.')
+      return
+    }
+
+    if (!this.supportedGpuArch.includes(firstGpu.arch)) {
+      console.error(
+        `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
+      )
+      return
+    }
+
+    const binaryFolderPath = await executeOnMain(
+      this.nodeModule,
+      'binaryFolder'
+    )
+    if (!(await fs.existsSync(binaryFolderPath))) {
+      await fs.mkdirSync(binaryFolderPath)
+    }
+
+    const placeholderUrl = DOWNLOAD_RUNNER_URL
+    const tensorrtVersion = TENSORRT_VERSION
+
+    const url = placeholderUrl
+      .replace(/<version>/g, tensorrtVersion)
+      .replace(/<gpuarch>/g, firstGpu.arch)
+
+    const tarball = await baseName(url)
+
+    const tarballFullPath = await joinPath([binaryFolderPath, tarball])
+    const downloadRequest: DownloadRequest = {
+      url,
+      localPath: tarballFullPath,
+      extensionId: EXTENSION_NAME,
+      downloadType: 'extension',
+    }
+    downloadFile(downloadRequest)
+
+    // TODO: wrap this into a Promise
+    const onFileDownloadSuccess = async (state: DownloadState) => {
+      // if other download, ignore
+      if (state.fileName !== tarball) return
+      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+      await executeOnMain(this.nodeModule, 'decompressRunner', tarballFullPath)
+      events.emit(DownloadEvent.onFileUnzipSuccess, state)
+
+      // Prepopulate models as soon as it's ready
+      this.prePopulateModels().then(() => {
+        showToast(
+          'Extension installed successfully.',
+          'New models are added to Model Hub.'
+        )
+      })
+    }
+    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+  }
+
+  override async installationState(): Promise<InstallationState> {
+    // For now, we just check the executable of nitro x tensor rt
+    const isNitroExecutableAvailable = await executeOnMain(
+      this.nodeModule,
+      'isNitroExecutableAvailable'
+    )
+
+    return isNitroExecutableAvailable ? 'Installed' : 'NotInstalled'
+  }
+
+  override onInferenceStopped() {
+    if (!this.isRunning) return
+    showToast(
+      'Unable to Stop Inference',
+      'The model does not support stopping inference.'
+    )
+    return Promise.resolve()
+  }
+}
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@ -0,0 +1,191 @@
+import path from 'path'
+import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
+import tcpPortUsed from 'tcp-port-used'
+import fetchRT from 'fetch-retry'
+import { log } from '@janhq/core/node'
+import { existsSync } from 'fs'
+import decompress from 'decompress'
+
+// Polyfill fetch with retry
+const fetchRetry = fetchRT(fetch)
+
+/**
+ * The response object for model init operation.
+ */
+interface ModelLoadParams {
+  engine_path: string
+  ctx_len: number
+}
+
+// The subprocess instance for Engine
+let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
+
+/**
+ * Initializes a engine subprocess to load a machine learning model.
+ * @param params - The model load settings.
+ */
+async function loadModel(params: any): Promise<{ error: Error | undefined }> {
+  // modelFolder is the absolute path to the running model folder
+  // e.g. ~/jan/models/llama-2
+  let modelFolder = params.modelFolder
+
+  const settings: ModelLoadParams = {
+    engine_path: modelFolder,
+    ctx_len: params.model.settings.ctx_len ?? 2048,
+  }
+  return runEngineAndLoadModel(settings)
+}
+
+/**
+ * Stops a Engine subprocess.
+ */
+function unloadModel(): Promise<any> {
+  const controller = new AbortController()
+  setTimeout(() => controller.abort(), 5000)
+  debugLog(`Request to kill engine`)
+
+  subprocess?.kill()
+  return fetch(TERMINATE_ENGINE_URL, {
+    method: 'DELETE',
+    signal: controller.signal,
+  })
+    .then(() => {
+      subprocess = undefined
+    })
+    .catch(() => {}) // Do nothing with this attempt
+    .then(() => tcpPortUsed.waitUntilFree(parseInt(ENGINE_PORT), 300, 5000)) // Wait for port available
+    .then(() => debugLog(`Engine process is terminated`))
+    .catch((err) => {
+      debugLog(
+        `Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
+      )
+      throw 'PORT_NOT_AVAILABLE'
+    })
+}
+/**
+ * 1. Spawn engine process
+ * 2. Load model into engine subprocess
+ * @returns
+ */
+async function runEngineAndLoadModel(settings: ModelLoadParams) {
+  return unloadModel()
+    .then(runEngine)
+    .then(() => loadModelRequest(settings))
+    .catch((err) => {
+      // TODO: Broadcast error so app could display proper error message
+      debugLog(`${err}`, 'Error')
+      return { error: err }
+    })
+}
+
+/**
+ * Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
+ */
+function loadModelRequest(
+  settings: ModelLoadParams
+): Promise<{ error: Error | undefined }> {
+  debugLog(`Loading model with params ${JSON.stringify(settings)}`)
+  return fetchRetry(LOAD_MODEL_URL, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify(settings),
+    retries: 3,
+    retryDelay: 500,
+  })
+    .then((res) => {
+      debugLog(`Load model success with response ${JSON.stringify(res)}`)
+      return Promise.resolve({ error: undefined })
+    })
+    .catch((err) => {
+      debugLog(`Load model failed with error ${err}`, 'Error')
+      return Promise.resolve({ error: err })
+    })
+}
+
+/**
+ * Spawns engine subprocess.
+ */
+function runEngine(): Promise<any> {
+  debugLog(`Spawning engine subprocess...`)
+
+  return new Promise<void>((resolve, reject) => {
+    // Current directory by default
+    let binaryFolder = path.join(__dirname, '..', 'bin')
+    // Binary path
+    const binary = path.join(
+      binaryFolder,
+      process.platform === 'win32' ? 'nitro.exe' : 'nitro'
+    )
+
+    const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
+    // Execute the binary
+    debugLog(`Spawn nitro at path: ${binary}, and args: ${args}`)
+    subprocess = spawn(binary, args, {
+      cwd: binaryFolder,
+      env: {
+        ...process.env,
+      },
+    })
+
+    // Handle subprocess output
+    subprocess.stdout.on('data', (data: any) => {
+      debugLog(`${data}`)
+    })
+
+    subprocess.stderr.on('data', (data: any) => {
+      debugLog(`${data}`)
+    })
+
+    subprocess.on('close', (code: any) => {
+      debugLog(`Engine exited with code: ${code}`)
+      subprocess = undefined
+      reject(`child process exited with code ${code}`)
+    })
+
+    tcpPortUsed.waitUntilUsed(parseInt(ENGINE_PORT), 300, 30000).then(() => {
+      debugLog(`Engine is ready`)
+      resolve()
+    })
+  })
+}
+
+function debugLog(message: string, level: string = 'Debug') {
+  log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
+}
+
+const binaryFolder = async (): Promise<string> => {
+  return path.join(__dirname, '..', 'bin')
+}
+
+const decompressRunner = async (zipPath: string) => {
+  const output = path.join(__dirname, '..', 'bin')
+  console.debug(`Decompressing ${zipPath} to ${output}...`)
+  try {
+    const files = await decompress(zipPath, output)
+    console.debug('Decompress finished!', files)
+  } catch (err) {
+    console.error(`Decompress ${zipPath} failed: ${err}`)
+  }
+}
+
+const isNitroExecutableAvailable = async (): Promise<boolean> => {
+  const binary = path.join(
+    __dirname,
+    '..',
+    'bin',
+    process.platform === 'win32' ? 'nitro.exe' : 'nitro'
+  )
+
+  return existsSync(binary)
+}
+
+export default {
+  binaryFolder,
+  decompressRunner,
+  loadModel,
+  unloadModel,
+  dispose: unloadModel,
+  isNitroExecutableAvailable,
+}
--- a/extensions/tensorrt-llm-extension/tsconfig.json
+++ b/extensions/tensorrt-llm-extension/tsconfig.json
@ -0,0 +1,20 @@
+{
+  "compilerOptions": {
+    "moduleResolution": "node",
+    "target": "es5",
+    "module": "ES2020",
+    "lib": ["es2015", "es2016", "es2017", "dom"],
+    "strict": true,
+    "sourceMap": true,
+    "declaration": true,
+    "allowSyntheticDefaultImports": true,
+    "experimentalDecorators": true,
+    "emitDecoratorMetadata": true,
+    "declarationDir": "dist/types",
+    "outDir": "dist",
+    "importHelpers": true,
+    "resolveJsonModule": true,
+    "typeRoots": ["node_modules/@types"]
+  },
+  "include": ["src"]
+}
--- a/web/containers/DropdownListSidebar/index.tsx
+++ b/web/containers/DropdownListSidebar/index.tsx
@ -73,8 +73,9 @@ const DropdownListSidebar = ({

  const [copyId, setCopyId] = useState('')

+  // TODO: Update filter condition for the local model
  const localModel = downloadedModels.filter(
-    (model) => model.engine === InferenceEngine.nitro
+    (model) => model.engine !== InferenceEngine.openai
  )
  const remoteModel = downloadedModels.filter(
    (model) => model.engine === InferenceEngine.openai
--- a/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx
+++ b/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx
@ -0,0 +1,87 @@
+import { useCallback, useEffect } from 'react'
+
+import { abortDownload } from '@janhq/core'
+import {
+  Button,
+  Modal,
+  ModalContent,
+  ModalHeader,
+  ModalTitle,
+  Progress,
+} from '@janhq/uikit'
+import { atom, useAtom, useAtomValue } from 'jotai'
+
+import {
+  formatDownloadPercentage,
+  formatExtensionsName,
+} from '@/utils/converter'
+
+import {
+  InstallingExtensionState,
+  installingExtensionAtom,
+} from '@/helpers/atoms/Extension.atom'
+
+export const showInstallingExtensionModalAtom = atom(false)
+
+const InstallingExtensionModal: React.FC = () => {
+  const [showInstallingExtensionModal, setShowInstallingExtensionModal] =
+    useAtom(showInstallingExtensionModalAtom)
+  const installingExtensions = useAtomValue(installingExtensionAtom)
+
+  useEffect(() => {
+    if (installingExtensions.length === 0) {
+      setShowInstallingExtensionModal(false)
+    }
+  }, [installingExtensions, setShowInstallingExtensionModal])
+
+  const onAbortInstallingExtensionClick = useCallback(
+    (item: InstallingExtensionState) => {
+      if (item.localPath) {
+        abortDownload(item.localPath)
+      }
+    },
+    []
+  )
+
+  return (
+    <Modal
+      open={showInstallingExtensionModal}
+      onOpenChange={() => setShowInstallingExtensionModal(false)}
+    >
+      <ModalContent>
+        <ModalHeader>
+          <ModalTitle>Installing Extension</ModalTitle>
+        </ModalHeader>
+        {Object.values(installingExtensions).map((item) => (
+          <div className="pt-2" key={item.extensionId}>
+            <Progress
+              className="mb-2 h-2"
+              value={
+                formatDownloadPercentage(item.percentage, {
+                  hidePercentage: true,
+                }) as number
+              }
+            />
+            <div className="flex items-center justify-between gap-x-2">
+              <div className="flex gap-x-2">
+                <p className="line-clamp-1">
+                  {formatExtensionsName(item.extensionId)}
+                </p>
+                <span>{formatDownloadPercentage(item.percentage)}</span>
+              </div>
+              <Button
+                themes="outline"
+                size="sm"
+                onClick={() => onAbortInstallingExtensionClick(item)}
+              >
+                Cancel
+              </Button>
+            </div>
+          </div>
+        ))}
+      </ModalContent>
+    </Modal>
+  )
+}
+
+export default InstallingExtensionModal
--- a/web/containers/Layout/BottomBar/InstallingExtension/index.tsx
+++ b/web/containers/Layout/BottomBar/InstallingExtension/index.tsx
@ -0,0 +1,52 @@
+import { Fragment, useCallback } from 'react'
+
+import { Progress } from '@janhq/uikit'
+import { useAtomValue, useSetAtom } from 'jotai'
+
+import { showInstallingExtensionModalAtom } from './InstallingExtensionModal'
+
+import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
+
+const InstallingExtension: React.FC = () => {
+  const installingExtensions = useAtomValue(installingExtensionAtom)
+  const setShowInstallingExtensionModal = useSetAtom(
+    showInstallingExtensionModalAtom
+  )
+  const shouldShowInstalling = installingExtensions.length > 0
+
+  let totalPercentage = 0
+  let totalExtensions = 0
+  for (const installation of installingExtensions) {
+    totalPercentage += installation.percentage
+    totalExtensions++
+  }
+  const progress = (totalPercentage / totalExtensions) * 100
+
+  const onClick = useCallback(() => {
+    setShowInstallingExtensionModal(true)
+  }, [setShowInstallingExtensionModal])
+
+  return (
+    <Fragment>
+      {shouldShowInstalling ? (
+        <div
+          className="flex cursor-pointer flex-row items-center space-x-2"
+          onClick={onClick}
+        >
+          <p className="text-xs font-semibold text-muted-foreground">
+            Installing Extension
+          </p>
+
+          <div className="flex flex-row items-center justify-center space-x-2 rounded-md bg-secondary px-2 py-[2px]">
+            <Progress className="h-2 w-24" value={progress} />
+            <span className="text-xs font-bold text-muted-foreground">
+              {progress.toFixed(2)}%
+            </span>
+          </div>
+        </div>
+      ) : null}
+    </Fragment>
+  )
+}
+
+export default InstallingExtension
--- a/web/containers/Layout/BottomBar/index.tsx
+++ b/web/containers/Layout/BottomBar/index.tsx
@ -16,6 +16,7 @@ import ProgressBar from '@/containers/ProgressBar'
 import { appDownloadProgress } from '@/containers/Providers/Jotai'

 import ImportingModelState from './ImportingModelState'
+import InstallingExtension from './InstallingExtension'
 import SystemMonitor from './SystemMonitor'
 import UpdatedFailedModal from './UpdateFailedModal'

@ -46,6 +47,7 @@ const BottomBar = () => {
        <ImportingModelState />
        <DownloadingState />
        <UpdatedFailedModal />
+        <InstallingExtension />
      </div>
      <div className="flex items-center gap-x-3">
        <SystemMonitor />
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@ -22,6 +22,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
 import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
 import SelectingModelModal from '@/screens/Settings/SelectingModelModal'

+import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal'
+
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'

 const BaseLayout = (props: PropsWithChildren) => {
@ -68,6 +70,7 @@ const BaseLayout = (props: PropsWithChildren) => {
      {importModelStage === 'IMPORTING_MODEL' && <ImportingModelModal />}
      {importModelStage === 'EDIT_MODEL_INFO' && <EditModelInfoModal />}
      {importModelStage === 'CONFIRM_CANCEL' && <CancelModelImportModal />}
+      <InstallingExtensionModal />
    </div>
  )
 }
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@ -7,6 +7,10 @@ import { useSetAtom } from 'jotai'

 import { setDownloadStateAtom } from '@/hooks/useDownloadState'

+import { formatExtensionsName } from '@/utils/converter'
+
+import { toaster } from '../Toast'
+
 import AppUpdateListener from './AppUpdateListener'
 import ClipboardListener from './ClipboardListener'
 import EventHandler from './EventHandler'
@ -14,46 +18,89 @@ import EventHandler from './EventHandler'
 import ModelImportListener from './ModelImportListener'
 import QuickAskListener from './QuickAskListener'

+import {
+  InstallingExtensionState,
+  removeInstallingExtensionAtom,
+  setInstallingExtensionAtom,
+} from '@/helpers/atoms/Extension.atom'
+
 const EventListenerWrapper = ({ children }: PropsWithChildren) => {
  const setDownloadState = useSetAtom(setDownloadStateAtom)
+  const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
+  const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)

  const onFileDownloadUpdate = useCallback(
    async (state: DownloadState) => {
      console.debug('onFileDownloadUpdate', state)
-      setDownloadState(state)
+      if (state.downloadType === 'extension') {
+        const installingExtensionState: InstallingExtensionState = {
+          extensionId: state.extensionId!,
+          percentage: state.percent,
+          localPath: state.localPath,
+        }
+        setInstallingExtension(state.extensionId!, installingExtensionState)
+      } else {
+        setDownloadState(state)
+      }
    },
-    [setDownloadState]
+    [setDownloadState, setInstallingExtension]
  )

  const onFileDownloadError = useCallback(
    (state: DownloadState) => {
      console.debug('onFileDownloadError', state)
-      setDownloadState(state)
+      if (state.downloadType === 'extension') {
+        removeInstallingExtension(state.extensionId!)
+      } else {
+        setDownloadState(state)
+      }
    },
-    [setDownloadState]
+    [setDownloadState, removeInstallingExtension]
  )

  const onFileDownloadSuccess = useCallback(
    (state: DownloadState) => {
      console.debug('onFileDownloadSuccess', state)
-      setDownloadState(state)
+      if (state.downloadType !== 'extension') {
+        setDownloadState(state)
+      }
    },
    [setDownloadState]
  )

+  const onFileUnzipSuccess = useCallback(
+    (state: DownloadState) => {
+      console.debug('onFileUnzipSuccess', state)
+      toaster({
+        title: 'Success',
+        description: `Install ${formatExtensionsName(state.extensionId!)} successfully.`,
+        type: 'success',
+      })
+      removeInstallingExtension(state.extensionId!)
+    },
+    [removeInstallingExtension]
+  )
+
  useEffect(() => {
    console.debug('EventListenerWrapper: registering event listeners...')
    events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
    events.on(DownloadEvent.onFileDownloadError, onFileDownloadError)
    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+    events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)

    return () => {
      console.debug('EventListenerWrapper: unregistering event listeners...')
      events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
      events.off(DownloadEvent.onFileDownloadError, onFileDownloadError)
      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+      events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
    }
-  }, [onFileDownloadUpdate, onFileDownloadError, onFileDownloadSuccess])
+  }, [
+    onFileDownloadUpdate,
+    onFileDownloadError,
+    onFileDownloadSuccess,
+    onFileUnzipSuccess,
+  ])

  return (
    <AppUpdateListener>
--- a/web/extension/ExtensionManager.ts
+++ b/web/extension/ExtensionManager.ts
@ -23,7 +23,9 @@ export class ExtensionManager {
   * @param type - The type of the extension to retrieve.
   * @returns The extension, if found.
   */
-  get<T extends BaseExtension>(type: ExtensionTypeEnum): T | undefined {
+  get<T extends BaseExtension>(
+    type: ExtensionTypeEnum | string
+  ): T | undefined {
    return this.extensions.get(type) as T | undefined
  }

--- a/web/helpers/atoms/Extension.atom.ts
+++ b/web/helpers/atoms/Extension.atom.ts
@ -0,0 +1,40 @@
+import { atom } from 'jotai'
+
+type ExtensionId = string
+
+export type InstallingExtensionState = {
+  extensionId: ExtensionId
+  percentage: number
+  localPath?: string
+}
+
+export const installingExtensionAtom = atom<InstallingExtensionState[]>([])
+
+export const setInstallingExtensionAtom = atom(
+  null,
+  (get, set, extensionId: string, state: InstallingExtensionState) => {
+    const current = get(installingExtensionAtom)
+
+    const isExists = current.some((e) => e.extensionId === extensionId)
+    if (isExists) {
+      const newCurrent = current.map((e) => {
+        if (e.extensionId === extensionId) {
+          return state
+        }
+        return e
+      })
+      set(installingExtensionAtom, newCurrent)
+    } else {
+      set(installingExtensionAtom, [...current, state])
+    }
+  }
+)
+
+export const removeInstallingExtensionAtom = atom(
+  null,
+  (get, set, extensionId: string) => {
+    const current = get(installingExtensionAtom)
+    const newCurrent = current.filter((e) => e.extensionId !== extensionId)
+    set(installingExtensionAtom, newCurrent)
+  }
+)
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@ -40,6 +40,16 @@ export function useActiveModel() {
      console.debug(`Model ${modelId} is already initialized. Ignore..`)
      return
    }
+
+    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
+
+    // Switch between engines
+    if (model && activeModel && activeModel.engine !== model.engine) {
+      stopModel()
+      // TODO: Refactor inference provider would address this
+      await new Promise((res) => setTimeout(res, 1000))
+    }
+
    // TODO: incase we have multiple assistants, the configuration will be from assistant
    setLoadModelError(undefined)

@ -47,8 +57,6 @@ export function useActiveModel() {

    setStateModel({ state: 'start', loading: true, model: modelId })

-    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
-
    if (!model) {
      toaster({
        title: `Model ${modelId} not found!`,
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@ -8,12 +8,15 @@ import {
  joinPath,
  ModelArtifact,
  DownloadState,
+  GpuSetting,
 } from '@janhq/core'

 import { useAtomValue, useSetAtom } from 'jotai'

 import { setDownloadStateAtom } from './useDownloadState'

+import useGpuSetting from './useGpuSetting'
+
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
  ignoreSslAtom,
@ -29,6 +32,8 @@ export default function useDownloadModel() {
  const setDownloadState = useSetAtom(setDownloadStateAtom)
  const addDownloadingModel = useSetAtom(addDownloadingModelAtom)

+  const { getGpuSettings } = useGpuSetting()
+
  const downloadModel = useCallback(
    async (model: Model) => {
      const childProgresses: DownloadState[] = model.sources.map(
@ -68,10 +73,22 @@ export default function useDownloadModel() {
      })

      addDownloadingModel(model)
-
-      await localDownloadModel(model, ignoreSSL, proxyEnabled ? proxy : '')
+      const gpuSettings = await getGpuSettings()
+      await localDownloadModel(
+        model,
+        ignoreSSL,
+        proxyEnabled ? proxy : '',
+        gpuSettings
+      )
    },
-    [ignoreSSL, proxy, proxyEnabled, addDownloadingModel, setDownloadState]
+    [
+      ignoreSSL,
+      proxy,
+      proxyEnabled,
+      getGpuSettings,
+      addDownloadingModel,
+      setDownloadState,
+    ]
  )

  const abortModelDownload = useCallback(async (model: Model) => {
@ -90,8 +107,9 @@ export default function useDownloadModel() {
 const localDownloadModel = async (
  model: Model,
  ignoreSSL: boolean,
-  proxy: string
+  proxy: string,
+  gpuSettings?: GpuSetting
 ) =>
  extensionManager
    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.downloadModel(model, { ignoreSSL, proxy })
+    ?.downloadModel(model, gpuSettings, { ignoreSSL, proxy })
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@ -18,123 +18,129 @@ export const modelDownloadStateAtom = atom<Record<string, DownloadState>>({})
 export const setDownloadStateAtom = atom(
  null,
  (get, set, state: DownloadState) => {
-    const currentState = { ...get(modelDownloadStateAtom) }
+    try {
+      const currentState = { ...get(modelDownloadStateAtom) }

-    if (state.downloadState === 'end') {
-      const modelDownloadState = currentState[state.modelId]
+      if (state.downloadState === 'end') {
+        const modelDownloadState = currentState[state.modelId]

-      const updatedChildren: DownloadState[] =
-        modelDownloadState.children!.filter(
-          (m) => m.fileName !== state.fileName
+        const updatedChildren: DownloadState[] = (
+          modelDownloadState.children ?? []
+        ).filter((m) => m.fileName !== state.fileName)
+        updatedChildren.push(state)
+        modelDownloadState.children = updatedChildren
+        currentState[state.modelId] = modelDownloadState
+
+        const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
+          (m) => m.downloadState === 'end'
        )
-      updatedChildren.push(state)
-      modelDownloadState.children = updatedChildren
-      currentState[state.modelId] = modelDownloadState

-      const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
-        (m) => m.downloadState === 'end'
-      )
+        if (isAllChildrenDownloadEnd) {
+          // download successfully
+          delete currentState[state.modelId]
+          set(removeDownloadingModelAtom, state.modelId)

-      if (isAllChildrenDownloadEnd) {
-        // download successfully
+          const model = get(configuredModelsAtom).find(
+            (e) => e.id === state.modelId
+          )
+          if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
+          toaster({
+            title: 'Download Completed',
+            description: `Download ${state.modelId} completed`,
+            type: 'success',
+          })
+        }
+      } else if (state.downloadState === 'error') {
+        // download error
        delete currentState[state.modelId]
        set(removeDownloadingModelAtom, state.modelId)
-
-        const model = get(configuredModelsAtom).find(
-          (e) => e.id === state.modelId
-        )
-        if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
-        toaster({
-          title: 'Download Completed',
-          description: `Download ${state.modelId} completed`,
-          type: 'success',
-        })
-      }
-    } else if (state.downloadState === 'error') {
-      // download error
-      delete currentState[state.modelId]
-      set(removeDownloadingModelAtom, state.modelId)
-      if (state.error === 'aborted') {
-        toaster({
-          title: 'Cancel Download',
-          description: `Model ${state.modelId} download cancelled`,
-          type: 'warning',
-        })
-      } else {
-        let error = state.error
-        if (
-          typeof error?.includes === 'function' &&
-          state.error?.includes('certificate')
-        ) {
-          error +=
-            '. To fix enable "Ignore SSL Certificates" in Advanced settings.'
+        if (state.error === 'aborted') {
+          toaster({
+            title: 'Cancel Download',
+            description: `Model ${state.modelId} download cancelled`,
+            type: 'warning',
+          })
+        } else {
+          let error = state.error
+          if (
+            typeof error?.includes === 'function' &&
+            state.error?.includes('certificate')
+          ) {
+            error +=
+              '. To fix enable "Ignore SSL Certificates" in Advanced settings.'
+          }
+          toaster({
+            title: 'Download Failed',
+            description: `Model ${state.modelId} download failed: ${error}`,
+            type: 'error',
+          })
+        }
+      } else {
+        // download in progress
+        if (state.size.total === 0) {
+          // this is initial state, just set the state
+          currentState[state.modelId] = state
+          set(modelDownloadStateAtom, currentState)
+          return
        }
-        toaster({
-          title: 'Download Failed',
-          description: `Model ${state.modelId} download failed: ${error}`,
-          type: 'error',
-        })
-      }
-    } else {
-      // download in progress
-      if (state.size.total === 0) {
-        // this is initial state, just set the state
-        currentState[state.modelId] = state
-        set(modelDownloadStateAtom, currentState)
-        return
-      }

-      const modelDownloadState = currentState[state.modelId]
-      if (!modelDownloadState) {
-        console.debug('setDownloadStateAtom: modelDownloadState not found')
-        return
-      }
+        const modelDownloadState = currentState[state.modelId]
+        if (!modelDownloadState) {
+          console.debug('setDownloadStateAtom: modelDownloadState not found')
+          return
+        }

-      // delete the children if the filename is matched and replace the new state
-      const updatedChildren: DownloadState[] =
-        modelDownloadState.children!.filter(
-          (m) => m.fileName !== state.fileName
+        // delete the children if the filename is matched and replace the new state
+        const updatedChildren: DownloadState[] = (
+          modelDownloadState.children ?? []
+        ).filter((m) => m.fileName !== state.fileName)
+
+        updatedChildren.push(state)
+
+        // re-calculate the overall progress if we have all the children download data
+        const isAnyChildDownloadNotReady = updatedChildren.some(
+          (m) =>
+            m.size.total === 0 &&
+            !modelDownloadState.children?.some(
+              (e) => e.fileName === m.fileName && e.downloadState === 'end'
+            ) &&
+            modelDownloadState.children?.some((e) => e.fileName === m.fileName)
        )

-      updatedChildren.push(state)
+        modelDownloadState.children = updatedChildren
+        if (isAnyChildDownloadNotReady) {
+          // just update the children
+          currentState[state.modelId] = modelDownloadState
+          set(modelDownloadStateAtom, currentState)
+          return
+        }

-      // re-calculate the overall progress if we have all the children download data
-      const isAnyChildDownloadNotReady = updatedChildren.some(
-        (m) => m.size.total === 0
-      )
+        const parentTotalSize = modelDownloadState.size.total
+        if (parentTotalSize === 0) {
+          // calculate the total size of the parent by sum all children total size
+          const totalSize = updatedChildren.reduce(
+            (acc, m) => acc + m.size.total,
+            0
+          )

-      modelDownloadState.children = updatedChildren
+          modelDownloadState.size.total = totalSize
+        }

-      if (isAnyChildDownloadNotReady) {
-        // just update the children
-        currentState[state.modelId] = modelDownloadState
-        set(modelDownloadStateAtom, currentState)
-
-        return
-      }
-
-      const parentTotalSize = modelDownloadState.size.total
-      if (parentTotalSize === 0) {
-        // calculate the total size of the parent by sum all children total size
-        const totalSize = updatedChildren.reduce(
-          (acc, m) => acc + m.size.total,
+        // calculate the total transferred size by sum all children transferred size
+        const transferredSize = updatedChildren.reduce(
+          (acc, m) => acc + m.size.transferred,
          0
        )
-
-        modelDownloadState.size.total = totalSize
+        modelDownloadState.size.transferred = transferredSize
+        modelDownloadState.percent =
+          parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
+        currentState[state.modelId] = modelDownloadState
      }

-      // calculate the total transferred size by sum all children transferred size
-      const transferredSize = updatedChildren.reduce(
-        (acc, m) => acc + m.size.transferred,
-        0
-      )
-      modelDownloadState.size.transferred = transferredSize
-      modelDownloadState.percent =
-        parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
-      currentState[state.modelId] = modelDownloadState
+      set(modelDownloadStateAtom, currentState)
+    } catch (e) {
+      console.debug('setDownloadStateAtom: state', state)
+      console.debug('setDownloadStateAtom: error', e)
    }
-
-    set(modelDownloadStateAtom, currentState)
  }
 )
--- a/web/hooks/useGpuSetting.ts
+++ b/web/hooks/useGpuSetting.ts
@ -0,0 +1,21 @@
+import { useCallback } from 'react'
+
+import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
+
+import { extensionManager } from '@/extension'
+
+export default function useGpuSetting() {
+  const getGpuSettings = useCallback(async () => {
+    const gpuSetting = await extensionManager
+      ?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
+      ?.getGpuSetting()
+
+    if (!gpuSetting) {
+      console.debug('No GPU setting found')
+      return undefined
+    }
+    return gpuSetting
+  }, [])
+
+  return { getGpuSettings }
+}
--- a/web/next.config.js
+++ b/web/next.config.js
@ -38,6 +38,7 @@ const nextConfig = {
        isMac: process.platform === 'darwin',
        isWindows: process.platform === 'win32',
        isLinux: process.platform === 'linux',
+        PLATFORM: JSON.stringify(process.platform),
      }),
    ]
    return config
--- a/web/screens/ExploreModels/ExploreModelItem/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItem/index.tsx
@ -3,6 +3,8 @@ import { useState } from 'react'
 import { Model } from '@janhq/core'
 import { Badge } from '@janhq/uikit'

+import { twMerge } from 'tailwind-merge'
+
 import ExploreModelItemHeader from '@/screens/ExploreModels/ExploreModelItemHeader'

 type Props = {
@ -75,7 +77,16 @@ const ExploreModelItem: React.FC<Props> = ({ model }) => {
              <span className="font-semibold text-muted-foreground">
                Format
              </span>
-              <p className="mt-2 font-medium uppercase">{model.format}</p>
+              <p
+                className={twMerge(
+                  'mt-2 font-medium',
+                  !model.format?.includes(' ') &&
+                    !model.format?.includes('-') &&
+                    'uppercase'
+                )}
+              >
+                {model.format}
+              </p>
            </div>
          </div>
        </div>
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@ -152,6 +152,7 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
      <div className="flex items-center justify-between p-4">
        <div className="flex items-center gap-2">
          <span className="font-bold">{model.name}</span>
+          <EngineBadge engine={model.engine} />
        </div>
        <div className="inline-flex items-center space-x-2">
          <span className="mr-4 font-semibold text-muted-foreground">
@ -172,4 +173,21 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
  )
 }

+type EngineBadgeProps = {
+  engine: string
+}
+
+const EngineBadge: React.FC<EngineBadgeProps> = ({ engine }) => {
+  switch (engine) {
+    case 'nitro-tensorrt-llm':
+      return (
+        <div className="flex items-center justify-center rounded-md bg-[#EFF6FF] px-2 py-[2px] font-semibold text-primary">
+          TensorRT-LLM
+        </div>
+      )
+    default:
+      return null
+  }
+}
+
 export default ExploreModelItemHeader
--- a/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
+++ b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
@ -0,0 +1,225 @@
+import { useCallback, useEffect, useState } from 'react'
+
+import {
+  Compatibility,
+  GpuSetting,
+  InstallationState,
+  abortDownload,
+  systemInformations,
+} from '@janhq/core'
+import {
+  Button,
+  Progress,
+  Tooltip,
+  TooltipArrow,
+  TooltipContent,
+  TooltipPortal,
+  TooltipTrigger,
+} from '@janhq/uikit'
+
+import { InfoCircledIcon } from '@radix-ui/react-icons'
+import { useAtomValue } from 'jotai'
+
+import { extensionManager } from '@/extension'
+import Extension from '@/extension/Extension'
+import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
+
+type Props = {
+  item: Extension
+}
+
+const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
+  const [compatibility, setCompatibility] = useState<Compatibility | undefined>(
+    undefined
+  )
+  const [installState, setInstallState] =
+    useState<InstallationState>('NotRequired')
+  const installingExtensions = useAtomValue(installingExtensionAtom)
+  const [isGpuSupported, setIsGpuSupported] = useState<boolean>(false)
+
+  const isInstalling = installingExtensions.some(
+    (e) => e.extensionId === item.name
+  )
+
+  const progress = isInstalling
+    ? installingExtensions.find((e) => e.extensionId === item.name)
+        ?.percentage ?? -1
+    : -1
+
+  useEffect(() => {
+    const getSystemInfos = async () => {
+      const info = await systemInformations()
+      if (!info) {
+        setIsGpuSupported(false)
+        return
+      }
+
+      const gpuSettings: GpuSetting | undefined = info.gpuSetting
+      if (!gpuSettings || gpuSettings.gpus.length === 0) {
+        setIsGpuSupported(false)
+        return
+      }
+
+      const arch = gpuSettings.gpus[0].arch
+      if (!arch) {
+        setIsGpuSupported(false)
+        return
+      }
+
+      const supportedGpuArch = ['turing', 'ampere', 'ada']
+      setIsGpuSupported(supportedGpuArch.includes(arch))
+    }
+    getSystemInfos()
+  }, [])
+
+  useEffect(() => {
+    const getExtensionInstallationState = async () => {
+      const extension = extensionManager.get(item.name ?? '')
+      if (!extension) return
+
+      if (typeof extension?.installationState === 'function') {
+        const installState = await extension.installationState()
+        setInstallState(installState)
+      }
+    }
+
+    getExtensionInstallationState()
+  }, [item.name, isInstalling])
+
+  useEffect(() => {
+    const extension = extensionManager.get(item.name ?? '')
+    if (!extension) return
+    setCompatibility(extension.compatibility())
+  }, [setCompatibility, item.name])
+
+  const onInstallClick = useCallback(async () => {
+    const extension = extensionManager.get(item.name ?? '')
+    if (!extension) return
+
+    await extension.install()
+  }, [item.name])
+
+  const onCancelInstallingClick = () => {
+    const extension = installingExtensions.find(
+      (e) => e.extensionId === item.name
+    )
+    if (extension?.localPath) {
+      abortDownload(extension.localPath)
+    }
+  }
+
+  return (
+    <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none">
+      <div className="flex-1 flex-shrink-0 space-y-1.5">
+        <div className="flex items-center gap-x-2">
+          <h6 className="text-sm font-semibold capitalize">
+            TensorRT-LLM Extension
+          </h6>
+          <p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed">
+            v{item.version}
+          </p>
+        </div>
+        <p className="whitespace-pre-wrap leading-relaxed">
+          {item.description}
+        </p>
+      </div>
+      {(!compatibility || compatibility['platform']?.includes(PLATFORM)) &&
+      isGpuSupported ? (
+        <div className="flex min-w-[150px] flex-row justify-end">
+          <InstallStateIndicator
+            installProgress={progress}
+            installState={installState}
+            onInstallClick={onInstallClick}
+            onCancelClick={onCancelInstallingClick}
+          />
+        </div>
+      ) : (
+        <div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
+          <div className="flex flex-row items-center justify-center gap-1">
+            Incompatible{' '}
+            <Tooltip>
+              <TooltipTrigger className="w-full">
+                <InfoCircledIcon />
+              </TooltipTrigger>
+              <TooltipPortal>
+                <TooltipContent side="top">
+                  {compatibility ? (
+                    <span>
+                      Only available on{' '}
+                      {compatibility?.platform
+                        ?.map((e: string) =>
+                          e === 'win32'
+                            ? 'Windows'
+                            : e === 'linux'
+                              ? 'Linux'
+                              : 'MacOS'
+                        )
+                        .join(', ')}
+                    </span>
+                  ) : (
+                    <span>
+                      Your GPUs are not compatible with this extension
+                    </span>
+                  )}
+                  <TooltipArrow />
+                </TooltipContent>
+              </TooltipPortal>
+            </Tooltip>
+          </div>
+        </div>
+      )}
+    </div>
+  )
+}
+
+type InstallStateProps = {
+  installProgress: number
+  installState: InstallationState
+  onInstallClick: () => void
+  onCancelClick: () => void
+}
+
+const InstallStateIndicator: React.FC<InstallStateProps> = ({
+  installProgress,
+  installState,
+  onInstallClick,
+  onCancelClick,
+}) => {
+  // TODO: NamH support dark mode for this
+  if (installProgress !== -1) {
+    const progress = installProgress * 100
+    return (
+      <div className="flex h-10 flex-row items-center justify-center space-x-2 rounded-md bg-[#EFF8FF] px-4 text-primary">
+        <button onClick={onCancelClick} className="font-semibold text-primary">
+          Cancel
+        </button>
+        <div className="flex w-[113px] flex-row items-center justify-center space-x-2 rounded-md bg-[#D1E9FF] px-2 py-[2px]">
+          <Progress className="h-1 w-[69px]" value={progress} />
+          <span className="text-xs font-bold text-primary">
+            {progress.toFixed(0)}%
+          </span>
+        </div>
+      </div>
+    )
+  }
+
+  // TODO: NamH check for dark mode here
+  switch (installState) {
+    case 'Installed':
+      return (
+        <div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
+          Installed
+        </div>
+      )
+    case 'NotInstalled':
+      return (
+        <Button themes="secondaryBlue" size="sm" onClick={onInstallClick}>
+          Install
+        </Button>
+      )
+    default:
+      return <div></div>
+  }
+}
+
+export default TensorRtExtensionItem
--- a/web/screens/Settings/CoreExtensions/index.tsx
+++ b/web/screens/Settings/CoreExtensions/index.tsx
@ -4,13 +4,18 @@ import React, { useState, useEffect, useRef } from 'react'

 import { Button, ScrollArea } from '@janhq/uikit'

+import Loader from '@/containers/Loader'
+
 import { formatExtensionsName } from '@/utils/converter'

+import TensorRtExtensionItem from './TensorRtExtensionItem'
+
 import { extensionManager } from '@/extension'
 import Extension from '@/extension/Extension'

 const ExtensionCatalog = () => {
  const [activeExtensions, setActiveExtensions] = useState<Extension[]>([])
+  const [showLoading, setShowLoading] = useState(false)
  const fileInputRef = useRef<HTMLInputElement | null>(null)
  /**
   * Fetches the active extensions and their preferences from the `extensions` and `preferences` modules.
@ -63,65 +68,76 @@ const ExtensionCatalog = () => {
  const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
    const file = event.target.files?.[0]
    if (file) {
+      setShowLoading(true)
      install(event)
    }
  }

  return (
-    <ScrollArea className="h-full w-full px-4">
-      <div className="block w-full">
-        {activeExtensions.map((item, i) => {
-          return (
-            <div
-              key={i}
-              className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
-            >
-              <div className="w-4/5 flex-shrink-0 space-y-1.5">
-                <div className="flex gap-x-2">
-                  <h6 className="text-sm font-semibold capitalize">
-                    {formatExtensionsName(item.name ?? item.description ?? '')}
-                  </h6>
-                  <p className="whitespace-pre-wrap font-semibold leading-relaxed ">
-                    v{item.version}
+    <>
+      <ScrollArea className="h-full w-full px-4">
+        <div className="block w-full">
+          {activeExtensions.map((item, i) => {
+            // TODO: this is bad code, rewrite it
+            if (item.name === '@janhq/tensorrt-llm-extension') {
+              return <TensorRtExtensionItem key={i} item={item} />
+            }
+
+            return (
+              <div
+                key={i}
+                className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
+              >
+                <div className="w-4/5 flex-shrink-0 space-y-1.5">
+                  <div className="flex items-center gap-x-2">
+                    <h6 className="text-sm font-semibold capitalize">
+                      {formatExtensionsName(
+                        item.name ?? item.description ?? ''
+                      )}
+                    </h6>
+                    <p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed ">
+                      v{item.version}
+                    </p>
+                  </div>
+                  <p className="whitespace-pre-wrap leading-relaxed ">
+                    {item.description}
                  </p>
                </div>
-                <p className="whitespace-pre-wrap leading-relaxed ">
-                  {item.description}
-                </p>
              </div>
+            )
+          })}
+          {/* Manual Installation */}
+          <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
+            <div className="w-4/5 flex-shrink-0 space-y-1.5">
+              <div className="flex gap-x-2">
+                <h6 className="text-sm font-semibold capitalize">
+                  Manual Installation
+                </h6>
+              </div>
+              <p className="whitespace-pre-wrap leading-relaxed ">
+                Select a extension file to install (.tgz)
+              </p>
            </div>
-          )
-        })}
-        {/* Manual Installation */}
-        <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
-          <div className="w-4/5 flex-shrink-0 space-y-1.5">
-            <div className="flex gap-x-2">
-              <h6 className="text-sm font-semibold capitalize">
-                Manual Installation
-              </h6>
+            <div>
+              <input
+                type="file"
+                style={{ display: 'none' }}
+                ref={fileInputRef}
+                onChange={handleFileChange}
+              />
+              <Button
+                themes="secondaryBlue"
+                size="sm"
+                onClick={() => fileInputRef.current?.click()}
+              >
+                Select
+              </Button>
            </div>
-            <p className="whitespace-pre-wrap leading-relaxed ">
-              Select a extension file to install (.tgz)
-            </p>
-          </div>
-          <div>
-            <input
-              type="file"
-              style={{ display: 'none' }}
-              ref={fileInputRef}
-              onChange={handleFileChange}
-            />
-            <Button
-              themes="secondaryBlue"
-              size="sm"
-              onClick={() => fileInputRef.current?.click()}
-            >
-              Select
-            </Button>
          </div>
        </div>
-      </div>
-    </ScrollArea>
+      </ScrollArea>
+      {showLoading && <Loader description="Installing..." />}
+    </>
  )
 }

--- a/web/services/appService.ts
+++ b/web/services/appService.ts
@ -0,0 +1,24 @@
+import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
+
+import { toaster } from '@/containers/Toast'
+
+import { extensionManager } from '@/extension'
+
+export const appService = {
+  systemInformations: async () => {
+    const gpuSetting = await extensionManager
+      ?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
+      ?.getGpuSetting()
+
+    return {
+      gpuSetting,
+      // TODO: Other system information
+    }
+  },
+  showToast: (title: string, description: string) => {
+    toaster({
+      title,
+      description: description,
+    })
+  },
+}
--- a/web/services/coreService.ts
+++ b/web/services/coreService.ts
@ -1,5 +1,7 @@
+import { appService } from './appService'
 import { EventEmitter } from './eventsService'
 import { restAPI } from './restService'
+
 export const setupCoreServices = () => {
  if (typeof window === 'undefined') {
    console.debug('undefine', window)
@ -10,7 +12,10 @@ export const setupCoreServices = () => {
  if (!window.core) {
    window.core = {
      events: new EventEmitter(),
-      api: window.electronAPI ?? restAPI,
+      api: {
+        ...(window.electronAPI ? window.electronAPI : restAPI),
+        ...appService,
+      },
    }
  }
 }
--- a/web/types/index.d.ts
+++ b/web/types/index.d.ts
@ -11,6 +11,7 @@ declare global {
  declare const isMac: boolean
  declare const isWindows: boolean
  declare const isLinux: boolean
+  declare const PLATFORM: string
  interface Core {
    api: APIFunctions
    events: EventEmitter