diff --git a/.gitignore b/.gitignore
index ae0691605..d9787d87b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,16 +22,16 @@ package-lock.json
 core/lib/**
 
 # Nitro binary files
-extensions/inference-nitro-extension/bin/*/nitro
-extensions/inference-nitro-extension/bin/*/*.metal
-extensions/inference-nitro-extension/bin/*/*.exe
-extensions/inference-nitro-extension/bin/*/*.dll
-extensions/inference-nitro-extension/bin/*/*.exp
-extensions/inference-nitro-extension/bin/*/*.lib
-extensions/inference-nitro-extension/bin/saved-*
-extensions/inference-nitro-extension/bin/*.tar.gz
-extensions/inference-nitro-extension/bin/vulkaninfoSDK.exe
-extensions/inference-nitro-extension/bin/vulkaninfo
+extensions/*-extension/bin/*/nitro
+extensions/*-extension/bin/*/*.metal
+extensions/*-extension/bin/*/*.exe
+extensions/*-extension/bin/*/*.dll
+extensions/*-extension/bin/*/*.exp
+extensions/*-extension/bin/*/*.lib
+extensions/*-extension/bin/saved-*
+extensions/*-extension/bin/*.tar.gz
+extensions/*-extension/bin/vulkaninfoSDK.exe
+extensions/*-extension/bin/vulkaninfo
 
 
 # Turborepo
diff --git a/README.md b/README.md
index bc206f4eb..adebb8ea1 100644
--- a/README.md
+++ b/README.md
@@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
   <tr style="text-align:center">
     <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.8-318.exe'>
+      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.8-323.exe'>
         <img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
         <b>jan.exe</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.8-318.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.8-323.dmg'>
         <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
         <b>Intel</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.8-318.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.8-323.dmg'>
         <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
         <b>M1/M2</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.8-318.deb'>
+      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.8-323.deb'>
         <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
         <b>jan.deb</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.8-318.AppImage'>
+      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.8-323.AppImage'>
         <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
         <b>jan.AppImage</b>
       </a>
diff --git a/core/package.json b/core/package.json
index 2bf3e1735..2f4f6b576 100644
--- a/core/package.json
+++ b/core/package.json
@@ -45,11 +45,12 @@
     "start": "rollup -c rollup.config.ts -w"
   },
   "devDependencies": {
-    "jest": "^29.7.0",
     "@types/jest": "^29.5.12",
     "@types/node": "^12.0.2",
-    "eslint-plugin-jest": "^27.9.0",
     "eslint": "8.57.0",
+    "eslint-plugin-jest": "^27.9.0",
+    "jest": "^29.7.0",
+    "rimraf": "^3.0.2",
     "rollup": "^2.38.5",
     "rollup-plugin-commonjs": "^9.1.8",
     "rollup-plugin-json": "^3.1.0",
@@ -58,7 +59,10 @@
     "rollup-plugin-typescript2": "^0.36.0",
     "ts-jest": "^29.1.2",
     "tslib": "^2.6.2",
-    "typescript": "^5.3.3",
-    "rimraf": "^3.0.2"
+    "typescript": "^5.3.3"
+  },
+  "dependencies": {
+    "rxjs": "^7.8.1",
+    "ulid": "^2.3.0"
   }
 }
diff --git a/core/rollup.config.ts b/core/rollup.config.ts
index ebea8e237..95305bf25 100644
--- a/core/rollup.config.ts
+++ b/core/rollup.config.ts
@@ -64,7 +64,7 @@ export default [
       // Allow json resolution
       json(),
       // Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true }),
+      typescript({ useTsconfigDeclarationDir: true, exclude: ['src/*.ts', 'src/extensions/**'] }),
       // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
       commonjs(),
       // Allow node_modules resolution, so you can use 'external' to control
diff --git a/core/src/api/index.ts b/core/src/api/index.ts
index e62b49087..f97593934 100644
--- a/core/src/api/index.ts
+++ b/core/src/api/index.ts
@@ -33,6 +33,8 @@ export enum AppRoute {
   stopServer = 'stopServer',
   log = 'log',
   logServer = 'logServer',
+  systemInformations = 'systemInformations',
+  showToast = 'showToast',
 }
 
 export enum AppEvent {
@@ -56,6 +58,7 @@ export enum DownloadEvent {
   onFileDownloadUpdate = 'onFileDownloadUpdate',
   onFileDownloadError = 'onFileDownloadError',
   onFileDownloadSuccess = 'onFileDownloadSuccess',
+  onFileUnzipSuccess = 'onFileUnzipSuccess',
 }
 
 export enum LocalImportModelEvent {
diff --git a/core/src/core.ts b/core/src/core.ts
index 6e2442c2b..b8cbd3162 100644
--- a/core/src/core.ts
+++ b/core/src/core.ts
@@ -1,4 +1,4 @@
-import { FileStat } from './types'
+import { DownloadRequest, FileStat, NetworkConfig } from './types'
 
 /**
  * Execute a extension module function in main process
@@ -17,18 +17,16 @@ const executeOnMain: (extension: string, method: string, ...args: any[]) => Prom
 
 /**
  * Downloads a file from a URL and saves it to the local file system.
- * @param {string} url - The URL of the file to download.
- * @param {string} fileName - The name to use for the downloaded file.
- * @param {object} network - Optional object to specify proxy/whether to ignore SSL certificates.
+ *
+ * @param {DownloadRequest} downloadRequest - The request to download the file.
+ * @param {NetworkConfig} network - Optional object to specify proxy/whether to ignore SSL certificates.
+ *
  * @returns {Promise<any>} A promise that resolves when the file is downloaded.
  */
-const downloadFile: (
-  url: string,
-  fileName: string,
-  network?: { proxy?: string; ignoreSSL?: boolean }
-) => Promise<any> = (url, fileName, network) => {
-  return global.core?.api?.downloadFile(url, fileName, network)
-}
+const downloadFile: (downloadRequest: DownloadRequest, network?: NetworkConfig) => Promise<any> = (
+  downloadRequest,
+  network
+) => global.core?.api?.downloadFile(downloadRequest, network)
 
 /**
  * Aborts the download of a specific file.
@@ -108,6 +106,20 @@ const log: (message: string, fileName?: string) => void = (message, fileName) =>
 const isSubdirectory: (from: string, to: string) => Promise<boolean> = (from: string, to: string) =>
   global.core.api?.isSubdirectory(from, to)
 
+/**
+ * Get system information
+ * @returns {Promise<any>} - A promise that resolves with the system information.
+ */
+const systemInformations: () => Promise<any> = () => global.core.api?.systemInformations()
+
+/**
+ * Show toast message from browser processes.
+ * @param title
+ * @param message
+ * @returns
+ */
+const showToast: (title: string, message: string) => void = (title, message) =>
+  global.core.api?.showToast(title, message)
 /**
  * Register extension point function type definition
  */
@@ -134,5 +146,7 @@ export {
   log,
   isSubdirectory,
   getUserHomePath,
+  systemInformations,
+  showToast,
   FileStat,
 }
diff --git a/core/src/extension.ts b/core/src/extension.ts
index 3b3edc7b3..22accb4b4 100644
--- a/core/src/extension.ts
+++ b/core/src/extension.ts
@@ -10,6 +10,22 @@ export enum ExtensionTypeEnum {
 export interface ExtensionType {
   type(): ExtensionTypeEnum | undefined
 }
+
+export interface Compatibility {
+  platform: string[]
+  version: string
+}
+
+const ALL_INSTALLATION_STATE = [
+  'NotRequired', // not required.
+  'Installed', // require and installed. Good to go.
+  'NotInstalled', // require to be installed.
+  'Corrupted', // require but corrupted. Need to redownload.
+] as const
+
+export type InstallationStateTuple = typeof ALL_INSTALLATION_STATE
+export type InstallationState = InstallationStateTuple[number]
+
 /**
  * Represents a base extension.
  * This class should be extended by any class that represents an extension.
@@ -33,4 +49,32 @@ export abstract class BaseExtension implements ExtensionType {
    * Any cleanup logic for the extension should be put here.
    */
   abstract onUnload(): void
+
+  /**
+   * The compatibility of the extension.
+   * This is used to check if the extension is compatible with the current environment.
+   * @property {Array} platform
+   */
+  compatibility(): Compatibility | undefined {
+    return undefined
+  }
+
+  /**
+   * Determine if the prerequisites for the extension are installed.
+   *
+   * @returns {boolean} true if the prerequisites are installed, false otherwise.
+   */
+  async installationState(): Promise<InstallationState> {
+    return 'NotRequired'
+  }
+
+  /**
+   * Install the prerequisites for the extension.
+   *
+   * @returns {Promise<void>}
+   */
+  // @ts-ignore
+  async install(...args): Promise<void> {
+    return
+  }
 }
diff --git a/core/src/extensions/ai-engines/AIEngine.ts b/core/src/extensions/ai-engines/AIEngine.ts
new file mode 100644
index 000000000..608b5c193
--- /dev/null
+++ b/core/src/extensions/ai-engines/AIEngine.ts
@@ -0,0 +1,60 @@
+import { getJanDataFolderPath, joinPath } from '../../core'
+import { events } from '../../events'
+import { BaseExtension } from '../../extension'
+import { fs } from '../../fs'
+import { Model, ModelEvent } from '../../types'
+
+/**
+ * Base AIEngine
+ * Applicable to all AI Engines
+ */
+export abstract class AIEngine extends BaseExtension {
+  // The inference engine
+  abstract provider: string
+  // The model folder
+  modelFolder: string = 'models'
+
+  abstract models(): Promise<Model[]>
+
+  /**
+   * On extension load, subscribe to events.
+   */
+  onLoad() {
+    this.prePopulateModels()
+  }
+
+  /**
+   * Pre-populate models to App Data Folder
+   */
+  prePopulateModels(): Promise<void> {
+    return this.models().then((models) => {
+      const prePoluateOperations = models.map((model) =>
+        getJanDataFolderPath()
+          .then((janDataFolder) =>
+            // Attempt to create the model folder
+            joinPath([janDataFolder, this.modelFolder, model.id]).then((path) =>
+              fs
+                .mkdirSync(path)
+                .catch()
+                .then(() => path)
+            )
+          )
+          .then((path) => joinPath([path, 'model.json']))
+          .then((path) => {
+            // Do not overwite existing model.json
+            return fs.existsSync(path).then((exist: any) => {
+              if (!exist) return fs.writeFileSync(path, JSON.stringify(model, null, 2))
+            })
+          })
+          .catch((e: Error) => {
+            console.error('Error', e)
+          })
+      )
+      Promise.all(prePoluateOperations).then(() =>
+        // Emit event to update models
+        // So the UI can update the models list
+        events.emit(ModelEvent.OnModelsUpdate, {})
+      )
+    })
+  }
+}
diff --git a/core/src/extensions/ai-engines/LocalOAIEngine.ts b/core/src/extensions/ai-engines/LocalOAIEngine.ts
new file mode 100644
index 000000000..79dbcbf5e
--- /dev/null
+++ b/core/src/extensions/ai-engines/LocalOAIEngine.ts
@@ -0,0 +1,63 @@
+import { executeOnMain, getJanDataFolderPath, joinPath } from '../../core'
+import { events } from '../../events'
+import { Model, ModelEvent } from '../../types'
+import { OAIEngine } from './OAIEngine'
+
+/**
+ * Base OAI Local Inference Provider
+ * Added the implementation of loading and unloading model (applicable to local inference providers)
+ */
+export abstract class LocalOAIEngine extends OAIEngine {
+  // The inference engine
+  loadModelFunctionName: string = 'loadModel'
+  unloadModelFunctionName: string = 'unloadModel'
+  isRunning: boolean = false
+
+  /**
+   * On extension load, subscribe to events.
+   */
+  onLoad() {
+    super.onLoad()
+    // These events are applicable to local inference providers
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model))
+    events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model))
+  }
+
+  /**
+   * Load the model.
+   */
+  async onModelInit(model: Model) {
+    if (model.engine.toString() !== this.provider) return
+
+    const modelFolder = await joinPath([await getJanDataFolderPath(), this.modelFolder, model.id])
+
+    const res = await executeOnMain(this.nodeModule, this.loadModelFunctionName, {
+      modelFolder,
+      model,
+    })
+
+    if (res?.error) {
+      events.emit(ModelEvent.OnModelFail, {
+        ...model,
+        error: res.error,
+      })
+      return
+    } else {
+      this.loadedModel = model
+      events.emit(ModelEvent.OnModelReady, model)
+      this.isRunning = true
+    }
+  }
+  /**
+   * Stops the model.
+   */
+  onModelStop(model: Model) {
+    if (model.engine?.toString() !== this.provider) return
+
+    this.isRunning = false
+
+    executeOnMain(this.nodeModule, this.unloadModelFunctionName).then(() => {
+      events.emit(ModelEvent.OnModelStopped, {})
+    })
+  }
+}
diff --git a/core/src/extensions/ai-engines/OAIEngine.ts b/core/src/extensions/ai-engines/OAIEngine.ts
new file mode 100644
index 000000000..3e583c9b9
--- /dev/null
+++ b/core/src/extensions/ai-engines/OAIEngine.ts
@@ -0,0 +1,116 @@
+import { requestInference } from './helpers/sse'
+import { ulid } from 'ulid'
+import { AIEngine } from './AIEngine'
+import {
+  ChatCompletionRole,
+  ContentType,
+  InferenceEvent,
+  MessageEvent,
+  MessageRequest,
+  MessageRequestType,
+  MessageStatus,
+  Model,
+  ModelInfo,
+  ThreadContent,
+  ThreadMessage,
+} from '../../types'
+import { events } from '../../events'
+
+/**
+ * Base OAI Inference Provider
+ * Applicable to all OAI compatible inference providers
+ */
+export abstract class OAIEngine extends AIEngine {
+  // The inference engine
+  abstract inferenceUrl: string
+  abstract nodeModule: string
+
+  // Controller to handle stop requests
+  controller = new AbortController()
+  isCancelled = false
+
+  // The loaded model instance
+  loadedModel: Model | undefined
+
+  /**
+   * On extension load, subscribe to events.
+   */
+  onLoad() {
+    super.onLoad()
+    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
+    events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped())
+  }
+
+  /**
+   * On extension unload
+   */
+  onUnload(): void {}
+
+  /*
+   * Inference request
+   */
+  inference(data: MessageRequest) {
+    if (data.model?.engine?.toString() !== this.provider) return
+
+    const timestamp = Date.now()
+    const message: ThreadMessage = {
+      id: ulid(),
+      thread_id: data.threadId,
+      type: data.type,
+      assistant_id: data.assistantId,
+      role: ChatCompletionRole.Assistant,
+      content: [],
+      status: MessageStatus.Pending,
+      created: timestamp,
+      updated: timestamp,
+      object: 'thread.message',
+    }
+
+    if (data.type !== MessageRequestType.Summary) {
+      events.emit(MessageEvent.OnMessageResponse, message)
+    }
+
+    this.isCancelled = false
+    this.controller = new AbortController()
+
+    const model: ModelInfo = {
+      ...(this.loadedModel ? this.loadedModel : {}),
+      ...data.model,
+    }
+
+    requestInference(this.inferenceUrl, data.messages ?? [], model, this.controller).subscribe({
+      next: (content: any) => {
+        const messageContent: ThreadContent = {
+          type: ContentType.Text,
+          text: {
+            value: content.trim(),
+            annotations: [],
+          },
+        }
+        message.content = [messageContent]
+        events.emit(MessageEvent.OnMessageUpdate, message)
+      },
+      complete: async () => {
+        message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
+        events.emit(MessageEvent.OnMessageUpdate, message)
+      },
+      error: async (err: any) => {
+        if (this.isCancelled || message.content.length) {
+          message.status = MessageStatus.Stopped
+          events.emit(MessageEvent.OnMessageUpdate, message)
+          return
+        }
+        message.status = MessageStatus.Error
+        events.emit(MessageEvent.OnMessageUpdate, message)
+      },
+    })
+  }
+
+  /**
+   * Stops the inference.
+   */
+  onInferenceStopped() {
+    this.isCancelled = true
+    this.controller?.abort()
+  }
+}
diff --git a/core/src/extensions/ai-engines/helpers/sse.ts b/core/src/extensions/ai-engines/helpers/sse.ts
new file mode 100644
index 000000000..3d810d934
--- /dev/null
+++ b/core/src/extensions/ai-engines/helpers/sse.ts
@@ -0,0 +1,67 @@
+import { Observable } from 'rxjs'
+import { ModelRuntimeParams } from '../../../types'
+/**
+ * Sends a request to the inference server to generate a response based on the recent messages.
+ * @param recentMessages - An array of recent messages to use as context for the inference.
+ * @returns An Observable that emits the generated response as a string.
+ */
+export function requestInference(
+  inferenceUrl: string,
+  recentMessages: any[],
+  model: {
+    id: string
+    parameters: ModelRuntimeParams
+  },
+  controller?: AbortController
+): Observable<string> {
+  return new Observable((subscriber) => {
+    const requestBody = JSON.stringify({
+      messages: recentMessages,
+      model: model.id,
+      stream: true,
+      ...model.parameters,
+    })
+    fetch(inferenceUrl, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Access-Control-Allow-Origin': '*',
+        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+      },
+      body: requestBody,
+      signal: controller?.signal,
+    })
+      .then(async (response) => {
+        if (model.parameters.stream === false) {
+          const data = await response.json()
+          subscriber.next(data.choices[0]?.message?.content ?? '')
+        } else {
+          const stream = response.body
+          const decoder = new TextDecoder('utf-8')
+          const reader = stream?.getReader()
+          let content = ''
+
+          while (true && reader) {
+            const { done, value } = await reader.read()
+            if (done) {
+              break
+            }
+            const text = decoder.decode(value)
+            const lines = text.trim().split('\n')
+            for (const line of lines) {
+              if (line.startsWith('data: ') && !line.includes('data: [DONE]')) {
+                const data = JSON.parse(line.replace('data: ', ''))
+                content += data.choices[0]?.delta?.content ?? ''
+                if (content.startsWith('assistant: ')) {
+                  content = content.replace('assistant: ', '')
+                }
+                subscriber.next(content)
+              }
+            }
+          }
+        }
+        subscriber.complete()
+      })
+      .catch((err) => subscriber.error(err))
+  })
+}
diff --git a/core/src/extensions/ai-engines/index.ts b/core/src/extensions/ai-engines/index.ts
new file mode 100644
index 000000000..f4da62a7c
--- /dev/null
+++ b/core/src/extensions/ai-engines/index.ts
@@ -0,0 +1,3 @@
+export * from './AIEngine'
+export * from './OAIEngine'
+export * from './LocalOAIEngine'
diff --git a/core/src/extensions/index.ts b/core/src/extensions/index.ts
index c6834482c..c049f3b3a 100644
--- a/core/src/extensions/index.ts
+++ b/core/src/extensions/index.ts
@@ -28,3 +28,8 @@ export { ModelExtension } from './model'
  * Hugging Face extension for converting HF models to GGUF.
  */
 export { HuggingFaceExtension } from './huggingface'
+
+/**
+ * Base AI Engines.
+ */
+export * from './ai-engines'
diff --git a/core/src/extensions/model.ts b/core/src/extensions/model.ts
index 79202398b..33eec0afc 100644
--- a/core/src/extensions/model.ts
+++ b/core/src/extensions/model.ts
@@ -1,5 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { ImportingModel, Model, ModelInterface, OptionType } from '../index'
+import { GpuSetting, ImportingModel, Model, ModelInterface, OptionType } from '../index'
 
 /**
  * Model extension for managing models.
@@ -14,6 +14,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
 
   abstract downloadModel(
     model: Model,
+    gpuSettings?: GpuSetting,
     network?: { proxy: string; ignoreSSL?: boolean }
   ): Promise<void>
   abstract cancelModelDownload(modelId: string): Promise<void>
diff --git a/core/src/extensions/monitoring.ts b/core/src/extensions/monitoring.ts
index ba193f0f4..8d61580fc 100644
--- a/core/src/extensions/monitoring.ts
+++ b/core/src/extensions/monitoring.ts
@@ -1,5 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { MonitoringInterface } from '../index'
+import { GpuSetting, MonitoringInterface } from '../index'
 
 /**
  * Monitoring extension for system monitoring.
@@ -13,6 +13,7 @@ export abstract class MonitoringExtension extends BaseExtension implements Monit
     return ExtensionTypeEnum.SystemMonitoring
   }
 
+  abstract getGpuSetting(): Promise<GpuSetting>
   abstract getResourcesInfo(): Promise<any>
   abstract getCurrentLoad(): Promise<any>
 }
diff --git a/core/src/node/api/processors/download.ts b/core/src/node/api/processors/download.ts
index 4ddeff160..8e8e08f2f 100644
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@@ -5,7 +5,7 @@ import { getJanDataFolderPath } from '../../helper'
 import { DownloadManager } from '../../helper/download'
 import { createWriteStream, renameSync } from 'fs'
 import { Processor } from './Processor'
-import { DownloadState } from '../../../types'
+import { DownloadRequest, DownloadState, NetworkConfig } from '../../../types'
 
 export class Downloader implements Processor {
   observer?: Function
@@ -20,24 +20,27 @@ export class Downloader implements Processor {
     return func(this.observer, ...args)
   }
 
-  downloadFile(observer: any, url: string, localPath: string, network: any) {
+  downloadFile(observer: any, downloadRequest: DownloadRequest, network?: NetworkConfig) {
     const request = require('request')
     const progress = require('request-progress')
 
     const strictSSL = !network?.ignoreSSL
     const proxy = network?.proxy?.startsWith('http') ? network.proxy : undefined
+
+    const { localPath, url } = downloadRequest
+    let normalizedPath = localPath
     if (typeof localPath === 'string') {
-      localPath = normalizeFilePath(localPath)
+      normalizedPath = normalizeFilePath(localPath)
     }
-    const array = localPath.split(sep)
+    const array = normalizedPath.split(sep)
     const fileName = array.pop() ?? ''
     const modelId = array.pop() ?? ''
 
-    const destination = resolve(getJanDataFolderPath(), localPath)
+    const destination = resolve(getJanDataFolderPath(), normalizedPath)
     const rq = request({ url, strictSSL, proxy })
 
     // Put request to download manager instance
-    DownloadManager.instance.setRequest(localPath, rq)
+    DownloadManager.instance.setRequest(normalizedPath, rq)
 
     // Downloading file to a temp file first
     const downloadingTempFile = `${destination}.download`
@@ -56,16 +59,25 @@ export class Downloader implements Processor {
         total: 0,
         transferred: 0,
       },
+      children: [],
       downloadState: 'downloading',
+      extensionId: downloadRequest.extensionId,
+      downloadType: downloadRequest.downloadType,
+      localPath: normalizedPath,
     }
     DownloadManager.instance.downloadProgressMap[modelId] = initialDownloadState
 
+    if (downloadRequest.downloadType === 'extension') {
+      observer?.(DownloadEvent.onFileDownloadUpdate, initialDownloadState)
+    }
+
     progress(rq, {})
       .on('progress', (state: any) => {
+        const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
         const downloadState: DownloadState = {
+          ...currentDownloadState,
           ...state,
-          modelId,
-          fileName,
+          fileName: fileName,
           downloadState: 'downloading',
         }
         console.debug('progress: ', downloadState)
@@ -76,22 +88,22 @@ export class Downloader implements Processor {
         const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
         const downloadState: DownloadState = {
           ...currentDownloadState,
+          fileName: fileName,
           error: error.message,
           downloadState: 'error',
         }
-        if (currentDownloadState) {
-          DownloadManager.instance.downloadProgressMap[modelId] = downloadState
-        }
 
         observer?.(DownloadEvent.onFileDownloadError, downloadState)
+        DownloadManager.instance.downloadProgressMap[modelId] = downloadState
       })
       .on('end', () => {
         const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
-        if (currentDownloadState && DownloadManager.instance.networkRequests[localPath]) {
+        if (currentDownloadState && DownloadManager.instance.networkRequests[normalizedPath]) {
           // Finished downloading, rename temp file to actual file
           renameSync(downloadingTempFile, destination)
           const downloadState: DownloadState = {
             ...currentDownloadState,
+            fileName: fileName,
             downloadState: 'end',
           }
           observer?.(DownloadEvent.onFileDownloadSuccess, downloadState)
diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts
index 7001c0c76..6b9bbb3a8 100644
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@@ -1,7 +1,16 @@
-import fs from 'fs'
+import {
+  existsSync,
+  readdirSync,
+  readFileSync,
+  writeFileSync,
+  mkdirSync,
+  appendFileSync,
+  createWriteStream,
+  rmdirSync,
+} from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
-import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../index'
+import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
 import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
 import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
 
@@ -9,12 +18,12 @@ import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
 export const getBuilder = async (configuration: RouteConfiguration) => {
   const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
   try {
-    if (!fs.existsSync(directoryPath)) {
+    if (!existsSync(directoryPath)) {
       console.debug('model folder not found')
       return []
     }
 
-    const files: string[] = fs.readdirSync(directoryPath)
+    const files: string[] = readdirSync(directoryPath)
 
     const allDirectories: string[] = []
     for (const file of files) {
@@ -46,8 +55,8 @@ export const getBuilder = async (configuration: RouteConfiguration) => {
 }
 
 const readModelMetadata = (path: string): string | undefined => {
-  if (fs.existsSync(path)) {
-    return fs.readFileSync(path, 'utf-8')
+  if (existsSync(path)) {
+    return readFileSync(path, 'utf-8')
   } else {
     return undefined
   }
@@ -81,7 +90,7 @@ export const deleteBuilder = async (configuration: RouteConfiguration, id: strin
     }
 
     const objectPath = join(directoryPath, id)
-    fs.rmdirSync(objectPath, { recursive: true })
+    rmdirSync(objectPath, { recursive: true })
     return {
       id: id,
       object: configuration.delete.object,
@@ -96,20 +105,19 @@ export const getMessages = async (threadId: string): Promise<ThreadMessage[]> =>
   const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
   const messageFile = 'messages.jsonl'
   try {
-    const files: string[] = fs.readdirSync(threadDirPath)
+    const files: string[] = readdirSync(threadDirPath)
     if (!files.includes(messageFile)) {
       console.error(`${threadDirPath} not contains message file`)
       return []
     }
 
     const messageFilePath = join(threadDirPath, messageFile)
-    if (!fs.existsSync(messageFilePath)) {
+    if (!existsSync(messageFilePath)) {
       console.debug('message file not found')
       return []
     }
 
-    const lines = fs
-      .readFileSync(messageFilePath, 'utf-8')
+    const lines = readFileSync(messageFilePath, 'utf-8')
       .toString()
       .split('\n')
       .filter((line: any) => line !== '')
@@ -157,11 +165,11 @@ export const createThread = async (thread: any) => {
     const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
     const threadJsonPath = join(threadDirPath, threadMetadataFileName)
 
-    if (!fs.existsSync(threadDirPath)) {
-      fs.mkdirSync(threadDirPath)
+    if (!existsSync(threadDirPath)) {
+      mkdirSync(threadDirPath)
     }
 
-    await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
+    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
     return updatedThread
   } catch (err) {
     return {
@@ -191,7 +199,7 @@ export const updateThread = async (threadId: string, thread: any) => {
     const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
     const threadJsonPath = join(threadDirPath, threadMetadataFileName)
 
-    await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
+    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
     return updatedThread
   } catch (err) {
     return {
@@ -233,10 +241,10 @@ export const createMessage = async (threadId: string, message: any) => {
     const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
     const threadMessagePath = join(threadDirPath, threadMessagesFileName)
 
-    if (!fs.existsSync(threadDirPath)) {
-      fs.mkdirSync(threadDirPath)
+    if (!existsSync(threadDirPath)) {
+      mkdirSync(threadDirPath)
     }
-    fs.appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
+    appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
     return threadMessage
   } catch (err) {
     return {
@@ -259,8 +267,8 @@ export const downloadModel = async (
   }
 
   const directoryPath = join(getJanDataFolderPath(), 'models', modelId)
-  if (!fs.existsSync(directoryPath)) {
-    fs.mkdirSync(directoryPath)
+  if (!existsSync(directoryPath)) {
+    mkdirSync(directoryPath)
   }
 
   // path to model binary
@@ -281,7 +289,7 @@ export const downloadModel = async (
       .on('end', function () {
         console.debug('end')
       })
-      .pipe(fs.createWriteStream(modelBinaryPath))
+      .pipe(createWriteStream(modelBinaryPath))
   }
 
   return {
diff --git a/core/src/types/file/index.ts b/core/src/types/file/index.ts
index cc7274a28..d941987ef 100644
--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@@ -4,16 +4,43 @@ export type FileStat = {
 }
 
 export type DownloadState = {
-  modelId: string
+  modelId: string // TODO: change to download id
   fileName: string
   time: DownloadTime
   speed: number
-  percent: number
 
+  percent: number
   size: DownloadSize
-  children?: DownloadState[]
-  error?: string
   downloadState: 'downloading' | 'error' | 'end'
+  children?: DownloadState[]
+
+  error?: string
+  extensionId?: string
+  downloadType?: DownloadType
+  localPath?: string
+}
+
+export type DownloadType = 'model' | 'extension'
+
+export type DownloadRequest = {
+  /**
+   * The URL to download the file from.
+   */
+  url: string
+
+  /**
+   * The local path to save the file to.
+   */
+  localPath: string
+
+  /**
+   * The extension ID of the extension that initiated the download.
+   *
+   * Can be extension name.
+   */
+  extensionId?: string
+
+  downloadType?: DownloadType
 }
 
 type DownloadTime = {
diff --git a/core/src/types/miscellaneous/fileDownloadRequest.ts b/core/src/types/miscellaneous/fileDownloadRequest.ts
new file mode 100644
index 000000000..83131aa71
--- /dev/null
+++ b/core/src/types/miscellaneous/fileDownloadRequest.ts
@@ -0,0 +1,8 @@
+export type FileDownloadRequest = {
+  downloadId: string
+  url: string
+  localPath: string
+  fileName: string
+  displayName: string
+  metadata: Record<string, string | number>
+}
diff --git a/core/src/types/miscellaneous/index.ts b/core/src/types/miscellaneous/index.ts
index e9c205a73..b4ef68ab6 100644
--- a/core/src/types/miscellaneous/index.ts
+++ b/core/src/types/miscellaneous/index.ts
@@ -1,3 +1,5 @@
 export * from './systemResourceInfo'
 export * from './promptTemplate'
 export * from './appUpdate'
+export * from './fileDownloadRequest'
+export * from './networkConfig'
\ No newline at end of file
diff --git a/core/src/types/miscellaneous/networkConfig.ts b/core/src/types/miscellaneous/networkConfig.ts
new file mode 100644
index 000000000..2d27f4223
--- /dev/null
+++ b/core/src/types/miscellaneous/networkConfig.ts
@@ -0,0 +1,4 @@
+export type NetworkConfig = {
+  proxy?: string
+  ignoreSSL?: boolean
+}
diff --git a/core/src/types/miscellaneous/systemResourceInfo.ts b/core/src/types/miscellaneous/systemResourceInfo.ts
index 1472cda47..f7dd4a82b 100644
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@@ -2,3 +2,31 @@ export type SystemResourceInfo = {
   numCpuPhysicalCore: number
   memAvailable: number
 }
+
+export type RunMode = 'cpu' | 'gpu'
+
+export type GpuSetting = {
+  notify: boolean
+  run_mode: RunMode
+  nvidia_driver: {
+    exist: boolean
+    version: string
+  }
+  cuda: {
+    exist: boolean
+    version: string
+  }
+  gpus: GpuSettingInfo[]
+  gpu_highest_vram: string
+  gpus_in_use: string[]
+  is_initial: boolean
+  // TODO: This needs to be set based on user toggle in settings
+  vulkan: boolean
+}
+
+export type GpuSettingInfo = {
+  id: string
+  vram: string
+  name: string
+  arch?: string
+}
diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts
index 11d3e0526..74568686b 100644
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@@ -19,6 +19,7 @@ export enum InferenceEngine {
   nitro = 'nitro',
   openai = 'openai',
   triton_trtllm = 'triton_trtllm',
+  nitro_tensorrt_llm = 'nitro-tensorrt-llm',
 
   tool_retrieval_enabled = 'tool_retrieval_enabled',
 }
diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts
index 93d5867ee..639c7c8d3 100644
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@@ -1,3 +1,4 @@
+import { GpuSetting } from '../miscellaneous'
 import { Model } from './modelEntity'
 
 /**
@@ -10,7 +11,11 @@ export interface ModelInterface {
    * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
    * @returns A Promise that resolves when the model has been downloaded.
    */
-  downloadModel(model: Model, network?: { ignoreSSL?: boolean; proxy?: string }): Promise<void>
+  downloadModel(
+    model: Model,
+    gpuSettings?: GpuSetting,
+    network?: { ignoreSSL?: boolean; proxy?: string }
+  ): Promise<void>
 
   /**
    * Cancels the download of a specific model.
diff --git a/core/src/types/monitoring/index.ts b/core/src/types/monitoring/index.ts
index 5828dae8b..b96c518fd 100644
--- a/core/src/types/monitoring/index.ts
+++ b/core/src/types/monitoring/index.ts
@@ -1 +1,2 @@
 export * from './monitoringInterface'
+export * from './resourceInfo'
diff --git a/core/src/types/monitoring/resourceInfo.ts b/core/src/types/monitoring/resourceInfo.ts
new file mode 100644
index 000000000..b19da5462
--- /dev/null
+++ b/core/src/types/monitoring/resourceInfo.ts
@@ -0,0 +1,6 @@
+export type ResourceInfo = {
+  mem: {
+    totalMemory: number
+    usedMemory: number
+  }
+}
diff --git a/core/tsconfig.json b/core/tsconfig.json
index b112079d2..daeb7eeff 100644
--- a/core/tsconfig.json
+++ b/core/tsconfig.json
@@ -13,7 +13,7 @@
     "declarationDir": "dist/types",
     "outDir": "dist/lib",
     "importHelpers": true,
-    "types": ["@types/jest"]
+    "types": ["@types/jest"],
   },
-  "include": ["src"]
+  "include": ["src"],
 }
diff --git a/docs/docs/guides/providers/README.mdx b/docs/docs/guides/providers/README.mdx
new file mode 100644
index 000000000..aa3bfea1f
--- /dev/null
+++ b/docs/docs/guides/providers/README.mdx
@@ -0,0 +1,8 @@
+---
+title: Inference Providers
+slug: /guides/providers
+---
+
+import DocCardList from "@theme/DocCardList";
+
+<DocCardList />
diff --git a/docs/docs/guides/providers/image.png b/docs/docs/guides/providers/image.png
new file mode 100644
index 000000000..5f1f7104e
Binary files /dev/null and b/docs/docs/guides/providers/image.png differ
diff --git a/docs/docs/guides/providers/llama-cpp.md b/docs/docs/guides/providers/llama-cpp.md
new file mode 100644
index 000000000..d2b0daa2a
--- /dev/null
+++ b/docs/docs/guides/providers/llama-cpp.md
@@ -0,0 +1,10 @@
+---
+title: llama.cpp
+slug: /guides/providers/llama-cpp
+---
+
+## Overview
+
+[Nitro](https://github.com/janhq/nitro) is an inference server on top of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides an OpenAI-compatible API, queue, & scaling.
+
+Nitro is the default AI engine downloaded with Jan. There is no additional setup needed.
\ No newline at end of file
diff --git a/docs/docs/guides/providers/tensorrt-llm.md b/docs/docs/guides/providers/tensorrt-llm.md
new file mode 100644
index 000000000..52da83b36
--- /dev/null
+++ b/docs/docs/guides/providers/tensorrt-llm.md
@@ -0,0 +1,87 @@
+---
+title: TensorRT-LLM
+slug: /guides/providers/tensorrt-llm
+---
+
+Users with Nvidia GPUs can get **20-40% faster\* token speeds** on their laptop or desktops by using [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). The greater implication is that you are running FP16, which is also more accurate than quantized models.
+
+This guide walks you through how to install Jan's official [TensorRT-LLM Extension](https://github.com/janhq/nitro-tensorrt-llm). This extension uses [Nitro-TensorRT-LLM](https://github.com/janhq/nitro-tensorrt-llm) as the AI engine, instead of the default [Nitro-Llama-CPP](https://github.com/janhq/nitro). It includes an efficient C++ server to natively execute the [TRT-LLM C++ runtime](https://nvidia.github.io/TensorRT-LLM/gpt_runtime.html). It also comes with additional feature and performance improvements like OpenAI compatibility, tokenizer improvements, and queues.
+
+*Compared to using LlamaCPP engine.
+
+:::warning
+This feature is only available for Windows users. Linux is coming soon.
+
+Additionally, we only prebuilt a few demo models. You can always build your desired models directly on your machine. [Read here](#build-your-own-tensorrt-models).
+
+:::
+
+## Requirements
+
+- A Windows PC
+- Nvidia GPU(s): Ada or Ampere series (i.e. RTX 4000s & 3000s). More will be supported soon.
+- 3GB+ of disk space to download TRT-LLM artifacts and a Nitro binary
+- Jan v0.4.9+ or Jan v0.4.8-321+ (nightly)
+- Nvidia Driver v535+ ([installation guide](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements))
+- CUDA Toolkit v12.2+ ([installation guide](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements))
+
+## Install TensorRT-Extension
+
+1. Go to Settings > Extensions
+2. Click install next to the TensorRT-LLM Extension
+3. Check that files are correctly downloaded
+
+```sh
+ls ~\jan\extensions\@janhq\tensorrt-llm-extension\dist\bin
+# Your Extension Folder should now include `nitro.exe`, among other artifacts needed to run TRT-LLM
+```
+
+## Download a Compatible Model
+TensorRT-LLM can only run models in `TensorRT` format. These models, aka "TensorRT Engines", are prebuilt specifically for each target OS+GPU architecture.
+
+We offer a handful of precompiled models for Ampere and Ada cards that you can immediately download and play with:
+
+1. Restart the application and go to the Hub
+2. Look for models with the `TensorRT-LLM` label in the recommended models list. Click download. This step might take some time. 🙏
+
+![image](https://hackmd.io/_uploads/rJewrEgRp.png)
+
+3. Click use and start chatting!
+4. You may need to allow Nitro in your network 
+
+![alt text](image.png)
+
+:::warning
+If you are our nightly builds, you may have to reinstall the TensorRT-LLM extension each time you update the app. We're working on better extension lifecyles - stay tuned.
+:::
+
+## Configure Settings
+
+You can customize the default parameters for how Jan runs TensorRT-LLM. 
+
+:::info
+coming soon
+:::
+
+## Troubleshooting
+
+### Incompatible Extension vs Engine versions
+
+For now, the model versions are pinned to the extension versions.
+
+### Uninstall Extension
+
+1. Quit the app
+2. Go to Settings > Extensions
+3. Delete the entire Extensions folder.
+4. Reopen the app, only the default extensions should be restored.
+
+### Install Nitro-TensorRT-LLM manually
+
+To manually build the artifacts needed to run the server and TensorRT-LLM, you can reference the source code. [Read here](https://github.com/janhq/nitro-tensorrt-llm?tab=readme-ov-file#quickstart).
+
+### Build your own TensorRT models
+
+:::info
+coming soon
+:::
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 4c45cadbe..b95e4044f 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -199,6 +199,19 @@ const sidebars = {
             "guides/models/integrate-remote",
           ]
         },
+        {
+          type: "category",
+          label: "Inference Providers",
+          className: "head_SubMenu",
+          link: {
+            type: 'doc',
+            id: "guides/providers/README",
+          },
+          items: [
+            "guides/providers/llama-cpp",
+            "guides/providers/tensorrt-llm",
+          ]
+        },
         {
           type: "category",
           label: "Extensions",
diff --git a/electron/icons/512x512.png b/electron/icons/512x512.png
new file mode 100644
index 000000000..289f99ded
Binary files /dev/null and b/electron/icons/512x512.png differ
diff --git a/extensions/huggingface-extension/src/index.ts b/extensions/huggingface-extension/src/index.ts
index d8f755080..88292ce58 100644
--- a/extensions/huggingface-extension/src/index.ts
+++ b/extensions/huggingface-extension/src/index.ts
@@ -13,6 +13,7 @@ import {
   events,
   DownloadEvent,
   log,
+  DownloadRequest,
 } from '@janhq/core'
 import { ggufMetadata } from 'hyllama'
 
@@ -148,7 +149,11 @@ export default class JanHuggingFaceExtension extends HuggingFaceExtension {
 
       if (this.interrupted) return
       if (!(await fs.existsSync(localPath))) {
-        downloadFile(url, localPath, network)
+        const downloadRequest: DownloadRequest = {
+          url,
+          localPath,
+        }
+        downloadFile(downloadRequest, network)
         filePaths.push(filePath)
       }
     }
diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat
index 2ef3165c1..bb8c4ffdc 100644
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@@ -1,3 +1,3 @@
 @echo off
 set /p NITRO_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
+.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
index e6365ad92..dd5798764 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@@ -8,7 +8,7 @@
   "license": "AGPL-3.0",
   "scripts": {
     "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro && download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
+    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
     "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
     "downloadnitro:win32": "download.bat",
     "downloadnitro": "run-script-os",
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index 979b4cfac..70244a5d9 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -108,9 +108,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
     events.on(InferenceEvent.OnInferenceStopped, () =>
       this.onInferenceStopped()
     )
-
-    // Attempt to fetch nvidia info
-    await executeOnMain(NODE, 'updateNvidiaInfo', {})
   }
 
   /**
diff --git a/extensions/inference-nitro-extension/src/node/accelerator.ts b/extensions/inference-nitro-extension/src/node/accelerator.ts
deleted file mode 100644
index 1ffdbc5bd..000000000
--- a/extensions/inference-nitro-extension/src/node/accelerator.ts
+++ /dev/null
@@ -1,237 +0,0 @@
-import { writeFileSync, existsSync, readFileSync } from 'fs'
-import { exec, spawn } from 'child_process'
-import path from 'path'
-import { getJanDataFolderPath, log } from '@janhq/core/node'
-
-/**
- * Default GPU settings
- * TODO: This needs to be refactored to support multiple accelerators
- **/
-const DEFALT_SETTINGS = {
-  notify: true,
-  run_mode: 'cpu',
-  nvidia_driver: {
-    exist: false,
-    version: '',
-  },
-  cuda: {
-    exist: false,
-    version: '',
-  },
-  gpus: [],
-  gpu_highest_vram: '',
-  gpus_in_use: [],
-  is_initial: true,
-  // TODO: This needs to be set based on user toggle in settings
-  vulkan: false
-}
-
-/**
- * Path to the settings file
- **/
-export const GPU_INFO_FILE = path.join(
-  getJanDataFolderPath(),
-  'settings',
-  'settings.json'
-)
-
-/**
- * Current nitro process
- */
-let nitroProcessInfo: NitroProcessInfo | undefined = undefined
-
-/**
- * Nitro process info
- */
-export interface NitroProcessInfo {
-  isRunning: boolean
-}
-
-/**
- * This will retrive GPU informations and persist settings.json
- * Will be called when the extension is loaded to turn on GPU acceleration if supported
- */
-export async function updateNvidiaInfo() {
-  if (process.platform !== 'darwin') {
-    let data
-    try {
-      data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-    } catch (error) {
-      data = DEFALT_SETTINGS
-      writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-    }
-    updateNvidiaDriverInfo()
-    updateGpuInfo()
-  }
-}
-
-/**
- * Retrieve current nitro process
- */
-export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => {
-  nitroProcessInfo = {
-    isRunning: subprocess != null,
-  }
-  return nitroProcessInfo
-}
-
-/**
- * Validate nvidia and cuda for linux and windows
- */
-export async function updateNvidiaDriverInfo(): Promise<void> {
-  exec(
-    'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
-    (error, stdout) => {
-      let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-
-      if (!error) {
-        const firstLine = stdout.split('\n')[0].trim()
-        data['nvidia_driver'].exist = true
-        data['nvidia_driver'].version = firstLine
-      } else {
-        data['nvidia_driver'].exist = false
-      }
-
-      writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-      Promise.resolve()
-    }
-  )
-}
-
-/**
- * Check if file exists in paths
- */
-export function checkFileExistenceInPaths(
-  file: string,
-  paths: string[]
-): boolean {
-  return paths.some((p) => existsSync(path.join(p, file)))
-}
-
-/**
- * Validate cuda for linux and windows
- */
-export function updateCudaExistence(
-  data: Record<string, any> = DEFALT_SETTINGS
-): Record<string, any> {
-  let filesCuda12: string[]
-  let filesCuda11: string[]
-  let paths: string[]
-  let cudaVersion: string = ''
-
-  if (process.platform === 'win32') {
-    filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
-    filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
-    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
-  } else {
-    filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
-    filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
-    paths = process.env.LD_LIBRARY_PATH
-      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
-      : []
-    paths.push('/usr/lib/x86_64-linux-gnu/')
-  }
-
-  let cudaExists = filesCuda12.every(
-    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
-  )
-
-  if (!cudaExists) {
-    cudaExists = filesCuda11.every(
-      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
-    )
-    if (cudaExists) {
-      cudaVersion = '11'
-    }
-  } else {
-    cudaVersion = '12'
-  }
-
-  data['cuda'].exist = cudaExists
-  data['cuda'].version = cudaVersion
-  console.debug(data['is_initial'], data['gpus_in_use'])
-  if (cudaExists && data['is_initial'] && data['gpus_in_use'].length > 0) {
-    data.run_mode = 'gpu'
-  }
-  data.is_initial = false
-  return data
-}
-
-/**
- * Get GPU information
- */
-export async function updateGpuInfo(): Promise<void> {
-  let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-
-  // Cuda
-  if (data['vulkan'] === true) {
-    // Vulkan
-    exec(
-      process.platform === 'win32'
-        ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
-        : `${__dirname}/../bin/vulkaninfo --summary`,
-      (error, stdout) => {
-        if (!error) {
-          const output = stdout.toString()
-          log(output)
-          const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
-
-          let gpus = []
-          let match
-          while ((match = gpuRegex.exec(output)) !== null) {
-            const id = match[1]
-            const name = match[2]
-            gpus.push({ id, vram: 0, name })
-          }
-          data.gpus = gpus
-
-          if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
-            data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
-          }
-
-          data = updateCudaExistence(data)
-          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-        }
-        Promise.resolve()
-      }
-    )
-  } else {
-    exec(
-      'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
-      (error, stdout) => {
-        if (!error) {
-          log(stdout)
-          // Get GPU info and gpu has higher memory first
-          let highestVram = 0
-          let highestVramId = '0'
-          let gpus = stdout
-            .trim()
-            .split('\n')
-            .map((line) => {
-              let [id, vram, name] = line.split(', ')
-              vram = vram.replace(/\r/g, '')
-              if (parseFloat(vram) > highestVram) {
-                highestVram = parseFloat(vram)
-                highestVramId = id
-              }
-              return { id, vram, name }
-            })
-
-          data.gpus = gpus
-          data.gpu_highest_vram = highestVramId
-        } else {
-          data.gpus = []
-          data.gpu_highest_vram = ''
-        }
-
-        if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
-          data.gpus_in_use = [data['gpu_highest_vram']]
-        }
-
-        data = updateCudaExistence(data)
-        writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-        Promise.resolve()
-      }
-    )
-  }
-}
diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts
index f9a668507..8bcc75ae4 100644
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.ts
@@ -1,12 +1,19 @@
+import { getJanDataFolderPath } from '@janhq/core/node'
 import { readFileSync } from 'fs'
 import * as path from 'path'
-import { GPU_INFO_FILE } from './accelerator'
 
 export interface NitroExecutableOptions {
   executablePath: string
   cudaVisibleDevices: string
   vkVisibleDevices: string
 }
+
+export const GPU_INFO_FILE = path.join(
+  getJanDataFolderPath(),
+  'settings',
+  'settings.json'
+)
+
 /**
  * Find which executable file to run based on the current platform.
  * @returns The name of the executable file to run.
diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
index 9b2684a6c..c57eb262d 100644
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@@ -4,7 +4,6 @@ import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
 import tcpPortUsed from 'tcp-port-used'
 import fetchRT from 'fetch-retry'
 import { log, getSystemResourceInfo } from '@janhq/core/node'
-import { getNitroProcessInfo, updateNvidiaInfo } from './accelerator'
 import {
   Model,
   InferenceEngine,
@@ -385,11 +384,26 @@ function dispose() {
   killSubprocess()
 }
 
+/**
+ * Nitro process info
+ */
+export interface NitroProcessInfo {
+  isRunning: boolean
+}
+
+/**
+ * Retrieve current nitro process
+ */
+const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
+  return {
+    isRunning: subprocess != null,
+  }
+}
+
 export default {
   runModel,
   stopModel,
   killSubprocess,
   dispose,
-  updateNvidiaInfo,
-  getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
+  getCurrentNitroProcessInfo,
 }
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index fb1f26885..d05e7d07f 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -17,6 +17,8 @@ import {
   ImportingModel,
   LocalImportModelEvent,
   baseName,
+  GpuSetting,
+  DownloadRequest,
 } from '@janhq/core'
 
 import { extractFileName } from './helpers/path'
@@ -29,10 +31,14 @@ export default class JanModelExtension extends ModelExtension {
   private static readonly _modelMetadataFileName = 'model.json'
   private static readonly _supportedModelFormat = '.gguf'
   private static readonly _incompletedModelFileName = '.download'
-  private static readonly _offlineInferenceEngine = InferenceEngine.nitro
-
+  private static readonly _offlineInferenceEngine = [
+    InferenceEngine.nitro,
+    InferenceEngine.nitro_tensorrt_llm,
+  ]
+  private static readonly _tensorRtEngineFormat = '.engine'
   private static readonly _configDirName = 'config'
   private static readonly _defaultModelFileName = 'default-model.json'
+  private static readonly _supportedGpuArch = ['turing', 'ampere', 'ada']
 
   /**
    * Called when the extension is loaded.
@@ -89,12 +95,52 @@ export default class JanModelExtension extends ModelExtension {
    */
   async downloadModel(
     model: Model,
+    gpuSettings?: GpuSetting,
     network?: { ignoreSSL?: boolean; proxy?: string }
   ): Promise<void> {
     // create corresponding directory
     const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
     if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)
 
+    if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
+      if (!gpuSettings || gpuSettings.gpus.length === 0) {
+        console.error('No GPU found. Please check your GPU setting.')
+        return
+      }
+      const firstGpu = gpuSettings.gpus[0]
+      if (!firstGpu.name.toLowerCase().includes('nvidia')) {
+        console.error('No Nvidia GPU found. Please check your GPU setting.')
+        return
+      }
+      const gpuArch = firstGpu.arch
+      if (gpuArch === undefined) {
+        console.error(
+          'No GPU architecture found. Please check your GPU setting.'
+        )
+        return
+      }
+
+      if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
+        console.error(
+          `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
+        )
+        return
+      }
+
+      const os = 'windows' // TODO: remove this hard coded value
+
+      const newSources = model.sources.map((source) => {
+        const newSource = { ...source }
+        newSource.url = newSource.url
+          .replace(/<os>/g, os)
+          .replace(/<gpuarch>/g, gpuArch)
+        return newSource
+      })
+      model.sources = newSources
+    }
+
+    console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
+
     if (model.sources.length > 1) {
       // path to model binaries
       for (const source of model.sources) {
@@ -105,8 +151,11 @@ export default class JanModelExtension extends ModelExtension {
         if (source.filename) {
           path = await joinPath([modelDirPath, source.filename])
         }
-
-        downloadFile(source.url, path, network)
+        const downloadRequest: DownloadRequest = {
+          url: source.url,
+          localPath: path,
+        }
+        downloadFile(downloadRequest, network)
       }
       // TODO: handle multiple binaries for web later
     } else {
@@ -115,7 +164,11 @@ export default class JanModelExtension extends ModelExtension {
         JanModelExtension._supportedModelFormat
       )
       const path = await joinPath([modelDirPath, fileName])
-      downloadFile(model.sources[0]?.url, path, network)
+      const downloadRequest: DownloadRequest = {
+        url: model.sources[0]?.url,
+        localPath: path,
+      }
+      downloadFile(downloadRequest, network)
 
       if (window && window.core?.api && window.core.api.baseApiUrl) {
         this.startPollingDownloadProgress(model.id)
@@ -238,7 +291,7 @@ export default class JanModelExtension extends ModelExtension {
   async getDownloadedModels(): Promise<Model[]> {
     return await this.getModelsMetadata(
       async (modelDir: string, model: Model) => {
-        if (model.engine !== JanModelExtension._offlineInferenceEngine)
+        if (!JanModelExtension._offlineInferenceEngine.includes(model.engine))
           return true
 
         // model binaries (sources) are absolute path & exist
@@ -247,22 +300,32 @@ export default class JanModelExtension extends ModelExtension {
         )
         if (existFiles.every((exist) => exist)) return true
 
-        return await fs
+        const result = await fs
           .readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
           .then((files: string[]) => {
             // Model binary exists in the directory
             // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
             return (
               files.includes(modelDir) ||
-              files.filter(
-                (file) =>
+              files.filter((file) => {
+                if (
+                  file.endsWith(JanModelExtension._incompletedModelFileName)
+                ) {
+                  return false
+                }
+                return (
                   file
                     .toLowerCase()
-                    .includes(JanModelExtension._supportedModelFormat) &&
-                  !file.endsWith(JanModelExtension._incompletedModelFileName)
-              )?.length >= model.sources.length
+                    .includes(JanModelExtension._supportedModelFormat) ||
+                  file
+                    .toLowerCase()
+                    .includes(JanModelExtension._tensorRtEngineFormat)
+                )
+              })?.length > 0 // TODO: NamH find better way (can use basename to check the file name with source url)
             )
           })
+
+        return result
       }
     )
   }
diff --git a/extensions/monitoring-extension/bin/.gitkeep b/extensions/monitoring-extension/bin/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/extensions/monitoring-extension/download.bat b/extensions/monitoring-extension/download.bat
new file mode 100644
index 000000000..f1cf8b7ea
--- /dev/null
+++ b/extensions/monitoring-extension/download.bat
@@ -0,0 +1,2 @@
+@echo off
+.\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
\ No newline at end of file
diff --git a/extensions/monitoring-extension/package.json b/extensions/monitoring-extension/package.json
index 582f7cd7b..73d28ab37 100644
--- a/extensions/monitoring-extension/package.json
+++ b/extensions/monitoring-extension/package.json
@@ -3,21 +3,40 @@
   "version": "1.0.10",
   "description": "This extension provides system health and OS level data",
   "main": "dist/index.js",
-  "module": "dist/module.js",
+  "node": "dist/node/index.cjs.js",
   "author": "Jan <service@jan.ai>",
   "license": "AGPL-3.0",
   "scripts": {
-    "build": "tsc -b . && webpack --config webpack.config.js",
+    "build": "tsc --module commonjs && rollup -c rollup.config.ts && npm run download-artifacts",
+    "download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
+    "download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
+    "download-artifacts:win32": "download.bat",
+    "download-artifacts:linux": "download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
     "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../pre-install"
   },
+  "exports": {
+    ".": "./dist/index.js",
+    "./main": "./dist/node/index.cjs.js"
+  },
   "devDependencies": {
+    "@rollup/plugin-commonjs": "^25.0.7",
+    "@rollup/plugin-json": "^6.1.0",
+    "@rollup/plugin-node-resolve": "^15.2.3",
+    "@types/node": "^20.11.4",
+    "@types/node-os-utils": "^1.3.4",
+    "run-script-os": "^1.1.6",
+    "cpx": "^1.5.0",
     "rimraf": "^3.0.2",
-    "webpack": "^5.88.2",
-    "webpack-cli": "^5.1.4",
-    "ts-loader": "^9.5.0"
+    "rollup": "^2.38.5",
+    "rollup-plugin-define": "^1.0.1",
+    "rollup-plugin-sourcemaps": "^0.6.3",
+    "rollup-plugin-typescript2": "^0.36.0",
+    "typescript": "^5.3.3",
+    "download-cli": "^1.1.1"
   },
   "dependencies": {
     "@janhq/core": "file:../../core",
+    "@rollup/plugin-replace": "^5.0.5",
     "node-os-utils": "^1.3.7"
   },
   "files": [
diff --git a/extensions/monitoring-extension/rollup.config.ts b/extensions/monitoring-extension/rollup.config.ts
new file mode 100644
index 000000000..1b7a40bad
--- /dev/null
+++ b/extensions/monitoring-extension/rollup.config.ts
@@ -0,0 +1,68 @@
+import resolve from '@rollup/plugin-node-resolve'
+import commonjs from '@rollup/plugin-commonjs'
+import sourceMaps from 'rollup-plugin-sourcemaps'
+import typescript from 'rollup-plugin-typescript2'
+import json from '@rollup/plugin-json'
+import replace from '@rollup/plugin-replace'
+const packageJson = require('./package.json')
+
+export default [
+  {
+    input: `src/index.ts`,
+    output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
+    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
+    external: [],
+    watch: {
+      include: 'src/**',
+    },
+    plugins: [
+      replace({
+        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
+      }),
+      // Allow json resolution
+      json(),
+      // Compile TypeScript files
+      typescript({ useTsconfigDeclarationDir: true }),
+      // Compile TypeScript files
+      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
+      commonjs(),
+      // Allow node_modules resolution, so you can use 'external' to control
+      // which external modules to include in the bundle
+      // https://github.com/rollup/rollup-plugin-node-resolve#usage
+      resolve({
+        extensions: ['.js', '.ts', '.svelte'],
+      }),
+
+      // Resolve source maps to the original source
+      sourceMaps(),
+    ],
+  },
+  {
+    input: `src/node/index.ts`,
+    output: [
+      { file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
+    ],
+    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
+    external: ['@janhq/core/node'],
+    watch: {
+      include: 'src/node/**',
+    },
+    plugins: [
+      // Allow json resolution
+      json(),
+      // Compile TypeScript files
+      typescript({ useTsconfigDeclarationDir: true }),
+      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
+      commonjs(),
+      // Allow node_modules resolution, so you can use 'external' to control
+      // which external modules to include in the bundle
+      // https://github.com/rollup/rollup-plugin-node-resolve#usage
+      resolve({
+        extensions: ['.ts', '.js', '.json'],
+      }),
+
+      // Resolve source maps to the original source
+      sourceMaps(),
+    ],
+  },
+]
diff --git a/extensions/monitoring-extension/src/@types/global.d.ts b/extensions/monitoring-extension/src/@types/global.d.ts
index 8106353cf..dfa96a0b1 100644
--- a/extensions/monitoring-extension/src/@types/global.d.ts
+++ b/extensions/monitoring-extension/src/@types/global.d.ts
@@ -1 +1,18 @@
-declare const MODULE: string
+declare const NODE: string
+
+type CpuGpuInfo = {
+  cpu: {
+    usage: number
+  }
+  gpu: GpuInfo[]
+}
+
+type GpuInfo = {
+  id: string
+  name: string
+  temperature: string
+  utilization: string
+  memoryTotal: string
+  memoryFree: string
+  memoryUtilization: string
+}
diff --git a/extensions/monitoring-extension/src/index.ts b/extensions/monitoring-extension/src/index.ts
index ce9b2fc14..c7f53455d 100644
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@@ -1,4 +1,4 @@
-import { MonitoringExtension, executeOnMain } from '@janhq/core'
+import { GpuSetting, MonitoringExtension, executeOnMain } from '@janhq/core'
 
 /**
  * JanMonitoringExtension is a extension that provides system monitoring functionality.
@@ -8,19 +8,30 @@ export default class JanMonitoringExtension extends MonitoringExtension {
   /**
    * Called when the extension is loaded.
    */
-  async onLoad() {}
+  async onLoad() {
+    // Attempt to fetch nvidia info
+    await executeOnMain(NODE, 'updateNvidiaInfo')
+  }
 
   /**
    * Called when the extension is unloaded.
    */
   onUnload(): void {}
 
+  /**
+   * Returns the GPU configuration.
+   * @returns A Promise that resolves to an object containing the GPU configuration.
+   */
+  async getGpuSetting(): Promise<GpuSetting | undefined> {
+    return executeOnMain(NODE, 'getGpuConfig')
+  }
+
   /**
    * Returns information about the system resources.
    * @returns A Promise that resolves to an object containing information about the system resources.
    */
   getResourcesInfo(): Promise<any> {
-    return executeOnMain(MODULE, 'getResourcesInfo')
+    return executeOnMain(NODE, 'getResourcesInfo')
   }
 
   /**
@@ -28,6 +39,6 @@ export default class JanMonitoringExtension extends MonitoringExtension {
    * @returns A Promise that resolves to an object containing information about the current system load.
    */
   getCurrentLoad(): Promise<any> {
-    return executeOnMain(MODULE, 'getCurrentLoad')
+    return executeOnMain(NODE, 'getCurrentLoad')
   }
 }
diff --git a/extensions/monitoring-extension/src/module.ts b/extensions/monitoring-extension/src/module.ts
deleted file mode 100644
index 27781a5d6..000000000
--- a/extensions/monitoring-extension/src/module.ts
+++ /dev/null
@@ -1,92 +0,0 @@
-const nodeOsUtils = require('node-os-utils')
-const getJanDataFolderPath = require('@janhq/core/node').getJanDataFolderPath
-const path = require('path')
-const { readFileSync } = require('fs')
-const exec = require('child_process').exec
-
-const NVIDIA_INFO_FILE = path.join(
-  getJanDataFolderPath(),
-  'settings',
-  'settings.json'
-)
-
-const getResourcesInfo = () =>
-  new Promise((resolve) => {
-    nodeOsUtils.mem.used().then((ramUsedInfo) => {
-      const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
-      const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
-      const response = {
-        mem: {
-          totalMemory,
-          usedMemory,
-        },
-      }
-      resolve(response)
-    })
-  })
-
-const getCurrentLoad = () =>
-  new Promise((resolve, reject) => {
-    nodeOsUtils.cpu.usage().then((cpuPercentage) => {
-      let data = {
-        run_mode: 'cpu',
-        gpus_in_use: [],
-      }
-      if (process.platform !== 'darwin') {
-        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
-      }
-      if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
-        const gpuIds = data['gpus_in_use'].join(',')
-        if (gpuIds !== '' && data['vulkan'] !== true) {
-          exec(
-            `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
-            (error, stdout, _) => {
-              if (error) {
-                console.error(`exec error: ${error}`)
-                reject(error)
-                return
-              }
-              const gpuInfo = stdout
-                .trim()
-                .split('\n')
-                .map((line) => {
-                  const [
-                    id,
-                    name,
-                    temperature,
-                    utilization,
-                    memoryTotal,
-                    memoryFree,
-                    memoryUtilization,
-                  ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
-                  return {
-                    id,
-                    name,
-                    temperature,
-                    utilization,
-                    memoryTotal,
-                    memoryFree,
-                    memoryUtilization,
-                  }
-                })
-              resolve({
-                cpu: { usage: cpuPercentage },
-                gpu: gpuInfo,
-              })
-            }
-          )
-        } else {
-          // Handle the case where gpuIds is empty
-          resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
-        }
-      } else {
-        // Handle the case where run_mode is not 'gpu' or no GPUs are in use
-        resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
-      }
-    })
-  })
-
-module.exports = {
-  getResourcesInfo,
-  getCurrentLoad,
-}
diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts
new file mode 100644
index 000000000..25f151112
--- /dev/null
+++ b/extensions/monitoring-extension/src/node/index.ts
@@ -0,0 +1,322 @@
+import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core'
+import { getJanDataFolderPath, log } from '@janhq/core/node'
+import { mem, cpu } from 'node-os-utils'
+import { exec } from 'child_process'
+import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
+import path from 'path'
+
+/**
+ * Path to the settings directory
+ **/
+export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
+/**
+ * Path to the settings file
+ **/
+export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
+
+/**
+ * Default GPU settings
+ * TODO: This needs to be refactored to support multiple accelerators
+ **/
+const DEFAULT_SETTINGS: GpuSetting = {
+  notify: true,
+  run_mode: 'cpu',
+  nvidia_driver: {
+    exist: false,
+    version: '',
+  },
+  cuda: {
+    exist: false,
+    version: '',
+  },
+  gpus: [],
+  gpu_highest_vram: '',
+  gpus_in_use: [],
+  is_initial: true,
+  // TODO: This needs to be set based on user toggle in settings
+  vulkan: false,
+}
+
+export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
+  if (process.platform === 'darwin') return undefined
+  return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+}
+
+export const getResourcesInfo = async (): Promise<ResourceInfo> => {
+  const ramUsedInfo = await mem.used()
+  const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
+  const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
+
+  const resourceInfo: ResourceInfo = {
+    mem: {
+      totalMemory,
+      usedMemory,
+    },
+  }
+
+  return resourceInfo
+}
+
+export const getCurrentLoad = () =>
+  new Promise<CpuGpuInfo>(async (resolve, reject) => {
+    const cpuPercentage = await cpu.usage()
+    let data = {
+      run_mode: 'cpu',
+      gpus_in_use: [],
+    }
+
+    if (process.platform !== 'darwin') {
+      data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+    }
+
+    if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
+      const gpuIds = data.gpus_in_use.join(',')
+      if (gpuIds !== '' && data['vulkan'] !== true) {
+        exec(
+          `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
+          (error, stdout, _) => {
+            if (error) {
+              console.error(`exec error: ${error}`)
+              throw new Error(error.message)
+            }
+            const gpuInfo: GpuInfo[] = stdout
+              .trim()
+              .split('\n')
+              .map((line) => {
+                const [
+                  id,
+                  name,
+                  temperature,
+                  utilization,
+                  memoryTotal,
+                  memoryFree,
+                  memoryUtilization,
+                ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
+                return {
+                  id,
+                  name,
+                  temperature,
+                  utilization,
+                  memoryTotal,
+                  memoryFree,
+                  memoryUtilization,
+                }
+              })
+
+            resolve({
+              cpu: { usage: cpuPercentage },
+              gpu: gpuInfo,
+            })
+          }
+        )
+      } else {
+        // Handle the case where gpuIds is empty
+        resolve({
+          cpu: { usage: cpuPercentage },
+          gpu: [],
+        })
+      }
+    } else {
+      // Handle the case where run_mode is not 'gpu' or no GPUs are in use
+      resolve({
+        cpu: { usage: cpuPercentage },
+        gpu: [],
+      })
+    }
+  })
+
+/**
+ * This will retrive GPU informations and persist settings.json
+ * Will be called when the extension is loaded to turn on GPU acceleration if supported
+ */
+export const updateNvidiaInfo = async () => {
+  // ignore if macos
+  if (process.platform === 'darwin') return
+
+  try {
+    JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+  } catch (error) {
+    if (!existsSync(SETTINGS_DIR)) {
+      mkdirSync(SETTINGS_DIR, {
+        recursive: true,
+      })
+    }
+    writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
+  }
+
+  await updateNvidiaDriverInfo()
+  await updateGpuInfo()
+}
+
+const updateNvidiaDriverInfo = async () =>
+  new Promise((resolve, reject) => {
+    exec(
+      'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
+      (error, stdout) => {
+        const data: GpuSetting = JSON.parse(
+          readFileSync(GPU_INFO_FILE, 'utf-8')
+        )
+
+        if (!error) {
+          const firstLine = stdout.split('\n')[0].trim()
+          data.nvidia_driver.exist = true
+          data.nvidia_driver.version = firstLine
+        } else {
+          data.nvidia_driver.exist = false
+        }
+
+        writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
+        resolve({})
+      }
+    )
+  })
+
+const getGpuArch = (gpuName: string): string => {
+  if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
+
+  if (gpuName.includes('20')) return 'turing'
+  else if (gpuName.includes('30')) return 'ampere'
+  else if (gpuName.includes('40')) return 'ada'
+  else return 'unknown'
+}
+
+const updateGpuInfo = async () =>
+  new Promise((resolve, reject) => {
+    let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
+
+    // Cuda
+    if (data.vulkan === true) {
+      // Vulkan
+      exec(
+        process.platform === 'win32'
+          ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
+          : `${__dirname}/../bin/vulkaninfo --summary`,
+        (error, stdout) => {
+          if (!error) {
+            const output = stdout.toString()
+
+            log(output)
+            const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
+
+            const gpus: GpuSettingInfo[] = []
+            let match
+            while ((match = gpuRegex.exec(output)) !== null) {
+              const id = match[1]
+              const name = match[2]
+              const arch = getGpuArch(name)
+              gpus.push({ id, vram: '0', name, arch })
+            }
+            data.gpus = gpus
+
+            if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
+              data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
+            }
+
+            data = updateCudaExistence(data)
+            writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
+            resolve({})
+          } else {
+            reject(error)
+          }
+        }
+      )
+    } else {
+      exec(
+        'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
+        (error, stdout) => {
+          if (!error) {
+            log(stdout)
+            // Get GPU info and gpu has higher memory first
+            let highestVram = 0
+            let highestVramId = '0'
+            const gpus: GpuSettingInfo[] = stdout
+              .trim()
+              .split('\n')
+              .map((line) => {
+                let [id, vram, name] = line.split(', ')
+                const arch = getGpuArch(name)
+                vram = vram.replace(/\r/g, '')
+                if (parseFloat(vram) > highestVram) {
+                  highestVram = parseFloat(vram)
+                  highestVramId = id
+                }
+                return { id, vram, name, arch }
+              })
+
+            data.gpus = gpus
+            data.gpu_highest_vram = highestVramId
+          } else {
+            data.gpus = []
+            data.gpu_highest_vram = ''
+          }
+
+          if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
+            data.gpus_in_use = [data.gpu_highest_vram]
+          }
+
+          data = updateCudaExistence(data)
+          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
+          resolve({})
+        }
+      )
+    }
+  })
+
+/**
+ * Check if file exists in paths
+ */
+const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
+  return paths.some((p) => existsSync(path.join(p, file)))
+}
+
+/**
+ * Validate cuda for linux and windows
+ */
+const updateCudaExistence = (
+  data: GpuSetting = DEFAULT_SETTINGS
+): GpuSetting => {
+  let filesCuda12: string[]
+  let filesCuda11: string[]
+  let paths: string[]
+  let cudaVersion: string = ''
+
+  if (process.platform === 'win32') {
+    filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
+    filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
+    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
+  } else {
+    filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
+    filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
+    paths = process.env.LD_LIBRARY_PATH
+      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
+      : []
+    paths.push('/usr/lib/x86_64-linux-gnu/')
+  }
+
+  let cudaExists = filesCuda12.every(
+    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
+  )
+
+  if (!cudaExists) {
+    cudaExists = filesCuda11.every(
+      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
+    )
+    if (cudaExists) {
+      cudaVersion = '11'
+    }
+  } else {
+    cudaVersion = '12'
+  }
+
+  data.cuda.exist = cudaExists
+  data.cuda.version = cudaVersion
+
+  console.debug(data.is_initial, data.gpus_in_use)
+
+  if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
+    data.run_mode = 'gpu'
+  }
+
+  data.is_initial = false
+  return data
+}
diff --git a/extensions/monitoring-extension/webpack.config.js b/extensions/monitoring-extension/webpack.config.js
deleted file mode 100644
index c8c3a34f7..000000000
--- a/extensions/monitoring-extension/webpack.config.js
+++ /dev/null
@@ -1,35 +0,0 @@
-const path = require('path')
-const webpack = require('webpack')
-const packageJson = require('./package.json')
-
-module.exports = {
-  experiments: { outputModule: true },
-  entry: './src/index.ts', // Adjust the entry point to match your project's main file
-  mode: 'production',
-  module: {
-    rules: [
-      {
-        test: /\.tsx?$/,
-        use: 'ts-loader',
-        exclude: /node_modules/,
-      },
-    ],
-  },
-  output: {
-    filename: 'index.js', // Adjust the output file name as needed
-    path: path.resolve(__dirname, 'dist'),
-    library: { type: 'module' }, // Specify ESM output format
-  },
-  plugins: [
-    new webpack.DefinePlugin({
-      MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
-    }),
-  ],
-  resolve: {
-    extensions: ['.ts', '.js'],
-  },
-  optimization: {
-    minimize: false,
-  },
-  // Add loaders and other configuration as needed for your project
-}
diff --git a/extensions/tensorrt-llm-extension/README.md b/extensions/tensorrt-llm-extension/README.md
new file mode 100644
index 000000000..34a670516
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/README.md
@@ -0,0 +1,79 @@
+# Tensorrt-LLM Extension
+
+Created using Jan extension example
+
+# Create a Jan Extension using Typescript
+
+Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
+
+## Create Your Own Extension
+
+To create your own extension, you can use this repository as a template! Just follow the below instructions:
+
+1. Click the Use this template button at the top of the repository
+2. Select Create a new repository
+3. Select an owner and name for your new repository
+4. Click Create repository
+5. Clone your new repository
+
+## Initial Setup
+
+After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
+
+> [!NOTE]
+>
+> You'll need to have a reasonably modern version of
+> [Node.js](https://nodejs.org) handy. If you are using a version manager like
+> [`nodenv`](https://github.com/nodenv/nodenv) or
+> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
+> root of your repository to install the version specified in
+> [`package.json`](./package.json). Otherwise, 20.x or later should work!
+
+1. :hammer_and_wrench: Install the dependencies
+
+   ```bash
+   npm install
+   ```
+
+1. :building_construction: Package the TypeScript for distribution
+
+   ```bash
+   npm run bundle
+   ```
+
+1. :white_check_mark: Check your artifact
+
+   There will be a tgz file in your extension directory now
+
+## Update the Extension Metadata
+
+The [`package.json`](package.json) file defines metadata about your extension, such as
+extension name, main entry, description and version.
+
+When you copy this repository, update `package.json` with the name, description for your extension.
+
+## Update the Extension Code
+
+The [`src/`](./src/) directory is the heart of your extension! This contains the
+source code that will be run when your extension functions are invoked. You can replace the
+contents of this directory with your own code.
+
+There are a few things to keep in mind when writing your extension code:
+
+- Most Jan Extension functions are processed asynchronously.
+  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
+
+  ```typescript
+  import { events, MessageEvent, MessageRequest } from '@janhq/core'
+
+  function onStart(): Promise<any> {
+    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
+      this.inference(data)
+    )
+  }
+  ```
+
+  For more information about the Jan Extension Core module, see the
+  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
+
+So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/tensorrt-llm-extension/models.json b/extensions/tensorrt-llm-extension/models.json
new file mode 100644
index 000000000..30f345f47
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/models.json
@@ -0,0 +1,96 @@
+[
+  {
+    "sources": [
+      {
+        "filename": "config.json",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/config.json"
+      },
+      {
+        "filename": "rank0.engine",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/rank0.engine"
+      },
+      {
+        "filename": "tokenizer.model",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
+      },
+      {
+        "filename": "special_tokens_map.json",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
+      },
+      {
+        "filename": "tokenizer.json",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
+      },
+      {
+        "filename": "tokenizer_config.json",
+        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
+      }
+    ],
+    "id": "llamacorn-1.1b-chat-fp16",
+    "object": "model",
+    "name": "LlamaCorn 1.1B Chat FP16",
+    "version": "1.0",
+    "description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
+    "format": "TensorRT-LLM",
+    "settings": {
+      "ctx_len": 2048,
+      "text_model": false
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "LLama",
+      "tags": ["TensorRT-LLM", "1B", "Finetuned"],
+      "size": 2151000000
+    },
+    "engine": "nitro-tensorrt-llm"
+  },
+  {
+    "sources": [
+      {
+        "filename": "config.json",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/config.json"
+      },
+      {
+        "filename": "rank0.engine",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/rank0.engine"
+      },
+      {
+        "filename": "tokenizer.model",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
+      },
+      {
+        "filename": "special_tokens_map.json",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
+      },
+      {
+        "filename": "tokenizer.json",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
+      },
+      {
+        "filename": "tokenizer_config.json",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
+      }
+    ],
+    "id": "tinyjensen-1.1b-chat-fp16",
+    "object": "model",
+    "name": "TinyJensen 1.1B Chat FP16",
+    "version": "1.0",
+    "description": "Do you want to chat with Jensen Huan? Here you are",
+    "format": "TensorRT-LLM",
+    "settings": {
+      "ctx_len": 2048,
+      "text_model": false
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "LLama",
+      "tags": ["TensorRT-LLM", "1B", "Finetuned"],
+      "size": 2151000000
+    },
+    "engine": "nitro-tensorrt-llm"
+  }
+]
diff --git a/extensions/tensorrt-llm-extension/package.json b/extensions/tensorrt-llm-extension/package.json
new file mode 100644
index 000000000..96ede4a56
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/package.json
@@ -0,0 +1,75 @@
+{
+  "name": "@janhq/tensorrt-llm-extension",
+  "version": "0.0.3",
+  "description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.",
+  "main": "dist/index.js",
+  "node": "dist/node/index.cjs.js",
+  "author": "Jan <service@jan.ai>",
+  "license": "AGPL-3.0",
+  "config": {
+    "host": "127.0.0.1",
+    "port": "3929"
+  },
+  "compatibility": {
+    "platform": [
+      "win32",
+      "linux"
+    ],
+    "app": [
+      "0.1.0"
+    ]
+  },
+  "scripts": {
+    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
+    "build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:linux": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" &&  npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:darwin": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" &&  npm pack && cpx *.tgz ../../pre-install",
+    "build:publish": "run-script-os"
+  },
+  "exports": {
+    ".": "./dist/index.js",
+    "./main": "./dist/node/index.cjs.js"
+  },
+  "devDependencies": {
+    "@rollup/plugin-commonjs": "^25.0.7",
+    "@rollup/plugin-json": "^6.1.0",
+    "@rollup/plugin-node-resolve": "^15.2.3",
+    "@rollup/plugin-replace": "^5.0.5",
+    "@types/node": "^20.11.4",
+    "@types/os-utils": "^0.0.4",
+    "@types/tcp-port-used": "^1.0.4",
+    "@types/decompress": "4.2.7",
+    "cpx": "^1.5.0",
+    "download-cli": "^1.1.1",
+    "rimraf": "^3.0.2",
+    "rollup": "^2.38.5",
+    "rollup-plugin-define": "^1.0.1",
+    "rollup-plugin-sourcemaps": "^0.6.3",
+    "rollup-plugin-typescript2": "^0.36.0",
+    "run-script-os": "^1.1.6",
+    "typescript": "^5.2.2"
+  },
+  "dependencies": {
+    "@janhq/core": "file:../../core",
+    "decompress": "^4.2.1",
+    "fetch-retry": "^5.0.6",
+    "path-browserify": "^1.0.1",
+    "rxjs": "^7.8.1",
+    "tcp-port-used": "^1.0.2",
+    "ulid": "^2.3.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "files": [
+    "dist/*",
+    "package.json",
+    "README.md"
+  ],
+  "bundleDependencies": [
+    "tcp-port-used",
+    "fetch-retry",
+    "decompress",
+    "@janhq/core"
+  ]
+}
diff --git a/extensions/tensorrt-llm-extension/rollup.config.ts b/extensions/tensorrt-llm-extension/rollup.config.ts
new file mode 100644
index 000000000..33e45823b
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/rollup.config.ts
@@ -0,0 +1,73 @@
+import resolve from '@rollup/plugin-node-resolve'
+import commonjs from '@rollup/plugin-commonjs'
+import sourceMaps from 'rollup-plugin-sourcemaps'
+import typescript from 'rollup-plugin-typescript2'
+import json from '@rollup/plugin-json'
+import replace from '@rollup/plugin-replace'
+const packageJson = require('./package.json')
+
+export default [
+  {
+    input: `src/index.ts`,
+    output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
+    watch: {
+      include: 'src/**',
+    },
+    plugins: [
+      replace({
+        EXTENSION_NAME: JSON.stringify(packageJson.name),
+        TENSORRT_VERSION: JSON.stringify('0.1.5'),
+        DOWNLOAD_RUNNER_URL:
+          process.platform === 'darwin' || process.platform === 'win32'
+            ? JSON.stringify(
+                'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
+              )
+            : JSON.stringify(
+                'https://github.com/janhq/nitro-tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
+              ),
+        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
+        INFERENCE_URL: JSON.stringify(
+          process.env.INFERENCE_URL ||
+            `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions`
+        ),
+        COMPATIBILITY: JSON.stringify(packageJson.compatibility),
+      }),
+      json(),
+      typescript({ useTsconfigDeclarationDir: true }),
+      commonjs(),
+      resolve({
+        extensions: ['.js', '.ts', '.svelte'],
+      }),
+      sourceMaps(),
+    ],
+  },
+  {
+    input: `src/node/index.ts`,
+    output: [
+      { file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
+    ],
+    external: ['@janhq/core/node'],
+    watch: {
+      include: 'src/node/**',
+    },
+    plugins: [
+      replace({
+        LOAD_MODEL_URL: JSON.stringify(
+          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
+        ),
+        TERMINATE_ENGINE_URL: JSON.stringify(
+          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/processmanager/destroy`
+        ),
+        ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
+        ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
+      }),
+      json(),
+      typescript({ useTsconfigDeclarationDir: true }),
+      commonjs(),
+      resolve({
+        extensions: ['.ts', '.js', '.json'],
+      }),
+      sourceMaps(),
+    ],
+  },
+]
diff --git a/extensions/tensorrt-llm-extension/src/@types/global.d.ts b/extensions/tensorrt-llm-extension/src/@types/global.d.ts
new file mode 100644
index 000000000..905e86380
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/src/@types/global.d.ts
@@ -0,0 +1,10 @@
+declare const NODE: string
+declare const INFERENCE_URL: string
+declare const LOAD_MODEL_URL: string
+declare const TERMINATE_ENGINE_URL: string
+declare const ENGINE_HOST: string
+declare const ENGINE_PORT: string
+declare const DOWNLOAD_RUNNER_URL: string
+declare const TENSORRT_VERSION: string
+declare const COMPATIBILITY: object
+declare const EXTENSION_NAME: string
diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts
new file mode 100644
index 000000000..02c676841
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@@ -0,0 +1,171 @@
+/**
+ * @module tensorrt-llm-extension/src/index
+ */
+
+import {
+  Compatibility,
+  DownloadEvent,
+  DownloadRequest,
+  DownloadState,
+  GpuSetting,
+  InstallationState,
+  Model,
+  baseName,
+  downloadFile,
+  events,
+  executeOnMain,
+  joinPath,
+  showToast,
+  systemInformations,
+  LocalOAIEngine,
+  fs,
+  MessageRequest,
+  ModelEvent,
+} from '@janhq/core'
+import models from '../models.json'
+
+/**
+ * TensorRTLLMExtension - Implementation of LocalOAIEngine
+ * @extends BaseOAILocalInferenceProvider
+ * Provide pre-populated models for TensorRTLLM
+ */
+export default class TensorRTLLMExtension extends LocalOAIEngine {
+  /**
+   * Override custom function name for loading and unloading model
+   * Which are implemented from node module
+   */
+  override provider = 'nitro-tensorrt-llm'
+  override inferenceUrl = INFERENCE_URL
+  override nodeModule = NODE
+
+  private supportedGpuArch = ['turing', 'ampere', 'ada']
+
+  compatibility() {
+    return COMPATIBILITY as unknown as Compatibility
+  }
+  /**
+   * models implemented by the extension
+   * define pre-populated models
+   */
+  async models(): Promise<Model[]> {
+    if ((await this.installationState()) === 'Installed')
+      return models as unknown as Model[]
+    return []
+  }
+
+  override async install(): Promise<void> {
+    const info = await systemInformations()
+    console.debug(
+      `TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}`
+    )
+    const gpuSetting: GpuSetting | undefined = info.gpuSetting
+    if (gpuSetting === undefined || gpuSetting.gpus.length === 0) {
+      console.error('No GPU setting found. Please check your GPU setting.')
+      return
+    }
+
+    // TODO: we only check for the first graphics card. Need to refactor this later.
+    const firstGpu = gpuSetting.gpus[0]
+    if (!firstGpu.name.toLowerCase().includes('nvidia')) {
+      console.error('No Nvidia GPU found. Please check your GPU setting.')
+      return
+    }
+
+    if (firstGpu.arch === undefined) {
+      console.error('No GPU architecture found. Please check your GPU setting.')
+      return
+    }
+
+    if (!this.supportedGpuArch.includes(firstGpu.arch)) {
+      console.error(
+        `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
+      )
+      return
+    }
+
+    const binaryFolderPath = await executeOnMain(
+      this.nodeModule,
+      'binaryFolder'
+    )
+    if (!(await fs.existsSync(binaryFolderPath))) {
+      await fs.mkdirSync(binaryFolderPath)
+    }
+
+    const placeholderUrl = DOWNLOAD_RUNNER_URL
+    const tensorrtVersion = TENSORRT_VERSION
+
+    const url = placeholderUrl
+      .replace(/<version>/g, tensorrtVersion)
+      .replace(/<gpuarch>/g, firstGpu.arch)
+
+    const tarball = await baseName(url)
+
+    const tarballFullPath = await joinPath([binaryFolderPath, tarball])
+    const downloadRequest: DownloadRequest = {
+      url,
+      localPath: tarballFullPath,
+      extensionId: EXTENSION_NAME,
+      downloadType: 'extension',
+    }
+    downloadFile(downloadRequest)
+
+    // TODO: wrap this into a Promise
+    const onFileDownloadSuccess = async (state: DownloadState) => {
+      // if other download, ignore
+      if (state.fileName !== tarball) return
+      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+      await executeOnMain(this.nodeModule, 'decompressRunner', tarballFullPath)
+      events.emit(DownloadEvent.onFileUnzipSuccess, state)
+
+      // Prepopulate models as soon as it's ready
+      this.prePopulateModels().then(() => {
+        showToast(
+          'Extension installed successfully.',
+          'New models are added to Model Hub.'
+        )
+      })
+    }
+    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+  }
+
+  async onModelInit(model: Model): Promise<void> {
+    if (model.engine !== this.provider) return
+
+    if ((await this.installationState()) === 'Installed')
+      return super.onModelInit(model)
+    else {
+      events.emit(ModelEvent.OnModelFail, {
+        ...model,
+        error: {
+          message: 'EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension',
+        },
+      })
+    }
+  }
+
+  override async installationState(): Promise<InstallationState> {
+    // For now, we just check the executable of nitro x tensor rt
+    const isNitroExecutableAvailable = await executeOnMain(
+      this.nodeModule,
+      'isNitroExecutableAvailable'
+    )
+
+    return isNitroExecutableAvailable ? 'Installed' : 'NotInstalled'
+  }
+
+  override onInferenceStopped() {
+    if (!this.isRunning) return
+    showToast(
+      'Unable to Stop Inference',
+      'The model does not support stopping inference.'
+    )
+    return Promise.resolve()
+  }
+
+  inference(data: MessageRequest): void {
+    if (!this.isRunning) return
+    // TensorRT LLM Extension supports streaming only
+    if (data.model) data.model.parameters.stream = true
+    super.inference(data)
+  }
+}
diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts
new file mode 100644
index 000000000..252468fc1
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@@ -0,0 +1,191 @@
+import path from 'path'
+import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
+import tcpPortUsed from 'tcp-port-used'
+import fetchRT from 'fetch-retry'
+import { log } from '@janhq/core/node'
+import { existsSync } from 'fs'
+import decompress from 'decompress'
+
+// Polyfill fetch with retry
+const fetchRetry = fetchRT(fetch)
+
+/**
+ * The response object for model init operation.
+ */
+interface ModelLoadParams {
+  engine_path: string
+  ctx_len: number
+}
+
+// The subprocess instance for Engine
+let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
+
+/**
+ * Initializes a engine subprocess to load a machine learning model.
+ * @param params - The model load settings.
+ */
+async function loadModel(params: any): Promise<{ error: Error | undefined }> {
+  // modelFolder is the absolute path to the running model folder
+  // e.g. ~/jan/models/llama-2
+  let modelFolder = params.modelFolder
+
+  const settings: ModelLoadParams = {
+    engine_path: modelFolder,
+    ctx_len: params.model.settings.ctx_len ?? 2048,
+  }
+  return runEngineAndLoadModel(settings)
+}
+
+/**
+ * Stops a Engine subprocess.
+ */
+function unloadModel(): Promise<any> {
+  const controller = new AbortController()
+  setTimeout(() => controller.abort(), 5000)
+  debugLog(`Request to kill engine`)
+
+  subprocess?.kill()
+  return fetch(TERMINATE_ENGINE_URL, {
+    method: 'DELETE',
+    signal: controller.signal,
+  })
+    .then(() => {
+      subprocess = undefined
+    })
+    .catch(() => {}) // Do nothing with this attempt
+    .then(() => tcpPortUsed.waitUntilFree(parseInt(ENGINE_PORT), 300, 5000)) // Wait for port available
+    .then(() => debugLog(`Engine process is terminated`))
+    .catch((err) => {
+      debugLog(
+        `Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
+      )
+      throw 'PORT_NOT_AVAILABLE'
+    })
+}
+/**
+ * 1. Spawn engine process
+ * 2. Load model into engine subprocess
+ * @returns
+ */
+async function runEngineAndLoadModel(settings: ModelLoadParams) {
+  return unloadModel()
+    .then(runEngine)
+    .then(() => loadModelRequest(settings))
+    .catch((err) => {
+      // TODO: Broadcast error so app could display proper error message
+      debugLog(`${err}`, 'Error')
+      return { error: err }
+    })
+}
+
+/**
+ * Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
+ */
+function loadModelRequest(
+  settings: ModelLoadParams
+): Promise<{ error: Error | undefined }> {
+  debugLog(`Loading model with params ${JSON.stringify(settings)}`)
+  return fetchRetry(LOAD_MODEL_URL, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify(settings),
+    retries: 3,
+    retryDelay: 500,
+  })
+    .then((res) => {
+      debugLog(`Load model success with response ${JSON.stringify(res)}`)
+      return Promise.resolve({ error: undefined })
+    })
+    .catch((err) => {
+      debugLog(`Load model failed with error ${err}`, 'Error')
+      return Promise.resolve({ error: err })
+    })
+}
+
+/**
+ * Spawns engine subprocess.
+ */
+function runEngine(): Promise<any> {
+  debugLog(`Spawning engine subprocess...`)
+
+  return new Promise<void>((resolve, reject) => {
+    // Current directory by default
+    let binaryFolder = path.join(__dirname, '..', 'bin')
+    // Binary path
+    const binary = path.join(
+      binaryFolder,
+      process.platform === 'win32' ? 'nitro.exe' : 'nitro'
+    )
+
+    const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
+    // Execute the binary
+    debugLog(`Spawn nitro at path: ${binary}, and args: ${args}`)
+    subprocess = spawn(binary, args, {
+      cwd: binaryFolder,
+      env: {
+        ...process.env,
+      },
+    })
+
+    // Handle subprocess output
+    subprocess.stdout.on('data', (data: any) => {
+      debugLog(`${data}`)
+    })
+
+    subprocess.stderr.on('data', (data: any) => {
+      debugLog(`${data}`)
+    })
+
+    subprocess.on('close', (code: any) => {
+      debugLog(`Engine exited with code: ${code}`)
+      subprocess = undefined
+      reject(`child process exited with code ${code}`)
+    })
+
+    tcpPortUsed.waitUntilUsed(parseInt(ENGINE_PORT), 300, 30000).then(() => {
+      debugLog(`Engine is ready`)
+      resolve()
+    })
+  })
+}
+
+function debugLog(message: string, level: string = 'Debug') {
+  log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
+}
+
+const binaryFolder = async (): Promise<string> => {
+  return path.join(__dirname, '..', 'bin')
+}
+
+const decompressRunner = async (zipPath: string) => {
+  const output = path.join(__dirname, '..', 'bin')
+  console.debug(`Decompressing ${zipPath} to ${output}...`)
+  try {
+    const files = await decompress(zipPath, output)
+    console.debug('Decompress finished!', files)
+  } catch (err) {
+    console.error(`Decompress ${zipPath} failed: ${err}`)
+  }
+}
+
+const isNitroExecutableAvailable = async (): Promise<boolean> => {
+  const binary = path.join(
+    __dirname,
+    '..',
+    'bin',
+    process.platform === 'win32' ? 'nitro.exe' : 'nitro'
+  )
+
+  return existsSync(binary)
+}
+
+export default {
+  binaryFolder,
+  decompressRunner,
+  loadModel,
+  unloadModel,
+  dispose: unloadModel,
+  isNitroExecutableAvailable,
+}
diff --git a/extensions/tensorrt-llm-extension/tsconfig.json b/extensions/tensorrt-llm-extension/tsconfig.json
new file mode 100644
index 000000000..478a05728
--- /dev/null
+++ b/extensions/tensorrt-llm-extension/tsconfig.json
@@ -0,0 +1,20 @@
+{
+  "compilerOptions": {
+    "moduleResolution": "node",
+    "target": "es5",
+    "module": "ES2020",
+    "lib": ["es2015", "es2016", "es2017", "dom"],
+    "strict": true,
+    "sourceMap": true,
+    "declaration": true,
+    "allowSyntheticDefaultImports": true,
+    "experimentalDecorators": true,
+    "emitDecoratorMetadata": true,
+    "declarationDir": "dist/types",
+    "outDir": "dist",
+    "importHelpers": true,
+    "resolveJsonModule": true,
+    "typeRoots": ["node_modules/@types"]
+  },
+  "include": ["src"]
+}
diff --git a/uikit/src/badge/styles.scss b/uikit/src/badge/styles.scss
index b777892d7..4788f65be 100644
--- a/uikit/src/badge/styles.scss
+++ b/uikit/src/badge/styles.scss
@@ -1,5 +1,5 @@
 .badge {
-  @apply focus:ring-ring border-border inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-offset-2;
+  @apply focus:ring-ring border-border inline-flex items-center rounded-md border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-offset-2;
 
   &-primary {
     @apply border-transparent bg-blue-100 text-blue-600;
diff --git a/web/containers/DropdownListSidebar/index.tsx b/web/containers/DropdownListSidebar/index.tsx
index c05d26e51..70651a4d4 100644
--- a/web/containers/DropdownListSidebar/index.tsx
+++ b/web/containers/DropdownListSidebar/index.tsx
@@ -73,8 +73,9 @@ const DropdownListSidebar = ({
 
   const [copyId, setCopyId] = useState('')
 
+  // TODO: Update filter condition for the local model
   const localModel = downloadedModels.filter(
-    (model) => model.engine === InferenceEngine.nitro
+    (model) => model.engine !== InferenceEngine.openai
   )
   const remoteModel = downloadedModels.filter(
     (model) => model.engine === InferenceEngine.openai
@@ -293,7 +294,7 @@ const DropdownListSidebar = ({
                                 <span className="font-bold text-muted-foreground">
                                   {toGibibytes(x.metadata.size)}
                                 </span>
-                                {x.engine == InferenceEngine.nitro && (
+                                {x.metadata.size && (
                                   <ModelLabel size={x.metadata.size} />
                                 )}
                               </div>
diff --git a/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx b/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx
new file mode 100644
index 000000000..d46764e88
--- /dev/null
+++ b/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx
@@ -0,0 +1,87 @@
+import { useCallback, useEffect } from 'react'
+
+import { abortDownload } from '@janhq/core'
+import {
+  Button,
+  Modal,
+  ModalContent,
+  ModalHeader,
+  ModalTitle,
+  Progress,
+} from '@janhq/uikit'
+import { atom, useAtom, useAtomValue } from 'jotai'
+
+import {
+  formatDownloadPercentage,
+  formatExtensionsName,
+} from '@/utils/converter'
+
+import {
+  InstallingExtensionState,
+  installingExtensionAtom,
+} from '@/helpers/atoms/Extension.atom'
+
+export const showInstallingExtensionModalAtom = atom(false)
+
+const InstallingExtensionModal: React.FC = () => {
+  const [showInstallingExtensionModal, setShowInstallingExtensionModal] =
+    useAtom(showInstallingExtensionModalAtom)
+  const installingExtensions = useAtomValue(installingExtensionAtom)
+
+  useEffect(() => {
+    if (installingExtensions.length === 0) {
+      setShowInstallingExtensionModal(false)
+    }
+  }, [installingExtensions, setShowInstallingExtensionModal])
+
+  const onAbortInstallingExtensionClick = useCallback(
+    (item: InstallingExtensionState) => {
+      if (item.localPath) {
+        abortDownload(item.localPath)
+      }
+    },
+    []
+  )
+
+  return (
+    <Modal
+      open={showInstallingExtensionModal}
+      onOpenChange={() => setShowInstallingExtensionModal(false)}
+    >
+      <ModalContent>
+        <ModalHeader>
+          <ModalTitle>Installing Extension</ModalTitle>
+        </ModalHeader>
+        {Object.values(installingExtensions).map((item) => (
+          <div className="pt-2" key={item.extensionId}>
+            <Progress
+              className="mb-2 h-2"
+              value={
+                formatDownloadPercentage(item.percentage, {
+                  hidePercentage: true,
+                }) as number
+              }
+            />
+            <div className="flex items-center justify-between gap-x-2">
+              <div className="flex gap-x-2">
+                <p className="line-clamp-1">
+                  {formatExtensionsName(item.extensionId)}
+                </p>
+                <span>{formatDownloadPercentage(item.percentage)}</span>
+              </div>
+              <Button
+                themes="outline"
+                size="sm"
+                onClick={() => onAbortInstallingExtensionClick(item)}
+              >
+                Cancel
+              </Button>
+            </div>
+          </div>
+        ))}
+      </ModalContent>
+    </Modal>
+  )
+}
+
+export default InstallingExtensionModal
diff --git a/web/containers/Layout/BottomBar/InstallingExtension/index.tsx b/web/containers/Layout/BottomBar/InstallingExtension/index.tsx
new file mode 100644
index 000000000..05e803881
--- /dev/null
+++ b/web/containers/Layout/BottomBar/InstallingExtension/index.tsx
@@ -0,0 +1,52 @@
+import { Fragment, useCallback } from 'react'
+
+import { Progress } from '@janhq/uikit'
+import { useAtomValue, useSetAtom } from 'jotai'
+
+import { showInstallingExtensionModalAtom } from './InstallingExtensionModal'
+
+import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
+
+const InstallingExtension: React.FC = () => {
+  const installingExtensions = useAtomValue(installingExtensionAtom)
+  const setShowInstallingExtensionModal = useSetAtom(
+    showInstallingExtensionModalAtom
+  )
+  const shouldShowInstalling = installingExtensions.length > 0
+
+  let totalPercentage = 0
+  let totalExtensions = 0
+  for (const installation of installingExtensions) {
+    totalPercentage += installation.percentage
+    totalExtensions++
+  }
+  const progress = (totalPercentage / totalExtensions) * 100
+
+  const onClick = useCallback(() => {
+    setShowInstallingExtensionModal(true)
+  }, [setShowInstallingExtensionModal])
+
+  return (
+    <Fragment>
+      {shouldShowInstalling ? (
+        <div
+          className="flex cursor-pointer flex-row items-center space-x-2"
+          onClick={onClick}
+        >
+          <p className="text-xs font-semibold text-muted-foreground">
+            Installing Extension
+          </p>
+
+          <div className="flex flex-row items-center justify-center space-x-2 rounded-md bg-secondary px-2 py-[2px]">
+            <Progress className="h-2 w-24" value={progress} />
+            <span className="text-xs font-bold text-muted-foreground">
+              {progress.toFixed(2)}%
+            </span>
+          </div>
+        </div>
+      ) : null}
+    </Fragment>
+  )
+}
+
+export default InstallingExtension
diff --git a/web/containers/Layout/BottomBar/index.tsx b/web/containers/Layout/BottomBar/index.tsx
index 2373ac3d4..3683d23db 100644
--- a/web/containers/Layout/BottomBar/index.tsx
+++ b/web/containers/Layout/BottomBar/index.tsx
@@ -16,6 +16,7 @@ import ProgressBar from '@/containers/ProgressBar'
 import { appDownloadProgress } from '@/containers/Providers/Jotai'
 
 import ImportingModelState from './ImportingModelState'
+import InstallingExtension from './InstallingExtension'
 import SystemMonitor from './SystemMonitor'
 import UpdatedFailedModal from './UpdateFailedModal'
 
@@ -46,6 +47,7 @@ const BottomBar = () => {
         <ImportingModelState />
         <DownloadingState />
         <UpdatedFailedModal />
+        <InstallingExtension />
       </div>
       <div className="flex items-center gap-x-3">
         <SystemMonitor />
diff --git a/web/containers/Layout/index.tsx b/web/containers/Layout/index.tsx
index 7e3ad38ab..fb08bc6ac 100644
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@@ -22,6 +22,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
 import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
 import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
 
+import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal'
+
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 
 const BaseLayout = (props: PropsWithChildren) => {
@@ -68,6 +70,7 @@ const BaseLayout = (props: PropsWithChildren) => {
       {importModelStage === 'IMPORTING_MODEL' && <ImportingModelModal />}
       {importModelStage === 'EDIT_MODEL_INFO' && <EditModelInfoModal />}
       {importModelStage === 'CONFIRM_CANCEL' && <CancelModelImportModal />}
+      <InstallingExtensionModal />
     </div>
   )
 }
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index bfc87917b..20fc6dde2 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -7,6 +7,10 @@ import { useSetAtom } from 'jotai'
 
 import { setDownloadStateAtom } from '@/hooks/useDownloadState'
 
+import { formatExtensionsName } from '@/utils/converter'
+
+import { toaster } from '../Toast'
+
 import AppUpdateListener from './AppUpdateListener'
 import ClipboardListener from './ClipboardListener'
 import EventHandler from './EventHandler'
@@ -14,46 +18,89 @@ import EventHandler from './EventHandler'
 import ModelImportListener from './ModelImportListener'
 import QuickAskListener from './QuickAskListener'
 
+import {
+  InstallingExtensionState,
+  removeInstallingExtensionAtom,
+  setInstallingExtensionAtom,
+} from '@/helpers/atoms/Extension.atom'
+
 const EventListenerWrapper = ({ children }: PropsWithChildren) => {
   const setDownloadState = useSetAtom(setDownloadStateAtom)
+  const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
+  const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)
 
   const onFileDownloadUpdate = useCallback(
     async (state: DownloadState) => {
       console.debug('onFileDownloadUpdate', state)
-      setDownloadState(state)
+      if (state.downloadType === 'extension') {
+        const installingExtensionState: InstallingExtensionState = {
+          extensionId: state.extensionId!,
+          percentage: state.percent,
+          localPath: state.localPath,
+        }
+        setInstallingExtension(state.extensionId!, installingExtensionState)
+      } else {
+        setDownloadState(state)
+      }
     },
-    [setDownloadState]
+    [setDownloadState, setInstallingExtension]
   )
 
   const onFileDownloadError = useCallback(
     (state: DownloadState) => {
       console.debug('onFileDownloadError', state)
-      setDownloadState(state)
+      if (state.downloadType === 'extension') {
+        removeInstallingExtension(state.extensionId!)
+      } else {
+        setDownloadState(state)
+      }
     },
-    [setDownloadState]
+    [setDownloadState, removeInstallingExtension]
   )
 
   const onFileDownloadSuccess = useCallback(
     (state: DownloadState) => {
       console.debug('onFileDownloadSuccess', state)
-      setDownloadState(state)
+      if (state.downloadType !== 'extension') {
+        setDownloadState(state)
+      }
     },
     [setDownloadState]
   )
 
+  const onFileUnzipSuccess = useCallback(
+    (state: DownloadState) => {
+      console.debug('onFileUnzipSuccess', state)
+      toaster({
+        title: 'Success',
+        description: `Install ${formatExtensionsName(state.extensionId!)} successfully.`,
+        type: 'success',
+      })
+      removeInstallingExtension(state.extensionId!)
+    },
+    [removeInstallingExtension]
+  )
+
   useEffect(() => {
     console.debug('EventListenerWrapper: registering event listeners...')
     events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
     events.on(DownloadEvent.onFileDownloadError, onFileDownloadError)
     events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+    events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
 
     return () => {
       console.debug('EventListenerWrapper: unregistering event listeners...')
       events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
       events.off(DownloadEvent.onFileDownloadError, onFileDownloadError)
       events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+      events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
     }
-  }, [onFileDownloadUpdate, onFileDownloadError, onFileDownloadSuccess])
+  }, [
+    onFileDownloadUpdate,
+    onFileDownloadError,
+    onFileDownloadSuccess,
+    onFileUnzipSuccess,
+  ])
 
   return (
     <AppUpdateListener>
diff --git a/web/extension/ExtensionManager.ts b/web/extension/ExtensionManager.ts
index 1259021f7..c976010c6 100644
--- a/web/extension/ExtensionManager.ts
+++ b/web/extension/ExtensionManager.ts
@@ -23,7 +23,9 @@ export class ExtensionManager {
    * @param type - The type of the extension to retrieve.
    * @returns The extension, if found.
    */
-  get<T extends BaseExtension>(type: ExtensionTypeEnum): T | undefined {
+  get<T extends BaseExtension>(
+    type: ExtensionTypeEnum | string
+  ): T | undefined {
     return this.extensions.get(type) as T | undefined
   }
 
diff --git a/web/helpers/atoms/Extension.atom.ts b/web/helpers/atoms/Extension.atom.ts
new file mode 100644
index 000000000..7af755e35
--- /dev/null
+++ b/web/helpers/atoms/Extension.atom.ts
@@ -0,0 +1,40 @@
+import { atom } from 'jotai'
+
+type ExtensionId = string
+
+export type InstallingExtensionState = {
+  extensionId: ExtensionId
+  percentage: number
+  localPath?: string
+}
+
+export const installingExtensionAtom = atom<InstallingExtensionState[]>([])
+
+export const setInstallingExtensionAtom = atom(
+  null,
+  (get, set, extensionId: string, state: InstallingExtensionState) => {
+    const current = get(installingExtensionAtom)
+
+    const isExists = current.some((e) => e.extensionId === extensionId)
+    if (isExists) {
+      const newCurrent = current.map((e) => {
+        if (e.extensionId === extensionId) {
+          return state
+        }
+        return e
+      })
+      set(installingExtensionAtom, newCurrent)
+    } else {
+      set(installingExtensionAtom, [...current, state])
+    }
+  }
+)
+
+export const removeInstallingExtensionAtom = atom(
+  null,
+  (get, set, extensionId: string) => {
+    const current = get(installingExtensionAtom)
+    const newCurrent = current.filter((e) => e.extensionId !== extensionId)
+    set(installingExtensionAtom, newCurrent)
+  }
+)
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 600e10783..e6c519f9f 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -40,6 +40,16 @@ export function useActiveModel() {
       console.debug(`Model ${modelId} is already initialized. Ignore..`)
       return
     }
+
+    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
+
+    // Switch between engines
+    if (model && activeModel && activeModel.engine !== model.engine) {
+      stopModel()
+      // TODO: Refactor inference provider would address this
+      await new Promise((res) => setTimeout(res, 1000))
+    }
+
     // TODO: incase we have multiple assistants, the configuration will be from assistant
     setLoadModelError(undefined)
 
@@ -47,8 +57,6 @@ export function useActiveModel() {
 
     setStateModel({ state: 'start', loading: true, model: modelId })
 
-    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
-
     if (!model) {
       toaster({
         title: `Model ${modelId} not found!`,
diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts
index 9f6334c71..d0d13d93b 100644
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@@ -8,12 +8,15 @@ import {
   joinPath,
   ModelArtifact,
   DownloadState,
+  GpuSetting,
 } from '@janhq/core'
 
 import { useAtomValue, useSetAtom } from 'jotai'
 
 import { setDownloadStateAtom } from './useDownloadState'
 
+import useGpuSetting from './useGpuSetting'
+
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
   ignoreSslAtom,
@@ -29,6 +32,8 @@ export default function useDownloadModel() {
   const setDownloadState = useSetAtom(setDownloadStateAtom)
   const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
 
+  const { getGpuSettings } = useGpuSetting()
+
   const downloadModel = useCallback(
     async (model: Model) => {
       const childProgresses: DownloadState[] = model.sources.map(
@@ -68,10 +73,22 @@ export default function useDownloadModel() {
       })
 
       addDownloadingModel(model)
-
-      await localDownloadModel(model, ignoreSSL, proxyEnabled ? proxy : '')
+      const gpuSettings = await getGpuSettings()
+      await localDownloadModel(
+        model,
+        ignoreSSL,
+        proxyEnabled ? proxy : '',
+        gpuSettings
+      )
     },
-    [ignoreSSL, proxy, proxyEnabled, addDownloadingModel, setDownloadState]
+    [
+      ignoreSSL,
+      proxy,
+      proxyEnabled,
+      getGpuSettings,
+      addDownloadingModel,
+      setDownloadState,
+    ]
   )
 
   const abortModelDownload = useCallback(async (model: Model) => {
@@ -90,8 +107,9 @@ export default function useDownloadModel() {
 const localDownloadModel = async (
   model: Model,
   ignoreSSL: boolean,
-  proxy: string
+  proxy: string,
+  gpuSettings?: GpuSetting
 ) =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.downloadModel(model, { ignoreSSL, proxy })
+    ?.downloadModel(model, gpuSettings, { ignoreSSL, proxy })
diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts
index 06de9bef6..03a8883cb 100644
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@@ -18,123 +18,129 @@ export const modelDownloadStateAtom = atom<Record<string, DownloadState>>({})
 export const setDownloadStateAtom = atom(
   null,
   (get, set, state: DownloadState) => {
-    const currentState = { ...get(modelDownloadStateAtom) }
+    try {
+      const currentState = { ...get(modelDownloadStateAtom) }
 
-    if (state.downloadState === 'end') {
-      const modelDownloadState = currentState[state.modelId]
+      if (state.downloadState === 'end') {
+        const modelDownloadState = currentState[state.modelId]
 
-      const updatedChildren: DownloadState[] =
-        modelDownloadState.children!.filter(
-          (m) => m.fileName !== state.fileName
+        const updatedChildren: DownloadState[] = (
+          modelDownloadState.children ?? []
+        ).filter((m) => m.fileName !== state.fileName)
+        updatedChildren.push(state)
+        modelDownloadState.children = updatedChildren
+        currentState[state.modelId] = modelDownloadState
+
+        const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
+          (m) => m.downloadState === 'end'
         )
-      updatedChildren.push(state)
-      modelDownloadState.children = updatedChildren
-      currentState[state.modelId] = modelDownloadState
 
-      const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
-        (m) => m.downloadState === 'end'
-      )
+        if (isAllChildrenDownloadEnd) {
+          // download successfully
+          delete currentState[state.modelId]
+          set(removeDownloadingModelAtom, state.modelId)
 
-      if (isAllChildrenDownloadEnd) {
-        // download successfully
+          const model = get(configuredModelsAtom).find(
+            (e) => e.id === state.modelId
+          )
+          if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
+          toaster({
+            title: 'Download Completed',
+            description: `Download ${state.modelId} completed`,
+            type: 'success',
+          })
+        }
+      } else if (state.downloadState === 'error') {
+        // download error
         delete currentState[state.modelId]
         set(removeDownloadingModelAtom, state.modelId)
-
-        const model = get(configuredModelsAtom).find(
-          (e) => e.id === state.modelId
-        )
-        if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
-        toaster({
-          title: 'Download Completed',
-          description: `Download ${state.modelId} completed`,
-          type: 'success',
-        })
-      }
-    } else if (state.downloadState === 'error') {
-      // download error
-      delete currentState[state.modelId]
-      set(removeDownloadingModelAtom, state.modelId)
-      if (state.error === 'aborted') {
-        toaster({
-          title: 'Cancel Download',
-          description: `Model ${state.modelId} download cancelled`,
-          type: 'warning',
-        })
-      } else {
-        let error = state.error
-        if (
-          typeof error?.includes === 'function' &&
-          state.error?.includes('certificate')
-        ) {
-          error +=
-            '. To fix enable "Ignore SSL Certificates" in Advanced settings.'
+        if (state.error === 'aborted') {
+          toaster({
+            title: 'Cancel Download',
+            description: `Model ${state.modelId} download cancelled`,
+            type: 'warning',
+          })
+        } else {
+          let error = state.error
+          if (
+            typeof error?.includes === 'function' &&
+            state.error?.includes('certificate')
+          ) {
+            error +=
+              '. To fix enable "Ignore SSL Certificates" in Advanced settings.'
+          }
+          toaster({
+            title: 'Download Failed',
+            description: `Model ${state.modelId} download failed: ${error}`,
+            type: 'error',
+          })
+        }
+      } else {
+        // download in progress
+        if (state.size.total === 0) {
+          // this is initial state, just set the state
+          currentState[state.modelId] = state
+          set(modelDownloadStateAtom, currentState)
+          return
         }
-        toaster({
-          title: 'Download Failed',
-          description: `Model ${state.modelId} download failed: ${error}`,
-          type: 'error',
-        })
-      }
-    } else {
-      // download in progress
-      if (state.size.total === 0) {
-        // this is initial state, just set the state
-        currentState[state.modelId] = state
-        set(modelDownloadStateAtom, currentState)
-        return
-      }
 
-      const modelDownloadState = currentState[state.modelId]
-      if (!modelDownloadState) {
-        console.debug('setDownloadStateAtom: modelDownloadState not found')
-        return
-      }
+        const modelDownloadState = currentState[state.modelId]
+        if (!modelDownloadState) {
+          console.debug('setDownloadStateAtom: modelDownloadState not found')
+          return
+        }
 
-      // delete the children if the filename is matched and replace the new state
-      const updatedChildren: DownloadState[] =
-        modelDownloadState.children!.filter(
-          (m) => m.fileName !== state.fileName
+        // delete the children if the filename is matched and replace the new state
+        const updatedChildren: DownloadState[] = (
+          modelDownloadState.children ?? []
+        ).filter((m) => m.fileName !== state.fileName)
+
+        updatedChildren.push(state)
+
+        // re-calculate the overall progress if we have all the children download data
+        const isAnyChildDownloadNotReady = updatedChildren.some(
+          (m) =>
+            m.size.total === 0 &&
+            !modelDownloadState.children?.some(
+              (e) => e.fileName === m.fileName && e.downloadState === 'end'
+            ) &&
+            modelDownloadState.children?.some((e) => e.fileName === m.fileName)
         )
 
-      updatedChildren.push(state)
+        modelDownloadState.children = updatedChildren
+        if (isAnyChildDownloadNotReady) {
+          // just update the children
+          currentState[state.modelId] = modelDownloadState
+          set(modelDownloadStateAtom, currentState)
+          return
+        }
 
-      // re-calculate the overall progress if we have all the children download data
-      const isAnyChildDownloadNotReady = updatedChildren.some(
-        (m) => m.size.total === 0
-      )
+        const parentTotalSize = modelDownloadState.size.total
+        if (parentTotalSize === 0) {
+          // calculate the total size of the parent by sum all children total size
+          const totalSize = updatedChildren.reduce(
+            (acc, m) => acc + m.size.total,
+            0
+          )
 
-      modelDownloadState.children = updatedChildren
+          modelDownloadState.size.total = totalSize
+        }
 
-      if (isAnyChildDownloadNotReady) {
-        // just update the children
-        currentState[state.modelId] = modelDownloadState
-        set(modelDownloadStateAtom, currentState)
-
-        return
-      }
-
-      const parentTotalSize = modelDownloadState.size.total
-      if (parentTotalSize === 0) {
-        // calculate the total size of the parent by sum all children total size
-        const totalSize = updatedChildren.reduce(
-          (acc, m) => acc + m.size.total,
+        // calculate the total transferred size by sum all children transferred size
+        const transferredSize = updatedChildren.reduce(
+          (acc, m) => acc + m.size.transferred,
           0
         )
-
-        modelDownloadState.size.total = totalSize
+        modelDownloadState.size.transferred = transferredSize
+        modelDownloadState.percent =
+          parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
+        currentState[state.modelId] = modelDownloadState
       }
 
-      // calculate the total transferred size by sum all children transferred size
-      const transferredSize = updatedChildren.reduce(
-        (acc, m) => acc + m.size.transferred,
-        0
-      )
-      modelDownloadState.size.transferred = transferredSize
-      modelDownloadState.percent =
-        parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
-      currentState[state.modelId] = modelDownloadState
+      set(modelDownloadStateAtom, currentState)
+    } catch (e) {
+      console.debug('setDownloadStateAtom: state', state)
+      console.debug('setDownloadStateAtom: error', e)
     }
-
-    set(modelDownloadStateAtom, currentState)
   }
 )
diff --git a/web/hooks/useGpuSetting.ts b/web/hooks/useGpuSetting.ts
new file mode 100644
index 000000000..36f51ed57
--- /dev/null
+++ b/web/hooks/useGpuSetting.ts
@@ -0,0 +1,21 @@
+import { useCallback } from 'react'
+
+import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
+
+import { extensionManager } from '@/extension'
+
+export default function useGpuSetting() {
+  const getGpuSettings = useCallback(async () => {
+    const gpuSetting = await extensionManager
+      ?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
+      ?.getGpuSetting()
+
+    if (!gpuSetting) {
+      console.debug('No GPU setting found')
+      return undefined
+    }
+    return gpuSetting
+  }, [])
+
+  return { getGpuSettings }
+}
diff --git a/web/next.config.js b/web/next.config.js
index a4b3e6d43..48ea0703e 100644
--- a/web/next.config.js
+++ b/web/next.config.js
@@ -38,6 +38,7 @@ const nextConfig = {
         isMac: process.platform === 'darwin',
         isWindows: process.platform === 'win32',
         isLinux: process.platform === 'linux',
+        PLATFORM: JSON.stringify(process.platform),
       }),
     ]
     return config
diff --git a/web/screens/Chat/ChatInput/index.tsx b/web/screens/Chat/ChatInput/index.tsx
index c90a12cd2..8707e8bcd 100644
--- a/web/screens/Chat/ChatInput/index.tsx
+++ b/web/screens/Chat/ChatInput/index.tsx
@@ -244,16 +244,13 @@ const ChatInput: React.FC = () => {
                   <li
                     className={twMerge(
                       'flex w-full cursor-pointer items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary',
-                      activeThread?.assistants[0].model.settings.vision_model &&
-                        activeThread?.assistants[0].model.settings
-                          .text_model === false
+                      activeThread?.assistants[0].model.settings.text_model ===
+                        false
                         ? 'cursor-not-allowed opacity-50'
                         : 'cursor-pointer'
                     )}
                     onClick={() => {
                       if (
-                        !activeThread?.assistants[0].model.settings
-                          .vision_model ||
                         activeThread?.assistants[0].model.settings
                           .text_model !== false
                       ) {
diff --git a/web/screens/Chat/ErrorMessage/index.tsx b/web/screens/Chat/ErrorMessage/index.tsx
index 25cec1cb9..5be87a59d 100644
--- a/web/screens/Chat/ErrorMessage/index.tsx
+++ b/web/screens/Chat/ErrorMessage/index.tsx
@@ -7,11 +7,14 @@ import ModalTroubleShooting, {
   modalTroubleShootingAtom,
 } from '@/containers/ModalTroubleShoot'
 
+import { MainViewState } from '@/constants/screens'
+
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 import useSendChatMessage from '@/hooks/useSendChatMessage'
 
 import { getErrorTitle } from '@/utils/errorMessage'
 
+import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 
 const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
@@ -19,6 +22,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
   const { resendChatMessage } = useSendChatMessage()
   const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
   const loadModelError = useAtomValue(loadModelErrorAtom)
+  const setMainState = useSetAtom(mainViewStateAtom)
   const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
 
   const regenerateMessage = async () => {
@@ -70,6 +74,23 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
               </p>
               <ModalTroubleShooting />
             </div>
+          ) : loadModelError?.includes('EXTENSION_IS_NOT_INSTALLED') ? (
+            <div
+              key={message.id}
+              className="flex w-full flex-col items-center text-center text-sm font-medium text-gray-500"
+            >
+              <p className="w-[90%]">
+                Model is currently unavailable. Please switch to a different
+                model or install the{' '}
+                <button
+                  className="font-medium text-blue-500"
+                  onClick={() => setMainState(MainViewState.Settings)}
+                >
+                  {loadModelError.split('::')[1] ?? ''}
+                </button>{' '}
+                to continue using it.
+              </p>
+            </div>
           ) : (
             <div
               key={message.id}
diff --git a/web/screens/ExploreModels/ExploreModelItem/index.tsx b/web/screens/ExploreModels/ExploreModelItem/index.tsx
index 9cdfbc01a..e8887e70f 100644
--- a/web/screens/ExploreModels/ExploreModelItem/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItem/index.tsx
@@ -3,6 +3,8 @@ import { useState } from 'react'
 import { Model } from '@janhq/core'
 import { Badge } from '@janhq/uikit'
 
+import { twMerge } from 'tailwind-merge'
+
 import ExploreModelItemHeader from '@/screens/ExploreModels/ExploreModelItemHeader'
 
 type Props = {
@@ -75,7 +77,16 @@ const ExploreModelItem: React.FC<Props> = ({ model }) => {
               <span className="font-semibold text-muted-foreground">
                 Format
               </span>
-              <p className="mt-2 font-medium uppercase">{model.format}</p>
+              <p
+                className={twMerge(
+                  'mt-2 font-medium',
+                  !model.format?.includes(' ') &&
+                    !model.format?.includes('-') &&
+                    'uppercase'
+                )}
+              >
+                {model.format}
+              </p>
             </div>
           </div>
         </div>
diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
index 38e7f65a6..465e69fa6 100644
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@@ -152,6 +152,7 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
       <div className="flex items-center justify-between p-4">
         <div className="flex items-center gap-2">
           <span className="font-bold">{model.name}</span>
+          <EngineBadge engine={model.engine} />
         </div>
         <div className="inline-flex items-center space-x-2">
           <span className="mr-4 font-semibold text-muted-foreground">
@@ -172,4 +173,23 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
   )
 }
 
+type EngineBadgeProps = {
+  engine: string
+}
+
+const EngineBadge: React.FC<EngineBadgeProps> = ({ engine }) => {
+  const title = 'TensorRT-LLM'
+
+  switch (engine) {
+    case 'nitro-tensorrt-llm':
+      return (
+        <Badge themes="primary" className="line-clamp-1" title={title}>
+          {title}
+        </Badge>
+      )
+    default:
+      return null
+  }
+}
+
 export default ExploreModelItemHeader
diff --git a/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
new file mode 100644
index 000000000..8033cd588
--- /dev/null
+++ b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
@@ -0,0 +1,226 @@
+import { useCallback, useEffect, useState } from 'react'
+
+import {
+  Compatibility,
+  GpuSetting,
+  InstallationState,
+  abortDownload,
+  systemInformations,
+} from '@janhq/core'
+import {
+  Button,
+  Progress,
+  Tooltip,
+  TooltipArrow,
+  TooltipContent,
+  TooltipPortal,
+  TooltipTrigger,
+} from '@janhq/uikit'
+
+import { InfoCircledIcon } from '@radix-ui/react-icons'
+import { useAtomValue } from 'jotai'
+
+import { extensionManager } from '@/extension'
+import Extension from '@/extension/Extension'
+import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
+
+type Props = {
+  item: Extension
+}
+
+const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
+  const [compatibility, setCompatibility] = useState<Compatibility | undefined>(
+    undefined
+  )
+  const [installState, setInstallState] =
+    useState<InstallationState>('NotRequired')
+  const installingExtensions = useAtomValue(installingExtensionAtom)
+  const [isGpuSupported, setIsGpuSupported] = useState<boolean>(false)
+
+  const isInstalling = installingExtensions.some(
+    (e) => e.extensionId === item.name
+  )
+
+  const progress = isInstalling
+    ? installingExtensions.find((e) => e.extensionId === item.name)
+        ?.percentage ?? -1
+    : -1
+
+  useEffect(() => {
+    const getSystemInfos = async () => {
+      const info = await systemInformations()
+      if (!info) {
+        setIsGpuSupported(false)
+        return
+      }
+
+      const gpuSettings: GpuSetting | undefined = info.gpuSetting
+      if (!gpuSettings || gpuSettings.gpus.length === 0) {
+        setIsGpuSupported(false)
+        return
+      }
+
+      const arch = gpuSettings.gpus[0].arch
+      if (!arch) {
+        setIsGpuSupported(false)
+        return
+      }
+
+      const supportedGpuArch = ['turing', 'ampere', 'ada']
+      setIsGpuSupported(supportedGpuArch.includes(arch))
+    }
+    getSystemInfos()
+  }, [])
+
+  useEffect(() => {
+    const getExtensionInstallationState = async () => {
+      const extension = extensionManager.get(item.name ?? '')
+      if (!extension) return
+
+      if (typeof extension?.installationState === 'function') {
+        const installState = await extension.installationState()
+        setInstallState(installState)
+      }
+    }
+
+    getExtensionInstallationState()
+  }, [item.name, isInstalling])
+
+  useEffect(() => {
+    const extension = extensionManager.get(item.name ?? '')
+    if (!extension) return
+    setCompatibility(extension.compatibility())
+  }, [setCompatibility, item.name])
+
+  const onInstallClick = useCallback(async () => {
+    const extension = extensionManager.get(item.name ?? '')
+    if (!extension) return
+
+    await extension.install()
+  }, [item.name])
+
+  const onCancelInstallingClick = () => {
+    const extension = installingExtensions.find(
+      (e) => e.extensionId === item.name
+    )
+    if (extension?.localPath) {
+      abortDownload(extension.localPath)
+    }
+  }
+
+  return (
+    <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none">
+      <div className="flex-1 flex-shrink-0 space-y-1.5">
+        <div className="flex items-center gap-x-2">
+          <h6 className="text-sm font-semibold capitalize">
+            TensorRT-LLM Extension
+          </h6>
+          <p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed">
+            v{item.version}
+          </p>
+        </div>
+        <p className="whitespace-pre-wrap leading-relaxed">
+          {item.description}
+        </p>
+      </div>
+
+      {(!compatibility || compatibility['platform']?.includes(PLATFORM)) &&
+      isGpuSupported ? (
+        <div className="flex min-w-[150px] flex-row justify-end">
+          <InstallStateIndicator
+            installProgress={progress}
+            installState={installState}
+            onInstallClick={onInstallClick}
+            onCancelClick={onCancelInstallingClick}
+          />
+        </div>
+      ) : (
+        <div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
+          <div className="flex flex-row items-center justify-center gap-1">
+            Incompatible{' '}
+            <Tooltip>
+              <TooltipTrigger className="w-full">
+                <InfoCircledIcon />
+              </TooltipTrigger>
+              <TooltipPortal>
+                <TooltipContent side="top">
+                  {compatibility &&
+                  !compatibility['platform']?.includes(PLATFORM) ? (
+                    <span>
+                      Only available on{' '}
+                      {compatibility?.platform
+                        ?.map((e: string) =>
+                          e === 'win32'
+                            ? 'Windows'
+                            : e === 'linux'
+                              ? 'Linux'
+                              : 'MacOS'
+                        )
+                        .join(', ')}
+                    </span>
+                  ) : (
+                    <span>
+                      Your GPUs are not compatible with this extension
+                    </span>
+                  )}
+                  <TooltipArrow />
+                </TooltipContent>
+              </TooltipPortal>
+            </Tooltip>
+          </div>
+        </div>
+      )}
+    </div>
+  )
+}
+
+type InstallStateProps = {
+  installProgress: number
+  installState: InstallationState
+  onInstallClick: () => void
+  onCancelClick: () => void
+}
+
+const InstallStateIndicator: React.FC<InstallStateProps> = ({
+  installProgress,
+  installState,
+  onInstallClick,
+  onCancelClick,
+}) => {
+  if (installProgress !== -1) {
+    const progress = installProgress * 100
+    return (
+      <div className="flex h-10 flex-row items-center justify-center space-x-2 rounded-lg bg-[#EFF8FF] px-4 text-primary dark:bg-secondary">
+        <button onClick={onCancelClick} className="font-semibold text-primary">
+          Cancel
+        </button>
+        <div className="flex w-[113px] flex-row items-center justify-center space-x-2 rounded-md bg-[#D1E9FF] px-2 py-[2px] dark:bg-black/50">
+          <Progress className="h-1 w-[69px]" value={progress} />
+          <span className="text-xs font-bold text-primary">
+            {progress.toFixed(0)}%
+          </span>
+        </div>
+      </div>
+    )
+  }
+
+  // TODO: NamH check for dark mode here
+  switch (installState) {
+    case 'Installed':
+      return (
+        <div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
+          Installed
+        </div>
+      )
+    case 'NotInstalled':
+      return (
+        <Button themes="secondaryBlue" size="sm" onClick={onInstallClick}>
+          Install
+        </Button>
+      )
+    default:
+      return <div></div>
+  }
+}
+
+export default TensorRtExtensionItem
diff --git a/web/screens/Settings/CoreExtensions/index.tsx b/web/screens/Settings/CoreExtensions/index.tsx
index 8c9f92d7a..f5b66abeb 100644
--- a/web/screens/Settings/CoreExtensions/index.tsx
+++ b/web/screens/Settings/CoreExtensions/index.tsx
@@ -4,13 +4,18 @@ import React, { useState, useEffect, useRef } from 'react'
 
 import { Button, ScrollArea } from '@janhq/uikit'
 
+import Loader from '@/containers/Loader'
+
 import { formatExtensionsName } from '@/utils/converter'
 
+import TensorRtExtensionItem from './TensorRtExtensionItem'
+
 import { extensionManager } from '@/extension'
 import Extension from '@/extension/Extension'
 
 const ExtensionCatalog = () => {
   const [activeExtensions, setActiveExtensions] = useState<Extension[]>([])
+  const [showLoading, setShowLoading] = useState(false)
   const fileInputRef = useRef<HTMLInputElement | null>(null)
   /**
    * Fetches the active extensions and their preferences from the `extensions` and `preferences` modules.
@@ -63,65 +68,76 @@ const ExtensionCatalog = () => {
   const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
     const file = event.target.files?.[0]
     if (file) {
+      setShowLoading(true)
       install(event)
     }
   }
 
   return (
-    <ScrollArea className="h-full w-full px-4">
-      <div className="block w-full">
-        {activeExtensions.map((item, i) => {
-          return (
-            <div
-              key={i}
-              className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
-            >
-              <div className="w-4/5 flex-shrink-0 space-y-1.5">
-                <div className="flex gap-x-2">
-                  <h6 className="text-sm font-semibold capitalize">
-                    {formatExtensionsName(item.name ?? item.description ?? '')}
-                  </h6>
-                  <p className="whitespace-pre-wrap font-semibold leading-relaxed ">
-                    v{item.version}
+    <>
+      <ScrollArea className="h-full w-full px-4">
+        <div className="block w-full">
+          {activeExtensions.map((item, i) => {
+            // TODO: this is bad code, rewrite it
+            if (item.name === '@janhq/tensorrt-llm-extension') {
+              return <TensorRtExtensionItem key={i} item={item} />
+            }
+
+            return (
+              <div
+                key={i}
+                className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
+              >
+                <div className="w-4/5 flex-shrink-0 space-y-1.5">
+                  <div className="flex items-center gap-x-2">
+                    <h6 className="text-sm font-semibold capitalize">
+                      {formatExtensionsName(
+                        item.name ?? item.description ?? ''
+                      )}
+                    </h6>
+                    <p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed ">
+                      v{item.version}
+                    </p>
+                  </div>
+                  <p className="whitespace-pre-wrap leading-relaxed ">
+                    {item.description}
                   </p>
                 </div>
-                <p className="whitespace-pre-wrap leading-relaxed ">
-                  {item.description}
-                </p>
               </div>
+            )
+          })}
+          {/* Manual Installation */}
+          <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
+            <div className="w-4/5 flex-shrink-0 space-y-1.5">
+              <div className="flex gap-x-2">
+                <h6 className="text-sm font-semibold capitalize">
+                  Manual Installation
+                </h6>
+              </div>
+              <p className="whitespace-pre-wrap leading-relaxed ">
+                Select a extension file to install (.tgz)
+              </p>
             </div>
-          )
-        })}
-        {/* Manual Installation */}
-        <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
-          <div className="w-4/5 flex-shrink-0 space-y-1.5">
-            <div className="flex gap-x-2">
-              <h6 className="text-sm font-semibold capitalize">
-                Manual Installation
-              </h6>
+            <div>
+              <input
+                type="file"
+                style={{ display: 'none' }}
+                ref={fileInputRef}
+                onChange={handleFileChange}
+              />
+              <Button
+                themes="secondaryBlue"
+                size="sm"
+                onClick={() => fileInputRef.current?.click()}
+              >
+                Select
+              </Button>
             </div>
-            <p className="whitespace-pre-wrap leading-relaxed ">
-              Select a extension file to install (.tgz)
-            </p>
-          </div>
-          <div>
-            <input
-              type="file"
-              style={{ display: 'none' }}
-              ref={fileInputRef}
-              onChange={handleFileChange}
-            />
-            <Button
-              themes="secondaryBlue"
-              size="sm"
-              onClick={() => fileInputRef.current?.click()}
-            >
-              Select
-            </Button>
           </div>
         </div>
-      </div>
-    </ScrollArea>
+      </ScrollArea>
+      {showLoading && <Loader description="Installing..." />}
+    </>
   )
 }
 
diff --git a/web/services/appService.ts b/web/services/appService.ts
new file mode 100644
index 000000000..9327d55c3
--- /dev/null
+++ b/web/services/appService.ts
@@ -0,0 +1,24 @@
+import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
+
+import { toaster } from '@/containers/Toast'
+
+import { extensionManager } from '@/extension'
+
+export const appService = {
+  systemInformations: async () => {
+    const gpuSetting = await extensionManager
+      ?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
+      ?.getGpuSetting()
+
+    return {
+      gpuSetting,
+      // TODO: Other system information
+    }
+  },
+  showToast: (title: string, description: string) => {
+    toaster({
+      title,
+      description: description,
+    })
+  },
+}
diff --git a/web/services/coreService.ts b/web/services/coreService.ts
index c010c6cec..a483cc452 100644
--- a/web/services/coreService.ts
+++ b/web/services/coreService.ts
@@ -1,5 +1,7 @@
+import { appService } from './appService'
 import { EventEmitter } from './eventsService'
 import { restAPI } from './restService'
+
 export const setupCoreServices = () => {
   if (typeof window === 'undefined') {
     console.debug('undefine', window)
@@ -10,7 +12,10 @@ export const setupCoreServices = () => {
   if (!window.core) {
     window.core = {
       events: new EventEmitter(),
-      api: window.electronAPI ?? restAPI,
+      api: {
+        ...(window.electronAPI ? window.electronAPI : restAPI),
+        ...appService,
+      },
     }
   }
 }
diff --git a/web/types/index.d.ts b/web/types/index.d.ts
index 833c3e2bd..ed83e0d14 100644
--- a/web/types/index.d.ts
+++ b/web/types/index.d.ts
@@ -11,6 +11,7 @@ declare global {
   declare const isMac: boolean
   declare const isWindows: boolean
   declare const isLinux: boolean
+  declare const PLATFORM: string
   interface Core {
     api: APIFunctions
     events: EventEmitter