Merge pull request #2362 from janhq/update-docs-14th-mar

Update docs 14th mar
2024-03-14 21:52:36 +07:00 · 2024-03-14 21:52:36 +07:00 · 4bc76b64c7
commit 4bc76b64c7
parent 2b26d4af55 f3ca508022
118 changed files with 3977 additions and 1771 deletions
--- a/.gitignore
+++ b/.gitignore
@ -22,16 +22,16 @@ package-lock.json
 core/lib/**
 # Nitro binary files
-extensions/inference-nitro-extension/bin/*/nitro
+extensions/*-extension/bin/*/nitro
-extensions/inference-nitro-extension/bin/*/*.metal
+extensions/*-extension/bin/*/*.metal
-extensions/inference-nitro-extension/bin/*/*.exe
+extensions/*-extension/bin/*/*.exe
-extensions/inference-nitro-extension/bin/*/*.dll
+extensions/*-extension/bin/*/*.dll
-extensions/inference-nitro-extension/bin/*/*.exp
+extensions/*-extension/bin/*/*.exp
-extensions/inference-nitro-extension/bin/*/*.lib
+extensions/*-extension/bin/*/*.lib
-extensions/inference-nitro-extension/bin/saved-*
+extensions/*-extension/bin/saved-*
-extensions/inference-nitro-extension/bin/*.tar.gz
+extensions/*-extension/bin/*.tar.gz
-extensions/inference-nitro-extension/bin/vulkaninfoSDK.exe
+extensions/*-extension/bin/vulkaninfoSDK.exe
-extensions/inference-nitro-extension/bin/vulkaninfo
+extensions/*-extension/bin/vulkaninfo
 # Turborepo
--- a/README.md
+++ b/README.md
@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
  <tr style="text-align:center">
    <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.8-310.exe'>
+      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.8-322.exe'>
        <img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
        <b>jan.exe</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.8-310.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.8-322.dmg'>
        <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
        <b>Intel</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.8-310.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.8-322.dmg'>
        <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
        <b>M1/M2</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.8-310.deb'>
+      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.8-322.deb'>
        <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
        <b>jan.deb</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.8-310.AppImage'>
+      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.8-322.AppImage'>
        <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
        <b>jan.AppImage</b>
      </a>
--- a/core/package.json
+++ b/core/package.json
@ -45,11 +45,12 @@
    "start": "rollup -c rollup.config.ts -w"
  },
  "devDependencies": {
    "jest": "^29.7.0",
    "@types/jest": "^29.5.12",
    "@types/node": "^12.0.2",
    "eslint-plugin-jest": "^27.9.0",
    "eslint": "8.57.0",
    "eslint-plugin-jest": "^27.9.0",
    "jest": "^29.7.0",
    "rimraf": "^3.0.2",
    "rollup": "^2.38.5",
    "rollup-plugin-commonjs": "^9.1.8",
    "rollup-plugin-json": "^3.1.0",
@ -58,7 +59,10 @@
    "rollup-plugin-typescript2": "^0.36.0",
    "ts-jest": "^29.1.2",
    "tslib": "^2.6.2",
-    "typescript": "^5.3.3",
+    "typescript": "^5.3.3"
-    "rimraf": "^3.0.2"
+  },
  "dependencies": {
    "rxjs": "^7.8.1",
    "ulid": "^2.3.0"
  }
 }
--- a/core/rollup.config.ts
+++ b/core/rollup.config.ts
@ -64,7 +64,7 @@ export default [
      // Allow json resolution
      json(),
      // Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true }),
+      typescript({ useTsconfigDeclarationDir: true, exclude: ['src/*.ts', 'src/extensions/**'] }),
      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
      commonjs(),
      // Allow node_modules resolution, so you can use 'external' to control
--- a/core/src/api/index.ts
+++ b/core/src/api/index.ts
@ -33,6 +33,8 @@ export enum AppRoute {
  stopServer = 'stopServer',
  log = 'log',
  logServer = 'logServer',
  systemInformations = 'systemInformations',
  showToast = 'showToast',
 }
 export enum AppEvent {
@ -56,6 +58,7 @@ export enum DownloadEvent {
  onFileDownloadUpdate = 'onFileDownloadUpdate',
  onFileDownloadError = 'onFileDownloadError',
  onFileDownloadSuccess = 'onFileDownloadSuccess',
  onFileUnzipSuccess = 'onFileUnzipSuccess',
 }
 export enum LocalImportModelEvent {
--- a/core/src/core.ts
+++ b/core/src/core.ts
@ -1,4 +1,4 @@
-import { FileStat } from './types'
+import { DownloadRequest, FileStat, NetworkConfig } from './types'
 /**
 * Execute a extension module function in main process
@ -17,18 +17,16 @@ const executeOnMain: (extension: string, method: string, ...args: any[]) => Prom
 /**
 * Downloads a file from a URL and saves it to the local file system.
- * @param {string} url - The URL of the file to download.
+ *
- * @param {string} fileName - The name to use for the downloaded file.
+ * @param {DownloadRequest} downloadRequest - The request to download the file.
- * @param {object} network - Optional object to specify proxy/whether to ignore SSL certificates.
+ * @param {NetworkConfig} network - Optional object to specify proxy/whether to ignore SSL certificates.
 *
 * @returns {Promise<any>} A promise that resolves when the file is downloaded.
 */
-const downloadFile: (
+const downloadFile: (downloadRequest: DownloadRequest, network?: NetworkConfig) => Promise<any> = (
-  url: string,
+  downloadRequest,
-  fileName: string,
+  network
-  network?: { proxy?: string; ignoreSSL?: boolean }
+) => global.core?.api?.downloadFile(downloadRequest, network)
 ) => Promise<any> = (url, fileName, network) => {
  return global.core?.api?.downloadFile(url, fileName, network)
 }
 /**
 * Aborts the download of a specific file.
@ -108,6 +106,20 @@ const log: (message: string, fileName?: string) => void = (message, fileName) =>
 const isSubdirectory: (from: string, to: string) => Promise<boolean> = (from: string, to: string) =>
  global.core.api?.isSubdirectory(from, to)
 /**
 * Get system information
 * @returns {Promise<any>} - A promise that resolves with the system information.
 */
 const systemInformations: () => Promise<any> = () => global.core.api?.systemInformations()
 /**
 * Show toast message from browser processes.
 * @param title
 * @param message
 * @returns
 */
 const showToast: (title: string, message: string) => void = (title, message) =>
  global.core.api?.showToast(title, message)
 /**
 * Register extension point function type definition
 */
@ -134,5 +146,7 @@ export {
  log,
  isSubdirectory,
  getUserHomePath,
  systemInformations,
  showToast,
  FileStat,
 }
--- a/core/src/extension.ts
+++ b/core/src/extension.ts
@ -10,6 +10,22 @@ export enum ExtensionTypeEnum {
 export interface ExtensionType {
  type(): ExtensionTypeEnum | undefined
 }
 export interface Compatibility {
  platform: string[]
  version: string
 }
 const ALL_INSTALLATION_STATE = [
  'NotRequired', // not required.
  'Installed', // require and installed. Good to go.
  'NotInstalled', // require to be installed.
  'Corrupted', // require but corrupted. Need to redownload.
 ] as const
 export type InstallationStateTuple = typeof ALL_INSTALLATION_STATE
 export type InstallationState = InstallationStateTuple[number]
 /**
 * Represents a base extension.
 * This class should be extended by any class that represents an extension.
@ -33,4 +49,32 @@ export abstract class BaseExtension implements ExtensionType {
   * Any cleanup logic for the extension should be put here.
   */
  abstract onUnload(): void
  /**
   * The compatibility of the extension.
   * This is used to check if the extension is compatible with the current environment.
   * @property {Array} platform
   */
  compatibility(): Compatibility | undefined {
    return undefined
  }
  /**
   * Determine if the prerequisites for the extension are installed.
   *
   * @returns {boolean} true if the prerequisites are installed, false otherwise.
   */
  async installationState(): Promise<InstallationState> {
    return 'NotRequired'
  }
  /**
   * Install the prerequisites for the extension.
   *
   * @returns {Promise<void>}
   */
  // @ts-ignore
  async install(...args): Promise<void> {
    return
  }
 }
--- a/core/src/extensions/ai-engines/AIEngine.ts
+++ b/core/src/extensions/ai-engines/AIEngine.ts
@ -0,0 +1,60 @@
 import { getJanDataFolderPath, joinPath } from '../../core'
 import { events } from '../../events'
 import { BaseExtension } from '../../extension'
 import { fs } from '../../fs'
 import { Model, ModelEvent } from '../../types'
 /**
 * Base AIEngine
 * Applicable to all AI Engines
 */
 export abstract class AIEngine extends BaseExtension {
  // The inference engine
  abstract provider: string
  // The model folder
  modelFolder: string = 'models'
  abstract models(): Promise<Model[]>
  /**
   * On extension load, subscribe to events.
   */
  onLoad() {
    this.prePopulateModels()
  }
  /**
   * Pre-populate models to App Data Folder
   */
  prePopulateModels(): Promise<void> {
    return this.models().then((models) => {
      const prePoluateOperations = models.map((model) =>
        getJanDataFolderPath()
          .then((janDataFolder) =>
            // Attempt to create the model folder
            joinPath([janDataFolder, this.modelFolder, model.id]).then((path) =>
              fs
                .mkdirSync(path)
                .catch()
                .then(() => path)
            )
          )
          .then((path) => joinPath([path, 'model.json']))
          .then((path) => {
            // Do not overwite existing model.json
            return fs.existsSync(path).then((exist: any) => {
              if (!exist) return fs.writeFileSync(path, JSON.stringify(model, null, 2))
            })
          })
          .catch((e: Error) => {
            console.error('Error', e)
          })
      )
      Promise.all(prePoluateOperations).then(() =>
        // Emit event to update models
        // So the UI can update the models list
        events.emit(ModelEvent.OnModelsUpdate, {})
      )
    })
  }
 }
--- a/core/src/extensions/ai-engines/LocalOAIEngine.ts
+++ b/core/src/extensions/ai-engines/LocalOAIEngine.ts
@ -0,0 +1,63 @@
 import { executeOnMain, getJanDataFolderPath, joinPath } from '../../core'
 import { events } from '../../events'
 import { Model, ModelEvent } from '../../types'
 import { OAIEngine } from './OAIEngine'
 /**
 * Base OAI Local Inference Provider
 * Added the implementation of loading and unloading model (applicable to local inference providers)
 */
 export abstract class LocalOAIEngine extends OAIEngine {
  // The inference engine
  loadModelFunctionName: string = 'loadModel'
  unloadModelFunctionName: string = 'unloadModel'
  isRunning: boolean = false
  /**
   * On extension load, subscribe to events.
   */
  onLoad() {
    super.onLoad()
    // These events are applicable to local inference providers
    events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model))
    events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model))
  }
  /**
   * Load the model.
   */
  async onModelInit(model: Model) {
    if (model.engine.toString() !== this.provider) return
    const modelFolder = await joinPath([await getJanDataFolderPath(), this.modelFolder, model.id])
    const res = await executeOnMain(this.nodeModule, this.loadModelFunctionName, {
      modelFolder,
      model,
    })
    if (res?.error) {
      events.emit(ModelEvent.OnModelFail, {
        ...model,
        error: res.error,
      })
      return
    } else {
      this.loadedModel = model
      events.emit(ModelEvent.OnModelReady, model)
      this.isRunning = true
    }
  }
  /**
   * Stops the model.
   */
  onModelStop(model: Model) {
    if (model.engine?.toString() !== this.provider) return
    this.isRunning = false
    executeOnMain(this.nodeModule, this.unloadModelFunctionName).then(() => {
      events.emit(ModelEvent.OnModelStopped, {})
    })
  }
 }
--- a/core/src/extensions/ai-engines/OAIEngine.ts
+++ b/core/src/extensions/ai-engines/OAIEngine.ts
@ -0,0 +1,116 @@
 import { requestInference } from './helpers/sse'
 import { ulid } from 'ulid'
 import { AIEngine } from './AIEngine'
 import {
  ChatCompletionRole,
  ContentType,
  InferenceEvent,
  MessageEvent,
  MessageRequest,
  MessageRequestType,
  MessageStatus,
  Model,
  ModelInfo,
  ThreadContent,
  ThreadMessage,
 } from '../../types'
 import { events } from '../../events'
 /**
 * Base OAI Inference Provider
 * Applicable to all OAI compatible inference providers
 */
 export abstract class OAIEngine extends AIEngine {
  // The inference engine
  abstract inferenceUrl: string
  abstract nodeModule: string
  // Controller to handle stop requests
  controller = new AbortController()
  isCancelled = false
  // The loaded model instance
  loadedModel: Model | undefined
  /**
   * On extension load, subscribe to events.
   */
  onLoad() {
    super.onLoad()
    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
    events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped())
  }
  /**
   * On extension unload
   */
  onUnload(): void {}
  /*
   * Inference request
   */
  inference(data: MessageRequest) {
    if (data.model?.engine?.toString() !== this.provider) return
    const timestamp = Date.now()
    const message: ThreadMessage = {
      id: ulid(),
      thread_id: data.threadId,
      type: data.type,
      assistant_id: data.assistantId,
      role: ChatCompletionRole.Assistant,
      content: [],
      status: MessageStatus.Pending,
      created: timestamp,
      updated: timestamp,
      object: 'thread.message',
    }
    if (data.type !== MessageRequestType.Summary) {
      events.emit(MessageEvent.OnMessageResponse, message)
    }
    this.isCancelled = false
    this.controller = new AbortController()
    const model: ModelInfo = {
      ...(this.loadedModel ? this.loadedModel : {}),
      ...data.model,
    }
    requestInference(this.inferenceUrl, data.messages ?? [], model, this.controller).subscribe({
      next: (content: any) => {
        const messageContent: ThreadContent = {
          type: ContentType.Text,
          text: {
            value: content.trim(),
            annotations: [],
          },
        }
        message.content = [messageContent]
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      complete: async () => {
        message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      error: async (err: any) => {
        if (this.isCancelled || message.content.length) {
          message.status = MessageStatus.Stopped
          events.emit(MessageEvent.OnMessageUpdate, message)
          return
        }
        message.status = MessageStatus.Error
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
    })
  }
  /**
   * Stops the inference.
   */
  onInferenceStopped() {
    this.isCancelled = true
    this.controller?.abort()
  }
 }
--- a/core/src/extensions/ai-engines/helpers/sse.ts
+++ b/core/src/extensions/ai-engines/helpers/sse.ts
@ -0,0 +1,67 @@
 import { Observable } from 'rxjs'
 import { ModelRuntimeParams } from '../../../types'
 /**
 * Sends a request to the inference server to generate a response based on the recent messages.
 * @param recentMessages - An array of recent messages to use as context for the inference.
 * @returns An Observable that emits the generated response as a string.
 */
 export function requestInference(
  inferenceUrl: string,
  recentMessages: any[],
  model: {
    id: string
    parameters: ModelRuntimeParams
  },
  controller?: AbortController
 ): Observable<string> {
  return new Observable((subscriber) => {
    const requestBody = JSON.stringify({
      messages: recentMessages,
      model: model.id,
      stream: true,
      ...model.parameters,
    })
    fetch(inferenceUrl, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Access-Control-Allow-Origin': '*',
        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
      },
      body: requestBody,
      signal: controller?.signal,
    })
      .then(async (response) => {
        if (model.parameters.stream === false) {
          const data = await response.json()
          subscriber.next(data.choices[0]?.message?.content ?? '')
        } else {
          const stream = response.body
          const decoder = new TextDecoder('utf-8')
          const reader = stream?.getReader()
          let content = ''
          while (true && reader) {
            const { done, value } = await reader.read()
            if (done) {
              break
            }
            const text = decoder.decode(value)
            const lines = text.trim().split('\n')
            for (const line of lines) {
              if (line.startsWith('data: ') && !line.includes('data: [DONE]')) {
                const data = JSON.parse(line.replace('data: ', ''))
                content += data.choices[0]?.delta?.content ?? ''
                if (content.startsWith('assistant: ')) {
                  content = content.replace('assistant: ', '')
                }
                subscriber.next(content)
              }
            }
          }
        }
        subscriber.complete()
      })
      .catch((err) => subscriber.error(err))
  })
 }
--- a/core/src/extensions/ai-engines/index.ts
+++ b/core/src/extensions/ai-engines/index.ts
@ -0,0 +1,3 @@
 export * from './AIEngine'
 export * from './OAIEngine'
 export * from './LocalOAIEngine'
--- a/core/src/extensions/index.ts
+++ b/core/src/extensions/index.ts
@ -28,3 +28,8 @@ export { ModelExtension } from './model'
 * Hugging Face extension for converting HF models to GGUF.
 */
 export { HuggingFaceExtension } from './huggingface'
 /**
 * Base AI Engines.
 */
 export * from './ai-engines'
--- a/core/src/extensions/model.ts
+++ b/core/src/extensions/model.ts
@ -1,5 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { ImportingModel, Model, ModelInterface, OptionType } from '../index'
+import { GpuSetting, ImportingModel, Model, ModelInterface, OptionType } from '../index'
 /**
 * Model extension for managing models.
@ -14,6 +14,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
  abstract downloadModel(
    model: Model,
    gpuSettings?: GpuSetting,
    network?: { proxy: string; ignoreSSL?: boolean }
  ): Promise<void>
  abstract cancelModelDownload(modelId: string): Promise<void>
--- a/core/src/extensions/monitoring.ts
+++ b/core/src/extensions/monitoring.ts
@ -1,5 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { MonitoringInterface } from '../index'
+import { GpuSetting, MonitoringInterface } from '../index'
 /**
 * Monitoring extension for system monitoring.
@ -13,6 +13,7 @@ export abstract class MonitoringExtension extends BaseExtension implements Monit
    return ExtensionTypeEnum.SystemMonitoring
  }
  abstract getGpuSetting(): Promise<GpuSetting>
  abstract getResourcesInfo(): Promise<any>
  abstract getCurrentLoad(): Promise<any>
 }
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@ -5,7 +5,7 @@ import { getJanDataFolderPath } from '../../helper'
 import { DownloadManager } from '../../helper/download'
 import { createWriteStream, renameSync } from 'fs'
 import { Processor } from './Processor'
-import { DownloadState } from '../../../types'
+import { DownloadRequest, DownloadState, NetworkConfig } from '../../../types'
 export class Downloader implements Processor {
  observer?: Function
@ -20,24 +20,27 @@ export class Downloader implements Processor {
    return func(this.observer, ...args)
  }
-  downloadFile(observer: any, url: string, localPath: string, network: any) {
+  downloadFile(observer: any, downloadRequest: DownloadRequest, network?: NetworkConfig) {
    const request = require('request')
    const progress = require('request-progress')
    const strictSSL = !network?.ignoreSSL
    const proxy = network?.proxy?.startsWith('http') ? network.proxy : undefined
    const { localPath, url } = downloadRequest
    let normalizedPath = localPath
    if (typeof localPath === 'string') {
-      localPath = normalizeFilePath(localPath)
+      normalizedPath = normalizeFilePath(localPath)
    }
-    const array = localPath.split(sep)
+    const array = normalizedPath.split(sep)
    const fileName = array.pop() ?? ''
    const modelId = array.pop() ?? ''
-    const destination = resolve(getJanDataFolderPath(), localPath)
+    const destination = resolve(getJanDataFolderPath(), normalizedPath)
    const rq = request({ url, strictSSL, proxy })
    // Put request to download manager instance
-    DownloadManager.instance.setRequest(localPath, rq)
+    DownloadManager.instance.setRequest(normalizedPath, rq)
    // Downloading file to a temp file first
    const downloadingTempFile = `${destination}.download`
@ -56,16 +59,25 @@ export class Downloader implements Processor {
        total: 0,
        transferred: 0,
      },
      children: [],
      downloadState: 'downloading',
      extensionId: downloadRequest.extensionId,
      downloadType: downloadRequest.downloadType,
      localPath: normalizedPath,
    }
    DownloadManager.instance.downloadProgressMap[modelId] = initialDownloadState
    if (downloadRequest.downloadType === 'extension') {
      observer?.(DownloadEvent.onFileDownloadUpdate, initialDownloadState)
    }
    progress(rq, {})
      .on('progress', (state: any) => {
        const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
        const downloadState: DownloadState = {
          ...currentDownloadState,
          ...state,
-          modelId,
+          fileName: fileName,
          fileName,
          downloadState: 'downloading',
        }
        console.debug('progress: ', downloadState)
@ -76,22 +88,22 @@ export class Downloader implements Processor {
        const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
        const downloadState: DownloadState = {
          ...currentDownloadState,
          fileName: fileName,
          error: error.message,
          downloadState: 'error',
        }
        if (currentDownloadState) {
          DownloadManager.instance.downloadProgressMap[modelId] = downloadState
        }
        observer?.(DownloadEvent.onFileDownloadError, downloadState)
        DownloadManager.instance.downloadProgressMap[modelId] = downloadState
      })
      .on('end', () => {
        const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
-        if (currentDownloadState && DownloadManager.instance.networkRequests[localPath]) {
+        if (currentDownloadState && DownloadManager.instance.networkRequests[normalizedPath]) {
          // Finished downloading, rename temp file to actual file
          renameSync(downloadingTempFile, destination)
          const downloadState: DownloadState = {
            ...currentDownloadState,
            fileName: fileName,
            downloadState: 'end',
          }
          observer?.(DownloadEvent.onFileDownloadSuccess, downloadState)
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@ -1,7 +1,16 @@
-import fs from 'fs'
+import {
  existsSync,
  readdirSync,
  readFileSync,
  writeFileSync,
  mkdirSync,
  appendFileSync,
  createWriteStream,
  rmdirSync,
 } from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
-import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../index'
+import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
 import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
 import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
@ -9,12 +18,12 @@ import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
 export const getBuilder = async (configuration: RouteConfiguration) => {
  const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
  try {
-    if (!fs.existsSync(directoryPath)) {
+    if (!existsSync(directoryPath)) {
      console.debug('model folder not found')
      return []
    }
-    const files: string[] = fs.readdirSync(directoryPath)
+    const files: string[] = readdirSync(directoryPath)
    const allDirectories: string[] = []
    for (const file of files) {
@ -46,8 +55,8 @@ export const getBuilder = async (configuration: RouteConfiguration) => {
 }
 const readModelMetadata = (path: string): string | undefined => {
-  if (fs.existsSync(path)) {
+  if (existsSync(path)) {
-    return fs.readFileSync(path, 'utf-8')
+    return readFileSync(path, 'utf-8')
  } else {
    return undefined
  }
@ -81,7 +90,7 @@ export const deleteBuilder = async (configuration: RouteConfiguration, id: strin
    }
    const objectPath = join(directoryPath, id)
-    fs.rmdirSync(objectPath, { recursive: true })
+    rmdirSync(objectPath, { recursive: true })
    return {
      id: id,
      object: configuration.delete.object,
@ -96,20 +105,19 @@ export const getMessages = async (threadId: string): Promise<ThreadMessage[]> =>
  const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
  const messageFile = 'messages.jsonl'
  try {
-    const files: string[] = fs.readdirSync(threadDirPath)
+    const files: string[] = readdirSync(threadDirPath)
    if (!files.includes(messageFile)) {
      console.error(`${threadDirPath} not contains message file`)
      return []
    }
    const messageFilePath = join(threadDirPath, messageFile)
-    if (!fs.existsSync(messageFilePath)) {
+    if (!existsSync(messageFilePath)) {
      console.debug('message file not found')
      return []
    }
-    const lines = fs
+    const lines = readFileSync(messageFilePath, 'utf-8')
      .readFileSync(messageFilePath, 'utf-8')
      .toString()
      .split('\n')
      .filter((line: any) => line !== '')
@ -157,11 +165,11 @@ export const createThread = async (thread: any) => {
    const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
    const threadJsonPath = join(threadDirPath, threadMetadataFileName)
-    if (!fs.existsSync(threadDirPath)) {
+    if (!existsSync(threadDirPath)) {
-      fs.mkdirSync(threadDirPath)
+      mkdirSync(threadDirPath)
    }
-    await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
+    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
    return updatedThread
  } catch (err) {
    return {
@ -191,7 +199,7 @@ export const updateThread = async (threadId: string, thread: any) => {
    const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
    const threadJsonPath = join(threadDirPath, threadMetadataFileName)
-    await fs.writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
+    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
    return updatedThread
  } catch (err) {
    return {
@ -233,10 +241,10 @@ export const createMessage = async (threadId: string, message: any) => {
    const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
    const threadMessagePath = join(threadDirPath, threadMessagesFileName)
-    if (!fs.existsSync(threadDirPath)) {
+    if (!existsSync(threadDirPath)) {
-      fs.mkdirSync(threadDirPath)
+      mkdirSync(threadDirPath)
    }
-    fs.appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
+    appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
    return threadMessage
  } catch (err) {
    return {
@ -259,8 +267,8 @@ export const downloadModel = async (
  }
  const directoryPath = join(getJanDataFolderPath(), 'models', modelId)
-  if (!fs.existsSync(directoryPath)) {
+  if (!existsSync(directoryPath)) {
-    fs.mkdirSync(directoryPath)
+    mkdirSync(directoryPath)
  }
  // path to model binary
@ -281,7 +289,7 @@ export const downloadModel = async (
      .on('end', function () {
        console.debug('end')
      })
-      .pipe(fs.createWriteStream(modelBinaryPath))
+      .pipe(createWriteStream(modelBinaryPath))
  }
  return {
--- a/core/src/node/helper/config.ts
+++ b/core/src/node/helper/config.ts
@ -4,13 +4,13 @@ import fs from 'fs'
 import os from 'os'
 import childProcess from 'child_process'
 // TODO: move this to core
 const configurationFileName = 'settings.json'
 // TODO: do no specify app name in framework module
 const defaultJanDataFolder = join(os.homedir(), 'jan')
 const defaultAppConfig: AppConfiguration = {
  data_folder: defaultJanDataFolder,
  quick_ask: false,
 }
 /**
--- a/core/src/types/config/appConfigEntity.ts
+++ b/core/src/types/config/appConfigEntity.ts
@ -1,3 +1,4 @@
 export type AppConfiguration = {
  data_folder: string
  quick_ask: boolean
 }
--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@ -4,16 +4,43 @@ export type FileStat = {
 }
 export type DownloadState = {
-  modelId: string
+  modelId: string // TODO: change to download id
  fileName: string
  time: DownloadTime
  speed: number
  percent: number
  percent: number
  size: DownloadSize
  children?: DownloadState[]
  error?: string
  downloadState: 'downloading' | 'error' | 'end'
  children?: DownloadState[]
  error?: string
  extensionId?: string
  downloadType?: DownloadType
  localPath?: string
 }
 export type DownloadType = 'model' | 'extension'
 export type DownloadRequest = {
  /**
   * The URL to download the file from.
   */
  url: string
  /**
   * The local path to save the file to.
   */
  localPath: string
  /**
   * The extension ID of the extension that initiated the download.
   *
   * Can be extension name.
   */
  extensionId?: string
  downloadType?: DownloadType
 }
 type DownloadTime = {
--- a/core/src/types/miscellaneous/fileDownloadRequest.ts
+++ b/core/src/types/miscellaneous/fileDownloadRequest.ts
@ -0,0 +1,8 @@
 export type FileDownloadRequest = {
  downloadId: string
  url: string
  localPath: string
  fileName: string
  displayName: string
  metadata: Record<string, string | number>
 }
--- a/core/src/types/miscellaneous/index.ts
+++ b/core/src/types/miscellaneous/index.ts
@ -1,3 +1,5 @@
 export * from './systemResourceInfo'
 export * from './promptTemplate'
 export * from './appUpdate'
 export * from './fileDownloadRequest'
 export * from './networkConfig'
--- a/core/src/types/miscellaneous/networkConfig.ts
+++ b/core/src/types/miscellaneous/networkConfig.ts
@ -0,0 +1,4 @@
 export type NetworkConfig = {
  proxy?: string
  ignoreSSL?: boolean
 }
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@ -2,3 +2,31 @@ export type SystemResourceInfo = {
  numCpuPhysicalCore: number
  memAvailable: number
 }
 export type RunMode = 'cpu' | 'gpu'
 export type GpuSetting = {
  notify: boolean
  run_mode: RunMode
  nvidia_driver: {
    exist: boolean
    version: string
  }
  cuda: {
    exist: boolean
    version: string
  }
  gpus: GpuSettingInfo[]
  gpu_highest_vram: string
  gpus_in_use: string[]
  is_initial: boolean
  // TODO: This needs to be set based on user toggle in settings
  vulkan: boolean
 }
 export type GpuSettingInfo = {
  id: string
  vram: string
  name: string
  arch?: string
 }
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -19,6 +19,7 @@ export enum InferenceEngine {
  nitro = 'nitro',
  openai = 'openai',
  triton_trtllm = 'triton_trtllm',
  nitro_tensorrt_llm = 'nitro-tensorrt-llm',
  tool_retrieval_enabled = 'tool_retrieval_enabled',
 }
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@ -1,3 +1,4 @@
 import { GpuSetting } from '../miscellaneous'
 import { Model } from './modelEntity'
 /**
@ -10,7 +11,11 @@ export interface ModelInterface {
   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
   * @returns A Promise that resolves when the model has been downloaded.
   */
-  downloadModel(model: Model, network?: { ignoreSSL?: boolean; proxy?: string }): Promise<void>
+  downloadModel(
    model: Model,
    gpuSettings?: GpuSetting,
    network?: { ignoreSSL?: boolean; proxy?: string }
  ): Promise<void>
  /**
   * Cancels the download of a specific model.
--- a/core/src/types/monitoring/index.ts
+++ b/core/src/types/monitoring/index.ts
@ -1 +1,2 @@
 export * from './monitoringInterface'
 export * from './resourceInfo'
--- a/core/src/types/monitoring/resourceInfo.ts
+++ b/core/src/types/monitoring/resourceInfo.ts
@ -0,0 +1,6 @@
 export type ResourceInfo = {
  mem: {
    totalMemory: number
    usedMemory: number
  }
 }
--- a/core/tsconfig.json
+++ b/core/tsconfig.json
@ -13,7 +13,7 @@
    "declarationDir": "dist/types",
    "outDir": "dist/lib",
    "importHelpers": true,
-    "types": ["@types/jest"]
+    "types": ["@types/jest"],
  },
-  "include": ["src"]
+  "include": ["src"],
 }
--- a/docs/README.md
+++ b/docs/README.md
@ -1,18 +1,20 @@
-# Website
+# Website & Docs
-This website is built using [Docusaurus 2](https://docusaurus.io/), a modern static website generator.
+This website is built using [Docusaurus 3.0](https://docusaurus.io/), a modern static website generator.
-## Information Architecture
+### Information Architecture
 We try to **keep routes consistent** to maintain SEO.
- `/guides`: Guides on how to use the Jan application, with GIFs. For end users who are directly using Jan. Always assume users are not technical.
+- **`/guides/`**: Guides on how to use the Jan application. For end users who are directly using Jan.
- `/developer`: Developer docs on how to extend Jan. These pages are about what people can build with our software. We must hide the complexity of HOW the app is built, but explain just enough of the high level architecture so devs know enough to build on top of it.
+- **`/developer/`**: Developer docs on how to extend Jan. These pages are about what people can build with our software.
- `/api-reference`: Reference documentation, written in Swagger/OpenAPI format.
+- **`/api-reference/`**: Reference documentation for the Jan API server, written in Swagger/OpenAPI format.
- `/docs`: Engineering specs and product specs, i.e. HOW the app is built. Mostly for internal reference and for our core contributors who are building the SDK itself.
+- **`/changelog/`**: A list of changes made to the Jan application with each release.
 - **`/blog/`**: A blog for the Jan application.
 ### Sidebar Autogeneration
@ -20,34 +22,36 @@ The order of each page is either explicitly defined in `sidebar.js` or follows t
 Important slugs are hardcoded at the document level (and shouldn't be rerouted):
-```md
+```
 ---
 title: Overview
 slug: /docs
 ---
 ```
-## Contributing
+## How to Contribute
-### Installation
+Refer to the [Contributing Guide](https://github.com/janhq/jan/blob/dev/CONTRIBUTING.md) for more comprehensive information on how to contribute to the Jan project.
-```
+### Pre-requisites and Installation
 $ yarn
 ```
-### Local Development
+- [Node.js](https://nodejs.org/en/) (version 20.0.0 or higher)
 - [yarn](https://yarnpkg.com/) (version 1.22.0 or higher)
-```
+#### Installation
-$ cp .env.example .env
+
-$ yarn start
+```bash
 cd jan/docs
 yarn install
 yarn start
 ```
 This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
-### Build
+#### Build
-```
+```bash
-$ yarn build
+yarn build
 ```
 This command generates static content into the `build` directory and can be served using any static contents hosting service.
@ -56,25 +60,27 @@ This command generates static content into the `build` directory and can be serv
 Using SSH:
-```
+```bash
-$ USE_SSH=true yarn deploy
+USE_SSH=true yarn deploy
 ```
 Not using SSH:
-```
+```bash
-$ GIT_USER=<Your GitHub username> yarn deploy
+GIT_USER=<Your GitHub username> yarn deploy
 ```
 If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
 ### Preview URL, Pre-release and Publishing Documentation
-When a PR is created, the preview URL will be automatically commented on the PR.
+- When a pull request is created, the preview URL will be automatically commented on the pull request.
-The documentation will then be published to [https://jan.ai/](https://jan.ai/) when the PR is merged to `main`.
+- The documentation will then be published to [https://dev.jan.ai/](https://dev.jan.ai/) when the pull request is merged to `dev`.
 - Our open-source maintainers will sync the updated content from `dev` to `docs` branch, which will then be published to [https://jan.ai/](https://jan.ai/).
 ### Additional Plugins
 - @docusaurus/theme-live-codeblock
- [Redocusaurus](https://redocusaurus.vercel.app/): manually upload swagger files at `/openapi/OpenAPISpec.json`
+- [Redocusaurus](https://redocusaurus.vercel.app/): manually upload swagger files at `/openapi/jan.yaml` to update the API reference documentation.
--- a/docs/docs/about/about.md
+++ b/docs/docs/about/about.md
@ -18,7 +18,7 @@ keywords:
  ]
 ---
-Jan turns computers into a thinking machine to change how you use computers.
+Jan turns computers into thinking machines to change how we use them.
 Jan is created and maintained by Jan Labs, a robotics company.
 With Jan, you can:
--- a/docs/docs/guides/providers/README.mdx
+++ b/docs/docs/guides/providers/README.mdx
@ -0,0 +1,8 @@
 ---
 title: Inference Providers
 slug: /guides/providers
 ---
 import DocCardList from "@theme/DocCardList";
 <DocCardList />
--- a/docs/docs/guides/providers/image.png
+++ b/docs/docs/guides/providers/image.png
--- a/docs/docs/guides/providers/llama-cpp.md
+++ b/docs/docs/guides/providers/llama-cpp.md
@ -0,0 +1,10 @@
 ---
 title: llama.cpp
 slug: /guides/providers/llama-cpp
 ---
 ## Overview
 [Nitro](https://github.com/janhq/nitro) is an inference server on top of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides an OpenAI-compatible API, queue, & scaling.
 Nitro is the default AI engine downloaded with Jan. There is no additional setup needed.
--- a/docs/docs/guides/providers/tensorrt-llm.md
+++ b/docs/docs/guides/providers/tensorrt-llm.md
@ -0,0 +1,87 @@
 ---
 title: TensorRT-LLM
 slug: /guides/providers/tensorrt-llm
 ---
 Users with Nvidia GPUs can get **20-40% faster\* token speeds** on their laptop or desktops by using [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). The greater implication is that you are running FP16, which is also more accurate than quantized models.
 This guide walks you through how to install Jan's official [TensorRT-LLM Extension](https://github.com/janhq/nitro-tensorrt-llm). This extension uses [Nitro-TensorRT-LLM](https://github.com/janhq/nitro-tensorrt-llm) as the AI engine, instead of the default [Nitro-Llama-CPP](https://github.com/janhq/nitro). It includes an efficient C++ server to natively execute the [TRT-LLM C++ runtime](https://nvidia.github.io/TensorRT-LLM/gpt_runtime.html). It also comes with additional feature and performance improvements like OpenAI compatibility, tokenizer improvements, and queues.
 *Compared to using LlamaCPP engine.
 :::warning
 This feature is only available for Windows users. Linux is coming soon.
 Additionally, we only prebuilt a few demo models. You can always build your desired models directly on your machine. [Read here](#build-your-own-tensorrt-models).
 :::
 ## Requirements
 - A Windows PC
 - Nvidia GPU(s): Ada or Ampere series (i.e. RTX 4000s & 3000s). More will be supported soon.
 - 3GB+ of disk space to download TRT-LLM artifacts and a Nitro binary
 - Jan v0.4.9+ or Jan v0.4.8-321+ (nightly)
 - Nvidia Driver v535+ ([installation guide](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements))
 - CUDA Toolkit v12.2+ ([installation guide](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements))
 ## Install TensorRT-Extension
 1. Go to Settings > Extensions
 2. Click install next to the TensorRT-LLM Extension
 3. Check that files are correctly downloaded
 ```sh
 ls ~\jan\extensions\@janhq\tensorrt-llm-extension\dist\bin
 # Your Extension Folder should now include `nitro.exe`, among other artifacts needed to run TRT-LLM
 ```
 ## Download a Compatible Model
 TensorRT-LLM can only run models in `TensorRT` format. These models, aka "TensorRT Engines", are prebuilt specifically for each target OS+GPU architecture.
 We offer a handful of precompiled models for Ampere and Ada cards that you can immediately download and play with:
 1. Restart the application and go to the Hub
 2. Look for models with the `TensorRT-LLM` label in the recommended models list. Click download. This step might take some time. 🙏
 ![image](https://hackmd.io/_uploads/rJewrEgRp.png)
 3. Click use and start chatting!
 4. You may need to allow Nitro in your network 
 ![alt text](image.png)
 :::warning
 If you are our nightly builds, you may have to reinstall the TensorRT-LLM extension each time you update the app. We're working on better extension lifecyles - stay tuned.
 :::
 ## Configure Settings
 You can customize the default parameters for how Jan runs TensorRT-LLM. 
 :::info
 coming soon
 :::
 ## Troubleshooting
 ### Incompatible Extension vs Engine versions
 For now, the model versions are pinned to the extension versions.
 ### Uninstall Extension
 1. Quit the app
 2. Go to Settings > Extensions
 3. Delete the entire Extensions folder.
 4. Reopen the app, only the default extensions should be restored.
 ### Install Nitro-TensorRT-LLM manually
 To manually build the artifacts needed to run the server and TensorRT-LLM, you can reference the source code. [Read here](https://github.com/janhq/nitro-tensorrt-llm?tab=readme-ov-file#quickstart).
 ### Build your own TensorRT models
 :::info
 coming soon
 :::
--- a/docs/docs/how-we-work/website-docs/website-docs.md
+++ b/docs/docs/how-we-work/website-docs/website-docs.md
@ -1,3 +1,88 @@
 ---
 title: Website & Docs
 ---
 This website is built using [Docusaurus 3.0](https://docusaurus.io/), a modern static website generator.
 ### Information Architecture
 We try to **keep routes consistent** to maintain SEO.
 - **`/guides/`**: Guides on how to use the Jan application. For end users who are directly using Jan.
 - **`/developer/`**: Developer docs on how to extend Jan. These pages are about what people can build with our software.
 - **`/api-reference/`**: Reference documentation for the Jan API server, written in Swagger/OpenAPI format.
 - **`/changelog/`**: A list of changes made to the Jan application with each release.
 - **`/blog/`**: A blog for the Jan application.
 ### Sidebar Autogeneration
 The order of each page is either explicitly defined in `sidebar.js` or follows the [Docusaurus autogenerated](https://docusaurus.io/docs/next/sidebar/autogenerated) naming format, `##-path-name.md`.
 Important slugs are hardcoded at the document level (and shouldn't be rerouted):
 ```
 ---
 title: Overview
 slug: /docs
 ---
 ```
 ## How to Contribute
 Refer to the [Contributing Guide](https://github.com/janhq/jan/blob/dev/CONTRIBUTING.md) for more comprehensive information on how to contribute to the Jan project.
 ### Pre-requisites and Installation
 - [Node.js](https://nodejs.org/en/) (version 20.0.0 or higher)
 - [yarn](https://yarnpkg.com/) (version 1.22.0 or higher)
 #### Installation
 ```bash
 cd jan/docs
 yarn install
 yarn start
 ```
 This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
 #### Build
 ```bash
 yarn build
 ```
 This command generates static content into the `build` directory and can be served using any static contents hosting service.
 ### Deployment
 Using SSH:
 ```bash
 USE_SSH=true yarn deploy
 ```
 Not using SSH:
 ```bash
 GIT_USER=<Your GitHub username> yarn deploy
 ```
 If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
 ### Preview URL, Pre-release and Publishing Documentation
 - When a pull request is created, the preview URL will be automatically commented on the pull request.
 - The documentation will then be published to [https://dev.jan.ai/](https://dev.jan.ai/) when the pull request is merged to `dev`.
 - Our open-source maintainers will sync the updated content from `dev` to `docs` branch, which will then be published to [https://jan.ai/](https://jan.ai/).
 ### Additional Plugins
 - @docusaurus/theme-live-codeblock
 - [Redocusaurus](https://redocusaurus.vercel.app/): manually upload swagger files at `/openapi/jan.yaml` to update the API reference documentation.
--- a/docs/plugins/changelog-plugin/fetchData.js
+++ b/docs/plugins/changelog-plugin/fetchData.js
@ -1,19 +1,19 @@
 const fs = require('fs');
 const path = require('path');
 const fetch = require('node-fetch');
 async function fetchData(siteConfig) {
  const owner = siteConfig.organizationName;
  const repo = siteConfig.projectName;
  const apiUrl = `https://api.github.com/repos/${owner}/${repo}/releases`;
-  const outputDirectory = path.join(__dirname, '../../docs/guides/changelogs');
+  const outputDirectory = path.join(__dirname, '../../docs/releases/changelog');
  if (!fs.existsSync(outputDirectory)) {
    fs.mkdirSync(outputDirectory);
  }
  let counter = 1;
  const categoryFilePath = path.join(outputDirectory, '_category_.json');
  const cacheFilePath = path.join(outputDirectory, 'cache.json');
  let cachedData = {};
@ -71,6 +71,14 @@ async function fetchData(siteConfig) {
  // Process the GitHub releases data here
  for (const release of releases) {
    const version = release.tag_name;
    // Check if the changelog file already exists for the current version
    const existingChangelogPath = path.join(outputDirectory, `changelog-${version}.mdx`);
    if (fs.existsSync(existingChangelogPath)) {
      console.log(`Changelog for version ${version} already exists. Skipping...`);
      continue;
    }
    const releaseUrl = release.html_url;
    const issueNumberMatch = release.body.match(/#(\d+)/);
    const issueNumber = issueNumberMatch ? parseInt(issueNumberMatch[1], 10) : null;
@ -83,7 +91,7 @@ async function fetchData(siteConfig) {
    const changes = release.body;
-    let markdownContent = `---\nsidebar_position: ${counter}\n---\n# ${version}\n\nFor more details, [GitHub Issues](${releaseUrl})\n\nHighlighted Issue: ${issueLink}\n\n${changes}\n`;
+    let markdownContent = `---\nsidebar_position: ${counter}\nslug: /changelog/changelog-${version}\n---\n# ${version}\n\nFor more details, [GitHub Issues](${releaseUrl})\n\nHighlighted Issue: ${issueLink}\n\n${changes}\n`;
    // Write to a separate markdown file for each version
    const outputFilePath = path.join(outputDirectory, `changelog-${version}.mdx`);
@ -93,20 +101,6 @@ async function fetchData(siteConfig) {
    counter++;
  }
  // Create _category_.json file
  const categoryContent = {
    label: 'Changelogs',
    position: 5,
    link: {
      type: 'generated-index',
      description: 'Changelog for Jan',
    },
  };
  fs.writeFileSync(categoryFilePath, JSON.stringify(categoryContent, null, 2), 'utf-8');
  console.log(`_category_.json has been created at: ${categoryFilePath}`);
 }
 module.exports = fetchData;
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@ -199,6 +199,19 @@ const sidebars = {
            "guides/models/integrate-remote",
          ]
        },
        {
          type: "category",
          label: "Inference Providers",
          className: "head_SubMenu",
          link: {
            type: 'doc',
            id: "guides/providers/README",
          },
          items: [
            "guides/providers/llama-cpp",
            "guides/providers/tensorrt-llm",
          ]
        },
        {
          type: "category",
          label: "Extensions",
--- a/electron/handlers/update.ts
+++ b/electron/handlers/update.ts
@ -7,6 +7,7 @@ import {
  autoUpdater,
 } from 'electron-updater'
 import { AppEvent } from '@janhq/core'
 import { trayManager } from '../managers/tray'
 export let waitingToInstallVersion: string | undefined = undefined
@ -22,6 +23,7 @@ export function handleAppUpdates() {
      message: 'Would you like to download and install it now?',
      buttons: ['Download', 'Later'],
    })
    trayManager.destroyCurrentTray()
    if (action.response === 0) await autoUpdater.downloadUpdate()
  })
--- a/electron/icons/512x512.png
+++ b/electron/icons/512x512.png
--- a/electron/main.ts
+++ b/electron/main.ts
@ -1,11 +1,11 @@
-import { app, BrowserWindow, Menu, Tray } from 'electron'
+import { app, BrowserWindow, Tray } from 'electron'
 import { join } from 'path'
 /**
 * Managers
 **/
 import { windowManager } from './managers/window'
-import { log } from '@janhq/core/node'
+import { getAppConfigurations, log } from '@janhq/core/node'
 /**
 * IPC Handlers
@ -27,7 +27,7 @@ import { setupReactDevTool } from './utils/dev'
 import { cleanLogs } from './utils/log'
 import { registerShortcut } from './utils/selectedText'
-import { createSystemTray } from './utils/tray'
+import { trayManager } from './managers/tray'
 const preloadPath = join(__dirname, 'preload.js')
 const rendererPath = join(__dirname, '..', 'renderer')
@ -39,8 +39,16 @@ const quickAskUrl = `${mainUrl}/search`
 const quickAskHotKey = 'CommandOrControl+J'
 const gotTheLock = app.requestSingleInstanceLock()
 app
  .whenReady()
  .then(() => {
    if (!gotTheLock) {
      app.quit()
      throw new Error('Another instance of the app is already running')
    }
  })
  .then(setupReactDevTool)
  .then(setupCore)
  .then(createUserSpace)
@ -56,7 +64,7 @@ app
      windowManager.mainWindow?.webContents.openDevTools()
    }
  })
-  .then(() => process.env.CI !== 'e2e' && createSystemTray())
+  .then(() => process.env.CI !== 'e2e' && trayManager.createSystemTray())
  .then(() => {
    log(`Version: ${app.getVersion()}`)
  })
@ -64,20 +72,38 @@ app
    app.on('activate', () => {
      if (!BrowserWindow.getAllWindows().length) {
        createMainWindow()
      } else {
        windowManager.showMainWindow()
      }
    })
  })
  .then(() => cleanLogs())
 app.on('second-instance', (_event, _commandLine, _workingDirectory) => {
  windowManager.showMainWindow()
 })
 app.on('ready', () => {
  registerGlobalShortcuts()
 })
 app.on('before-quit', function (evt) {
  trayManager.destroyCurrentTray()
 })
 app.once('quit', () => {
  cleanUpAndQuit()
 })
 app.once('window-all-closed', () => {
  // Feature Toggle for Quick Ask
  if (getAppConfigurations().quick_ask) return
  cleanUpAndQuit()
 })
 function createQuickAskWindow() {
  // Feature Toggle for Quick Ask
  if (!getAppConfigurations().quick_ask) return
  const startUrl = app.isPackaged ? `file://${quickAskPath}` : quickAskUrl
  windowManager.createQuickAskWindow(preloadPath, startUrl)
 }
@ -89,6 +115,9 @@ function createMainWindow() {
 function registerGlobalShortcuts() {
  const ret = registerShortcut(quickAskHotKey, (selectedText: string) => {
    // Feature Toggle for Quick Ask
    if (!getAppConfigurations().quick_ask) return
    if (!windowManager.isQuickAskWindowVisible()) {
      windowManager.showQuickAskWindow()
      windowManager.sendQuickAskSelectedText(selectedText)
--- a/electron/managers/mainWindowConfig.ts
+++ b/electron/managers/mainWindowConfig.ts
@ -5,7 +5,7 @@ export const mainWindowConfig: Electron.BrowserWindowConstructorOptions = {
  width: DEFAULT_WIDTH,
  minWidth: DEFAULT_WIDTH,
  height: DEFAULT_HEIGHT,
-  skipTaskbar: true,
+  skipTaskbar: false,
  show: true,
  trafficLightPosition: {
    x: 10,
--- a/electron/managers/tray.ts
+++ b/electron/managers/tray.ts
@ -0,0 +1,51 @@
 import { join } from 'path'
 import { Tray, app, Menu } from 'electron'
 import { windowManager } from '../managers/window'
 import { getAppConfigurations } from '@janhq/core/node'
 class TrayManager {
  currentTray: Tray | undefined
  createSystemTray = () => {
    // Feature Toggle for Quick Ask
    if (!getAppConfigurations().quick_ask) return
    if (this.currentTray) {
      return
    }
    const iconPath = join(app.getAppPath(), 'icons', 'icon-tray.png')
    const tray = new Tray(iconPath)
    tray.setToolTip(app.getName())
    tray.on('click', () => {
      windowManager.showQuickAskWindow()
    })
    // Add context menu for windows only
    if (process.platform === 'win32') {
      const contextMenu = Menu.buildFromTemplate([
        {
          label: 'Open Jan',
          type: 'normal',
          click: () => windowManager.showMainWindow(),
        },
        {
          label: 'Open Quick Ask',
          type: 'normal',
          click: () => windowManager.showQuickAskWindow(),
        },
        { label: 'Quit', type: 'normal', click: () => app.quit() },
      ])
      tray.setContextMenu(contextMenu)
    }
    this.currentTray = tray
  }
  destroyCurrentTray() {
    this.currentTray?.destroy()
    this.currentTray = undefined
  }
 }
 export const trayManager = new TrayManager()
--- a/electron/managers/window.ts
+++ b/electron/managers/window.ts
@ -2,6 +2,7 @@ import { BrowserWindow, app, shell } from 'electron'
 import { quickAskWindowConfig } from './quickAskWindowConfig'
 import { AppEvent } from '@janhq/core'
 import { mainWindowConfig } from './mainWindowConfig'
 import { getAppConfigurations } from '@janhq/core/node'
 /**
 * Manages the current window instance.
@ -43,6 +44,9 @@ class WindowManager {
    })
    windowManager.mainWindow?.on('close', function (evt) {
      // Feature Toggle for Quick Ask
      if (!getAppConfigurations().quick_ask) return
      if (!isAppQuitting) {
        evt.preventDefault()
        windowManager.hideMainWindow()
@ -73,15 +77,11 @@ class WindowManager {
  hideMainWindow(): void {
    this.mainWindow?.hide()
    this._mainWindowVisible = false
    // Only macos
    if (process.platform === 'darwin') app.dock.hide()
  }
  showMainWindow(): void {
    this.mainWindow?.show()
    this._mainWindowVisible = true
    // Only macos
    if (process.platform === 'darwin') app.dock.show()
  }
  hideQuickAskWindow(): void {
@ -101,6 +101,7 @@ class WindowManager {
  expandQuickAskWindow(heightOffset: number): void {
    const width = quickAskWindowConfig.width!
    const height = quickAskWindowConfig.height! + heightOffset
    this._quickAskWindow?.setMinimumSize(width, height)
    this._quickAskWindow?.setSize(width, height, true)
  }
--- a/electron/utils/tray.ts
+++ b/electron/utils/tray.ts
@ -1,24 +0,0 @@
 import { join } from 'path'
 import { Tray, app, Menu } from 'electron'
 import { windowManager } from '../managers/window'
 export const createSystemTray = () => {
  const iconPath = join(app.getAppPath(), 'icons', 'icon-tray.png')
  const tray = new Tray(iconPath)
  tray.setToolTip(app.getName())
  const contextMenu = Menu.buildFromTemplate([
    {
      label: 'Open Jan',
      type: 'normal',
      click: () => windowManager.showMainWindow(),
    },
    {
      label: 'Open Quick Ask',
      type: 'normal',
      click: () => windowManager.showQuickAskWindow(),
    },
    { label: 'Quit', type: 'normal', click: () => app.quit() },
  ])
  tray.setContextMenu(contextMenu)
 }
--- a/extensions/huggingface-extension/src/index.ts
+++ b/extensions/huggingface-extension/src/index.ts
@ -13,6 +13,7 @@ import {
  events,
  DownloadEvent,
  log,
  DownloadRequest,
 } from '@janhq/core'
 import { ggufMetadata } from 'hyllama'
@ -148,7 +149,11 @@ export default class JanHuggingFaceExtension extends HuggingFaceExtension {
      if (this.interrupted) return
      if (!(await fs.existsSync(localPath))) {
-        downloadFile(url, localPath, network)
+        const downloadRequest: DownloadRequest = {
          url,
          localPath,
        }
        downloadFile(downloadRequest, network)
        filePaths.push(filePath)
      }
    }
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@ -1,3 +1,3 @@
@echo off
 set /p NITRO_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan && .\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
+.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -8,7 +8,7 @@
  "license": "AGPL-3.0",
  "scripts": {
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro && download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
+    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
    "downloadnitro:win32": "download.bat",
    "downloadnitro": "run-script-os",
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -108,9 +108,6 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
    events.on(InferenceEvent.OnInferenceStopped, () =>
      this.onInferenceStopped()
    )
    // Attempt to fetch nvidia info
    await executeOnMain(NODE, 'updateNvidiaInfo', {})
  }
  /**
--- a/extensions/inference-nitro-extension/src/node/accelerator.ts
+++ b/extensions/inference-nitro-extension/src/node/accelerator.ts
@ -1,237 +0,0 @@
 import { writeFileSync, existsSync, readFileSync } from 'fs'
 import { exec, spawn } from 'child_process'
 import path from 'path'
 import { getJanDataFolderPath, log } from '@janhq/core/node'
 /**
 * Default GPU settings
 * TODO: This needs to be refactored to support multiple accelerators
 **/
 const DEFALT_SETTINGS = {
  notify: true,
  run_mode: 'cpu',
  nvidia_driver: {
    exist: false,
    version: '',
  },
  cuda: {
    exist: false,
    version: '',
  },
  gpus: [],
  gpu_highest_vram: '',
  gpus_in_use: [],
  is_initial: true,
  // TODO: This needs to be set based on user toggle in settings
  vulkan: false
 }
 /**
 * Path to the settings file
 **/
 export const GPU_INFO_FILE = path.join(
  getJanDataFolderPath(),
  'settings',
  'settings.json'
 )
 /**
 * Current nitro process
 */
 let nitroProcessInfo: NitroProcessInfo | undefined = undefined
 /**
 * Nitro process info
 */
 export interface NitroProcessInfo {
  isRunning: boolean
 }
 /**
 * This will retrive GPU informations and persist settings.json
 * Will be called when the extension is loaded to turn on GPU acceleration if supported
 */
 export async function updateNvidiaInfo() {
  if (process.platform !== 'darwin') {
    let data
    try {
      data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
    } catch (error) {
      data = DEFALT_SETTINGS
      writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
    }
    updateNvidiaDriverInfo()
    updateGpuInfo()
  }
 }
 /**
 * Retrieve current nitro process
 */
 export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => {
  nitroProcessInfo = {
    isRunning: subprocess != null,
  }
  return nitroProcessInfo
 }
 /**
 * Validate nvidia and cuda for linux and windows
 */
 export async function updateNvidiaDriverInfo(): Promise<void> {
  exec(
    'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
    (error, stdout) => {
      let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
      if (!error) {
        const firstLine = stdout.split('\n')[0].trim()
        data['nvidia_driver'].exist = true
        data['nvidia_driver'].version = firstLine
      } else {
        data['nvidia_driver'].exist = false
      }
      writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
      Promise.resolve()
    }
  )
 }
 /**
 * Check if file exists in paths
 */
 export function checkFileExistenceInPaths(
  file: string,
  paths: string[]
 ): boolean {
  return paths.some((p) => existsSync(path.join(p, file)))
 }
 /**
 * Validate cuda for linux and windows
 */
 export function updateCudaExistence(
  data: Record<string, any> = DEFALT_SETTINGS
 ): Record<string, any> {
  let filesCuda12: string[]
  let filesCuda11: string[]
  let paths: string[]
  let cudaVersion: string = ''
  if (process.platform === 'win32') {
    filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
    filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
  } else {
    filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
    filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
    paths = process.env.LD_LIBRARY_PATH
      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
      : []
    paths.push('/usr/lib/x86_64-linux-gnu/')
  }
  let cudaExists = filesCuda12.every(
    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
  )
  if (!cudaExists) {
    cudaExists = filesCuda11.every(
      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
    )
    if (cudaExists) {
      cudaVersion = '11'
    }
  } else {
    cudaVersion = '12'
  }
  data['cuda'].exist = cudaExists
  data['cuda'].version = cudaVersion
  console.debug(data['is_initial'], data['gpus_in_use'])
  if (cudaExists && data['is_initial'] && data['gpus_in_use'].length > 0) {
    data.run_mode = 'gpu'
  }
  data.is_initial = false
  return data
 }
 /**
 * Get GPU information
 */
 export async function updateGpuInfo(): Promise<void> {
  let data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
  // Cuda
  if (data['vulkan'] === true) {
    // Vulkan
    exec(
      process.platform === 'win32'
        ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
        : `${__dirname}/../bin/vulkaninfo --summary`,
      (error, stdout) => {
        if (!error) {
          const output = stdout.toString()
          log(output)
          const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
          let gpus = []
          let match
          while ((match = gpuRegex.exec(output)) !== null) {
            const id = match[1]
            const name = match[2]
            gpus.push({ id, vram: 0, name })
          }
          data.gpus = gpus
          if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
            data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
          }
          data = updateCudaExistence(data)
          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
        }
        Promise.resolve()
      }
    )
  } else {
    exec(
      'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
      (error, stdout) => {
        if (!error) {
          log(stdout)
          // Get GPU info and gpu has higher memory first
          let highestVram = 0
          let highestVramId = '0'
          let gpus = stdout
            .trim()
            .split('\n')
            .map((line) => {
              let [id, vram, name] = line.split(', ')
              vram = vram.replace(/\r/g, '')
              if (parseFloat(vram) > highestVram) {
                highestVram = parseFloat(vram)
                highestVramId = id
              }
              return { id, vram, name }
            })
          data.gpus = gpus
          data.gpu_highest_vram = highestVramId
        } else {
          data.gpus = []
          data.gpu_highest_vram = ''
        }
        if (!data['gpus_in_use'] || data['gpus_in_use'].length === 0) {
          data.gpus_in_use = [data['gpu_highest_vram']]
        }
        data = updateCudaExistence(data)
        writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
        Promise.resolve()
      }
    )
  }
 }
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.ts
@ -1,12 +1,19 @@
 import { getJanDataFolderPath } from '@janhq/core/node'
 import { readFileSync } from 'fs'
 import * as path from 'path'
 import { GPU_INFO_FILE } from './accelerator'
 export interface NitroExecutableOptions {
  executablePath: string
  cudaVisibleDevices: string
  vkVisibleDevices: string
 }
 export const GPU_INFO_FILE = path.join(
  getJanDataFolderPath(),
  'settings',
  'settings.json'
 )
 /**
 * Find which executable file to run based on the current platform.
 * @returns The name of the executable file to run.
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -4,7 +4,6 @@ import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
 import tcpPortUsed from 'tcp-port-used'
 import fetchRT from 'fetch-retry'
 import { log, getSystemResourceInfo } from '@janhq/core/node'
 import { getNitroProcessInfo, updateNvidiaInfo } from './accelerator'
 import {
  Model,
  InferenceEngine,
@ -385,11 +384,26 @@ function dispose() {
  killSubprocess()
 }
 /**
 * Nitro process info
 */
 export interface NitroProcessInfo {
  isRunning: boolean
 }
 /**
 * Retrieve current nitro process
 */
 const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
  return {
    isRunning: subprocess != null,
  }
 }
 export default {
  runModel,
  stopModel,
  killSubprocess,
  dispose,
-  updateNvidiaInfo,
+  getCurrentNitroProcessInfo,
  getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
 }
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -17,6 +17,8 @@ import {
  ImportingModel,
  LocalImportModelEvent,
  baseName,
  GpuSetting,
  DownloadRequest,
 } from '@janhq/core'
 import { extractFileName } from './helpers/path'
@ -29,10 +31,14 @@ export default class JanModelExtension extends ModelExtension {
  private static readonly _modelMetadataFileName = 'model.json'
  private static readonly _supportedModelFormat = '.gguf'
  private static readonly _incompletedModelFileName = '.download'
-  private static readonly _offlineInferenceEngine = InferenceEngine.nitro
+  private static readonly _offlineInferenceEngine = [
-
+    InferenceEngine.nitro,
    InferenceEngine.nitro_tensorrt_llm,
  ]
  private static readonly _tensorRtEngineFormat = '.engine'
  private static readonly _configDirName = 'config'
  private static readonly _defaultModelFileName = 'default-model.json'
  private static readonly _supportedGpuArch = ['turing', 'ampere', 'ada']
  /**
   * Called when the extension is loaded.
@ -89,12 +95,52 @@ export default class JanModelExtension extends ModelExtension {
   */
  async downloadModel(
    model: Model,
    gpuSettings?: GpuSetting,
    network?: { ignoreSSL?: boolean; proxy?: string }
  ): Promise<void> {
    // create corresponding directory
    const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
    if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)
    if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
      if (!gpuSettings || gpuSettings.gpus.length === 0) {
        console.error('No GPU found. Please check your GPU setting.')
        return
      }
      const firstGpu = gpuSettings.gpus[0]
      if (!firstGpu.name.toLowerCase().includes('nvidia')) {
        console.error('No Nvidia GPU found. Please check your GPU setting.')
        return
      }
      const gpuArch = firstGpu.arch
      if (gpuArch === undefined) {
        console.error(
          'No GPU architecture found. Please check your GPU setting.'
        )
        return
      }
      if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
        console.error(
          `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
        )
        return
      }
      const os = 'windows' // TODO: remove this hard coded value
      const newSources = model.sources.map((source) => {
        const newSource = { ...source }
        newSource.url = newSource.url
          .replace(/<os>/g, os)
          .replace(/<gpuarch>/g, gpuArch)
        return newSource
      })
      model.sources = newSources
    }
    console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
    if (model.sources.length > 1) {
      // path to model binaries
      for (const source of model.sources) {
@ -105,8 +151,11 @@ export default class JanModelExtension extends ModelExtension {
        if (source.filename) {
          path = await joinPath([modelDirPath, source.filename])
        }
-
+        const downloadRequest: DownloadRequest = {
-        downloadFile(source.url, path, network)
+          url: source.url,
          localPath: path,
        }
        downloadFile(downloadRequest, network)
      }
      // TODO: handle multiple binaries for web later
    } else {
@ -115,7 +164,11 @@ export default class JanModelExtension extends ModelExtension {
        JanModelExtension._supportedModelFormat
      )
      const path = await joinPath([modelDirPath, fileName])
-      downloadFile(model.sources[0]?.url, path, network)
+      const downloadRequest: DownloadRequest = {
        url: model.sources[0]?.url,
        localPath: path,
      }
      downloadFile(downloadRequest, network)
      if (window && window.core?.api && window.core.api.baseApiUrl) {
        this.startPollingDownloadProgress(model.id)
@ -238,7 +291,7 @@ export default class JanModelExtension extends ModelExtension {
  async getDownloadedModels(): Promise<Model[]> {
    return await this.getModelsMetadata(
      async (modelDir: string, model: Model) => {
-        if (model.engine !== JanModelExtension._offlineInferenceEngine)
+        if (!JanModelExtension._offlineInferenceEngine.includes(model.engine))
          return true
        // model binaries (sources) are absolute path & exist
@ -247,22 +300,32 @@ export default class JanModelExtension extends ModelExtension {
        )
        if (existFiles.every((exist) => exist)) return true
-        return await fs
+        const result = await fs
          .readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
          .then((files: string[]) => {
            // Model binary exists in the directory
            // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
            return (
              files.includes(modelDir) ||
-              files.filter(
+              files.filter((file) => {
-                (file) =>
+                if (
                  file.endsWith(JanModelExtension._incompletedModelFileName)
                ) {
                  return false
                }
                return (
                  file
                    .toLowerCase()
-                    .includes(JanModelExtension._supportedModelFormat) &&
+                    .includes(JanModelExtension._supportedModelFormat) ||
-                  !file.endsWith(JanModelExtension._incompletedModelFileName)
+                  file
-              )?.length >= model.sources.length
+                    .toLowerCase()
                    .includes(JanModelExtension._tensorRtEngineFormat)
                )
              })?.length > 0 // TODO: NamH find better way (can use basename to check the file name with source url)
            )
          })
        return result
      }
    )
  }
--- a/extensions/monitoring-extension/bin/.gitkeep
+++ b/extensions/monitoring-extension/bin/.gitkeep
--- a/extensions/monitoring-extension/download.bat
+++ b/extensions/monitoring-extension/download.bat
@ -0,0 +1,2 @@
@echo off
 .\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin
--- a/extensions/monitoring-extension/package.json
+++ b/extensions/monitoring-extension/package.json
@ -3,21 +3,40 @@
  "version": "1.0.10",
  "description": "This extension provides system health and OS level data",
  "main": "dist/index.js",
-  "module": "dist/module.js",
+  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
-    "build": "tsc -b . && webpack --config webpack.config.js",
+    "build": "tsc --module commonjs && rollup -c rollup.config.ts && npm run download-artifacts",
    "download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
    "download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
    "download-artifacts:win32": "download.bat",
    "download-artifacts:linux": "download https://delta.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
    "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../pre-install"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/node/index.cjs.js"
  },
  "devDependencies": {
    "@rollup/plugin-commonjs": "^25.0.7",
    "@rollup/plugin-json": "^6.1.0",
    "@rollup/plugin-node-resolve": "^15.2.3",
    "@types/node": "^20.11.4",
    "@types/node-os-utils": "^1.3.4",
    "run-script-os": "^1.1.6",
    "cpx": "^1.5.0",
    "rimraf": "^3.0.2",
-    "webpack": "^5.88.2",
+    "rollup": "^2.38.5",
-    "webpack-cli": "^5.1.4",
+    "rollup-plugin-define": "^1.0.1",
-    "ts-loader": "^9.5.0"
+    "rollup-plugin-sourcemaps": "^0.6.3",
    "rollup-plugin-typescript2": "^0.36.0",
    "typescript": "^5.3.3",
    "download-cli": "^1.1.1"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
    "@rollup/plugin-replace": "^5.0.5",
    "node-os-utils": "^1.3.7"
  },
  "files": [
--- a/extensions/monitoring-extension/rollup.config.ts
+++ b/extensions/monitoring-extension/rollup.config.ts
@ -0,0 +1,68 @@
 import resolve from '@rollup/plugin-node-resolve'
 import commonjs from '@rollup/plugin-commonjs'
 import sourceMaps from 'rollup-plugin-sourcemaps'
 import typescript from 'rollup-plugin-typescript2'
 import json from '@rollup/plugin-json'
 import replace from '@rollup/plugin-replace'
 const packageJson = require('./package.json')
 export default [
  {
    input: `src/index.ts`,
    output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
    external: [],
    watch: {
      include: 'src/**',
    },
    plugins: [
      replace({
        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
      }),
      // Allow json resolution
      json(),
      // Compile TypeScript files
      typescript({ useTsconfigDeclarationDir: true }),
      // Compile TypeScript files
      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
      commonjs(),
      // Allow node_modules resolution, so you can use 'external' to control
      // which external modules to include in the bundle
      // https://github.com/rollup/rollup-plugin-node-resolve#usage
      resolve({
        extensions: ['.js', '.ts', '.svelte'],
      }),
      // Resolve source maps to the original source
      sourceMaps(),
    ],
  },
  {
    input: `src/node/index.ts`,
    output: [
      { file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
    ],
    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
    external: ['@janhq/core/node'],
    watch: {
      include: 'src/node/**',
    },
    plugins: [
      // Allow json resolution
      json(),
      // Compile TypeScript files
      typescript({ useTsconfigDeclarationDir: true }),
      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
      commonjs(),
      // Allow node_modules resolution, so you can use 'external' to control
      // which external modules to include in the bundle
      // https://github.com/rollup/rollup-plugin-node-resolve#usage
      resolve({
        extensions: ['.ts', '.js', '.json'],
      }),
      // Resolve source maps to the original source
      sourceMaps(),
    ],
  },
 ]
--- a/extensions/monitoring-extension/src/@types/global.d.ts
+++ b/extensions/monitoring-extension/src/@types/global.d.ts
@ -1 +1,18 @@
-declare const MODULE: string
+declare const NODE: string
 type CpuGpuInfo = {
  cpu: {
    usage: number
  }
  gpu: GpuInfo[]
 }
 type GpuInfo = {
  id: string
  name: string
  temperature: string
  utilization: string
  memoryTotal: string
  memoryFree: string
  memoryUtilization: string
 }
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@ -1,4 +1,4 @@
-import { MonitoringExtension, executeOnMain } from '@janhq/core'
+import { GpuSetting, MonitoringExtension, executeOnMain } from '@janhq/core'
 /**
 * JanMonitoringExtension is a extension that provides system monitoring functionality.
@ -8,19 +8,30 @@ export default class JanMonitoringExtension extends MonitoringExtension {
  /**
   * Called when the extension is loaded.
   */
-  async onLoad() {}
+  async onLoad() {
    // Attempt to fetch nvidia info
    await executeOnMain(NODE, 'updateNvidiaInfo')
  }
  /**
   * Called when the extension is unloaded.
   */
  onUnload(): void {}
  /**
   * Returns the GPU configuration.
   * @returns A Promise that resolves to an object containing the GPU configuration.
   */
  async getGpuSetting(): Promise<GpuSetting | undefined> {
    return executeOnMain(NODE, 'getGpuConfig')
  }
  /**
   * Returns information about the system resources.
   * @returns A Promise that resolves to an object containing information about the system resources.
   */
  getResourcesInfo(): Promise<any> {
-    return executeOnMain(MODULE, 'getResourcesInfo')
+    return executeOnMain(NODE, 'getResourcesInfo')
  }
  /**
@ -28,6 +39,6 @@ export default class JanMonitoringExtension extends MonitoringExtension {
   * @returns A Promise that resolves to an object containing information about the current system load.
   */
  getCurrentLoad(): Promise<any> {
-    return executeOnMain(MODULE, 'getCurrentLoad')
+    return executeOnMain(NODE, 'getCurrentLoad')
  }
 }
--- a/extensions/monitoring-extension/src/module.ts
+++ b/extensions/monitoring-extension/src/module.ts
@ -1,92 +0,0 @@
 const nodeOsUtils = require('node-os-utils')
 const getJanDataFolderPath = require('@janhq/core/node').getJanDataFolderPath
 const path = require('path')
 const { readFileSync } = require('fs')
 const exec = require('child_process').exec
 const NVIDIA_INFO_FILE = path.join(
  getJanDataFolderPath(),
  'settings',
  'settings.json'
 )
 const getResourcesInfo = () =>
  new Promise((resolve) => {
    nodeOsUtils.mem.used().then((ramUsedInfo) => {
      const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
      const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
      const response = {
        mem: {
          totalMemory,
          usedMemory,
        },
      }
      resolve(response)
    })
  })
 const getCurrentLoad = () =>
  new Promise((resolve, reject) => {
    nodeOsUtils.cpu.usage().then((cpuPercentage) => {
      let data = {
        run_mode: 'cpu',
        gpus_in_use: [],
      }
      if (process.platform !== 'darwin') {
        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, 'utf-8'))
      }
      if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
        const gpuIds = data['gpus_in_use'].join(',')
        if (gpuIds !== '' && data['vulkan'] !== true) {
          exec(
            `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
            (error, stdout, _) => {
              if (error) {
                console.error(`exec error: ${error}`)
                reject(error)
                return
              }
              const gpuInfo = stdout
                .trim()
                .split('\n')
                .map((line) => {
                  const [
                    id,
                    name,
                    temperature,
                    utilization,
                    memoryTotal,
                    memoryFree,
                    memoryUtilization,
                  ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
                  return {
                    id,
                    name,
                    temperature,
                    utilization,
                    memoryTotal,
                    memoryFree,
                    memoryUtilization,
                  }
                })
              resolve({
                cpu: { usage: cpuPercentage },
                gpu: gpuInfo,
              })
            }
          )
        } else {
          // Handle the case where gpuIds is empty
          resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
        }
      } else {
        // Handle the case where run_mode is not 'gpu' or no GPUs are in use
        resolve({ cpu: { usage: cpuPercentage }, gpu: [] })
      }
    })
  })
 module.exports = {
  getResourcesInfo,
  getCurrentLoad,
 }
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@ -0,0 +1,322 @@
 import { GpuSetting, GpuSettingInfo, ResourceInfo } from '@janhq/core'
 import { getJanDataFolderPath, log } from '@janhq/core/node'
 import { mem, cpu } from 'node-os-utils'
 import { exec } from 'child_process'
 import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
 import path from 'path'
 /**
 * Path to the settings directory
 **/
 export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
 /**
 * Path to the settings file
 **/
 export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
 /**
 * Default GPU settings
 * TODO: This needs to be refactored to support multiple accelerators
 **/
 const DEFAULT_SETTINGS: GpuSetting = {
  notify: true,
  run_mode: 'cpu',
  nvidia_driver: {
    exist: false,
    version: '',
  },
  cuda: {
    exist: false,
    version: '',
  },
  gpus: [],
  gpu_highest_vram: '',
  gpus_in_use: [],
  is_initial: true,
  // TODO: This needs to be set based on user toggle in settings
  vulkan: false,
 }
 export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
  if (process.platform === 'darwin') return undefined
  return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
 }
 export const getResourcesInfo = async (): Promise<ResourceInfo> => {
  const ramUsedInfo = await mem.used()
  const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
  const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
  const resourceInfo: ResourceInfo = {
    mem: {
      totalMemory,
      usedMemory,
    },
  }
  return resourceInfo
 }
 export const getCurrentLoad = () =>
  new Promise<CpuGpuInfo>(async (resolve, reject) => {
    const cpuPercentage = await cpu.usage()
    let data = {
      run_mode: 'cpu',
      gpus_in_use: [],
    }
    if (process.platform !== 'darwin') {
      data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
    }
    if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
      const gpuIds = data.gpus_in_use.join(',')
      if (gpuIds !== '' && data['vulkan'] !== true) {
        exec(
          `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
          (error, stdout, _) => {
            if (error) {
              console.error(`exec error: ${error}`)
              throw new Error(error.message)
            }
            const gpuInfo: GpuInfo[] = stdout
              .trim()
              .split('\n')
              .map((line) => {
                const [
                  id,
                  name,
                  temperature,
                  utilization,
                  memoryTotal,
                  memoryFree,
                  memoryUtilization,
                ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
                return {
                  id,
                  name,
                  temperature,
                  utilization,
                  memoryTotal,
                  memoryFree,
                  memoryUtilization,
                }
              })
            resolve({
              cpu: { usage: cpuPercentage },
              gpu: gpuInfo,
            })
          }
        )
      } else {
        // Handle the case where gpuIds is empty
        resolve({
          cpu: { usage: cpuPercentage },
          gpu: [],
        })
      }
    } else {
      // Handle the case where run_mode is not 'gpu' or no GPUs are in use
      resolve({
        cpu: { usage: cpuPercentage },
        gpu: [],
      })
    }
  })
 /**
 * This will retrive GPU informations and persist settings.json
 * Will be called when the extension is loaded to turn on GPU acceleration if supported
 */
 export const updateNvidiaInfo = async () => {
  // ignore if macos
  if (process.platform === 'darwin') return
  try {
    JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
  } catch (error) {
    if (!existsSync(SETTINGS_DIR)) {
      mkdirSync(SETTINGS_DIR, {
        recursive: true,
      })
    }
    writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
  }
  await updateNvidiaDriverInfo()
  await updateGpuInfo()
 }
 const updateNvidiaDriverInfo = async () =>
  new Promise((resolve, reject) => {
    exec(
      'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
      (error, stdout) => {
        const data: GpuSetting = JSON.parse(
          readFileSync(GPU_INFO_FILE, 'utf-8')
        )
        if (!error) {
          const firstLine = stdout.split('\n')[0].trim()
          data.nvidia_driver.exist = true
          data.nvidia_driver.version = firstLine
        } else {
          data.nvidia_driver.exist = false
        }
        writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
        resolve({})
      }
    )
  })
 const getGpuArch = (gpuName: string): string => {
  if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
  if (gpuName.includes('20')) return 'turing'
  else if (gpuName.includes('30')) return 'ampere'
  else if (gpuName.includes('40')) return 'ada'
  else return 'unknown'
 }
 const updateGpuInfo = async () =>
  new Promise((resolve, reject) => {
    let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
    // Cuda
    if (data.vulkan === true) {
      // Vulkan
      exec(
        process.platform === 'win32'
          ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
          : `${__dirname}/../bin/vulkaninfo --summary`,
        (error, stdout) => {
          if (!error) {
            const output = stdout.toString()
            log(output)
            const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
            const gpus: GpuSettingInfo[] = []
            let match
            while ((match = gpuRegex.exec(output)) !== null) {
              const id = match[1]
              const name = match[2]
              const arch = getGpuArch(name)
              gpus.push({ id, vram: '0', name, arch })
            }
            data.gpus = gpus
            if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
              data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
            }
            data = updateCudaExistence(data)
            writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
            resolve({})
          } else {
            reject(error)
          }
        }
      )
    } else {
      exec(
        'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
        (error, stdout) => {
          if (!error) {
            log(stdout)
            // Get GPU info and gpu has higher memory first
            let highestVram = 0
            let highestVramId = '0'
            const gpus: GpuSettingInfo[] = stdout
              .trim()
              .split('\n')
              .map((line) => {
                let [id, vram, name] = line.split(', ')
                const arch = getGpuArch(name)
                vram = vram.replace(/\r/g, '')
                if (parseFloat(vram) > highestVram) {
                  highestVram = parseFloat(vram)
                  highestVramId = id
                }
                return { id, vram, name, arch }
              })
            data.gpus = gpus
            data.gpu_highest_vram = highestVramId
          } else {
            data.gpus = []
            data.gpu_highest_vram = ''
          }
          if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
            data.gpus_in_use = [data.gpu_highest_vram]
          }
          data = updateCudaExistence(data)
          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
          resolve({})
        }
      )
    }
  })
 /**
 * Check if file exists in paths
 */
 const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
  return paths.some((p) => existsSync(path.join(p, file)))
 }
 /**
 * Validate cuda for linux and windows
 */
 const updateCudaExistence = (
  data: GpuSetting = DEFAULT_SETTINGS
 ): GpuSetting => {
  let filesCuda12: string[]
  let filesCuda11: string[]
  let paths: string[]
  let cudaVersion: string = ''
  if (process.platform === 'win32') {
    filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
    filesCuda11 = ['cublas64_11.dll', 'cudart64_11.dll', 'cublasLt64_11.dll']
    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
  } else {
    filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
    filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
    paths = process.env.LD_LIBRARY_PATH
      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
      : []
    paths.push('/usr/lib/x86_64-linux-gnu/')
  }
  let cudaExists = filesCuda12.every(
    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
  )
  if (!cudaExists) {
    cudaExists = filesCuda11.every(
      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
    )
    if (cudaExists) {
      cudaVersion = '11'
    }
  } else {
    cudaVersion = '12'
  }
  data.cuda.exist = cudaExists
  data.cuda.version = cudaVersion
  console.debug(data.is_initial, data.gpus_in_use)
  if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
    data.run_mode = 'gpu'
  }
  data.is_initial = false
  return data
 }
--- a/extensions/monitoring-extension/webpack.config.js
+++ b/extensions/monitoring-extension/webpack.config.js
@ -1,35 +0,0 @@
 const path = require('path')
 const webpack = require('webpack')
 const packageJson = require('./package.json')
 module.exports = {
  experiments: { outputModule: true },
  entry: './src/index.ts', // Adjust the entry point to match your project's main file
  mode: 'production',
  module: {
    rules: [
      {
        test: /\.tsx?$/,
        use: 'ts-loader',
        exclude: /node_modules/,
      },
    ],
  },
  output: {
    filename: 'index.js', // Adjust the output file name as needed
    path: path.resolve(__dirname, 'dist'),
    library: { type: 'module' }, // Specify ESM output format
  },
  plugins: [
    new webpack.DefinePlugin({
      MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
    }),
  ],
  resolve: {
    extensions: ['.ts', '.js'],
  },
  optimization: {
    minimize: false,
  },
  // Add loaders and other configuration as needed for your project
 }
--- a/extensions/tensorrt-llm-extension/README.md
+++ b/extensions/tensorrt-llm-extension/README.md
@ -0,0 +1,79 @@
 # Tensorrt-LLM Extension
 Created using Jan extension example
 # Create a Jan Extension using Typescript
 Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
 ## Create Your Own Extension
 To create your own extension, you can use this repository as a template! Just follow the below instructions:
 1. Click the Use this template button at the top of the repository
 2. Select Create a new repository
 3. Select an owner and name for your new repository
 4. Click Create repository
 5. Clone your new repository
 ## Initial Setup
 After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
 > [!NOTE]
 >
 > You'll need to have a reasonably modern version of
 > [Node.js](https://nodejs.org) handy. If you are using a version manager like
 > [`nodenv`](https://github.com/nodenv/nodenv) or
 > [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
 > root of your repository to install the version specified in
 > [`package.json`](./package.json). Otherwise, 20.x or later should work!
 1. :hammer_and_wrench: Install the dependencies
   ```bash
   npm install
   ```
 1. :building_construction: Package the TypeScript for distribution
   ```bash
   npm run bundle
   ```
 1. :white_check_mark: Check your artifact
   There will be a tgz file in your extension directory now
 ## Update the Extension Metadata
 The [`package.json`](package.json) file defines metadata about your extension, such as
 extension name, main entry, description and version.
 When you copy this repository, update `package.json` with the name, description for your extension.
 ## Update the Extension Code
 The [`src/`](./src/) directory is the heart of your extension! This contains the
 source code that will be run when your extension functions are invoked. You can replace the
 contents of this directory with your own code.
 There are a few things to keep in mind when writing your extension code:
 - Most Jan Extension functions are processed asynchronously.
  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
  ```typescript
  import { events, MessageEvent, MessageRequest } from '@janhq/core'
  function onStart(): Promise<any> {
    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
      this.inference(data)
    )
  }
  ```
  For more information about the Jan Extension Core module, see the
  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
 So, what are you waiting for? Go ahead and start customizing your extension!
--- a/extensions/tensorrt-llm-extension/models.json
+++ b/extensions/tensorrt-llm-extension/models.json
@ -0,0 +1,49 @@
 [
  {
    "sources": [
      {
        "filename": "config.json",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/config.json"
      },
      {
        "filename": "rank0.engine",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/rank0.engine"
      },
      {
        "filename": "tokenizer.model",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
      },
      {
        "filename": "special_tokens_map.json",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
      },
      {
        "filename": "tokenizer.json",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
      },
      {
        "filename": "tokenizer_config.json",
        "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
      }
    ],
    "id": "llamacorn-1.1b-chat-fp16",
    "object": "model",
    "name": "LlamaCorn 1.1B Chat FP16",
    "version": "1.0",
    "description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
    "format": "TensorRT-LLM",
    "settings": {
      "ctx_len": 2048,
      "text_model": false
    },
    "parameters": {
      "max_tokens": 4096
    },
    "metadata": {
      "author": "LLama",
      "tags": ["TensorRT-LLM", "1B", "Finetuned"],
      "size": 2151000000
    },
    "engine": "nitro-tensorrt-llm"
  }
 ]
--- a/extensions/tensorrt-llm-extension/package.json
+++ b/extensions/tensorrt-llm-extension/package.json
@ -0,0 +1,75 @@
 {
  "name": "@janhq/tensorrt-llm-extension",
  "version": "0.0.2",
  "description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "config": {
    "host": "127.0.0.1",
    "port": "3928"
  },
  "compatibility": {
    "platform": [
      "win32",
      "linux"
    ],
    "app": [
      "0.1.0"
    ]
  },
  "scripts": {
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
    "build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
    "build:publish:linux": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" &&  npm pack && cpx *.tgz ../../pre-install",
    "build:publish:darwin": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" &&  npm pack && cpx *.tgz ../../pre-install",
    "build:publish": "run-script-os"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/node/index.cjs.js"
  },
  "devDependencies": {
    "@rollup/plugin-commonjs": "^25.0.7",
    "@rollup/plugin-json": "^6.1.0",
    "@rollup/plugin-node-resolve": "^15.2.3",
    "@rollup/plugin-replace": "^5.0.5",
    "@types/node": "^20.11.4",
    "@types/os-utils": "^0.0.4",
    "@types/tcp-port-used": "^1.0.4",
    "@types/decompress": "4.2.7",
    "cpx": "^1.5.0",
    "download-cli": "^1.1.1",
    "rimraf": "^3.0.2",
    "rollup": "^2.38.5",
    "rollup-plugin-define": "^1.0.1",
    "rollup-plugin-sourcemaps": "^0.6.3",
    "rollup-plugin-typescript2": "^0.36.0",
    "run-script-os": "^1.1.6",
    "typescript": "^5.2.2"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
    "decompress": "^4.2.1",
    "fetch-retry": "^5.0.6",
    "path-browserify": "^1.0.1",
    "rxjs": "^7.8.1",
    "tcp-port-used": "^1.0.2",
    "ulid": "^2.3.0"
  },
  "engines": {
    "node": ">=18.0.0"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ],
  "bundleDependencies": [
    "tcp-port-used",
    "fetch-retry",
    "decompress",
    "@janhq/core"
  ]
 }
--- a/extensions/tensorrt-llm-extension/rollup.config.ts
+++ b/extensions/tensorrt-llm-extension/rollup.config.ts
@ -0,0 +1,73 @@
 import resolve from '@rollup/plugin-node-resolve'
 import commonjs from '@rollup/plugin-commonjs'
 import sourceMaps from 'rollup-plugin-sourcemaps'
 import typescript from 'rollup-plugin-typescript2'
 import json from '@rollup/plugin-json'
 import replace from '@rollup/plugin-replace'
 const packageJson = require('./package.json')
 export default [
  {
    input: `src/index.ts`,
    output: [{ file: packageJson.main, format: 'es', sourcemap: true }],
    watch: {
      include: 'src/**',
    },
    plugins: [
      replace({
        EXTENSION_NAME: JSON.stringify(packageJson.name),
        TENSORRT_VERSION: JSON.stringify('0.1.5'),
        DOWNLOAD_RUNNER_URL:
          process.platform === 'darwin' || process.platform === 'win32'
            ? JSON.stringify(
                'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
              )
            : JSON.stringify(
                'https://github.com/janhq/nitro-tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
              ),
        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
        INFERENCE_URL: JSON.stringify(
          process.env.INFERENCE_URL ||
            `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions`
        ),
        COMPATIBILITY: JSON.stringify(packageJson.compatibility),
      }),
      json(),
      typescript({ useTsconfigDeclarationDir: true }),
      commonjs(),
      resolve({
        extensions: ['.js', '.ts', '.svelte'],
      }),
      sourceMaps(),
    ],
  },
  {
    input: `src/node/index.ts`,
    output: [
      { file: 'dist/node/index.cjs.js', format: 'cjs', sourcemap: true },
    ],
    external: ['@janhq/core/node'],
    watch: {
      include: 'src/node/**',
    },
    plugins: [
      replace({
        LOAD_MODEL_URL: JSON.stringify(
          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
        ),
        TERMINATE_ENGINE_URL: JSON.stringify(
          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/processmanager/destroy`
        ),
        ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
        ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
      }),
      json(),
      typescript({ useTsconfigDeclarationDir: true }),
      commonjs(),
      resolve({
        extensions: ['.ts', '.js', '.json'],
      }),
      sourceMaps(),
    ],
  },
 ]
--- a/extensions/tensorrt-llm-extension/src/@types/global.d.ts
+++ b/extensions/tensorrt-llm-extension/src/@types/global.d.ts
@ -0,0 +1,10 @@
 declare const NODE: string
 declare const INFERENCE_URL: string
 declare const LOAD_MODEL_URL: string
 declare const TERMINATE_ENGINE_URL: string
 declare const ENGINE_HOST: string
 declare const ENGINE_PORT: string
 declare const DOWNLOAD_RUNNER_URL: string
 declare const TENSORRT_VERSION: string
 declare const COMPATIBILITY: object
 declare const EXTENSION_NAME: string
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@ -0,0 +1,154 @@
 /**
 * @module tensorrt-llm-extension/src/index
 */
 import {
  Compatibility,
  DownloadEvent,
  DownloadRequest,
  DownloadState,
  GpuSetting,
  InstallationState,
  Model,
  baseName,
  downloadFile,
  events,
  executeOnMain,
  joinPath,
  showToast,
  systemInformations,
  LocalOAIEngine,
  fs,
  MessageRequest,
 } from '@janhq/core'
 import models from '../models.json'
 /**
 * TensorRTLLMExtension - Implementation of LocalOAIEngine
 * @extends BaseOAILocalInferenceProvider
 * Provide pre-populated models for TensorRTLLM
 */
 export default class TensorRTLLMExtension extends LocalOAIEngine {
  /**
   * Override custom function name for loading and unloading model
   * Which are implemented from node module
   */
  override provider = 'nitro-tensorrt-llm'
  override inferenceUrl = INFERENCE_URL
  override nodeModule = NODE
  private supportedGpuArch = ['turing', 'ampere', 'ada']
  compatibility() {
    return COMPATIBILITY as unknown as Compatibility
  }
  /**
   * models implemented by the extension
   * define pre-populated models
   */
  async models(): Promise<Model[]> {
    if ((await this.installationState()) === 'Installed')
      return models as unknown as Model[]
    return []
  }
  override async install(): Promise<void> {
    const info = await systemInformations()
    console.debug(
      `TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}`
    )
    const gpuSetting: GpuSetting | undefined = info.gpuSetting
    if (gpuSetting === undefined || gpuSetting.gpus.length === 0) {
      console.error('No GPU setting found. Please check your GPU setting.')
      return
    }
    // TODO: we only check for the first graphics card. Need to refactor this later.
    const firstGpu = gpuSetting.gpus[0]
    if (!firstGpu.name.toLowerCase().includes('nvidia')) {
      console.error('No Nvidia GPU found. Please check your GPU setting.')
      return
    }
    if (firstGpu.arch === undefined) {
      console.error('No GPU architecture found. Please check your GPU setting.')
      return
    }
    if (!this.supportedGpuArch.includes(firstGpu.arch)) {
      console.error(
        `Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
      )
      return
    }
    const binaryFolderPath = await executeOnMain(
      this.nodeModule,
      'binaryFolder'
    )
    if (!(await fs.existsSync(binaryFolderPath))) {
      await fs.mkdirSync(binaryFolderPath)
    }
    const placeholderUrl = DOWNLOAD_RUNNER_URL
    const tensorrtVersion = TENSORRT_VERSION
    const url = placeholderUrl
      .replace(/<version>/g, tensorrtVersion)
      .replace(/<gpuarch>/g, firstGpu.arch)
    const tarball = await baseName(url)
    const tarballFullPath = await joinPath([binaryFolderPath, tarball])
    const downloadRequest: DownloadRequest = {
      url,
      localPath: tarballFullPath,
      extensionId: EXTENSION_NAME,
      downloadType: 'extension',
    }
    downloadFile(downloadRequest)
    // TODO: wrap this into a Promise
    const onFileDownloadSuccess = async (state: DownloadState) => {
      // if other download, ignore
      if (state.fileName !== tarball) return
      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
      await executeOnMain(this.nodeModule, 'decompressRunner', tarballFullPath)
      events.emit(DownloadEvent.onFileUnzipSuccess, state)
      // Prepopulate models as soon as it's ready
      this.prePopulateModels().then(() => {
        showToast(
          'Extension installed successfully.',
          'New models are added to Model Hub.'
        )
      })
    }
    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
  }
  override async installationState(): Promise<InstallationState> {
    // For now, we just check the executable of nitro x tensor rt
    const isNitroExecutableAvailable = await executeOnMain(
      this.nodeModule,
      'isNitroExecutableAvailable'
    )
    return isNitroExecutableAvailable ? 'Installed' : 'NotInstalled'
  }
  override onInferenceStopped() {
    if (!this.isRunning) return
    showToast(
      'Unable to Stop Inference',
      'The model does not support stopping inference.'
    )
    return Promise.resolve()
  }
  inference(data: MessageRequest): void {
    // TensorRT LLM Extension supports streaming only
    if (data.model) data.model.parameters.stream = true
    super.inference(data)
  }
 }
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@ -0,0 +1,191 @@
 import path from 'path'
 import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
 import tcpPortUsed from 'tcp-port-used'
 import fetchRT from 'fetch-retry'
 import { log } from '@janhq/core/node'
 import { existsSync } from 'fs'
 import decompress from 'decompress'
 // Polyfill fetch with retry
 const fetchRetry = fetchRT(fetch)
 /**
 * The response object for model init operation.
 */
 interface ModelLoadParams {
  engine_path: string
  ctx_len: number
 }
 // The subprocess instance for Engine
 let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
 /**
 * Initializes a engine subprocess to load a machine learning model.
 * @param params - The model load settings.
 */
 async function loadModel(params: any): Promise<{ error: Error | undefined }> {
  // modelFolder is the absolute path to the running model folder
  // e.g. ~/jan/models/llama-2
  let modelFolder = params.modelFolder
  const settings: ModelLoadParams = {
    engine_path: modelFolder,
    ctx_len: params.model.settings.ctx_len ?? 2048,
  }
  return runEngineAndLoadModel(settings)
 }
 /**
 * Stops a Engine subprocess.
 */
 function unloadModel(): Promise<any> {
  const controller = new AbortController()
  setTimeout(() => controller.abort(), 5000)
  debugLog(`Request to kill engine`)
  subprocess?.kill()
  return fetch(TERMINATE_ENGINE_URL, {
    method: 'DELETE',
    signal: controller.signal,
  })
    .then(() => {
      subprocess = undefined
    })
    .catch(() => {}) // Do nothing with this attempt
    .then(() => tcpPortUsed.waitUntilFree(parseInt(ENGINE_PORT), 300, 5000)) // Wait for port available
    .then(() => debugLog(`Engine process is terminated`))
    .catch((err) => {
      debugLog(
        `Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
      )
      throw 'PORT_NOT_AVAILABLE'
    })
 }
 /**
 * 1. Spawn engine process
 * 2. Load model into engine subprocess
 * @returns
 */
 async function runEngineAndLoadModel(settings: ModelLoadParams) {
  return unloadModel()
    .then(runEngine)
    .then(() => loadModelRequest(settings))
    .catch((err) => {
      // TODO: Broadcast error so app could display proper error message
      debugLog(`${err}`, 'Error')
      return { error: err }
    })
 }
 /**
 * Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
 */
 function loadModelRequest(
  settings: ModelLoadParams
 ): Promise<{ error: Error | undefined }> {
  debugLog(`Loading model with params ${JSON.stringify(settings)}`)
  return fetchRetry(LOAD_MODEL_URL, {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
    },
    body: JSON.stringify(settings),
    retries: 3,
    retryDelay: 500,
  })
    .then((res) => {
      debugLog(`Load model success with response ${JSON.stringify(res)}`)
      return Promise.resolve({ error: undefined })
    })
    .catch((err) => {
      debugLog(`Load model failed with error ${err}`, 'Error')
      return Promise.resolve({ error: err })
    })
 }
 /**
 * Spawns engine subprocess.
 */
 function runEngine(): Promise<any> {
  debugLog(`Spawning engine subprocess...`)
  return new Promise<void>((resolve, reject) => {
    // Current directory by default
    let binaryFolder = path.join(__dirname, '..', 'bin')
    // Binary path
    const binary = path.join(
      binaryFolder,
      process.platform === 'win32' ? 'nitro.exe' : 'nitro'
    )
    const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
    // Execute the binary
    debugLog(`Spawn nitro at path: ${binary}, and args: ${args}`)
    subprocess = spawn(binary, args, {
      cwd: binaryFolder,
      env: {
        ...process.env,
      },
    })
    // Handle subprocess output
    subprocess.stdout.on('data', (data: any) => {
      debugLog(`${data}`)
    })
    subprocess.stderr.on('data', (data: any) => {
      debugLog(`${data}`)
    })
    subprocess.on('close', (code: any) => {
      debugLog(`Engine exited with code: ${code}`)
      subprocess = undefined
      reject(`child process exited with code ${code}`)
    })
    tcpPortUsed.waitUntilUsed(parseInt(ENGINE_PORT), 300, 30000).then(() => {
      debugLog(`Engine is ready`)
      resolve()
    })
  })
 }
 function debugLog(message: string, level: string = 'Debug') {
  log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
 }
 const binaryFolder = async (): Promise<string> => {
  return path.join(__dirname, '..', 'bin')
 }
 const decompressRunner = async (zipPath: string) => {
  const output = path.join(__dirname, '..', 'bin')
  console.debug(`Decompressing ${zipPath} to ${output}...`)
  try {
    const files = await decompress(zipPath, output)
    console.debug('Decompress finished!', files)
  } catch (err) {
    console.error(`Decompress ${zipPath} failed: ${err}`)
  }
 }
 const isNitroExecutableAvailable = async (): Promise<boolean> => {
  const binary = path.join(
    __dirname,
    '..',
    'bin',
    process.platform === 'win32' ? 'nitro.exe' : 'nitro'
  )
  return existsSync(binary)
 }
 export default {
  binaryFolder,
  decompressRunner,
  loadModel,
  unloadModel,
  dispose: unloadModel,
  isNitroExecutableAvailable,
 }
--- a/extensions/tensorrt-llm-extension/tsconfig.json
+++ b/extensions/tensorrt-llm-extension/tsconfig.json
@ -0,0 +1,20 @@
 {
  "compilerOptions": {
    "moduleResolution": "node",
    "target": "es5",
    "module": "ES2020",
    "lib": ["es2015", "es2016", "es2017", "dom"],
    "strict": true,
    "sourceMap": true,
    "declaration": true,
    "allowSyntheticDefaultImports": true,
    "experimentalDecorators": true,
    "emitDecoratorMetadata": true,
    "declarationDir": "dist/types",
    "outDir": "dist",
    "importHelpers": true,
    "resolveJsonModule": true,
    "typeRoots": ["node_modules/@types"]
  },
  "include": ["src"]
 }
--- a/uikit/src/badge/styles.scss
+++ b/uikit/src/badge/styles.scss
@ -1,5 +1,5 @@
 .badge {
-  @apply focus:ring-ring border-border inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-offset-2;
+  @apply focus:ring-ring border-border inline-flex items-center rounded-md border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-offset-2;
  &-primary {
    @apply border-transparent bg-blue-100 text-blue-600;
--- a/web/app/search/SelectedText.tsx
+++ b/web/app/search/SelectedText.tsx
@ -29,15 +29,15 @@ const SelectedText = ({ onCleared }: { onCleared?: () => void }) => {
  return shouldShowSelectedText ? (
    <div
      ref={containerRef}
-      className="relative rounded-lg border-[1px] border-[#0000000F] bg-[#0000000A] p-[10px]"
+      className="relative rounded-lg border border-border bg-secondary p-[10px]"
    >
      <div
-        className="absolute right-1 top-1 flex h-6 w-6 items-center justify-center rounded-full border-[1px] border-[#0000000F] bg-white drop-shadow"
+        className="absolute right-2 top-2 flex h-6 w-6 cursor-pointer items-center justify-center rounded-full border border-border bg-white shadow dark:bg-black/80"
        onClick={onClearClicked}
      >
-        <X size={16} />
+        <X size={14} className="text-muted-foreground" />
      </div>
-      <p className="font-semibold text-[#00000099]">{text}</p>
+      <p className="pr-8 font-medium text-muted-foreground">{text}</p>
    </div>
  ) : (
    <div />
--- a/web/app/search/UserInput.tsx
+++ b/web/app/search/UserInput.tsx
@ -56,7 +56,7 @@ const UserInput: React.FC = () => {
  }
  return (
-    <div className="flex flex-col space-y-3 p-3">
+    <div className="flex flex-col space-y-3 bg-white p-3 dark:bg-background">
      <form
        ref={formRef}
        className="flex h-full w-full items-center justify-center"
@ -66,7 +66,7 @@ const UserInput: React.FC = () => {
          <LogoMark width={28} height={28} className="mx-auto" />
          <input
            ref={inputRef}
-            className="flex-1 bg-transparent font-bold text-black focus:outline-none"
+            className="flex-1 bg-transparent font-bold focus:outline-none"
            type="text"
            value={inputValue}
            onChange={handleChange}
@ -77,7 +77,6 @@ const UserInput: React.FC = () => {
          </Button>
        </div>
      </form>
      <SelectedText onCleared={() => inputRef?.current?.focus()} />
    </div>
  )
--- a/web/app/search/page.tsx
+++ b/web/app/search/page.tsx
@ -4,7 +4,7 @@ import UserInput from './UserInput'
 const Search: React.FC = () => {
  return (
-    <div className="h-screen w-screen overflow-hidden bg-white">
+    <div className="h-screen w-screen overflow-hidden bg-white dark:bg-background">
      <UserInput />
    </div>
  )
--- a/web/containers/DropdownListSidebar/index.tsx
+++ b/web/containers/DropdownListSidebar/index.tsx
@ -73,8 +73,9 @@ const DropdownListSidebar = ({
  const [copyId, setCopyId] = useState('')
  // TODO: Update filter condition for the local model
  const localModel = downloadedModels.filter(
-    (model) => model.engine === InferenceEngine.nitro
+    (model) => model.engine !== InferenceEngine.openai
  )
  const remoteModel = downloadedModels.filter(
    (model) => model.engine === InferenceEngine.openai
@ -293,7 +294,7 @@ const DropdownListSidebar = ({
                                <span className="font-bold text-muted-foreground">
                                  {toGibibytes(x.metadata.size)}
                                </span>
-                                {x.engine == InferenceEngine.nitro && (
+                                {x.metadata.size && (
                                  <ModelLabel size={x.metadata.size} />
                                )}
                              </div>
--- a/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx
+++ b/web/containers/Layout/BottomBar/InstallingExtension/InstallingExtensionModal.tsx
@ -0,0 +1,87 @@
 import { useCallback, useEffect } from 'react'
 import { abortDownload } from '@janhq/core'
 import {
  Button,
  Modal,
  ModalContent,
  ModalHeader,
  ModalTitle,
  Progress,
 } from '@janhq/uikit'
 import { atom, useAtom, useAtomValue } from 'jotai'
 import {
  formatDownloadPercentage,
  formatExtensionsName,
 } from '@/utils/converter'
 import {
  InstallingExtensionState,
  installingExtensionAtom,
 } from '@/helpers/atoms/Extension.atom'
 export const showInstallingExtensionModalAtom = atom(false)
 const InstallingExtensionModal: React.FC = () => {
  const [showInstallingExtensionModal, setShowInstallingExtensionModal] =
    useAtom(showInstallingExtensionModalAtom)
  const installingExtensions = useAtomValue(installingExtensionAtom)
  useEffect(() => {
    if (installingExtensions.length === 0) {
      setShowInstallingExtensionModal(false)
    }
  }, [installingExtensions, setShowInstallingExtensionModal])
  const onAbortInstallingExtensionClick = useCallback(
    (item: InstallingExtensionState) => {
      if (item.localPath) {
        abortDownload(item.localPath)
      }
    },
    []
  )
  return (
    <Modal
      open={showInstallingExtensionModal}
      onOpenChange={() => setShowInstallingExtensionModal(false)}
    >
      <ModalContent>
        <ModalHeader>
          <ModalTitle>Installing Extension</ModalTitle>
        </ModalHeader>
        {Object.values(installingExtensions).map((item) => (
          <div className="pt-2" key={item.extensionId}>
            <Progress
              className="mb-2 h-2"
              value={
                formatDownloadPercentage(item.percentage, {
                  hidePercentage: true,
                }) as number
              }
            />
            <div className="flex items-center justify-between gap-x-2">
              <div className="flex gap-x-2">
                <p className="line-clamp-1">
                  {formatExtensionsName(item.extensionId)}
                </p>
                <span>{formatDownloadPercentage(item.percentage)}</span>
              </div>
              <Button
                themes="outline"
                size="sm"
                onClick={() => onAbortInstallingExtensionClick(item)}
              >
                Cancel
              </Button>
            </div>
          </div>
        ))}
      </ModalContent>
    </Modal>
  )
 }
 export default InstallingExtensionModal
--- a/web/containers/Layout/BottomBar/InstallingExtension/index.tsx
+++ b/web/containers/Layout/BottomBar/InstallingExtension/index.tsx
@ -0,0 +1,52 @@
 import { Fragment, useCallback } from 'react'
 import { Progress } from '@janhq/uikit'
 import { useAtomValue, useSetAtom } from 'jotai'
 import { showInstallingExtensionModalAtom } from './InstallingExtensionModal'
 import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
 const InstallingExtension: React.FC = () => {
  const installingExtensions = useAtomValue(installingExtensionAtom)
  const setShowInstallingExtensionModal = useSetAtom(
    showInstallingExtensionModalAtom
  )
  const shouldShowInstalling = installingExtensions.length > 0
  let totalPercentage = 0
  let totalExtensions = 0
  for (const installation of installingExtensions) {
    totalPercentage += installation.percentage
    totalExtensions++
  }
  const progress = (totalPercentage / totalExtensions) * 100
  const onClick = useCallback(() => {
    setShowInstallingExtensionModal(true)
  }, [setShowInstallingExtensionModal])
  return (
    <Fragment>
      {shouldShowInstalling ? (
        <div
          className="flex cursor-pointer flex-row items-center space-x-2"
          onClick={onClick}
        >
          <p className="text-xs font-semibold text-muted-foreground">
            Installing Extension
          </p>
          <div className="flex flex-row items-center justify-center space-x-2 rounded-md bg-secondary px-2 py-[2px]">
            <Progress className="h-2 w-24" value={progress} />
            <span className="text-xs font-bold text-muted-foreground">
              {progress.toFixed(2)}%
            </span>
          </div>
        </div>
      ) : null}
    </Fragment>
  )
 }
 export default InstallingExtension
--- a/web/containers/Layout/BottomBar/index.tsx
+++ b/web/containers/Layout/BottomBar/index.tsx
@ -16,6 +16,7 @@ import ProgressBar from '@/containers/ProgressBar'
 import { appDownloadProgress } from '@/containers/Providers/Jotai'
 import ImportingModelState from './ImportingModelState'
 import InstallingExtension from './InstallingExtension'
 import SystemMonitor from './SystemMonitor'
 import UpdatedFailedModal from './UpdateFailedModal'
@ -46,6 +47,7 @@ const BottomBar = () => {
        <ImportingModelState />
        <DownloadingState />
        <UpdatedFailedModal />
        <InstallingExtension />
      </div>
      <div className="flex items-center gap-x-3">
        <SystemMonitor />
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@ -22,6 +22,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
 import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
 import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
 import InstallingExtensionModal from './BottomBar/InstallingExtension/InstallingExtensionModal'
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 const BaseLayout = (props: PropsWithChildren) => {
@ -68,6 +70,7 @@ const BaseLayout = (props: PropsWithChildren) => {
      {importModelStage === 'IMPORTING_MODEL' && <ImportingModelModal />}
      {importModelStage === 'EDIT_MODEL_INFO' && <EditModelInfoModal />}
      {importModelStage === 'CONFIRM_CANCEL' && <CancelModelImportModal />}
      <InstallingExtensionModal />
    </div>
  )
 }
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@ -10,7 +10,10 @@ import useGetSystemResources from '@/hooks/useGetSystemResources'
 import useModels from '@/hooks/useModels'
 import useThreads from '@/hooks/useThreads'
-import { janDataFolderPathAtom } from '@/helpers/atoms/AppConfig.atom'
+import {
  janDataFolderPathAtom,
  quickAskEnabledAtom,
 } from '@/helpers/atoms/AppConfig.atom'
 type Props = {
  children: ReactNode
@ -18,6 +21,7 @@ type Props = {
 const DataLoader: React.FC<Props> = ({ children }) => {
  const setJanDataFolderPath = useSetAtom(janDataFolderPathAtom)
  const setQuickAskEnabled = useSetAtom(quickAskEnabledAtom)
  useModels()
  useThreads()
@ -29,8 +33,9 @@ const DataLoader: React.FC<Props> = ({ children }) => {
      ?.getAppConfigurations()
      ?.then((appConfig: AppConfiguration) => {
        setJanDataFolderPath(appConfig.data_folder)
        setQuickAskEnabled(appConfig.quick_ask)
      })
-  }, [setJanDataFolderPath])
+  }, [setJanDataFolderPath, setQuickAskEnabled])
  console.debug('Load Data...')
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@ -7,6 +7,10 @@ import { useSetAtom } from 'jotai'
 import { setDownloadStateAtom } from '@/hooks/useDownloadState'
 import { formatExtensionsName } from '@/utils/converter'
 import { toaster } from '../Toast'
 import AppUpdateListener from './AppUpdateListener'
 import ClipboardListener from './ClipboardListener'
 import EventHandler from './EventHandler'
@ -14,46 +18,89 @@ import EventHandler from './EventHandler'
 import ModelImportListener from './ModelImportListener'
 import QuickAskListener from './QuickAskListener'
 import {
  InstallingExtensionState,
  removeInstallingExtensionAtom,
  setInstallingExtensionAtom,
 } from '@/helpers/atoms/Extension.atom'
 const EventListenerWrapper = ({ children }: PropsWithChildren) => {
  const setDownloadState = useSetAtom(setDownloadStateAtom)
  const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
  const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)
  const onFileDownloadUpdate = useCallback(
    async (state: DownloadState) => {
      console.debug('onFileDownloadUpdate', state)
-      setDownloadState(state)
+      if (state.downloadType === 'extension') {
        const installingExtensionState: InstallingExtensionState = {
          extensionId: state.extensionId!,
          percentage: state.percent,
          localPath: state.localPath,
        }
        setInstallingExtension(state.extensionId!, installingExtensionState)
      } else {
        setDownloadState(state)
      }
    },
-    [setDownloadState]
+    [setDownloadState, setInstallingExtension]
  )
  const onFileDownloadError = useCallback(
    (state: DownloadState) => {
      console.debug('onFileDownloadError', state)
-      setDownloadState(state)
+      if (state.downloadType === 'extension') {
        removeInstallingExtension(state.extensionId!)
      } else {
        setDownloadState(state)
      }
    },
-    [setDownloadState]
+    [setDownloadState, removeInstallingExtension]
  )
  const onFileDownloadSuccess = useCallback(
    (state: DownloadState) => {
      console.debug('onFileDownloadSuccess', state)
-      setDownloadState(state)
+      if (state.downloadType !== 'extension') {
        setDownloadState(state)
      }
    },
    [setDownloadState]
  )
  const onFileUnzipSuccess = useCallback(
    (state: DownloadState) => {
      console.debug('onFileUnzipSuccess', state)
      toaster({
        title: 'Success',
        description: `Install ${formatExtensionsName(state.extensionId!)} successfully.`,
        type: 'success',
      })
      removeInstallingExtension(state.extensionId!)
    },
    [removeInstallingExtension]
  )
  useEffect(() => {
    console.debug('EventListenerWrapper: registering event listeners...')
    events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
    events.on(DownloadEvent.onFileDownloadError, onFileDownloadError)
    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
    events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
    return () => {
      console.debug('EventListenerWrapper: unregistering event listeners...')
      events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
      events.off(DownloadEvent.onFileDownloadError, onFileDownloadError)
      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
      events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
    }
-  }, [onFileDownloadUpdate, onFileDownloadError, onFileDownloadSuccess])
+  }, [
    onFileDownloadUpdate,
    onFileDownloadError,
    onFileDownloadSuccess,
    onFileUnzipSuccess,
  ])
  return (
    <AppUpdateListener>
--- a/web/containers/Providers/KeyListener.tsx
+++ b/web/containers/Providers/KeyListener.tsx
@ -24,10 +24,6 @@ export default function KeyListener({ children }: Props) {
  useEffect(() => {
    const onKeyDown = (e: KeyboardEvent) => {
      if (e.key === 'Escape') {
        window.core?.api?.hideMainWindow()
      }
      const prefixKey = isMac ? e.metaKey : e.ctrlKey
      if (e.key === 'b' && prefixKey) {
--- a/web/containers/Providers/index.tsx
+++ b/web/containers/Providers/index.tsx
@ -4,6 +4,8 @@ import { PropsWithChildren, useEffect, useState } from 'react'
 import { Toaster } from 'react-hot-toast'
 import { usePathname } from 'next/navigation'
 import { TooltipProvider } from '@janhq/uikit'
 import GPUDriverPrompt from '@/containers/GPUDriverPromptModal'
@ -29,6 +31,7 @@ import { extensionManager } from '@/extension'
 const Providers = (props: PropsWithChildren) => {
  const { children } = props
  const pathname = usePathname()
  const [setupCore, setSetupCore] = useState(false)
  const [activated, setActivated] = useState(false)
@ -40,6 +43,11 @@ const Providers = (props: PropsWithChildren) => {
    setTimeout(async () => {
      if (!isCoreExtensionInstalled()) {
        // TODO: Proper window handle
        // Do not migrate extension from quick ask window
        if (pathname === '/search') {
          return
        }
        setSettingUp(true)
        await setupBaseExtensions()
        return
--- a/web/extension/ExtensionManager.ts
+++ b/web/extension/ExtensionManager.ts
@ -23,7 +23,9 @@ export class ExtensionManager {
   * @param type - The type of the extension to retrieve.
   * @returns The extension, if found.
   */
-  get<T extends BaseExtension>(type: ExtensionTypeEnum): T | undefined {
+  get<T extends BaseExtension>(
    type: ExtensionTypeEnum | string
  ): T | undefined {
    return this.extensions.get(type) as T | undefined
  }
--- a/web/helpers/atoms/AppConfig.atom.ts
+++ b/web/helpers/atoms/AppConfig.atom.ts
@ -6,6 +6,7 @@ const PROXY_FEATURE_ENABLED = 'proxyFeatureEnabled'
 const VULKAN_ENABLED = 'vulkanEnabled'
 const IGNORE_SSL = 'ignoreSSLFeature'
 const HTTPS_PROXY_FEATURE = 'httpsProxyFeature'
 const QUICK_ASK_ENABLED = 'quickAskEnabled'
 export const janDataFolderPathAtom = atom('')
@ -19,3 +20,4 @@ export const proxyAtom = atomWithStorage(HTTPS_PROXY_FEATURE, '')
 export const ignoreSslAtom = atomWithStorage(IGNORE_SSL, false)
 export const vulkanEnabledAtom = atomWithStorage(VULKAN_ENABLED, false)
 export const quickAskEnabledAtom = atomWithStorage(QUICK_ASK_ENABLED, false)
--- a/web/helpers/atoms/Extension.atom.ts
+++ b/web/helpers/atoms/Extension.atom.ts
@ -0,0 +1,40 @@
 import { atom } from 'jotai'
 type ExtensionId = string
 export type InstallingExtensionState = {
  extensionId: ExtensionId
  percentage: number
  localPath?: string
 }
 export const installingExtensionAtom = atom<InstallingExtensionState[]>([])
 export const setInstallingExtensionAtom = atom(
  null,
  (get, set, extensionId: string, state: InstallingExtensionState) => {
    const current = get(installingExtensionAtom)
    const isExists = current.some((e) => e.extensionId === extensionId)
    if (isExists) {
      const newCurrent = current.map((e) => {
        if (e.extensionId === extensionId) {
          return state
        }
        return e
      })
      set(installingExtensionAtom, newCurrent)
    } else {
      set(installingExtensionAtom, [...current, state])
    }
  }
 )
 export const removeInstallingExtensionAtom = atom(
  null,
  (get, set, extensionId: string) => {
    const current = get(installingExtensionAtom)
    const newCurrent = current.filter((e) => e.extensionId !== extensionId)
    set(installingExtensionAtom, newCurrent)
  }
 )
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@ -40,6 +40,16 @@ export function useActiveModel() {
      console.debug(`Model ${modelId} is already initialized. Ignore..`)
      return
    }
    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
    // Switch between engines
    if (model && activeModel && activeModel.engine !== model.engine) {
      stopModel()
      // TODO: Refactor inference provider would address this
      await new Promise((res) => setTimeout(res, 1000))
    }
    // TODO: incase we have multiple assistants, the configuration will be from assistant
    setLoadModelError(undefined)
@ -47,8 +57,6 @@ export function useActiveModel() {
    setStateModel({ state: 'start', loading: true, model: modelId })
    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
    if (!model) {
      toaster({
        title: `Model ${modelId} not found!`,
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@ -8,12 +8,15 @@ import {
  joinPath,
  ModelArtifact,
  DownloadState,
  GpuSetting,
 } from '@janhq/core'
 import { useAtomValue, useSetAtom } from 'jotai'
 import { setDownloadStateAtom } from './useDownloadState'
 import useGpuSetting from './useGpuSetting'
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
  ignoreSslAtom,
@ -29,6 +32,8 @@ export default function useDownloadModel() {
  const setDownloadState = useSetAtom(setDownloadStateAtom)
  const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
  const { getGpuSettings } = useGpuSetting()
  const downloadModel = useCallback(
    async (model: Model) => {
      const childProgresses: DownloadState[] = model.sources.map(
@ -68,10 +73,22 @@ export default function useDownloadModel() {
      })
      addDownloadingModel(model)
-
+      const gpuSettings = await getGpuSettings()
-      await localDownloadModel(model, ignoreSSL, proxyEnabled ? proxy : '')
+      await localDownloadModel(
        model,
        ignoreSSL,
        proxyEnabled ? proxy : '',
        gpuSettings
      )
    },
-    [ignoreSSL, proxy, proxyEnabled, addDownloadingModel, setDownloadState]
+    [
      ignoreSSL,
      proxy,
      proxyEnabled,
      getGpuSettings,
      addDownloadingModel,
      setDownloadState,
    ]
  )
  const abortModelDownload = useCallback(async (model: Model) => {
@ -90,8 +107,9 @@ export default function useDownloadModel() {
 const localDownloadModel = async (
  model: Model,
  ignoreSSL: boolean,
-  proxy: string
+  proxy: string,
  gpuSettings?: GpuSetting
 ) =>
  extensionManager
    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.downloadModel(model, { ignoreSSL, proxy })
+    ?.downloadModel(model, gpuSettings, { ignoreSSL, proxy })
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@ -18,123 +18,129 @@ export const modelDownloadStateAtom = atom<Record<string, DownloadState>>({})
 export const setDownloadStateAtom = atom(
  null,
  (get, set, state: DownloadState) => {
-    const currentState = { ...get(modelDownloadStateAtom) }
+    try {
      const currentState = { ...get(modelDownloadStateAtom) }
-    if (state.downloadState === 'end') {
+      if (state.downloadState === 'end') {
-      const modelDownloadState = currentState[state.modelId]
+        const modelDownloadState = currentState[state.modelId]
-      const updatedChildren: DownloadState[] =
+        const updatedChildren: DownloadState[] = (
-        modelDownloadState.children!.filter(
+          modelDownloadState.children ?? []
-          (m) => m.fileName !== state.fileName
+        ).filter((m) => m.fileName !== state.fileName)
        updatedChildren.push(state)
        modelDownloadState.children = updatedChildren
        currentState[state.modelId] = modelDownloadState
        const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
          (m) => m.downloadState === 'end'
        )
      updatedChildren.push(state)
      modelDownloadState.children = updatedChildren
      currentState[state.modelId] = modelDownloadState
-      const isAllChildrenDownloadEnd = modelDownloadState.children?.every(
+        if (isAllChildrenDownloadEnd) {
-        (m) => m.downloadState === 'end'
+          // download successfully
-      )
+          delete currentState[state.modelId]
          set(removeDownloadingModelAtom, state.modelId)
-      if (isAllChildrenDownloadEnd) {
+          const model = get(configuredModelsAtom).find(
-        // download successfully
+            (e) => e.id === state.modelId
          )
          if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
          toaster({
            title: 'Download Completed',
            description: `Download ${state.modelId} completed`,
            type: 'success',
          })
        }
      } else if (state.downloadState === 'error') {
        // download error
        delete currentState[state.modelId]
        set(removeDownloadingModelAtom, state.modelId)
-
+        if (state.error === 'aborted') {
-        const model = get(configuredModelsAtom).find(
+          toaster({
-          (e) => e.id === state.modelId
+            title: 'Cancel Download',
-        )
+            description: `Model ${state.modelId} download cancelled`,
-        if (model) set(downloadedModelsAtom, (prev) => [...prev, model])
+            type: 'warning',
-        toaster({
+          })
-          title: 'Download Completed',
+        } else {
-          description: `Download ${state.modelId} completed`,
+          let error = state.error
-          type: 'success',
+          if (
-        })
+            typeof error?.includes === 'function' &&
-      }
+            state.error?.includes('certificate')
-    } else if (state.downloadState === 'error') {
+          ) {
-      // download error
+            error +=
-      delete currentState[state.modelId]
+              '. To fix enable "Ignore SSL Certificates" in Advanced settings.'
-      set(removeDownloadingModelAtom, state.modelId)
+          }
-      if (state.error === 'aborted') {
+          toaster({
-        toaster({
+            title: 'Download Failed',
-          title: 'Cancel Download',
+            description: `Model ${state.modelId} download failed: ${error}`,
-          description: `Model ${state.modelId} download cancelled`,
+            type: 'error',
-          type: 'warning',
+          })
-        })
+        }
-      } else {
+      } else {
-        let error = state.error
+        // download in progress
-        if (
+        if (state.size.total === 0) {
-          typeof error?.includes === 'function' &&
+          // this is initial state, just set the state
-          state.error?.includes('certificate')
+          currentState[state.modelId] = state
-        ) {
+          set(modelDownloadStateAtom, currentState)
-          error +=
+          return
            '. To fix enable "Ignore SSL Certificates" in Advanced settings.'
        }
        toaster({
          title: 'Download Failed',
          description: `Model ${state.modelId} download failed: ${error}`,
          type: 'error',
        })
      }
    } else {
      // download in progress
      if (state.size.total === 0) {
        // this is initial state, just set the state
        currentState[state.modelId] = state
        set(modelDownloadStateAtom, currentState)
        return
      }
-      const modelDownloadState = currentState[state.modelId]
+        const modelDownloadState = currentState[state.modelId]
-      if (!modelDownloadState) {
+        if (!modelDownloadState) {
-        console.debug('setDownloadStateAtom: modelDownloadState not found')
+          console.debug('setDownloadStateAtom: modelDownloadState not found')
-        return
+          return
-      }
+        }
-      // delete the children if the filename is matched and replace the new state
+        // delete the children if the filename is matched and replace the new state
-      const updatedChildren: DownloadState[] =
+        const updatedChildren: DownloadState[] = (
-        modelDownloadState.children!.filter(
+          modelDownloadState.children ?? []
-          (m) => m.fileName !== state.fileName
+        ).filter((m) => m.fileName !== state.fileName)
        updatedChildren.push(state)
        // re-calculate the overall progress if we have all the children download data
        const isAnyChildDownloadNotReady = updatedChildren.some(
          (m) =>
            m.size.total === 0 &&
            !modelDownloadState.children?.some(
              (e) => e.fileName === m.fileName && e.downloadState === 'end'
            ) &&
            modelDownloadState.children?.some((e) => e.fileName === m.fileName)
        )
-      updatedChildren.push(state)
+        modelDownloadState.children = updatedChildren
        if (isAnyChildDownloadNotReady) {
          // just update the children
          currentState[state.modelId] = modelDownloadState
          set(modelDownloadStateAtom, currentState)
          return
        }
-      // re-calculate the overall progress if we have all the children download data
+        const parentTotalSize = modelDownloadState.size.total
-      const isAnyChildDownloadNotReady = updatedChildren.some(
+        if (parentTotalSize === 0) {
-        (m) => m.size.total === 0
+          // calculate the total size of the parent by sum all children total size
-      )
+          const totalSize = updatedChildren.reduce(
            (acc, m) => acc + m.size.total,
            0
          )
-      modelDownloadState.children = updatedChildren
+          modelDownloadState.size.total = totalSize
        }
-      if (isAnyChildDownloadNotReady) {
+        // calculate the total transferred size by sum all children transferred size
-        // just update the children
+        const transferredSize = updatedChildren.reduce(
-        currentState[state.modelId] = modelDownloadState
+          (acc, m) => acc + m.size.transferred,
        set(modelDownloadStateAtom, currentState)
        return
      }
      const parentTotalSize = modelDownloadState.size.total
      if (parentTotalSize === 0) {
        // calculate the total size of the parent by sum all children total size
        const totalSize = updatedChildren.reduce(
          (acc, m) => acc + m.size.total,
          0
        )
-
+        modelDownloadState.size.transferred = transferredSize
-        modelDownloadState.size.total = totalSize
+        modelDownloadState.percent =
          parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
        currentState[state.modelId] = modelDownloadState
      }
-      // calculate the total transferred size by sum all children transferred size
+      set(modelDownloadStateAtom, currentState)
-      const transferredSize = updatedChildren.reduce(
+    } catch (e) {
-        (acc, m) => acc + m.size.transferred,
+      console.debug('setDownloadStateAtom: state', state)
-        0
+      console.debug('setDownloadStateAtom: error', e)
      )
      modelDownloadState.size.transferred = transferredSize
      modelDownloadState.percent =
        parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
      currentState[state.modelId] = modelDownloadState
    }
    set(modelDownloadStateAtom, currentState)
  }
 )
--- a/web/hooks/useFactoryReset.ts
+++ b/web/hooks/useFactoryReset.ts
@ -30,6 +30,7 @@ export default function useFactoryReset() {
      // set the default jan data folder to user's home directory
      const configuration: AppConfiguration = {
        data_folder: defaultJanDataFolder,
        quick_ask: appConfiguration?.quick_ask ?? false,
      }
      await window.core?.api?.updateAppConfiguration(configuration)
    }
--- a/web/hooks/useGpuSetting.ts
+++ b/web/hooks/useGpuSetting.ts
@ -0,0 +1,21 @@
 import { useCallback } from 'react'
 import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
 import { extensionManager } from '@/extension'
 export default function useGpuSetting() {
  const getGpuSettings = useCallback(async () => {
    const gpuSetting = await extensionManager
      ?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
      ?.getGpuSetting()
    if (!gpuSetting) {
      console.debug('No GPU setting found')
      return undefined
    }
    return gpuSetting
  }, [])
  return { getGpuSettings }
 }
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@ -79,6 +79,8 @@ export default function useSendChatMessage() {
  const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
  const activeThreadRef = useRef<Thread | undefined>()
  const selectedModelRef = useRef<Model | undefined>()
  useEffect(() => {
    modelRef.current = activeModel
  }, [activeModel])
@ -91,6 +93,10 @@ export default function useSendChatMessage() {
    activeThreadRef.current = activeThread
  }, [activeThread])
  useEffect(() => {
    selectedModelRef.current = selectedModel
  }, [selectedModel])
  const resendChatMessage = async (currentMessage: ThreadMessage) => {
    if (!activeThreadRef.current) {
      console.error('No active thread')
@ -128,11 +134,13 @@ export default function useSendChatMessage() {
      type: MessageRequestType.Thread,
      messages: messages,
      threadId: activeThreadRef.current.id,
-      model: activeThreadRef.current.assistants[0].model ?? selectedModel,
+      model:
        activeThreadRef.current.assistants[0].model ?? selectedModelRef.current,
    }
    const modelId =
-      selectedModel?.id ?? activeThreadRef.current.assistants[0].model.id
+      selectedModelRef.current?.id ??
      activeThreadRef.current.assistants[0].model.id
    if (modelRef.current?.id !== modelId) {
      setQueuedMessage(true)
@ -213,7 +221,7 @@ export default function useSendChatMessage() {
            {
              role: ChatCompletionRole.User,
              content:
-                selectedModel && base64Blob
+                selectedModelRef.current && base64Blob
                  ? [
                      {
                        type: ChatCompletionMessageContentType.Text,
@ -242,7 +250,7 @@ export default function useSendChatMessage() {
      )
    let modelRequest =
-      selectedModel ?? activeThreadRef.current.assistants[0].model
+      selectedModelRef?.current ?? activeThreadRef.current.assistants[0].model
    if (runtimeParams.stream == null) {
      runtimeParams.stream = true
    }
@ -344,7 +352,8 @@ export default function useSendChatMessage() {
      ?.addNewMessage(threadMessage)
    const modelId =
-      selectedModel?.id ?? activeThreadRef.current.assistants[0].model.id
+      selectedModelRef.current?.id ??
      activeThreadRef.current.assistants[0].model.id
    if (modelRef.current?.id !== modelId) {
      setQueuedMessage(true)
--- a/web/next.config.js
+++ b/web/next.config.js
@ -38,6 +38,7 @@ const nextConfig = {
        isMac: process.platform === 'darwin',
        isWindows: process.platform === 'win32',
        isLinux: process.platform === 'linux',
        PLATFORM: JSON.stringify(process.platform),
      }),
    ]
    return config
--- a/web/screens/Chat/ChatInput/index.tsx
+++ b/web/screens/Chat/ChatInput/index.tsx
@ -244,16 +244,13 @@ const ChatInput: React.FC = () => {
                  <li
                    className={twMerge(
                      'flex w-full cursor-pointer items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary',
-                      activeThread?.assistants[0].model.settings.vision_model &&
+                      activeThread?.assistants[0].model.settings.text_model ===
-                        activeThread?.assistants[0].model.settings
+                        false
                          .text_model === false
                        ? 'cursor-not-allowed opacity-50'
                        : 'cursor-pointer'
                    )}
                    onClick={() => {
                      if (
                        !activeThread?.assistants[0].model.settings
                          .vision_model ||
                        activeThread?.assistants[0].model.settings
                          .text_model !== false
                      ) {
--- a/web/screens/ExploreModels/ExploreModelItem/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItem/index.tsx
@ -3,6 +3,8 @@ import { useState } from 'react'
 import { Model } from '@janhq/core'
 import { Badge } from '@janhq/uikit'
 import { twMerge } from 'tailwind-merge'
 import ExploreModelItemHeader from '@/screens/ExploreModels/ExploreModelItemHeader'
 type Props = {
@ -75,7 +77,16 @@ const ExploreModelItem: React.FC<Props> = ({ model }) => {
              <span className="font-semibold text-muted-foreground">
                Format
              </span>
-              <p className="mt-2 font-medium uppercase">{model.format}</p>
+              <p
                className={twMerge(
                  'mt-2 font-medium',
                  !model.format?.includes(' ') &&
                    !model.format?.includes('-') &&
                    'uppercase'
                )}
              >
                {model.format}
              </p>
            </div>
          </div>
        </div>
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@ -152,6 +152,7 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
      <div className="flex items-center justify-between p-4">
        <div className="flex items-center gap-2">
          <span className="font-bold">{model.name}</span>
          <EngineBadge engine={model.engine} />
        </div>
        <div className="inline-flex items-center space-x-2">
          <span className="mr-4 font-semibold text-muted-foreground">
@ -172,4 +173,23 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
  )
 }
 type EngineBadgeProps = {
  engine: string
 }
 const EngineBadge: React.FC<EngineBadgeProps> = ({ engine }) => {
  const title = 'TensorRT-LLM'
  switch (engine) {
    case 'nitro-tensorrt-llm':
      return (
        <Badge themes="primary" className="line-clamp-1" title={title}>
          {title}
        </Badge>
      )
    default:
      return null
  }
 }
 export default ExploreModelItemHeader
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@ -2,7 +2,7 @@
 import { useEffect, useState, useCallback, ChangeEvent } from 'react'
-import { openExternalUrl, fs } from '@janhq/core'
+import { openExternalUrl, fs, AppConfiguration } from '@janhq/core'
 import {
  Switch,
@ -23,7 +23,7 @@ import {
  ScrollArea,
 } from '@janhq/uikit'
-import { useAtom } from 'jotai'
+import { useAtom, useAtomValue } from 'jotai'
 import { AlertTriangleIcon, AlertCircleIcon } from 'lucide-react'
 import ShortcutModal from '@/containers/ShortcutModal'
@ -42,6 +42,7 @@ import {
  proxyAtom,
  proxyEnabledAtom,
  vulkanEnabledAtom,
  quickAskEnabledAtom,
 } from '@/helpers/atoms/AppConfig.atom'
 type GPU = {
@ -56,6 +57,8 @@ const Advanced = () => {
  )
  const [vulkanEnabled, setVulkanEnabled] = useAtom(vulkanEnabledAtom)
  const [proxyEnabled, setProxyEnabled] = useAtom(proxyEnabledAtom)
  const quickAskEnabled = useAtomValue(quickAskEnabledAtom)
  const [proxy, setProxy] = useAtom(proxyAtom)
  const [ignoreSSL, setIgnoreSSL] = useAtom(ignoreSslAtom)
@ -87,6 +90,14 @@ const Advanced = () => {
    [setPartialProxy, setProxy]
  )
  const updateQuickAskEnabled = async (e: boolean) => {
    const appConfiguration: AppConfiguration =
      await window.core?.api?.getAppConfigurations()
    appConfiguration.quick_ask = e
    await window.core?.api?.updateAppConfiguration(appConfiguration)
    window.core?.api?.relaunch()
  }
  useEffect(() => {
    const setUseGpuIfPossible = async () => {
      const settings = await readSettings()
@ -361,7 +372,7 @@ const Advanced = () => {
                  Vulkan Support
                </h6>
              </div>
-              <p className="text-xs leading-relaxed">
+              <p className="leading-relaxed">
                Enable Vulkan with AMD GPU/APU and Intel Arc GPU for better
                model performance (reload needed).
              </p>
@ -426,6 +437,36 @@ const Advanced = () => {
          />
        </div>
        {experimentalEnabled && (
          <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
            <div className="flex-shrink-0 space-y-1.5">
              <div className="flex gap-x-2">
                <h6 className="text-sm font-semibold capitalize">
                  Jan Quick Ask
                </h6>
              </div>
              <p className="leading-relaxed">
                Enable Quick Ask to be triggered via the default hotkey{' '}
                <div className="inline-flex items-center justify-center rounded-full bg-secondary px-1 py-0.5 text-xs font-bold text-muted-foreground">
                  <span className="font-bold">{isMac ? '⌘' : 'Ctrl'} + J</span>
                </div>{' '}
                (reload needed).
              </p>
            </div>
            <Switch
              checked={quickAskEnabled}
              onCheckedChange={() => {
                toaster({
                  title: 'Reload',
                  description:
                    'Quick Ask settings updated. Reload now to apply the changes.',
                })
                updateQuickAskEnabled(!quickAskEnabled)
              }}
            />
          </div>
        )}
        {/* Clear log */}
        <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
          <div className="flex-shrink-0 space-y-1.5">
--- a/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
+++ b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
@ -0,0 +1,225 @@
 import { useCallback, useEffect, useState } from 'react'
 import {
  Compatibility,
  GpuSetting,
  InstallationState,
  abortDownload,
  systemInformations,
 } from '@janhq/core'
 import {
  Button,
  Progress,
  Tooltip,
  TooltipArrow,
  TooltipContent,
  TooltipPortal,
  TooltipTrigger,
 } from '@janhq/uikit'
 import { InfoCircledIcon } from '@radix-ui/react-icons'
 import { useAtomValue } from 'jotai'
 import { extensionManager } from '@/extension'
 import Extension from '@/extension/Extension'
 import { installingExtensionAtom } from '@/helpers/atoms/Extension.atom'
 type Props = {
  item: Extension
 }
 const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
  const [compatibility, setCompatibility] = useState<Compatibility | undefined>(
    undefined
  )
  const [installState, setInstallState] =
    useState<InstallationState>('NotRequired')
  const installingExtensions = useAtomValue(installingExtensionAtom)
  const [isGpuSupported, setIsGpuSupported] = useState<boolean>(false)
  const isInstalling = installingExtensions.some(
    (e) => e.extensionId === item.name
  )
  const progress = isInstalling
    ? installingExtensions.find((e) => e.extensionId === item.name)
        ?.percentage ?? -1
    : -1
  useEffect(() => {
    const getSystemInfos = async () => {
      const info = await systemInformations()
      if (!info) {
        setIsGpuSupported(false)
        return
      }
      const gpuSettings: GpuSetting | undefined = info.gpuSetting
      if (!gpuSettings || gpuSettings.gpus.length === 0) {
        setIsGpuSupported(false)
        return
      }
      const arch = gpuSettings.gpus[0].arch
      if (!arch) {
        setIsGpuSupported(false)
        return
      }
      const supportedGpuArch = ['turing', 'ampere', 'ada']
      setIsGpuSupported(supportedGpuArch.includes(arch))
    }
    getSystemInfos()
  }, [])
  useEffect(() => {
    const getExtensionInstallationState = async () => {
      const extension = extensionManager.get(item.name ?? '')
      if (!extension) return
      if (typeof extension?.installationState === 'function') {
        const installState = await extension.installationState()
        setInstallState(installState)
      }
    }
    getExtensionInstallationState()
  }, [item.name, isInstalling])
  useEffect(() => {
    const extension = extensionManager.get(item.name ?? '')
    if (!extension) return
    setCompatibility(extension.compatibility())
  }, [setCompatibility, item.name])
  const onInstallClick = useCallback(async () => {
    const extension = extensionManager.get(item.name ?? '')
    if (!extension) return
    await extension.install()
  }, [item.name])
  const onCancelInstallingClick = () => {
    const extension = installingExtensions.find(
      (e) => e.extensionId === item.name
    )
    if (extension?.localPath) {
      abortDownload(extension.localPath)
    }
  }
  return (
    <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none">
      <div className="flex-1 flex-shrink-0 space-y-1.5">
        <div className="flex items-center gap-x-2">
          <h6 className="text-sm font-semibold capitalize">
            TensorRT-LLM Extension
          </h6>
          <p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed">
            v{item.version}
          </p>
        </div>
        <p className="whitespace-pre-wrap leading-relaxed">
          {item.description}
        </p>
      </div>
      {(!compatibility || compatibility['platform']?.includes(PLATFORM)) &&
      isGpuSupported ? (
        <div className="flex min-w-[150px] flex-row justify-end">
          <InstallStateIndicator
            installProgress={progress}
            installState={installState}
            onInstallClick={onInstallClick}
            onCancelClick={onCancelInstallingClick}
          />
        </div>
      ) : (
        <div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
          <div className="flex flex-row items-center justify-center gap-1">
            Incompatible{' '}
            <Tooltip>
              <TooltipTrigger className="w-full">
                <InfoCircledIcon />
              </TooltipTrigger>
              <TooltipPortal>
                <TooltipContent side="top">
                  {compatibility ? (
                    <span>
                      Only available on{' '}
                      {compatibility?.platform
                        ?.map((e: string) =>
                          e === 'win32'
                            ? 'Windows'
                            : e === 'linux'
                              ? 'Linux'
                              : 'MacOS'
                        )
                        .join(', ')}
                    </span>
                  ) : (
                    <span>
                      Your GPUs are not compatible with this extension
                    </span>
                  )}
                  <TooltipArrow />
                </TooltipContent>
              </TooltipPortal>
            </Tooltip>
          </div>
        </div>
      )}
    </div>
  )
 }
 type InstallStateProps = {
  installProgress: number
  installState: InstallationState
  onInstallClick: () => void
  onCancelClick: () => void
 }
 const InstallStateIndicator: React.FC<InstallStateProps> = ({
  installProgress,
  installState,
  onInstallClick,
  onCancelClick,
 }) => {
  if (installProgress !== -1) {
    const progress = installProgress * 100
    return (
      <div className="flex h-10 flex-row items-center justify-center space-x-2 rounded-lg bg-[#EFF8FF] px-4 text-primary dark:bg-secondary">
        <button onClick={onCancelClick} className="font-semibold text-primary">
          Cancel
        </button>
        <div className="flex w-[113px] flex-row items-center justify-center space-x-2 rounded-md bg-[#D1E9FF] px-2 py-[2px] dark:bg-black/50">
          <Progress className="h-1 w-[69px]" value={progress} />
          <span className="text-xs font-bold text-primary">
            {progress.toFixed(0)}%
          </span>
        </div>
      </div>
    )
  }
  // TODO: NamH check for dark mode here
  switch (installState) {
    case 'Installed':
      return (
        <div className="rounded-md bg-secondary px-3 py-1.5 text-sm font-semibold text-gray-400">
          Installed
        </div>
      )
    case 'NotInstalled':
      return (
        <Button themes="secondaryBlue" size="sm" onClick={onInstallClick}>
          Install
        </Button>
      )
    default:
      return <div></div>
  }
 }
 export default TensorRtExtensionItem
--- a/web/screens/Settings/CoreExtensions/index.tsx
+++ b/web/screens/Settings/CoreExtensions/index.tsx
@ -4,13 +4,18 @@ import React, { useState, useEffect, useRef } from 'react'
 import { Button, ScrollArea } from '@janhq/uikit'
 import Loader from '@/containers/Loader'
 import { formatExtensionsName } from '@/utils/converter'
 import TensorRtExtensionItem from './TensorRtExtensionItem'
 import { extensionManager } from '@/extension'
 import Extension from '@/extension/Extension'
 const ExtensionCatalog = () => {
  const [activeExtensions, setActiveExtensions] = useState<Extension[]>([])
  const [showLoading, setShowLoading] = useState(false)
  const fileInputRef = useRef<HTMLInputElement | null>(null)
  /**
   * Fetches the active extensions and their preferences from the `extensions` and `preferences` modules.
@ -63,65 +68,76 @@ const ExtensionCatalog = () => {
  const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
    const file = event.target.files?.[0]
    if (file) {
      setShowLoading(true)
      install(event)
    }
  }
  return (
-    <ScrollArea className="h-full w-full px-4">
+    <>
-      <div className="block w-full">
+      <ScrollArea className="h-full w-full px-4">
-        {activeExtensions.map((item, i) => {
+        <div className="block w-full">
-          return (
+          {activeExtensions.map((item, i) => {
-            <div
+            // TODO: this is bad code, rewrite it
-              key={i}
+            if (item.name === '@janhq/tensorrt-llm-extension') {
-              className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
+              return <TensorRtExtensionItem key={i} item={item} />
-            >
+            }
-              <div className="w-4/5 flex-shrink-0 space-y-1.5">
+
-                <div className="flex gap-x-2">
+            return (
-                  <h6 className="text-sm font-semibold capitalize">
+              <div
-                    {formatExtensionsName(item.name ?? item.description ?? '')}
+                key={i}
-                  </h6>
+                className="flex w-full items-start justify-between border-b border-border py-4 first:pt-4 last:border-none"
-                  <p className="whitespace-pre-wrap font-semibold leading-relaxed ">
+              >
-                    v{item.version}
+                <div className="w-4/5 flex-shrink-0 space-y-1.5">
                  <div className="flex items-center gap-x-2">
                    <h6 className="text-sm font-semibold capitalize">
                      {formatExtensionsName(
                        item.name ?? item.description ?? ''
                      )}
                    </h6>
                    <p className="whitespace-pre-wrap text-sm font-semibold leading-relaxed ">
                      v{item.version}
                    </p>
                  </div>
                  <p className="whitespace-pre-wrap leading-relaxed ">
                    {item.description}
                  </p>
                </div>
                <p className="whitespace-pre-wrap leading-relaxed ">
                  {item.description}
                </p>
              </div>
            )
          })}
          {/* Manual Installation */}
          <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
            <div className="w-4/5 flex-shrink-0 space-y-1.5">
              <div className="flex gap-x-2">
                <h6 className="text-sm font-semibold capitalize">
                  Manual Installation
                </h6>
              </div>
              <p className="whitespace-pre-wrap leading-relaxed ">
                Select a extension file to install (.tgz)
              </p>
            </div>
-          )
+            <div>
-        })}
+              <input
-        {/* Manual Installation */}
+                type="file"
-        <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
+                style={{ display: 'none' }}
-          <div className="w-4/5 flex-shrink-0 space-y-1.5">
+                ref={fileInputRef}
-            <div className="flex gap-x-2">
+                onChange={handleFileChange}
-              <h6 className="text-sm font-semibold capitalize">
+              />
-                Manual Installation
+              <Button
-              </h6>
+                themes="secondaryBlue"
                size="sm"
                onClick={() => fileInputRef.current?.click()}
              >
                Select
              </Button>
            </div>
            <p className="whitespace-pre-wrap leading-relaxed ">
              Select a extension file to install (.tgz)
            </p>
          </div>
          <div>
            <input
              type="file"
              style={{ display: 'none' }}
              ref={fileInputRef}
              onChange={handleFileChange}
            />
            <Button
              themes="secondaryBlue"
              size="sm"
              onClick={() => fileInputRef.current?.click()}
            >
              Select
            </Button>
          </div>
        </div>
-      </div>
+      </ScrollArea>
-    </ScrollArea>
+      {showLoading && <Loader description="Installing..." />}
    </>
  )
 }
--- a/web/services/appService.ts
+++ b/web/services/appService.ts
@ -0,0 +1,24 @@
 import { ExtensionTypeEnum, MonitoringExtension } from '@janhq/core'
 import { toaster } from '@/containers/Toast'
 import { extensionManager } from '@/extension'
 export const appService = {
  systemInformations: async () => {
    const gpuSetting = await extensionManager
      ?.get<MonitoringExtension>(ExtensionTypeEnum.SystemMonitoring)
      ?.getGpuSetting()
    return {
      gpuSetting,
      // TODO: Other system information
    }
  },
  showToast: (title: string, description: string) => {
    toaster({
      title,
      description: description,
    })
  },
 }
--- a/Show More
+++ b/Show More
`@ -1 +1,2 @@`
	`export * from './monitoringInterface'`	`export * from './monitoringInterface'`
		`export * from './resourceInfo'`
		`@ -0,0 +1,2 @@`
							`@echo off`
							`.\node_modules\.bin\download https://delta.jan.ai/vulkaninfoSDK.exe -o ./bin`