Merge branch 'dev' into patch-1

2024-11-30 13:43:27 +07:00 · 2024-11-30 13:43:27 +07:00 · 0f403ffd34
commit 0f403ffd34
parent 44b7198bdb 39cdbbaeb9
138 changed files with 2348 additions and 1312 deletions
--- a/.github/workflows/template-noti-discord-and-update-url-readme.yml
+++ b/.github/workflows/template-noti-discord-and-update-url-readme.yml
@ -47,11 +47,11 @@ jobs:
        with:
          args: |
            Jan App ${{ inputs.build_reason }} build artifact version {{ VERSION }}:
-            - Windows: https://delta.jan.ai/nightly/jan-win-x64-{{ VERSION }}.exe
-            - macOS Intel: https://delta.jan.ai/nightly/jan-mac-x64-{{ VERSION }}.dmg
-            - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-mac-arm64-{{ VERSION }}.dmg
-            - Linux Deb: https://delta.jan.ai/nightly/jan-linux-amd64-{{ VERSION }}.deb
-            - Linux AppImage: https://delta.jan.ai/nightly/jan-linux-x86_64-{{ VERSION }}.AppImage
+            - Windows: https://delta.jan.ai/nightly/jan-nightly-win-x64-{{ VERSION }}.exe
+            - macOS Intel: https://delta.jan.ai/nightly/jan-nightly-mac-x64-{{ VERSION }}.dmg
+            - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-nightly-mac-arm64-{{ VERSION }}.dmg
+            - Linux Deb: https://delta.jan.ai/nightly/jan-nightly-linux-amd64-{{ VERSION }}.deb
+            - Linux AppImage: https://delta.jan.ai/nightly/jan-nightly-linux-x86_64-{{ VERSION }}.AppImage
            - Github action run: https://github.com/janhq/jan/actions/runs/{{ GITHUB_RUN_ID }}
        env:
          DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
--- a/.gitignore
+++ b/.gitignore
@ -47,3 +47,4 @@ coverage
 .yarnrc
 test_results.html
 *.tsbuildinfo
+electron/shared/**
--- a/core/src/browser/core.test.ts
+++ b/core/src/browser/core.test.ts
@ -3,7 +3,6 @@ import { joinPath } from './core'
 import { openFileExplorer } from './core'
 import { getJanDataFolderPath } from './core'
 import { abortDownload } from './core'
-import { getFileSize } from './core'
 import { executeOnMain } from './core'

 describe('test core apis', () => {
@ -66,18 +65,6 @@ describe('test core apis', () => {
    expect(result).toBe('aborted')
  })

-  it('should get file size', async () => {
-    const url = 'http://example.com/file'
-    globalThis.core = {
-      api: {
-        getFileSize: jest.fn().mockResolvedValue(1024),
-      },
-    }
-    const result = await getFileSize(url)
-    expect(globalThis.core.api.getFileSize).toHaveBeenCalledWith(url)
-    expect(result).toBe(1024)
-  })
-
  it('should execute function on main process', async () => {
    const extension = 'testExtension'
    const method = 'testMethod'
--- a/core/src/browser/core.ts
+++ b/core/src/browser/core.ts
@ -28,15 +28,6 @@ const downloadFile: (downloadRequest: DownloadRequest, network?: NetworkConfig)
  network
 ) => globalThis.core?.api?.downloadFile(downloadRequest, network)

-/**
- * Get unit in bytes for a remote file.
- *
- * @param url - The url of the file.
- * @returns {Promise<number>} - A promise that resolves with the file size.
- */
-const getFileSize: (url: string) => Promise<number> = (url: string) =>
-  globalThis.core.api?.getFileSize(url)
-
 /**
 * Aborts the download of a specific file.
 * @param {string} fileName - The name of the file whose download is to be aborted.
@ -167,7 +158,6 @@ export {
  getUserHomePath,
  systemInformation,
  showToast,
-  getFileSize,
  dirName,
  FileStat,
 }
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -113,7 +113,6 @@ export abstract class BaseExtension implements ExtensionType {
    for (const model of models) {
      ModelManager.instance().register(model)
    }
-    events.emit(ModelEvent.OnModelsUpdate, {})
  }

  /**
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -38,14 +38,16 @@ export function requestInference(
            errorCode = ErrorCode.InvalidApiKey
          }
          const error = {
-            message: data.error?.message ?? 'Error occurred.',
+            message: data.error?.message ?? data.message ?? 'Error occurred.',
            code: errorCode,
          }
          subscriber.error(error)
          subscriber.complete()
          return
        }
-        if (model.parameters?.stream === false) {
+        // There could be overriden stream parameter in the model
+        // that is set in request body (transformed payload)
+        if (requestBody?.stream === false || model.parameters?.stream === false) {
          const data = await response.json()
          if (transformResponse) {
            subscriber.next(transformResponse(data))
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@ -12,6 +12,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
    return ExtensionTypeEnum.Model
  }

+  abstract configurePullOptions(configs: { [key: string]: any }): Promise<any>
  abstract getModels(): Promise<Model[]>
  abstract pullModel(model: string, id?: string, name?: string): Promise<void>
  abstract cancelModelPull(modelId: string): Promise<void>
--- a/core/src/node/api/processors/download.test.ts
+++ b/core/src/node/api/processors/download.test.ts
@ -23,6 +23,11 @@ jest.mock('fs', () => ({
  createWriteStream: jest.fn(),
 }))

+const requestMock = jest.fn((options, callback) => {
+  callback(new Error('Test error'), null)
+})
+jest.mock('request', () => requestMock)
+
 jest.mock('request-progress', () => {
  return jest.fn().mockImplementation(() => {
    return {
@ -54,18 +59,6 @@ describe('Downloader', () => {
  beforeEach(() => {
    jest.resetAllMocks()
  })
-  it('should handle getFileSize errors correctly', async () => {
-    const observer = jest.fn()
-    const url = 'http://example.com/file'
-
-    const downloader = new Downloader(observer)
-    const requestMock = jest.fn((options, callback) => {
-      callback(new Error('Test error'), null)
-    })
-    jest.mock('request', () => requestMock)
-
-    await expect(downloader.getFileSize(observer, url)).rejects.toThrow('Test error')
-  })

  it('should pause download correctly', () => {
    const observer = jest.fn()
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@ -135,25 +135,4 @@ export class Downloader implements Processor {
  pauseDownload(_observer: any, fileName: any) {
    DownloadManager.instance.networkRequests[fileName]?.pause()
  }
-
-  async getFileSize(_observer: any, url: string): Promise<number> {
-    return new Promise((resolve, reject) => {
-      const request = require('request')
-      request(
-        {
-          url,
-          method: 'HEAD',
-        },
-        function (err: any, response: any) {
-          if (err) {
-            console.error('Getting file size failed:', err)
-            reject(err)
-          } else {
-            const size: number = response.headers['content-length'] ?? -1
-            resolve(size)
-          }
-        }
-      )
-    })
-  }
 }
--- a/core/src/node/api/restful/common.ts
+++ b/core/src/node/api/restful/common.ts
@ -1,7 +1,6 @@
 import { HttpServer } from '../HttpServer'
 import {
  chatCompletions,
-  deleteBuilder,
  downloadModel,
  getBuilder,
  retrieveBuilder,
@ -14,8 +13,6 @@ import {
 } from './helper/builder'

 import { JanApiRouteConfiguration } from './helper/configuration'
-import { startModel, stopModel } from './helper/startStopModel'
-import { ModelSettingParams } from '../../../types'

 export const commonRouter = async (app: HttpServer) => {
  const normalizeData = (data: any) => {
@ -28,19 +25,25 @@ export const commonRouter = async (app: HttpServer) => {
  // Read & Delete :: Threads | Models | Assistants
  Object.keys(JanApiRouteConfiguration).forEach((key) => {
    app.get(`/${key}`, async (_req, _res) => {
-      if (key === 'models') {
+      if (key.includes('models')) {
        return models(_req, _res)
      }
      return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
    })

-    app.get(`/${key}/:id`, async (request: any) =>
-      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
-    )
+    app.get(`/${key}/:id`, async (_req: any, _res: any) => {
+      if (key.includes('models')) {
+        return models(_req, _res)
+      }
+      return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id)
+    })

-    app.delete(`/${key}/:id`, async (request: any) =>
-      deleteBuilder(JanApiRouteConfiguration[key], request.params.id)
-    )
+    app.delete(`/${key}/:id`, async (_req: any, _res: any) => {
+      if (key.includes('models')) {
+        return models(_req, _res)
+      }
+      return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id)
+    })
  })

  // Threads
@ -70,16 +73,9 @@ export const commonRouter = async (app: HttpServer) => {
    })
  )

-  app.put(`/models/:modelId/start`, async (request: any) => {
-    let settingParams: ModelSettingParams | undefined = undefined
-    if (Object.keys(request.body).length !== 0) {
-      settingParams = JSON.parse(request.body) as ModelSettingParams
-    }
+  app.post(`/models/start`, async (request: any, reply: any) => models(request, reply))

-    return startModel(request.params.modelId, settingParams)
-  })
-
-  app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId))
+  app.post(`/models/stop`, async (request: any, reply: any) => models(request, reply))

  // Chat Completion
  app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))
--- a/core/src/node/api/restful/helper/builder.test.ts
+++ b/core/src/node/api/restful/helper/builder.test.ts
@ -1,17 +1,7 @@
-import {
-  existsSync,
-  readdirSync,
-  readFileSync,
-  writeFileSync,
-  mkdirSync,
-  appendFileSync,
-  rmdirSync,
-} from 'fs'
-import { join } from 'path'
+import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, appendFileSync } from 'fs'
 import {
  getBuilder,
  retrieveBuilder,
-  deleteBuilder,
  getMessages,
  retrieveMessage,
  createThread,
@ -82,34 +72,6 @@ describe('builder helper functions', () => {
    })
  })

-  describe('deleteBuilder', () => {
-    it('should return a message if trying to delete Jan assistant', async () => {
-      const result = await deleteBuilder({ ...mockConfiguration, dirName: 'assistants' }, 'jan')
-      expect(result).toEqual({ message: 'Cannot delete Jan assistant' })
-    })
-
-    it('should return a message if data is not found', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await deleteBuilder(mockConfiguration, 'nonexistentId')
-      expect(result).toEqual({ message: 'Not found' })
-    })
-
-    it('should delete the directory and return success message', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await deleteBuilder(mockConfiguration, 'model1')
-      expect(rmdirSync).toHaveBeenCalledWith(join('/mock/path', 'mockDir', 'model1'), {
-        recursive: true,
-      })
-      expect(result).toEqual({ id: 'model1', object: 'mockObject', deleted: true })
-    })
-  })
-
  describe('getMessages', () => {
    it('should return an empty array if message file does not exist', async () => {
      ;(existsSync as jest.Mock).mockReturnValue(false)
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@ -73,34 +73,6 @@ export const retrieveBuilder = async (configuration: RouteConfiguration, id: str
  return filteredData
 }

-export const deleteBuilder = async (configuration: RouteConfiguration, id: string) => {
-  if (configuration.dirName === 'assistants' && id === 'jan') {
-    return {
-      message: 'Cannot delete Jan assistant',
-    }
-  }
-
-  const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
-  try {
-    const data = await retrieveBuilder(configuration, id)
-    if (!data) {
-      return {
-        message: 'Not found',
-      }
-    }
-
-    const objectPath = join(directoryPath, id)
-    rmdirSync(objectPath, { recursive: true })
-    return {
-      id: id,
-      object: configuration.delete.object,
-      deleted: true,
-    }
-  } catch (ex) {
-    console.error(ex)
-  }
-}
-
 export const getMessages = async (threadId: string): Promise<ThreadMessage[]> => {
  const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
  const messageFile = 'messages.jsonl'
@ -308,7 +280,7 @@ export const models = async (request: any, reply: any) => {
    'Content-Type': 'application/json',
  }

-  const response = await fetch(`${CORTEX_API_URL}/models`, {
+  const response = await fetch(`${CORTEX_API_URL}/models${request.url.split('/models')[1] ?? ""}`, {
    method: request.method,
    headers: headers,
    body: JSON.stringify(request.body),
--- a/core/src/node/api/restful/helper/startStopModel.test.ts
+++ b/core/src/node/api/restful/helper/startStopModel.test.ts
@ -1,10 +0,0 @@
-import { startModel } from './startStopModel'
-
-describe('startModel', () => {
-  it('test_startModel_error', async () => {
-    const modelId = 'testModelId'
-    const settingParams = undefined
-
-    expect(startModel(modelId, settingParams)).resolves.toThrow()
-  })
-})
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@ -1,25 +0,0 @@
-import { ModelSettingParams } from '../../../../types'
-import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
-
-/**
- * Start a model
- * @param modelId
- * @param settingParams
- * @returns
- */
-export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
-  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
-    method: 'POST',
-    body: JSON.stringify({ model: modelId, ...settingParams }),
-  })
-}
-
-/*
- * Stop model.
- */
-export const stopModel = async (modelId: string) => {
-  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
-    method: 'POST',
-    body: JSON.stringify({ model: modelId }),
-  })
-}
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@ -27,6 +27,7 @@ export enum NativeRoute {

  quickAskSizeUpdated = 'quickAskSizeUpdated',
  ackDeepLink = 'ackDeepLink',
+  factoryReset = 'factoryReset'
 }

 /**
@ -65,7 +66,6 @@ export enum DownloadRoute {
  pauseDownload = 'pauseDownload',
  resumeDownload = 'resumeDownload',
  getDownloadProgress = 'getDownloadProgress',
-  getFileSize = 'getFileSize',
 }

 export enum DownloadEvent {
--- a/core/src/types/setting/settingComponent.ts
+++ b/core/src/types/setting/settingComponent.ts
@ -12,7 +12,7 @@ export type SettingComponentProps = {

 export type ConfigType = 'runtime' | 'setting'

-export type ControllerType = 'slider' | 'checkbox' | 'input'
+export type ControllerType = 'slider' | 'checkbox' | 'input' | 'tag'

 export type InputType = 'password' | 'text' | 'email' | 'number' | 'tel' | 'url'

@ -22,7 +22,7 @@ export type InputAction = InputActionsTuple[number]

 export type InputComponentProps = {
  placeholder: string
-  value: string
+  value: string | string[]
  type?: InputType
  textAlign?: 'left' | 'right'
  inputActions?: InputAction[]
--- a/docs/src/pages/docs/_meta.json
+++ b/docs/src/pages/docs/_meta.json
@ -13,6 +13,7 @@
  },
  "desktop": "Desktop",
  "data-folder": "Jan Data Folder",
+  "privacy": "Privacy",
  "user-guides": {
    "title": "BASIC USAGE",
    "type": "separator"
--- a/docs/src/pages/docs/privacy.mdx
+++ b/docs/src/pages/docs/privacy.mdx
@ -0,0 +1,63 @@
+---
+title: Jan Privacy
+description: Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why.
+keywords:
+  [
+    Jan AI,
+    Jan,
+    ChatGPT alternative,
+    local AI,
+    private AI,
+    conversational AI,
+    OpenAI platform alternative,
+    no-subscription fee,
+    large language model,
+    about Jan,
+    desktop application,
+    thinking machine,
+    jan vision,
+  ]
+---
+
+# Privacy
+
+Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why. 
+
+- Jan can't see your chats with AI
+- You're free to opt out
+
+## Why and what we track
+
+To build a reliable, user-friendly AI that you own, we need to understand how Jan is used. We collect two types of data: performance data and usage data.
+
+### Performance data
+We track app crashes and collect technical details about what went wrong, along with basic information about the hardware you’re using.
+
+When Jan crashes, we collect technical details about what went wrong.
+
+- Specific AI model in use during the crash
+- Hardware: `CPU`, `GPU`, `RAM`
+- Logs: `Date/Time`, `OS & version`, `app version`, `error codes & messages`.
+
+### Usage data
+
+We track data like how often the app is opened to check:
+
+- **Active Users**: How many people use Jan daily to measure engagement
+- **Retention Rates**: To understand if users are finding value in Jan over time
+
+Usage data is tied to a randomly generated telemetry ID. None of our usage data can be linked to your personal identity.
+
+## What we **don’t** track:
+- Your conversations with Jan. Those stay on your device.
+- Your files. We don’t scan, upload, or even look at them.
+- Anything tied to your identity.
+
+## Using Cloud Models
+
+Jan allows you to connect cloud model APIs. If you choose to use cloud-based models (e.g. GPT, Claude models), the API provider handling the model will have access to your messages as part of processing the request. Again, Jan doesn't see or store these messages - they go directly to the provider. Remember: With local models, everything stays on your device, so no one - not even us- can see your messages.
+
+## Where we store & process data
+We use [PostHog](https://posthog.com/eu) EU for analytics, ensuring all data is processed within the European Union. This setup complies with GDPR and other strict privacy regulations. PostHog lets us self-host and securely manage the data we collect. Read more [on PostHog's GDPR doc](https://posthog.com/docs/privacy/gdpr-compliance).
+
+For a detailed breakdown of the analytics data we collect, you can check out our analytics repo. If you have any questions or concerns, feel free to reach out to us at hi@jan.ai.
--- a/docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png
+++ b/docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png
--- a/docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png
+++ b/docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png
--- a/docs/src/pages/integrations/coding/tabby.mdx
+++ b/docs/src/pages/integrations/coding/tabby.mdx
@ -0,0 +1,104 @@
+---
+title: Tabby
+description: A step-by-step guide on integrating Jan with Tabby and VSCode, JetBrains, or other IDEs.
+keywords:
+  [
+    Jan,
+    Customizable Intelligence, LLM,
+    local AI,
+    privacy focus,
+    free and open source,
+    private and offline,
+    conversational AI,
+    no-subscription fee,
+    large language models,
+    Tabby integration,
+    VSCode integration,
+    JetBrains integration,
+  ]
+---
+
+import { Tabs, Steps } from 'nextra/components'
+
+# Tabby
+
+## Integrate Jan with Tabby and Your Favorite IDEs
+
+[Tabby](https://www.tabbyml.com/) is an open-source, self-hosted AI coding assistant.
+With Tabby, teams can easily set up their own LLM-powered code completion server.
+
+Tabby provides integrations with VSCode, JetBrains, and other IDEs to help developers code more efficiently,
+and it can be used with various LLM services, including Jan.
+
+To integrate Jan with Tabby, follow these steps:
+
+<Steps>
+
+### Step 1: Enable the Jan API Server
+
+To set up Tabby with Jan's Local Server, you must activate the Jan API Server with your chosen model.
+
+1. Click the `Local API Server` (`<>`) button above the Settings. Jan will direct you to the **Local API Server** section.
+2. Configure the server, including the **IP Port**, **Cross-Origin Resource Sharing (CORS)**, and **Verbose Server Logs**.
+3. Press the **Start Server** button.
+
+### Step 2: Find the Model ID and Ensure the Model is Activated
+
+1. Go to `Settings` > `My Models`.
+2. Models are listed with their **Model ID** beneath their names.
+3. Click the **three dots (⋮)** button next to the model.
+4. Select **Start Model** to activate the model.
+
+### Step 3: Installing Tabby Server
+
+Use the following documentation to install the Tabby server:
+- [Docker](https://tabby.tabbyml.com/docs/quick-start/installation/docker/)
+- [Apple Silicon](https://tabby.tabbyml.com/docs/quick-start/installation/apple/)
+- [Linux](https://tabby.tabbyml.com/docs/quick-start/installation/linux/)
+- [Windows](https://tabby.tabbyml.com/docs/quick-start/installation/windows/)
+
+Then, follow the steps to connect Jan with the Tabby server:
+[Connect Jan with Tabby](https://tabby.tabbyml.com/docs/references/models-http-api/jan.ai/).
+
+For example, to connect Jan with Tabby, save the following configuration under `~/.tabby/config.toml`:
+
+```toml title="~/.tabby/config.toml"
+# Chat model
+[model.chat.http]
+kind = "openai/chat"
+model_name = "model_id"
+api_endpoint = "http://localhost:1337/v1"
+api_key = ""
+```
+
+Currently, the Jan completion and embedding API is under construction.
+Once completed, you can also connect Jan with Tabby for completion and embedding tasks.
+
+### Step 4: Installing Tabby on Your Favorite IDEs
+
+Refer to the following documentation to install the Tabby extension on your favorite IDEs:
+- [Visual Studio Code](https://tabby.tabbyml.com/docs/extensions/installation/vscode/)
+- [JetBrains IntelliJ Platform](https://tabby.tabbyml.com/docs/extensions/installation/intellij/)
+- [VIM / NeoVIM](https://tabby.tabbyml.com/docs/extensions/installation/vim/)
+
+</Steps>
+
+## How to Use Tabby with Jan Integration
+
+### Answer Engine: Chat with Your Codes and Documentation
+
+Tabby offers an [Answer Engine](https://tabby.tabbyml.com/docs/administration/answer-engine/) on the homepage,
+which can leverage the Jan LLM and related contexts like code, documentation, and web pages to answer user questions.
+
+Simply open the Tabby homepage at [localhost:8080](http://localhost:8080) and ask your questions.
+
+![Answer Engine](./_assets/tabby-answer-engine.png)
+
+### IDE Chat Sidebar
+
+After installing the Tabby extension on your preferred IDEs, you can engage in a conversation with Jan to:
+
+1. Discuss your code, receive suggestions, and seek assistance.
+2. Request Jan to inline edit your code, and then review and accept the proposed changes.
+
+![Chat Sidebar](./_assets/tabby-chat-sidebar.png)
--- a/electron/handlers/native.ts
+++ b/electron/handlers/native.ts
@ -12,6 +12,9 @@ import {
 } from '@janhq/core/node'
 import { SelectFileOption } from '@janhq/core'
 import { menu } from '../utils/menu'
+import { migrate } from '../utils/migration'
+import { createUserSpace } from '../utils/path'
+import { setupExtensions } from '../utils/extension'

 const isMac = process.platform === 'darwin'

@ -33,14 +36,28 @@ export function handleAppIPCs() {
    nativeTheme.themeSource = 'light'
  })

+  /**
+   * Handles the "setCloseApp" IPC message by closing the main application window.
+   * This effectively closes the application if no other windows are open.
+   */
  ipcMain.handle(NativeRoute.setCloseApp, () => {
    windowManager.mainWindow?.close()
  })

+  /**
+   * Handles the "setMinimizeApp" IPC message by minimizing the main application window.
+   * The window will be minimized to the system's taskbar or dock.
+   */
  ipcMain.handle(NativeRoute.setMinimizeApp, () => {
    windowManager.mainWindow?.minimize()
  })

+  /**
+   * Handles the "setMaximizeApp" IPC message. It toggles the maximization state of the main window.
+   * If the window is currently maximized, it will be un-maximized (restored to its previous size).
+   * If the window is not maximized, it will be maximized to fill the screen.
+   * @param _event - The IPC event object.
+   */
  ipcMain.handle(NativeRoute.setMaximizeApp, async (_event) => {
    if (windowManager.mainWindow?.isMaximized()) {
      windowManager.mainWindow.unmaximize()
@ -104,6 +121,11 @@ export function handleAppIPCs() {
    }
  })

+  /**
+   * Handles the "selectDirectory" IPC message to open a dialog for selecting a directory.
+   * If no main window is found, logs an error and exits.
+   * @returns {string} The path of the selected directory, or nothing if canceled.
+   */
  ipcMain.handle(NativeRoute.selectDirectory, async () => {
    const mainWindow = windowManager.mainWindow
    if (!mainWindow) {
@ -122,6 +144,14 @@ export function handleAppIPCs() {
    }
  })

+  /**
+   * Handles the "selectFiles" IPC message to open a dialog for selecting files.
+   * Allows options for setting the dialog title, button label, and selection properties.
+   * Logs an error if no main window is found.
+   * @param _event - The IPC event object.
+   * @param option - Options for customizing file selection dialog.
+   * @returns {string[]} An array of selected file paths, or nothing if canceled.
+   */
  ipcMain.handle(
    NativeRoute.selectFiles,
    async (_event, option?: SelectFileOption) => {
@ -156,11 +186,20 @@ export function handleAppIPCs() {
    }
  )

+  /**
+   * Handles the "hideQuickAskWindow" IPC message to hide the quick ask window.
+   * @returns A promise that resolves when the window is hidden.
+   */
  ipcMain.handle(
    NativeRoute.hideQuickAskWindow,
    async (): Promise<void> => windowManager.hideQuickAskWindow()
  )

+  /**
+   * Handles the "sendQuickAskInput" IPC message to send user input to the main window.
+   * @param _event - The IPC event object.
+   * @param input - User input string to be sent.
+   */
  ipcMain.handle(
    NativeRoute.sendQuickAskInput,
    async (_event, input: string): Promise<void> => {
@ -171,6 +210,12 @@ export function handleAppIPCs() {
    }
  )

+  /**
+   * Handles the "showOpenMenu" IPC message to show the context menu at given coordinates.
+   * Only applicable on non-Mac platforms.
+   * @param e - The event object.
+   * @param args - Contains coordinates where the menu should appear.
+   */
  ipcMain.handle(NativeRoute.showOpenMenu, function (e, args) {
    if (!isMac && windowManager.mainWindow) {
      menu.popup({
@ -181,23 +226,55 @@ export function handleAppIPCs() {
    }
  })

+  /**
+   * Handles the "hideMainWindow" IPC message to hide the main application window.
+   * @returns A promise that resolves when the window is hidden.
+   */
  ipcMain.handle(
    NativeRoute.hideMainWindow,
    async (): Promise<void> => windowManager.hideMainWindow()
  )

+  /**
+   * Handles the "showMainWindow" IPC message to show the main application window.
+   * @returns A promise that resolves when the window is shown.
+   */
  ipcMain.handle(
    NativeRoute.showMainWindow,
    async (): Promise<void> => windowManager.showMainWindow()
  )

+  /**
+   * Handles the "quickAskSizeUpdated" IPC message to update the size of the quick ask window.
+   * Resizes window by the given height offset.
+   * @param _event - The IPC event object.
+   * @param heightOffset - The amount of height to increase.
+   * @returns A promise that resolves when the window is resized.
+   */
  ipcMain.handle(
    NativeRoute.quickAskSizeUpdated,
    async (_event, heightOffset: number): Promise<void> =>
      windowManager.expandQuickAskWindow(heightOffset)
  )

+  /**
+   * Handles the "ackDeepLink" IPC message to acknowledge a deep link.
+   * Triggers handling of deep link in the application.
+   * @param _event - The IPC event object.
+   * @returns A promise that resolves when the deep link is acknowledged.
+   */
  ipcMain.handle(NativeRoute.ackDeepLink, async (_event): Promise<void> => {
    windowManager.ackDeepLink()
  })
+
+  /**
+   * Handles the "factoryReset" IPC message to reset the application to its initial state.
+   * Clears loaded modules, recreates user space, runs migrations, and sets up extensions.
+   * @param _event - The IPC event object.
+   * @returns A promise that resolves after the reset operations are complete.
+   */
+  ipcMain.handle(NativeRoute.factoryReset, async (_event): Promise<void> => {
+    ModuleManager.instance.clearImportedModules()
+    return createUserSpace().then(migrate).then(setupExtensions)
+  })
 }
--- a/electron/managers/window.ts
+++ b/electron/managers/window.ts
@ -28,6 +28,7 @@ class WindowManager {
      ...mainWindowConfig,
      width: bounds.width,
      height: bounds.height,
+      show: false,
      x: bounds.x,
      y: bounds.y,
      webPreferences: {
@ -78,6 +79,10 @@ class WindowManager {
        windowManager.hideMainWindow()
      }
    })
+
+    windowManager.mainWindow?.on('ready-to-show', function () {
+      windowManager.mainWindow?.show()
+    })
  }

  createQuickAskWindow(preloadPath: string, startUrl: string): void {
--- a/electron/tests/e2e/thread.e2e.spec.ts
+++ b/electron/tests/e2e/thread.e2e.spec.ts
@ -25,7 +25,7 @@ test('Select GPT model from Hub and Chat with Invalid API Key', async ({
    { timeout: TIMEOUT }
  )

-  const APIKeyError = page.getByTestId('invalid-API-key-error')
+  const APIKeyError = page.getByTestId('passthrough-error-message')
  await expect(APIKeyError).toBeVisible({
    timeout: TIMEOUT,
  })
--- a/electron/utils/migration.ts
+++ b/electron/utils/migration.ts
@ -3,7 +3,6 @@ import { app } from 'electron'
 import { join } from 'path'
 import {
  rmdirSync,
-  readFileSync,
  existsSync,
  mkdirSync,
  readdirSync,
--- a/extensions/assistant-extension/src/node/engine.ts
+++ b/extensions/assistant-extension/src/node/engine.ts
@ -1,38 +0,0 @@
-import fs from 'fs'
-import path from 'path'
-import { SettingComponentProps, getJanDataFolderPath } from '@janhq/core/node'
-
-// Sec: Do not send engine settings over requests
-// Read it manually instead
-export const readEmbeddingEngine = (engineName: string) => {
-  if (engineName !== 'openai' && engineName !== 'groq') {
-    const engineSettings = fs.readFileSync(
-      path.join(getJanDataFolderPath(), 'engines', `${engineName}.json`),
-      'utf-8'
-    )
-    return JSON.parse(engineSettings)
-  } else {
-    const settingDirectoryPath = path.join(
-      getJanDataFolderPath(),
-      'settings',
-      '@janhq',
-      // TODO: James - To be removed
-      engineName === 'openai'
-        ? 'inference-openai-extension'
-        : 'inference-groq-extension',
-      'settings.json'
-    )
-
-    const content = fs.readFileSync(settingDirectoryPath, 'utf-8')
-    const settings: SettingComponentProps[] = JSON.parse(content)
-    const apiKeyId = engineName === 'openai' ? 'openai-api-key' : 'groq-api-key'
-    const keySetting = settings.find((setting) => setting.key === apiKeyId)
-
-    let apiKey = keySetting?.controllerProps.value
-    if (typeof apiKey !== 'string') apiKey = ''
-
-    return {
-      api_key: apiKey,
-    }
-  }
-}
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@ -8,7 +8,6 @@ import { MemoryVectorStore } from 'langchain/vectorstores/memory'
 import { HNSWLib } from 'langchain/vectorstores/hnswlib'

 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
-import { readEmbeddingEngine } from './engine'

 export class Retrieval {
  public chunkSize: number = 100
@ -28,8 +27,8 @@ export class Retrieval {
    // declare time-weighted retriever and storage
    this.timeWeightedVectorStore = new MemoryVectorStore(
      new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding' },
-        { basePath: 'http://127.0.0.1:3928/v1' }
+        { openAIApiKey: 'cortex-embedding' },
+        { basePath: 'http://127.0.0.1:39291/v1' }
      )
    )
    this.timeWeightedretriever = new TimeWeightedVectorStoreRetriever({
@ -49,21 +48,11 @@ export class Retrieval {
  }

  public updateEmbeddingEngine(model: string, engine: string): void {
-    // Engine settings are not compatible with the current embedding model params
-    // Switch case manually for now
-    if (engine === 'nitro') {
-      this.embeddingModel = new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding', model },
-        // TODO: Raw settings
-        { basePath: 'http://127.0.0.1:3928/v1' },
-      )
-    } else {
-      // Fallback to OpenAI Settings
-      const settings = readEmbeddingEngine(engine)
-      this.embeddingModel = new OpenAIEmbeddings({
-        openAIApiKey: settings.api_key,
-      })
-    }
+    this.embeddingModel = new OpenAIEmbeddings(
+      { openAIApiKey: 'cortex-embedding', model },
+      // TODO: Raw settings
+      { basePath: 'http://127.0.0.1:39291/v1' }
+    )

    // update time-weighted embedding model
    this.timeWeightedVectorStore.embeddings = this.embeddingModel
--- a/extensions/inference-anthropic-extension/package.json
+++ b/extensions/inference-anthropic-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-anthropic-extension",
  "productName": "Anthropic Inference Engine",
-  "version": "1.0.2",
+  "version": "1.0.3",
  "description": "This extension enables Anthropic chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/extensions/inference-anthropic-extension/resources/models.json
+++ b/extensions/inference-anthropic-extension/resources/models.json
@ -1,28 +1,4 @@
 [
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-opus-20240229",
-    "object": "model",
-    "name": "Claude 3 Opus 20240229",
-    "version": "1.1",
-    "description": "Claude 3 Opus is a powerful model suitables for highly complex task.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "stream": false
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
  {
    "sources": [
      {
@ -47,78 +23,6 @@
    },
    "engine": "anthropic"
  },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-sonnet-20240229",
-    "object": "model",
-    "name": "Claude 3 Sonnet 20240229",
-    "version": "1.1",
-    "description": "Claude 3 Sonnet is an ideal model balance of intelligence and speed for enterprise workloads.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "stream": false
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-haiku-20240307",
-    "object": "model",
-    "name": "Claude 3 Haiku 20240307",
-    "version": "1.1",
-    "description": "Claude 3 Haiku is the fastest model provides near-instant responsiveness.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "stream": false
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-5-haiku-20241022",
-    "object": "model",
-    "name": "Claude 3.5 Haiku 20241022",
-    "version": "1.0",
-    "description": "Claude 3.5 Haiku is the fastest model provides near-instant responsiveness.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "stream": false
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
  {
    "sources": [
      {
@ -143,54 +47,6 @@
    },
    "engine": "anthropic"
  },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-5-sonnet-20240620",
-    "object": "model",
-    "name": "Claude 3.5 Sonnet 20240620",
-    "version": "1.1",
-    "description": "Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "stream": true
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-5-sonnet-20241022",
-    "object": "model",
-    "name": "Claude 3.5 Sonnet 20241022",
-    "version": "1.0",
-    "description": "Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "stream": true
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
  {
    "sources": [
      {
--- a/extensions/inference-cohere-extension/src/index.ts
+++ b/extensions/inference-cohere-extension/src/index.ts
@ -113,6 +113,8 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
  }

  transformResponse = (data: any) => {
-    return typeof data === 'object' ? data.text : JSON.parse(data).text ?? ''
+    return typeof data === 'object'
+      ? data.text
+      : (JSON.parse(data.replace('data: ', '').trim()).text ?? '')
  }
 }
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -1 +1 @@
-1.0.2
+1.0.4-rc4
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@ -2,23 +2,23 @@
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
+set ENGINE_VERSION=0.1.40

@REM Download cortex.llamacpp binaries
-set VERSION=v0.1.35
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
-set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
-set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64
+set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/v%ENGINE_VERSION%
+set SUBFOLDERS=windows-amd64-noavx-cuda-12-0 windows-amd64-noavx-cuda-11-7 windows-amd64-avx2-cuda-12-0 windows-amd64-avx2-cuda-11-7 windows-amd64-noavx windows-amd64-avx windows-amd64-avx2 windows-amd64-avx512 windows-amd64-vulkan

-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%

@ -28,10 +28,10 @@ del %BIN_PATH%\cortex.exe

@REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
-    echo Processing folder: %BIN_PATH%\%%F
+    echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION%

    @REM Move all .dll files except engine.dll
-    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
+    for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION%\*.dll) do (
        if /I not "%%~nxD"=="engine.dll" (
            move "%%D" "%BIN_PATH%"
        )
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@ -2,9 +2,11 @@

 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
-ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
-CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
+ENGINE_VERSION=0.1.40
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
+SHARED_PATH="../../electron/shared"
 # Detect platform
 OS_TYPE=$(uname)

@ -17,17 +19,19 @@ if [ "$OS_TYPE" == "Linux" ]; then
    chmod +x "./bin/cortex-server"

    # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
+    mkdir -p "${SHARED_PATH}/engines/cortex.llamacpp/deps"
+    touch "${SHARED_PATH}/engines/cortex.llamacpp/deps/keep"

 elif [ "$OS_TYPE" == "Darwin" ]; then
    # macOS downloads
@ -38,8 +42,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
    chmod +x "./bin/cortex-server"

    # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v${ENGINE_VERSION}"
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v${ENGINE_VERSION}"

 else
    echo "Unsupported operating system: $OS_TYPE"
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-cortex-extension",
  "productName": "Cortex Inference Engine",
-  "version": "1.0.21",
+  "version": "1.0.22",
  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
--- a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
@ -1,8 +1,8 @@
 {
    "sources": [
      {
-        "url": "https://huggingface.co/cortexso/phi3/resolve/main/model.gguf",
-        "filename": "model.gguf"
+        "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf",
+        "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf"
      }
    ],
    "id": "phi3-3.8b",
@ -14,7 +14,7 @@
    "settings": {
      "ctx_len": 4096,
      "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
-      "llama_model_path": "model.gguf",
+      "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf",
      "ngl": 33
    },
    "parameters": {
--- a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
@ -1,8 +1,8 @@
 {
    "sources": [
      {
-        "url": "https://huggingface.co/bartowski/Phi-3-medium-128k-instruct-GGUF/resolve/main/Phi-3-medium-128k-instruct-Q4_K_M.gguf",
-        "filename": "Phi-3-medium-128k-instruct-Q4_K_M.gguf"
+        "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf",
+        "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf"
      }
    ],
    "id": "phi3-medium",
@ -14,7 +14,7 @@
    "settings": {
      "ctx_len": 128000,
      "prompt_template": "<|user|> {prompt}<|end|><|assistant|>",
-      "llama_model_path": "Phi-3-medium-128k-instruct-Q4_K_M.gguf",
+      "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf",
      "ngl": 33
    },
    "parameters": {
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@ -120,6 +120,7 @@ export default [
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
        CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
+        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.40'),
      }),
      // Allow json resolution
      json(),
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@ -1,6 +1,7 @@
 declare const NODE: string
 declare const CORTEX_API_URL: string
 declare const CORTEX_SOCKET_URL: string
+declare const CORTEX_ENGINE_VERSION: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>

--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -17,7 +17,10 @@ import {
  extractModelLoadParams,
  fs,
  events,
-  ModelEvent
+  ModelEvent,
+  SystemInformation,
+  dirName,
+  AppConfigurationEventName,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
@ -45,6 +48,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {

  provider: string = InferenceEngine.cortex

+  shouldReconnect = true
+
  /**
   * The URL for making inference requests.
   */
@ -65,23 +70,34 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {

    super.onLoad()

+    this.queue.add(() => this.clean())
+
    // Run the process watchdog
    const systemInfo = await systemInformation()
-    await this.clean()
-    await executeOnMain(NODE, 'run', systemInfo)
-
+    this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
    this.queue.add(() => this.healthz())
-
+    this.queue.add(() => this.setDefaultEngine(systemInfo))
    this.subscribeToEvents()

    window.addEventListener('beforeunload', () => {
      this.clean()
    })
+
+    const currentMode = systemInfo.gpuSetting?.run_mode
+
+    events.on(AppConfigurationEventName.OnConfigurationUpdate, async () => {
+      const systemInfo = await systemInformation()
+      // Update run mode on settings update
+      if (systemInfo.gpuSetting?.run_mode !== currentMode)
+        this.queue.add(() => this.setDefaultEngine(systemInfo))
+    })
  }

-  onUnload(): void {
+  async onUnload() {
+    console.log('Clean up cortex.cpp services')
+    this.shouldReconnect = false
    this.clean()
-    executeOnMain(NODE, 'dispose')
+    await executeOnMain(NODE, 'dispose')
    super.onUnload()
  }

@ -89,7 +105,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    model: Model & { file_path?: string }
  ): Promise<void> {
    if (
-      model.engine === InferenceEngine.nitro &&
+      (model.engine === InferenceEngine.nitro || model.settings.vision_model) &&
      model.settings.llama_model_path
    ) {
      // Legacy chat model support
@ -105,7 +121,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
      model.settings = settings
    }

-    if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
+    if (
+      (model.engine === InferenceEngine.nitro || model.settings.vision_model) &&
+      model.settings.mmproj
+    ) {
      // Legacy clip vision model support
      model.settings = {
        ...model.settings,
@ -127,6 +146,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
                ? InferenceEngine.cortex_llamacpp
                : model.engine,
          },
+          timeout: false,
        })
        .json()
        .catch(async (e) => {
@ -149,25 +169,54 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   * Do health check on cortex.cpp
   * @returns
   */
-  healthz(): Promise<void> {
+  private healthz(): Promise<void> {
    return ky
      .get(`${CORTEX_API_URL}/healthz`, {
        retry: {
-          limit: 10,
+          limit: 20,
+          delay: () => 500,
          methods: ['get'],
        },
      })
-      .then(() => { })
+      .then(() => {})
+  }
+
+  /**
+   * Set default engine variant on launch
+   */
+  private async setDefaultEngine(systemInfo: SystemInformation) {
+    const variant = await executeOnMain(
+      NODE,
+      'engineVariant',
+      systemInfo.gpuSetting
+    )
+    return (
+      ky
+        // Fallback support for legacy API
+        .post(
+          `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
+          {
+            json: {
+              version: CORTEX_ENGINE_VERSION,
+              variant,
+            },
+          }
+        )
+        .then(() => {})
+    )
  }

  /**
   * Clean cortex processes
   * @returns
   */
-  clean(): Promise<any> {
+  private clean(): Promise<any> {
    return ky
      .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
        timeout: 2000, // maximum 2 seconds
+        retry: {
+          limit: 0,
+        },
      })
      .catch(() => {
        // Do nothing
@ -177,7 +226,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
  /**
   * Subscribe to cortex.cpp websocket events
   */
-  subscribeToEvents() {
+  private subscribeToEvents() {
    this.queue.add(
      () =>
        new Promise<void>((resolve) => {
@ -195,33 +244,47 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
            )
            const percent = total > 0 ? transferred / total : 0

-            events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
-              modelId: data.task.id,
-              percent: percent,
-              size: {
-                transferred: transferred,
-                total: total,
-              },
-            })
+            events.emit(
+              DownloadTypes[data.type as keyof typeof DownloadTypes],
+              {
+                modelId: data.task.id,
+                percent: percent,
+                size: {
+                  transferred: transferred,
+                  total: total,
+                },
+              }
+            )
            // Update models list from Hub
            if (data.type === DownloadTypes.DownloadSuccess) {
              // Delay for the state update from cortex.cpp
              // Just to be sure
              setTimeout(() => {
-                events.emit(ModelEvent.OnModelsUpdate, {})
+                events.emit(ModelEvent.OnModelsUpdate, {
+                  fetch: true,
+                })
              }, 500)
            }
          })
+
+          this.socket.onclose = (event) => {
+            console.log('WebSocket closed:', event)
+            events.emit(ModelEvent.OnModelStopped, {})
+            if (this.shouldReconnect) {
+              console.log(`Attempting to reconnect...`)
+              setTimeout(() => this.subscribeToEvents(), 1000)
+            }
+          }
+
          resolve()
        })
    )
  }
-
 }

 /// Legacy
-export const getModelFilePath = async (
-  model: Model,
+const getModelFilePath = async (
+  model: Model & { file_path?: string },
  file: string
 ): Promise<string> => {
  // Symlink to the model file
@ -231,6 +294,9 @@ export const getModelFilePath = async (
  ) {
    return model.sources[0]?.url
  }
+  if (model.file_path) {
+    await joinPath([await dirName(model.file_path), file])
+  }
  return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
 }
 ///
--- a/extensions/inference-cortex-extension/src/node/cpuInfo.ts
+++ b/extensions/inference-cortex-extension/src/node/cpuInfo.ts
@ -0,0 +1,27 @@
+import { cpuInfo } from 'cpu-instructions'
+
+// Check the CPU info and determine the supported instruction set
+const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
+  ? 'avx512'
+  : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
+    ? 'avx2'
+    : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
+      ? 'avx'
+      : 'noavx'
+
+// Send the result and wait for confirmation before exiting
+new Promise<void>((resolve, reject) => {
+  // @ts-ignore
+  process.send(info, (error: Error | null) => {
+    if (error) {
+      reject(error)
+    } else {
+      resolve()
+    }
+  })
+})
+  .then(() => process.exit(0))
+  .catch((error) => {
+    console.error('Failed to send info:', error)
+    process.exit(1)
+  })
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@ -1,7 +1,8 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableCortexFile } from './execute'
-import { GpuSetting } from '@janhq/core'
+import { engineVariant, executableCortexFile } from './execute'
+import { GpuSetting } from '@janhq/core/node'
 import { cpuInfo } from 'cpu-instructions'
+import { fork } from 'child_process'

 let testSettings: GpuSetting = {
  run_mode: 'cpu',
@ -30,6 +31,15 @@ jest.mock('cpu-instructions', () => ({
 let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
 mockCpuInfo.mockReturnValue([])

+jest.mock('@janhq/core/node', () => ({
+  appResourcePath: () => '.',
+  log: jest.fn(),
+}))
+jest.mock('child_process', () => ({
+  fork: jest.fn(),
+}))
+const mockFork = fork as jest.Mock
+
 describe('test executable cortex file', () => {
  afterAll(function () {
    Object.defineProperty(process, 'platform', {
@ -37,6 +47,37 @@ describe('test executable cortex file', () => {
    })
  })

+  it('executes on MacOS', () => {
+    const mockProcess = {
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('noavx')
+        }
+      }),
+      send: jest.fn(),
+    }
+    Object.defineProperty(process, 'platform', {
+      value: 'darwin',
+    })
+    Object.defineProperty(process, 'arch', {
+      value: 'arm64',
+    })
+    expect(executableCortexFile(testSettings)).toEqual(
+      expect.objectContaining({
+        enginePath: expect.stringContaining('shared'),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`cortex-server`)
+            : expect.anything(),
+        cudaVisibleDevices: '',
+        vkVisibleDevices: '',
+      })
+    )
+
+    mockFork.mockReturnValue(mockProcess)
+    expect(engineVariant(testSettings)).resolves.toEqual('mac-arm64')
+  })
+
  it('executes on MacOS', () => {
    Object.defineProperty(process, 'platform', {
      value: 'darwin',
@ -44,25 +85,23 @@ describe('test executable cortex file', () => {
    Object.defineProperty(process, 'arch', {
      value: 'arm64',
    })
-    expect(executableCortexFile(testSettings)).toEqual(
-      expect.objectContaining({
-        enginePath: expect.stringContaining(`arm64`),
-        binPath: expect.stringContaining(`bin`),
-        executablePath:
-          originalPlatform === 'darwin'
-            ? expect.stringContaining(`cortex-server`)
-            : expect.anything(),
-        cudaVisibleDevices: '',
-        vkVisibleDevices: '',
-      })
-    )
+
+    const mockProcess = {
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('noavx')
+        }
+      }),
+      send: jest.fn(),
+    }
+    mockFork.mockReturnValue(mockProcess)
    Object.defineProperty(process, 'arch', {
      value: 'x64',
    })
+
    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`x64`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
        executablePath:
          originalPlatform === 'darwin'
            ? expect.stringContaining(`cortex-server`)
@ -71,6 +110,7 @@ describe('test executable cortex file', () => {
        vkVisibleDevices: '',
      })
    )
+    expect(engineVariant(testSettings)).resolves.toEqual('mac-amd64')
  })

  it('executes on Windows CPU', () => {
@ -81,16 +121,25 @@ describe('test executable cortex file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
-    mockCpuInfo.mockReturnValue(['avx'])
+    const mockProcess = {
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx')
+        }
+      }),
+      send: jest.fn(),
+    }
+    mockFork.mockReturnValue(mockProcess)
+
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`avx`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
    )
+    expect(engineVariant()).resolves.toEqual('windows-amd64-avx')
  })

  it('executes on Windows Cuda 11', () => {
@ -117,16 +166,27 @@ describe('test executable cortex file', () => {
        },
      ],
    }
-    mockCpuInfo.mockReturnValue(['avx2'])
+
+    const mockProcess = {
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx2')
+        }
+      }),
+      send: jest.fn(),
+    }
+    mockFork.mockReturnValue(mockProcess)
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
+    expect(engineVariant(settings)).resolves.toEqual(
+      'windows-amd64-avx2-cuda-11-7'
+    )
  })

  it('executes on Windows Cuda 12', () => {
@ -153,16 +213,36 @@ describe('test executable cortex file', () => {
        },
      ],
    }
-    mockCpuInfo.mockReturnValue(['noavx'])
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('noavx')
+        }
+      }),
+      send: jest.fn(),
+    })
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
+    expect(engineVariant(settings)).resolves.toEqual(
+      'windows-amd64-noavx-cuda-12-0'
+    )
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx512')
+        }
+      }),
+      send: jest.fn(),
+    })
+    expect(engineVariant(settings)).resolves.toEqual(
+      'windows-amd64-avx2-cuda-12-0'
+    )
  })

  it('executes on Linux CPU', () => {
@ -173,15 +253,23 @@ describe('test executable cortex file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
-    mockCpuInfo.mockReturnValue(['noavx'])
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('noavx')
+        }
+      }),
+      send: jest.fn(),
+    })
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx`),
+        enginePath: expect.stringContaining('shared'),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
    )
+    expect(engineVariant()).resolves.toEqual('linux-amd64-noavx')
  })

  it('executes on Linux Cuda 11', () => {
@ -208,15 +296,25 @@ describe('test executable cortex file', () => {
        },
      ],
    }
+
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx512')
+        }
+      }),
+      send: jest.fn(),
+    })
+
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-11-7`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
+    expect(engineVariant(settings)).resolves.toBe('linux-amd64-avx2-cuda-11-7')
  })

  it('executes on Linux Cuda 12', () => {
@ -243,15 +341,25 @@ describe('test executable cortex file', () => {
        },
      ],
    }
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx2')
+        }
+      }),
+      send: jest.fn(),
+    })
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-12-0`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
    )
+    expect(engineVariant(settings)).resolves.toEqual(
+      'linux-amd64-avx2-cuda-12-0'
+    )
  })

  // Generate test for different cpu instructions on Linux
@ -266,18 +374,27 @@ describe('test executable cortex file', () => {

    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })

      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
          executablePath: expect.stringContaining(`cortex-server`),

          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
      )
+      expect(engineVariant(settings)).resolves.toEqual(
+        `linux-amd64-${instruction}`
+      )
    })
  })
  // Generate test for different cpu instructions on Windows
@ -291,16 +408,25 @@ describe('test executable cortex file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
      )
+      expect(engineVariant(settings)).resolves.toEqual(
+        `windows-amd64-${instruction}`
+      )
    })
  })

@ -331,16 +457,25 @@ describe('test executable cortex file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
      )
+      expect(engineVariant(settings)).resolves.toEqual(
+        `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
+      )
    })
  })

@ -371,16 +506,25 @@ describe('test executable cortex file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
      )
+      expect(engineVariant(settings)).resolves.toEqual(
+        `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
+      )
    })
  })

@ -412,16 +556,23 @@ describe('test executable cortex file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`vulkan`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
      )
+      expect(engineVariant(settings)).resolves.toEqual(`linux-amd64-vulkan`)
    })
  })

@ -439,11 +590,17 @@ describe('test executable cortex file', () => {
        ...testSettings,
        run_mode: 'cpu',
      }
-      mockCpuInfo.mockReturnValue([])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback('noavx')
+          }
+        }),
+        send: jest.fn(),
+      })
      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`x64`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
          executablePath:
            originalPlatform === 'darwin'
              ? expect.stringContaining(`cortex-server`)
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@ -1,10 +1,9 @@
-import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
-import { cpuInfo } from 'cpu-instructions'
+import { GpuSetting, appResourcePath, log } from '@janhq/core/node'
+import { fork } from 'child_process'

 export interface CortexExecutableOptions {
  enginePath: string
-  binPath: string
  executablePath: string
  cudaVisibleDevices: string
  vkVisibleDevices: string
@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => {

  if (!settings) return ''

-  return settings.vulkan === true
-    ? 'vulkan'
-    : settings.run_mode === 'cpu'
-      ? ''
-      : 'cuda'
+  return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
 }

 /**
@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => {
 */
 const os = (): string => {
  return process.platform === 'win32'
-    ? 'win'
+    ? 'windows-amd64'
    : process.platform === 'darwin'
      ? process.arch === 'arm64'
-        ? 'arm64'
-        : 'x64'
-      : 'linux'
+        ? 'mac-arm64'
+        : 'mac-amd64'
+      : 'linux-amd64'
 }

 /**
@ -57,7 +52,9 @@ const extension = (): '.exe' | '' => {
 */
 const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
  const isUsingCuda =
-    settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
+    settings?.vulkan !== true &&
+    settings?.run_mode === 'gpu' &&
+    !os().includes('mac')

  if (!isUsingCuda) return undefined
  return settings?.cuda?.version === '11' ? '11-7' : '12-0'
@ -67,48 +64,74 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
 * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
 * @returns
 */
-const cpuInstructions = (): string => {
+const cpuInstructions = async (): Promise<string> => {
  if (process.platform === 'darwin') return ''
-  return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
-    ? 'avx512'
-    : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
-      ? 'avx2'
-      : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
-        ? 'avx'
-        : 'noavx'
+
+  const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
+
+  return new Promise((resolve, reject) => {
+    child.on('message', (cpuInfo?: string) => {
+      resolve(cpuInfo ?? 'noavx')
+      child.kill() // Kill the child process after receiving the result
+    })
+
+    child.on('error', (err) => {
+      resolve('noavx')
+      child.kill()
+    })
+
+    child.on('exit', (code) => {
+      if (code !== 0) {
+        resolve('noavx')
+        child.kill()
+      }
+    })
+  })
 }

 /**
- * Find which executable file to run based on the current platform.
- * @returns The name of the executable file to run.
+ * The executable options for the cortex.cpp extension.
 */
 export const executableCortexFile = (
  gpuSetting?: GpuSetting
 ): CortexExecutableOptions => {
-  const cpuInstruction = cpuInstructions()
-  let engineFolder = gpuSetting?.vulkan
-    ? 'vulkan'
-    : process.platform === 'darwin'
-      ? os()
-      : [
-        gpuRunMode(gpuSetting) !== 'cuda' ||
-          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
-          ? cpuInstruction
-          : 'noavx',
-        gpuRunMode(gpuSetting),
-        cudaVersion(gpuSetting),
-      ]
-        .filter((e) => !!e)
-        .join('-')
  let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let binaryName = `cortex-server${extension()}`
  const binPath = path.join(__dirname, '..', 'bin')
  return {
-    enginePath: path.join(binPath, engineFolder),
+    enginePath: path.join(appResourcePath(), 'shared'),
    executablePath: path.join(binPath, binaryName),
-    binPath: binPath,
    cudaVisibleDevices,
    vkVisibleDevices,
  }
 }
+
+/**
+ * Find which variant to run based on the current platform.
+ */
+export const engineVariant = async (
+  gpuSetting?: GpuSetting
+): Promise<string> => {
+  const cpuInstruction = await cpuInstructions()
+  log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
+  let engineVariant = [
+    os(),
+    gpuSetting?.vulkan
+      ? 'vulkan'
+      : gpuRunMode(gpuSetting) !== 'cuda'
+        ? // CPU mode - support all variants
+          cpuInstruction
+        : // GPU mode - packaged CUDA variants of avx2 and noavx
+          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
+          ? 'avx2'
+          : 'noavx',
+    gpuRunMode(gpuSetting),
+    cudaVersion(gpuSetting),
+  ]
+    .filter((e) => !!e)
+    .join('-')
+
+  log(`[CORTEX]: Engine variant: ${engineVariant}`)
+  return engineVariant
+}
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -1,8 +1,7 @@
 import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
-import { appResourcePath } from '@janhq/core/node'

 // The HOST address to use for the Nitro subprocess
 const LOCAL_PORT = '39291'
@ -20,9 +19,9 @@ function run(systemInfo?: SystemInformation): Promise<any> {
      // If ngl is not set or equal to 0, run on CPU with correct instructions
      systemInfo?.gpuSetting
        ? {
-          ...systemInfo.gpuSetting,
-          run_mode: systemInfo.gpuSetting.run_mode,
-        }
+            ...systemInfo.gpuSetting,
+            run_mode: systemInfo.gpuSetting.run_mode,
+          }
        : undefined
    )

@ -30,16 +29,13 @@ function run(systemInfo?: SystemInformation): Promise<any> {
    log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
    log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)

-    addEnvPaths(path.join(appResourcePath(), 'shared'))
-    addEnvPaths(executableOptions.binPath)
    addEnvPaths(executableOptions.enginePath)
-    // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
-    // This is required for the cortex engine to run for now since dlls are not moved to the root
-    addEnvPaths(
-      path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp')
-    )

    const dataFolderPath = getJanDataFolderPath()
+    if (watchdog) {
+      watchdog.terminate()
+    }
+
    watchdog = new ProcessWatchdog(
      executableOptions.executablePath,
      [
@ -81,17 +77,12 @@ function dispose() {
 function addEnvPaths(dest: string) {
  // Add engine path to the PATH and LD_LIBRARY_PATH
  if (process.platform === 'win32') {
-    process.env.PATH = (process.env.PATH || '').concat(
-      path.delimiter,
-      dest,
-    )
-    log(`[CORTEX] PATH: ${process.env.PATH}`)
+    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
  } else {
    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
      path.delimiter,
-      dest,
+      dest
    )
-    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
  }
 }

@ -105,4 +96,5 @@ export interface CortexProcessInfo {
 export default {
  run,
  dispose,
+  engineVariant,
 }
--- a/extensions/inference-openai-extension/package.json
+++ b/extensions/inference-openai-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-openai-extension",
  "productName": "OpenAI Inference Engine",
-  "version": "1.0.3",
+  "version": "1.0.4",
  "description": "This extension enables OpenAI chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/extensions/inference-openai-extension/resources/models.json
+++ b/extensions/inference-openai-extension/resources/models.json
@ -97,11 +97,10 @@
    "format": "api",
    "settings": {},
    "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
+      "temperature": 1,
+      "top_p": 1,
      "stream": true,
-      "stop": [],
+      "max_tokens": 32768,
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
@ -125,11 +124,10 @@
    "format": "api",
    "settings": {},
    "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
+      "temperature": 1,
+      "top_p": 1,
+      "max_tokens": 65536,
      "stream": true,
-      "stop": [],
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@ -76,11 +76,10 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine {
  transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => {
    // Transform the payload for preview models
    if (this.previewModels.includes(payload.model)) {
-      const { max_tokens, temperature, top_p, stop, ...params } = payload
+      const { max_tokens, stop, ...params } = payload
      return {
        ...params,
        max_completion_tokens: max_tokens,
-        stream: false // o1 only support stream = false
      }
    }
    // Pass through for non-preview models
--- a/extensions/inference-openrouter-extension/resources/models.json
+++ b/extensions/inference-openrouter-extension/resources/models.json
@ -1,4 +1,4 @@
-  [
+[
  {
    "sources": [
      {
@ -13,7 +13,7 @@
    "format": "api",
    "settings": {},
    "parameters": {
-      "max_tokens": 1024,
+      "max_tokens": 128000,
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
--- a/extensions/inference-openrouter-extension/src/index.ts
+++ b/extensions/inference-openrouter-extension/src/index.ts
@ -83,6 +83,6 @@ export default class JanInferenceOpenRouterExtension extends RemoteOAIEngine {

  transformPayload = (payload: PayloadType) => ({
    ...payload,
-    model: this.model,
+    model: payload.model !== 'open-router-auto' ? payload.model : this.model,
  })
 }
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/model-extension",
  "productName": "Model Management",
-  "version": "1.0.34",
+  "version": "1.0.35",
  "description": "Model Management Extension provides model exploration and seamless downloads",
  "main": "dist/index.js",
  "author": "Jan <service@jan.ai>",
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@ -1,6 +1,6 @@
 import PQueue from 'p-queue'
 import ky from 'ky'
-import {  extractModelLoadParams, Model } from '@janhq/core'
+import { extractModelLoadParams, Model } from '@janhq/core'
 import { extractInferenceParams } from '@janhq/core'
 /**
 * cortex.cpp Model APIs interface
@ -18,6 +18,7 @@ interface ICortexAPI {
  deleteModel(model: string): Promise<void>
  updateModel(model: object): Promise<void>
  cancelModelPull(model: string): Promise<void>
+  configs(body: { [key: string]: any }): Promise<void>
 }

 type ModelList = {
@ -52,7 +53,7 @@ export class CortexAPI implements ICortexAPI {
   */
  getModels(): Promise<Model[]> {
    return this.queue
-      .add(() => ky.get(`${API_URL}/models`).json<ModelList>())
+      .add(() => ky.get(`${API_URL}/v1/models`).json<ModelList>())
      .then((e) =>
        typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
      )
@ -104,7 +105,7 @@ export class CortexAPI implements ICortexAPI {
   */
  deleteModel(model: string): Promise<void> {
    return this.queue.add(() =>
-      ky.delete(`${API_URL}/models/${model}`).json().then()
+      ky.delete(`${API_URL}/v1/models/${model}`).json().then()
    )
  }

@ -130,7 +131,7 @@ export class CortexAPI implements ICortexAPI {
  cancelModelPull(model: string): Promise<void> {
    return this.queue.add(() =>
      ky
-        .delete(`${API_URL}/models/pull`, { json: { taskId: model } })
+        .delete(`${API_URL}/v1/models/pull`, { json: { taskId: model } })
        .json()
        .then()
    )
@ -142,7 +143,7 @@ export class CortexAPI implements ICortexAPI {
   */
  async getModelStatus(model: string): Promise<boolean> {
    return this.queue
-      .add(() => ky.get(`${API_URL}/models/status/${model}`))
+      .add(() => ky.get(`${API_URL}/v1/models/status/${model}`))
      .then((e) => true)
      .catch(() => false)
  }
@ -155,13 +156,24 @@ export class CortexAPI implements ICortexAPI {
    return ky
      .get(`${API_URL}/healthz`, {
        retry: {
-          limit: 10,
+          limit: 20,
+          delay: () => 500,
          methods: ['get'],
        },
      })
      .then(() => {})
  }

+  /**
+   * Configure model pull options
+   * @param body
+   */
+  configs(body: { [key: string]: any }): Promise<void> {
+    return this.queue.add(() =>
+      ky.patch(`${API_URL}/v1/configs`, { json: body }).then(() => {})
+    )
+  }
+
  /**
   * TRansform model to the expected format (e.g. parameters, settings, metadata)
   * @param model
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -20,11 +20,8 @@ import { deleteModelFiles } from './legacy/delete'

 declare const SETTINGS: Array<any>

-/**
- * Extension enum
- */
-enum ExtensionEnum {
-  downloadedModels = 'downloadedModels',
+export enum Settings {
+  huggingfaceToken = 'hugging-face-access-token',
 }

 /**
@ -40,15 +37,29 @@ export default class JanModelExtension extends ModelExtension {
  async onLoad() {
    this.registerSettings(SETTINGS)

-    // Try get models from cortex.cpp
-    this.getModels().then((models) => {
-      this.registerModels(models)
-    })
+    // Configure huggingface token if available
+    const huggingfaceToken = await this.getSetting<string>(
+      Settings.huggingfaceToken,
+      undefined
+    )
+    if (huggingfaceToken)
+      this.cortexAPI.configs({ huggingface_token: huggingfaceToken })

    // Listen to app download events
    this.handleDesktopEvents()
  }

+  /**
+   * Subscribe to settings update and make change accordingly
+   * @param key
+   * @param value
+   */
+  onSettingUpdate<T>(key: string, value: T): void {
+    if (key === Settings.huggingfaceToken) {
+      this.cortexAPI.configs({ huggingface_token: value })
+    }
+  }
+
  /**
   * Called when the extension is unloaded.
   * @override
@ -127,55 +138,43 @@ export default class JanModelExtension extends ModelExtension {
   * @returns A Promise that resolves with an array of all models.
   */
  async getModels(): Promise<Model[]> {
-    /**
-     * In this action, if return empty array right away
-     * it would reset app cache and app will not function properly
-     * should compare and try import
-     */
-    let currentModels: Model[] = []
-
    /**
     * Legacy models should be supported
     */
    let legacyModels = await scanModelsFolder()

-    try {
-      if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
-        // Updated from an older version than 0.5.5
-        // Scan through the models folder and import them (Legacy flow)
-        // Return models immediately
-        currentModels = legacyModels
-      } else {
-        currentModels = JSON.parse(
-          localStorage.getItem(ExtensionEnum.downloadedModels)
-        ) as Model[]
-      }
-    } catch (e) {
-      currentModels = []
-      console.error(e)
-    }
-
    /**
     * Here we are filtering out the models that are not imported
     * and are not using llama.cpp engine
     */
-    var toImportModels = currentModels.filter(
+    var toImportModels = legacyModels.filter(
      (e) => e.engine === InferenceEngine.nitro
    )

-    await this.cortexAPI.getModels().then((models) => {
-      const existingIds = models.map((e) => e.id)
-      toImportModels = toImportModels.filter(
-        (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
+    /**
+     * Fetch models from cortex.cpp
+     */
+    var fetchedModels = await this.cortexAPI.getModels().catch(() => [])
+
+    // Checking if there are models to import
+    const existingIds = fetchedModels.map((e) => e.id)
+    toImportModels = toImportModels.filter(
+      (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
+    )
+
+    /**
+     * There is no model to import
+     * just return fetched models
+     */
+    if (!toImportModels.length)
+      return fetchedModels.concat(
+        legacyModels.filter((e) => !fetchedModels.some((x) => x.id === e.id))
      )
-    })

    console.log('To import models:', toImportModels.length)
    /**
     * There are models to import
-     * do not return models from cortex.cpp yet
-     * otherwise it will reset the app cache
-     * */
+     */
    if (toImportModels.length > 0) {
      // Import models
      await Promise.all(
@ -193,17 +192,19 @@ export default class JanModelExtension extends ModelExtension {
                ]) // Copied models
              : model.sources[0].url, // Symlink models,
            model.name
-          ).then((e) => {
-            this.updateModel({
-              id: model.id,
-              ...model.settings,
-              ...model.parameters,
-            } as Partial<Model>)
-          })
+          )
+            .then((e) => {
+              this.updateModel({
+                id: model.id,
+                ...model.settings,
+                ...model.parameters,
+              } as Partial<Model>)
+            })
+            .catch((e) => {
+              console.debug(e)
+            })
        })
      )
-
-      return currentModels
    }

    /**
@ -252,6 +253,13 @@ export default class JanModelExtension extends ModelExtension {
    return this.cortexAPI.getModelStatus(model)
  }

+  /**
+   * Configure pull options such as proxy, headers, etc.
+   */
+  async configurePullOptions(options: { [key: string]: any }): Promise<any> {
+    return this.cortexAPI.configs(options).catch((e) => console.debug(e))
+  }
+
  /**
   * Handle download state from main app
   */
--- a/extensions/model-extension/src/legacy/delete.ts
+++ b/extensions/model-extension/src/legacy/delete.ts
@ -1,10 +1,12 @@
-import { fs, joinPath } from '@janhq/core'
+import { dirName, fs } from '@janhq/core'
+import { scanModelsFolder } from './model-json'

 export const deleteModelFiles = async (id: string) => {
  try {
-    const dirPath = await joinPath(['file://models', id])
+    const models = await scanModelsFolder()
+    const dirPath = models.find((e) => e.id === id)?.file_path
    // remove model folder directory
-    await fs.rm(dirPath)
+    if (dirPath) await fs.rm(await dirName(dirPath))
  } catch (err) {
    console.error(err)
  }
--- a/extensions/model-extension/src/legacy/model-json.ts
+++ b/extensions/model-extension/src/legacy/model-json.ts
@ -12,7 +12,9 @@ const LocalEngines = [
 * Scan through models folder and return downloaded models
 * @returns
 */
-export const scanModelsFolder = async (): Promise<Model[]> => {
+export const scanModelsFolder = async (): Promise<
+  (Model & { file_path?: string })[]
+> => {
  const _homeDir = 'file://models'
  try {
    if (!(await fs.existsSync(_homeDir))) {
@ -37,7 +39,7 @@ export const scanModelsFolder = async (): Promise<Model[]> => {

      const jsonPath = await getModelJsonPath(folderFullPath)

-      if (await fs.existsSync(jsonPath)) {
+      if (jsonPath && (await fs.existsSync(jsonPath))) {
        // if we have the model.json file, read it
        let model = await fs.readFileSync(jsonPath, 'utf-8')

@ -83,7 +85,10 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
                  file.toLowerCase().endsWith('.gguf') || // GGUF
                  file.toLowerCase().endsWith('.engine') // Tensort-LLM
                )
-              })?.length >= (model.engine === InferenceEngine.nitro_tensorrt_llm ? 1 : (model.sources?.length ?? 1))
+              })?.length >=
+                (model.engine === InferenceEngine.nitro_tensorrt_llm
+                  ? 1
+                  : (model.sources?.length ?? 1))
            )
          })

--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@ -1,7 +1,9 @@
 import {
+  AppConfigurationEventName,
  GpuSetting,
  MonitoringExtension,
  OperatingSystemInfo,
+  events,
  executeOnMain,
 } from '@janhq/core'

@ -37,6 +39,7 @@ export default class JanMonitoringExtension extends MonitoringExtension {

    // Attempt to fetch nvidia info
    await executeOnMain(NODE, 'updateNvidiaInfo')
+    events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
  }

  onSettingUpdate<T>(key: string, value: T): void {
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@ -259,15 +259,15 @@ const updateGpuInfo = async () =>
            data.gpu_highest_vram = highestVramId
          } else {
            data.gpus = []
-            data.gpu_highest_vram = ''
+            data.gpu_highest_vram = undefined
          }

          if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
-            data.gpus_in_use = [data.gpu_highest_vram]
+            data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
          }

          data = await updateCudaExistence(data)
-          console.log(data)
+          console.log('[MONITORING]::Cuda info: ', data)
          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
          log(`[APP]::${JSON.stringify(data)}`)
          resolve({})
@ -344,7 +344,7 @@ const updateCudaExistence = async (
            data.cuda.version = match[1]
          }
        }
-        console.log(data)
+        console.log('[MONITORING]::Finalized cuda info update: ', data)
        resolve()
      })
    })
--- a/joi/src/core/Slider/index.tsx
+++ b/joi/src/core/Slider/index.tsx
@ -1,5 +1,6 @@
 import React from 'react'
 import * as SliderPrimitive from '@radix-ui/react-slider'
+import { twMerge } from 'tailwind-merge'

 import './styles.scss'

@ -25,7 +26,7 @@ const Slider = ({
  disabled,
 }: Props) => (
  <SliderPrimitive.Root
-    className="slider"
+    className={twMerge('slider', disabled && 'slider--disabled')}
    name={name}
    min={min}
    max={max}
--- a/joi/src/core/Slider/styles.scss
+++ b/joi/src/core/Slider/styles.scss
@ -6,6 +6,11 @@
  touch-action: none;
  height: 16px;

+  &--disabled {
+    cursor: not-allowed;
+    opacity: 0.2;
+  }
+
  &__track {
    background-color: hsla(var(--slider-track-bg));
    position: relative;
--- a/web/app/search/layout.tsx
+++ b/web/app/search/layout.tsx
@ -52,9 +52,8 @@ export default function RootLayout() {
      <body className="font-sans antialiased">
        <JotaiWrapper>
          <ThemeWrapper>
-            <ClipboardListener>
-              <Search />
-            </ClipboardListener>
+            <ClipboardListener />
+            <Search />
          </ThemeWrapper>
        </JotaiWrapper>
      </body>
--- a/web/containers/EngineSetting/index.tsx
+++ b/web/containers/EngineSetting/index.tsx
@ -4,7 +4,10 @@ import SettingComponentBuilder from '@/containers/ModelSetting/SettingComponent'

 type Props = {
  componentData: SettingComponentProps[]
-  onValueChanged: (key: string, value: string | number | boolean) => void
+  onValueChanged: (
+    key: string,
+    value: string | number | boolean | string[]
+  ) => void
  disabled?: boolean
 }

--- a/web/containers/ErrorMessage/index.test.tsx
+++ b/web/containers/ErrorMessage/index.test.tsx
@ -63,9 +63,6 @@ describe('ErrorMessage Component', () => {

    render(<ErrorMessage message={message} />)

-    expect(
-      screen.getByText('Apologies, something’s amiss!')
-    ).toBeInTheDocument()
    expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument()
  })

--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@ -27,11 +27,8 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {

  const getErrorTitle = () => {
    switch (message.error_code) {
-      case ErrorCode.Unknown:
-        return 'Apologies, something’s amiss!'
      case ErrorCode.InvalidApiKey:
      case ErrorCode.AuthenticationError:
-      case ErrorCode.InvalidRequestError:
        return (
          <span data-testid="invalid-API-key-error">
            Invalid API key. Please check your API key from{' '}
@ -55,17 +52,17 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
        )
      default:
        return (
-          <>
+          <p data-testid="passthrough-error-message">
            {message.content[0]?.text?.value && (
              <AutoLink text={message.content[0].text.value} />
            )}
-          </>
+          </p>
        )
    }
  }

  return (
-    <div className="mt-10">
+    <div className="mx-auto mt-10 max-w-[700px]">
      {message.status === MessageStatus.Error && (
        <div
          key={message.id}
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@ -1,10 +1,8 @@
 'use client'

-import { useEffect } from 'react'
+import { useEffect, useMemo } from 'react'

-import { motion as m } from 'framer-motion'
-
-import { useAtom, useAtomValue } from 'jotai'
+import { useAtomValue, useSetAtom } from 'jotai'

 import { twMerge } from 'tailwind-merge'

@ -36,7 +34,7 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { reduceTransparentAtom } from '@/helpers/atoms/Setting.atom'

 const BaseLayout = () => {
-  const [mainViewState, setMainViewState] = useAtom(mainViewStateAtom)
+  const setMainViewState = useSetAtom(mainViewStateAtom)
  const importModelStage = useAtomValue(getImportModelStageAtom)
  const reduceTransparent = useAtomValue(reduceTransparentAtom)

@ -68,24 +66,7 @@ const BaseLayout = () => {
      <TopPanel />
      <div className="relative top-9 flex h-[calc(100vh-(36px+36px))] w-screen">
        <RibbonPanel />
-        <div className={twMerge('relative flex w-full')}>
-          <div className="w-full">
-            <m.div
-              key={mainViewState}
-              initial={{ opacity: 0, y: -8 }}
-              className="h-full"
-              animate={{
-                opacity: 1,
-                y: 0,
-                transition: {
-                  duration: 0.5,
-                },
-              }}
-            >
-              <MainViewContainer />
-            </m.div>
-          </div>
-        </div>
+        <MainViewContainer />
        <LoadingModal />
        {importModelStage === 'SELECTING_MODEL' && <SelectingModelModal />}
        {importModelStage === 'MODEL_SELECTED' && <ImportModelOptionModal />}
--- a/web/containers/ListContainer/index.tsx
+++ b/web/containers/ListContainer/index.tsx
@ -1,15 +1,30 @@
-import { ReactNode, useCallback, useEffect, useRef } from 'react'
+import { PropsWithChildren, useCallback, useEffect, useRef } from 'react'

 import { ScrollArea } from '@janhq/joi'

-type Props = {
-  children: ReactNode
-}
+import { useAtomValue } from 'jotai'

-const ListContainer = ({ children }: Props) => {
+import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
+
+const ListContainer = ({ children }: PropsWithChildren) => {
  const listRef = useRef<HTMLDivElement>(null)
  const prevScrollTop = useRef(0)
  const isUserManuallyScrollingUp = useRef(false)
+  const activeThread = useAtomValue(activeThreadAtom)
+  const prevActiveThread = useRef(activeThread)
+
+  // Handle active thread changes
+  useEffect(() => {
+    if (prevActiveThread.current?.id !== activeThread?.id) {
+      isUserManuallyScrollingUp.current = false
+      const scrollHeight = listRef.current?.scrollHeight ?? 0
+      listRef.current?.scrollTo({
+        top: scrollHeight,
+        behavior: 'instant',
+      })
+      prevActiveThread.current = activeThread // Update the previous active thread reference
+    }
+  }, [activeThread])

  const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
    const currentScrollTop = event.currentTarget.scrollTop
@ -25,6 +40,11 @@ const ListContainer = ({ children }: Props) => {
        isUserManuallyScrollingUp.current = false
      }
    }
+
+    if (isUserManuallyScrollingUp.current === true) {
+      event.preventDefault()
+      event.stopPropagation()
+    }
    prevScrollTop.current = currentScrollTop
  }, [])

--- a/web/containers/MainViewContainer/index.tsx
+++ b/web/containers/MainViewContainer/index.tsx
@ -1,5 +1,10 @@
+import { memo } from 'react'
+
+import { motion as m } from 'framer-motion'
 import { useAtomValue } from 'jotai'

+import { twMerge } from 'tailwind-merge'
+
 import { MainViewState } from '@/constants/screens'

 import HubScreen from '@/screens/Hub'
@ -31,7 +36,26 @@ const MainViewContainer = () => {
      break
  }

-  return children
+  return (
+    <div className={twMerge('relative flex w-full')}>
+      <div className="w-full">
+        <m.div
+          key={mainViewState}
+          initial={{ opacity: 0, y: -8 }}
+          className="h-full"
+          animate={{
+            opacity: 1,
+            y: 0,
+            transition: {
+              duration: 0.25,
+            },
+          }}
+        >
+          {children}
+        </m.div>
+      </div>
+    </div>
+  )
 }

-export default MainViewContainer
+export default memo(MainViewContainer)
--- a/web/containers/ModelConfigInput/index.test.tsx
+++ b/web/containers/ModelConfigInput/index.test.tsx
@ -2,7 +2,6 @@ import '@testing-library/jest-dom'
 import React from 'react'
 import { render, fireEvent } from '@testing-library/react'
 import ModelConfigInput from './index'
-import { Tooltip } from '@janhq/joi'

 // Mocking the Tooltip component to simplify testing
 jest.mock('@janhq/joi', () => ({
--- a/web/containers/ModelConfigInput/index.tsx
+++ b/web/containers/ModelConfigInput/index.tsx
@ -19,28 +19,30 @@ const ModelConfigInput = ({
  description,
  placeholder,
  onValueChanged,
-}: Props) => (
-  <div className="flex flex-col">
-    <div className="mb-2 flex items-center gap-x-2">
-      <p className="font-medium">{title}</p>
-      <Tooltip
-        trigger={
-          <InfoIcon
-            size={16}
-            className="flex-shrink-0 text-[hsla(var(--text-secondary))]"
-          />
-        }
-        content={description}
+}: Props) => {
+  return (
+    <div className="flex flex-col">
+      <div className="mb-2 flex items-center gap-x-2">
+        <p className="font-medium">{title}</p>
+        <Tooltip
+          trigger={
+            <InfoIcon
+              size={16}
+              className="flex-shrink-0 text-[hsla(var(--text-secondary))]"
+            />
+          }
+          content={description}
+        />
+      </div>
+      <TextArea
+        placeholder={placeholder}
+        onChange={(e) => onValueChanged?.(e.target.value)}
+        autoResize
+        value={value}
+        disabled={disabled}
      />
    </div>
-    <TextArea
-      placeholder={placeholder}
-      onChange={(e) => onValueChanged?.(e.target.value)}
-      autoResize
-      value={value}
-      disabled={disabled}
-    />
-  </div>
-)
+  )
+}

 export default ModelConfigInput
--- a/web/containers/ModelSearch/index.tsx
+++ b/web/containers/ModelSearch/index.tsx
@ -46,8 +46,7 @@ const ModelSearch = ({ onSearchLocal }: Props) => {
        errMessage = err.message
      }
      toaster({
-        title: 'Failed to get Hugging Face models',
-        description: errMessage,
+        title: errMessage,
        type: 'error',
      })
      console.error(err)
--- a/web/containers/ModelSetting/SettingComponent.tsx
+++ b/web/containers/ModelSetting/SettingComponent.tsx
@ -8,11 +8,15 @@ import {
 import Checkbox from '@/containers/Checkbox'
 import ModelConfigInput from '@/containers/ModelConfigInput'
 import SliderRightPanel from '@/containers/SliderRightPanel'
+import TagInput from '@/containers/TagInput'

 type Props = {
  componentProps: SettingComponentProps[]
  disabled?: boolean
-  onValueUpdated: (key: string, value: string | number | boolean) => void
+  onValueUpdated: (
+    key: string,
+    value: string | number | boolean | string[]
+  ) => void
 }

 const SettingComponent: React.FC<Props> = ({
@ -53,7 +57,24 @@ const SettingComponent: React.FC<Props> = ({
            name={data.key}
            description={data.description}
            placeholder={placeholder}
-            value={textValue}
+            value={textValue as string}
+            onValueChanged={(value) => onValueUpdated(data.key, value)}
+          />
+        )
+      }
+
+      case 'tag': {
+        const { placeholder, value: textValue } =
+          data.controllerProps as InputComponentProps
+        return (
+          <TagInput
+            title={data.title}
+            disabled={disabled}
+            key={data.key}
+            name={data.key}
+            description={data.description}
+            placeholder={placeholder}
+            value={textValue as string[]}
            onValueChanged={(value) => onValueUpdated(data.key, value)}
          />
        )
--- a/web/containers/ModelSetting/index.tsx
+++ b/web/containers/ModelSetting/index.tsx
@ -6,7 +6,10 @@ import SettingComponentBuilder from './SettingComponent'

 type Props = {
  componentProps: SettingComponentProps[]
-  onValueChanged: (key: string, value: string | number | boolean) => void
+  onValueChanged: (
+    key: string,
+    value: string | number | boolean | string[]
+  ) => void
  disabled?: boolean
 }

--- a/web/containers/Providers/AppUpdateListener.tsx
+++ b/web/containers/Providers/AppUpdateListener.tsx
@ -1,4 +1,4 @@
-import { Fragment, PropsWithChildren, useEffect } from 'react'
+import { Fragment, useEffect } from 'react'

 import { AppUpdateInfo } from '@janhq/core'
 import { useSetAtom } from 'jotai'
@ -8,7 +8,7 @@ import {
  updateVersionErrorAtom,
 } from '@/helpers/atoms/App.atom'

-const AppUpdateListener = ({ children }: PropsWithChildren) => {
+const AppUpdateListener = () => {
  const setProgress = useSetAtom(appDownloadProgressAtom)
  const setUpdateVersionError = useSetAtom(updateVersionErrorAtom)

@ -39,7 +39,7 @@ const AppUpdateListener = ({ children }: PropsWithChildren) => {
    }
  }, [setProgress, setUpdateVersionError])

-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }

 export default AppUpdateListener
--- a/web/containers/Providers/ClipboardListener.tsx
+++ b/web/containers/Providers/ClipboardListener.tsx
@ -1,10 +1,10 @@
-import { Fragment, PropsWithChildren } from 'react'
+import { Fragment } from 'react'

 import { useSetAtom } from 'jotai'

 import { selectedTextAtom } from './Jotai'

-const ClipboardListener = ({ children }: PropsWithChildren) => {
+const ClipboardListener = () => {
  const setSelectedText = useSetAtom(selectedTextAtom)

  if (typeof window !== 'undefined') {
@ -13,7 +13,7 @@ const ClipboardListener = ({ children }: PropsWithChildren) => {
    })
  }

-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }

 export default ClipboardListener
--- a/web/containers/Providers/CoreConfigurator.tsx
+++ b/web/containers/Providers/CoreConfigurator.tsx
@ -0,0 +1,64 @@
+'use client'
+
+import { PropsWithChildren, useCallback, useEffect, useState } from 'react'
+
+import Loader from '@/containers/Loader'
+
+import { setupCoreServices } from '@/services/coreService'
+import {
+  isCoreExtensionInstalled,
+  setupBaseExtensions,
+} from '@/services/extensionService'
+
+import { extensionManager } from '@/extension'
+
+export const CoreConfigurator = ({ children }: PropsWithChildren) => {
+  const [setupCore, setSetupCore] = useState(false)
+  const [activated, setActivated] = useState(false)
+  const [settingUp, setSettingUp] = useState(false)
+
+  const setupExtensions = useCallback(async () => {
+    // Register all active extensions
+    await extensionManager.registerActive()
+
+    setTimeout(async () => {
+      if (!isCoreExtensionInstalled()) {
+        setSettingUp(true)
+        await setupBaseExtensions()
+        return
+      }
+
+      extensionManager.load()
+      setSettingUp(false)
+      setActivated(true)
+    }, 500)
+  }, [])
+
+  // Services Setup
+  useEffect(() => {
+    setupCoreServices()
+    setSetupCore(true)
+    return () => {
+      extensionManager.unload()
+    }
+  }, [])
+
+  useEffect(() => {
+    if (setupCore) {
+      // Electron
+      if (window && window.core?.api) {
+        setupExtensions()
+      } else {
+        // Host
+        setActivated(true)
+      }
+    }
+  }, [setupCore, setupExtensions])
+
+  return (
+    <>
+      {settingUp && <Loader description="Preparing Update..." />}
+      {setupCore && activated && <>{children}</>}
+    </>
+  )
+}
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@ -1,13 +1,12 @@
 'use client'

-import { Fragment, ReactNode, useEffect } from 'react'
+import { Fragment, useEffect } from 'react'

 import { AppConfiguration, getUserHomePath } from '@janhq/core'
 import { useSetAtom } from 'jotai'

 import useAssistants from '@/hooks/useAssistants'
 import useGetSystemResources from '@/hooks/useGetSystemResources'
-import { useLoadTheme } from '@/hooks/useLoadTheme'
 import useModels from '@/hooks/useModels'
 import useThreads from '@/hooks/useThreads'

@ -20,21 +19,22 @@ import {
 } from '@/helpers/atoms/AppConfig.atom'
 import { janSettingScreenAtom } from '@/helpers/atoms/Setting.atom'

-type Props = {
-  children: ReactNode
-}
-
-const DataLoader: React.FC<Props> = ({ children }) => {
+const DataLoader: React.FC = () => {
  const setJanDataFolderPath = useSetAtom(janDataFolderPathAtom)
  const setQuickAskEnabled = useSetAtom(quickAskEnabledAtom)
  const setJanDefaultDataFolder = useSetAtom(defaultJanDataFolderAtom)
  const setJanSettingScreen = useSetAtom(janSettingScreenAtom)
+  const { getData: loadModels } = useModels()

-  useModels()
  useThreads()
  useAssistants()
  useGetSystemResources()
-  useLoadTheme()
+
+  useEffect(() => {
+    // Load data once
+    loadModels()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])

  useEffect(() => {
    window.core?.api
@ -63,7 +63,7 @@ const DataLoader: React.FC<Props> = ({ children }) => {

  console.debug('Load Data...')

-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }

 export default DataLoader
--- a/web/containers/Providers/DeepLinkListener.tsx
+++ b/web/containers/Providers/DeepLinkListener.tsx
@ -1,4 +1,4 @@
-import { Fragment, ReactNode } from 'react'
+import { Fragment } from 'react'

 import { useSetAtom } from 'jotai'

@ -13,11 +13,8 @@ import {
  importHuggingFaceModelStageAtom,
  importingHuggingFaceRepoDataAtom,
 } from '@/helpers/atoms/HuggingFace.atom'
-type Props = {
-  children: ReactNode
-}

-const DeepLinkListener: React.FC<Props> = ({ children }) => {
+const DeepLinkListener: React.FC = () => {
  const { getHfRepoData } = useGetHFRepoData()
  const setLoadingInfo = useSetAtom(loadingModalInfoAtom)
  const setImportingHuggingFaceRepoData = useSetAtom(
@ -69,7 +66,7 @@ const DeepLinkListener: React.FC<Props> = ({ children }) => {
    handleDeepLinkAction(action)
  })

-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }

 type DeepLinkAction = {
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@ -1,4 +1,4 @@
-import { PropsWithChildren, useCallback, useEffect } from 'react'
+import { useCallback, useEffect } from 'react'

 import React from 'react'

@ -23,7 +23,7 @@ import { toaster } from '../Toast'

 import AppUpdateListener from './AppUpdateListener'
 import ClipboardListener from './ClipboardListener'
-import EventHandler from './EventHandler'
+import ModelHandler from './ModelHandler'

 import ModelImportListener from './ModelImportListener'
 import QuickAskListener from './QuickAskListener'
@ -39,7 +39,7 @@ import {
  removeDownloadingModelAtom,
 } from '@/helpers/atoms/Model.atom'

-const EventListenerWrapper = ({ children }: PropsWithChildren) => {
+const EventListener = () => {
  const setDownloadState = useSetAtom(setDownloadStateAtom)
  const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
  const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)
@ -112,8 +112,8 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
        state.downloadState = 'end'
        setDownloadState(state)
        removeDownloadingModel(state.modelId)
+        events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
      }
-      events.emit(ModelEvent.OnModelsUpdate, {})
    },
    [removeDownloadingModel, setDownloadState]
  )
@ -156,16 +156,14 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
  ])

  return (
-    <AppUpdateListener>
-      <ClipboardListener>
-        <ModelImportListener>
-          <QuickAskListener>
-            <EventHandler>{children}</EventHandler>
-          </QuickAskListener>
-        </ModelImportListener>
-      </ClipboardListener>
-    </AppUpdateListener>
+    <>
+      <AppUpdateListener />
+      <ClipboardListener />
+      <ModelImportListener />
+      <QuickAskListener />
+      <ModelHandler />
+    </>
  )
 }

-export default EventListenerWrapper
+export default EventListener
--- a/web/containers/Providers/Jotai.tsx
+++ b/web/containers/Providers/Jotai.tsx
@ -1,13 +1,9 @@
 'use client'

-import { ReactNode } from 'react'
+import { PropsWithChildren } from 'react'

 import { Provider, atom } from 'jotai'

-type Props = {
-  children: ReactNode
-}
-
 export const editPromptAtom = atom<string>('')
 export const currentPromptAtom = atom<string>('')
 export const fileUploadAtom = atom<FileInfo[]>([])
@ -16,7 +12,7 @@ export const searchAtom = atom<string>('')

 export const selectedTextAtom = atom('')

-export default function JotaiWrapper({ children }: Props) {
+export default function JotaiWrapper({ children }: PropsWithChildren) {
  return <Provider>{children}</Provider>
 }

--- a/web/containers/Providers/KeyListener.tsx
+++ b/web/containers/Providers/KeyListener.tsx
@ -1,6 +1,6 @@
 'use client'

-import { Fragment, ReactNode, useEffect } from 'react'
+import { Fragment, useEffect } from 'react'

 import { useAtom, useAtomValue, useSetAtom } from 'jotai'

@ -8,6 +8,8 @@ import { MainViewState } from '@/constants/screens'

 import { useCreateNewThread } from '@/hooks/useCreateNewThread'

+import { useStarterScreen } from '@/hooks/useStarterScreen'
+
 import {
  mainViewStateAtom,
  showLeftPanelAtom,
@ -20,11 +22,7 @@ import {
  ThreadModalAction,
 } from '@/helpers/atoms/Thread.atom'

-type Props = {
-  children: ReactNode
-}
-
-export default function KeyListener({ children }: Props) {
+export default function KeyListener() {
  const setShowLeftPanel = useSetAtom(showLeftPanelAtom)
  const setShowRightPanel = useSetAtom(showRightPanelAtom)
  const [mainViewState, setMainViewState] = useAtom(mainViewStateAtom)
@ -32,6 +30,7 @@ export default function KeyListener({ children }: Props) {
  const assistants = useAtomValue(assistantsAtom)
  const activeThread = useAtomValue(activeThreadAtom)
  const setModalActionThread = useSetAtom(modalActionThreadAtom)
+  const { isShowStarterScreen } = useStarterScreen()

  useEffect(() => {
    const onKeyDown = (e: KeyboardEvent) => {
@ -60,7 +59,7 @@ export default function KeyListener({ children }: Props) {
        return
      }

-      if (e.code === 'KeyN' && prefixKey) {
+      if (e.code === 'KeyN' && prefixKey && !isShowStarterScreen) {
        if (mainViewState !== MainViewState.Thread) return
        requestCreateNewThread(assistants[0])
        setMainViewState(MainViewState.Thread)
@ -82,6 +81,7 @@ export default function KeyListener({ children }: Props) {
  }, [
    activeThread,
    assistants,
+    isShowStarterScreen,
    mainViewState,
    requestCreateNewThread,
    setMainViewState,
@ -90,5 +90,5 @@ export default function KeyListener({ children }: Props) {
    setShowRightPanel,
  ])

-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
--- a/web/containers/Providers/ModelHandler.tsx
+++ b/web/containers/Providers/ModelHandler.tsx
@ -1,4 +1,4 @@
-import { Fragment, ReactNode, useCallback, useEffect, useRef } from 'react'
+import { Fragment, useCallback, useEffect, useRef } from 'react'

 import {
  ChatCompletionMessage,
@ -30,6 +30,7 @@ import {
  getCurrentChatMessagesAtom,
  addNewMessageAtom,
  updateMessageAtom,
+  tokenSpeedAtom,
 } from '@/helpers/atoms/ChatMessage.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import {
@ -43,7 +44,7 @@ import {
 const maxWordForThreadTitle = 10
 const defaultThreadTitle = 'New Thread'

-export default function EventHandler({ children }: { children: ReactNode }) {
+export default function ModelHandler() {
  const messages = useAtomValue(getCurrentChatMessagesAtom)
  const addNewMessage = useSetAtom(addNewMessageAtom)
  const updateMessage = useSetAtom(updateMessageAtom)
@ -62,6 +63,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {
  const activeModelRef = useRef(activeModel)
  const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
  const activeModelParamsRef = useRef(activeModelParams)
+  const setTokenSpeed = useSetAtom(tokenSpeedAtom)

  useEffect(() => {
    threadsRef.current = threads
@ -179,8 +181,37 @@ export default function EventHandler({ children }: { children: ReactNode }) {
        if (message.content.length) {
          setIsGeneratingResponse(false)
        }
+
+        setTokenSpeed((prev) => {
+          const currentTimestamp = new Date().getTime() // Get current time in milliseconds
+          if (!prev) {
+            // If this is the first update, just set the lastTimestamp and return
+            return {
+              lastTimestamp: currentTimestamp,
+              tokenSpeed: 0,
+              tokenCount: 1,
+              message: message.id,
+            }
+          }
+
+          const timeDiffInSeconds =
+            (currentTimestamp - prev.lastTimestamp) / 1000 // Time difference in seconds
+          const totalTokenCount = prev.tokenCount + 1
+          const averageTokenSpeed =
+            totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed
+          return {
+            ...prev,
+            tokenSpeed: averageTokenSpeed,
+            tokenCount: totalTokenCount,
+            message: message.id,
+          }
+        })
        return
-      } else if (message.status === MessageStatus.Error) {
+      } else if (
+        message.status === MessageStatus.Error &&
+        activeModelRef.current?.engine &&
+        isLocalEngine(activeModelRef.current.engine)
+      ) {
        ;(async () => {
          if (
            !(await extensionManager
@ -329,5 +360,5 @@ export default function EventHandler({ children }: { children: ReactNode }) {
    }
  }, [onNewMessageResponse, onMessageResponseUpdate, onModelStopped])

-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
--- a/web/containers/Providers/ModelImportListener.tsx
+++ b/web/containers/Providers/ModelImportListener.tsx
@ -1,4 +1,4 @@
-import { Fragment, PropsWithChildren, useCallback, useEffect } from 'react'
+import { Fragment, useCallback, useEffect } from 'react'

 import {
  ImportingModel,
@ -17,7 +17,7 @@ import {
  updateImportingModelProgressAtom,
 } from '@/helpers/atoms/Model.atom'

-const ModelImportListener = ({ children }: PropsWithChildren) => {
+const ModelImportListener = () => {
  const updateImportingModelProgress = useSetAtom(
    updateImportingModelProgressAtom
  )
@ -43,7 +43,7 @@ const ModelImportListener = ({ children }: PropsWithChildren) => {
  const onImportModelSuccess = useCallback(
    (state: ImportingModel) => {
      if (!state.modelId) return
-      events.emit(ModelEvent.OnModelsUpdate, {})
+      events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
      setImportingModelSuccess(state.importId, state.modelId)
    },
    [setImportingModelSuccess]
@ -103,7 +103,7 @@ const ModelImportListener = ({ children }: PropsWithChildren) => {
    onImportModelFailed,
  ])

-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }

 export default ModelImportListener
--- a/web/containers/Providers/QuickAskListener.tsx
+++ b/web/containers/Providers/QuickAskListener.tsx
@ -1,4 +1,4 @@
-import { Fragment, ReactNode } from 'react'
+import { Fragment } from 'react'

 import { useSetAtom } from 'jotai'

@ -10,11 +10,7 @@ import useSendChatMessage from '@/hooks/useSendChatMessage'

 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'

-type Props = {
-  children: ReactNode
-}
-
-const QuickAskListener: React.FC<Props> = ({ children }) => {
+const QuickAskListener: React.FC = () => {
  const { sendChatMessage } = useSendChatMessage()
  const setMainState = useSetAtom(mainViewStateAtom)

@ -27,7 +23,7 @@ const QuickAskListener: React.FC<Props> = ({ children }) => {
    debounced(input)
  })

-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }

 export default QuickAskListener
--- a/web/containers/Providers/Responsive.test.tsx
+++ b/web/containers/Providers/Responsive.test.tsx
@ -45,17 +45,6 @@ describe('Responsive', () => {
    })
  })

-  it('renders children correctly', () => {
-    const { getByText } = render(
-      <Responsive>
-        <div>Child Content</div>
-      </Responsive>
-    )
-
-    // Check if the child content is rendered
-    expect(getByText('Child Content')).toBeInTheDocument()
-  })
-
  it('hides left and right panels on small screens', () => {
    // Simulate mobile view
    window.matchMedia = jest.fn().mockImplementation((query) => ({
@ -64,11 +53,7 @@ describe('Responsive', () => {
      removeListener: jest.fn(),
    }))

-    render(
-      <Responsive>
-        <div>Child Content</div>
-      </Responsive>
-    )
+    render(<Responsive />)

    // Check that the left and right panel states were updated to false
    expect(mockSetShowLeftPanel).toHaveBeenCalledWith(false)
@ -83,11 +68,7 @@ describe('Responsive', () => {
      removeListener: jest.fn(),
    }))

-    render(
-      <Responsive>
-        <div>Child Content</div>
-      </Responsive>
-    )
+    render(<Responsive />)

    // Change back to desktop view
    window.matchMedia = jest.fn().mockImplementation((query) => ({
@ -97,11 +78,7 @@ describe('Responsive', () => {
    }))

    // Call the effect manually to simulate the component re-rendering
-    const rerender = render(
-      <Responsive>
-        <div>Child Content</div>
-      </Responsive>
-    )
+    const rerender = render(<Responsive />)

    // Check that the last known states were restored (which were true initially)
    expect(mockSetShowLeftPanel).toHaveBeenCalledWith(true)
--- a/web/containers/Providers/Responsive.tsx
+++ b/web/containers/Providers/Responsive.tsx
@ -1,11 +1,11 @@
-import { Fragment, PropsWithChildren, useEffect, useRef } from 'react'
+import { Fragment, useEffect, useRef } from 'react'

 import { useMediaQuery } from '@janhq/joi'
 import { useAtom } from 'jotai'

 import { showLeftPanelAtom, showRightPanelAtom } from '@/helpers/atoms/App.atom'

-const Responsive = ({ children }: PropsWithChildren) => {
+const Responsive = () => {
  const matches = useMediaQuery('(max-width: 880px)')
  const [showLeftPanel, setShowLeftPanel] = useAtom(showLeftPanelAtom)
  const [showRightPanel, setShowRightPanel] = useAtom(showRightPanelAtom)
@ -30,7 +30,7 @@ const Responsive = ({ children }: PropsWithChildren) => {
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [matches, setShowLeftPanel, setShowRightPanel])

-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }

 export default Responsive
--- a/web/containers/Providers/SettingsHandler.tsx
+++ b/web/containers/Providers/SettingsHandler.tsx
@ -0,0 +1,20 @@
+'use client'
+
+import { useEffect } from 'react'
+
+import { useConfigurations } from '@/hooks/useConfigurations'
+import { useLoadTheme } from '@/hooks/useLoadTheme'
+
+const SettingsHandler: React.FC = () => {
+  useLoadTheme()
+
+  const { configurePullOptions } = useConfigurations()
+
+  useEffect(() => {
+    configurePullOptions()
+  }, [configurePullOptions])
+
+  return <></>
+}
+
+export default SettingsHandler
--- a/web/containers/Providers/index.tsx
+++ b/web/containers/Providers/index.tsx
@ -1,93 +1,42 @@
 'use client'

-import { PropsWithChildren, useCallback, useEffect, useState } from 'react'
+import { PropsWithChildren } from 'react'

 import { Toaster } from 'react-hot-toast'

-import Loader from '@/containers/Loader'
-import EventListenerWrapper from '@/containers/Providers/EventListener'
+import EventListener from '@/containers/Providers/EventListener'
 import JotaiWrapper from '@/containers/Providers/Jotai'

 import ThemeWrapper from '@/containers/Providers/Theme'

-import { setupCoreServices } from '@/services/coreService'
-import {
-  isCoreExtensionInstalled,
-  setupBaseExtensions,
-} from '@/services/extensionService'
-
 import Umami from '@/utils/umami'

+import { CoreConfigurator } from './CoreConfigurator'
 import DataLoader from './DataLoader'

 import DeepLinkListener from './DeepLinkListener'
 import KeyListener from './KeyListener'
 import Responsive from './Responsive'

-import { extensionManager } from '@/extension'
+import SettingsHandler from './SettingsHandler'

 const Providers = ({ children }: PropsWithChildren) => {
-  const [setupCore, setSetupCore] = useState(false)
-  const [activated, setActivated] = useState(false)
-  const [settingUp, setSettingUp] = useState(false)
-
-  const setupExtensions = useCallback(async () => {
-    // Register all active extensions
-    await extensionManager.registerActive()
-
-    setTimeout(async () => {
-      if (!isCoreExtensionInstalled()) {
-        setSettingUp(true)
-        await setupBaseExtensions()
-        return
-      }
-
-      extensionManager.load()
-      setSettingUp(false)
-      setActivated(true)
-    }, 500)
-  }, [])
-
-  // Services Setup
-  useEffect(() => {
-    setupCoreServices()
-    setSetupCore(true)
-    return () => {
-      extensionManager.unload()
-    }
-  }, [])
-
-  useEffect(() => {
-    if (setupCore) {
-      // Electron
-      if (window && window.core?.api) {
-        setupExtensions()
-      } else {
-        // Host
-        setActivated(true)
-      }
-    }
-  }, [setupCore, setupExtensions])
-
  return (
    <ThemeWrapper>
      <JotaiWrapper>
        <Umami />
-        {settingUp && <Loader description="Preparing Update..." />}
-        {setupCore && activated && (
+        <CoreConfigurator>
          <>
-            <Responsive>
-              <KeyListener>
-                <EventListenerWrapper>
-                  <DataLoader>
-                    <DeepLinkListener>{children}</DeepLinkListener>
-                  </DataLoader>
-                </EventListenerWrapper>
-                <Toaster />
-              </KeyListener>
-            </Responsive>
+            <Responsive />
+            <KeyListener />
+            <EventListener />
+            <DataLoader />
+            <SettingsHandler />
+            <DeepLinkListener />
+            <Toaster />
+            {children}
          </>
-        )}
+        </CoreConfigurator>
      </JotaiWrapper>
    </ThemeWrapper>
  )
--- a/web/containers/ServerLogs/index.tsx
+++ b/web/containers/ServerLogs/index.tsx
@ -1,8 +1,8 @@
 /* eslint-disable @typescript-eslint/naming-convention */

-import { memo, useCallback, useEffect, useState } from 'react'
+import { memo, useCallback, useEffect, useRef, useState } from 'react'

-import { Button, useClipboard } from '@janhq/joi'
+import { Button, ScrollArea, useClipboard } from '@janhq/joi'
 import { useAtomValue } from 'jotai'

 import { FolderIcon, CheckIcon, CopyIcon } from 'lucide-react'
@ -22,6 +22,9 @@ const ServerLogs = (props: ServerLogsProps) => {
  const { getLogs } = useLogs()
  const serverEnabled = useAtomValue(serverEnabledAtom)
  const [logs, setLogs] = useState<string[]>([])
+  const listRef = useRef<HTMLDivElement>(null)
+  const prevScrollTop = useRef(0)
+  const isUserManuallyScrollingUp = useRef(false)

  const updateLogs = useCallback(
    () =>
@ -58,13 +61,45 @@ const ServerLogs = (props: ServerLogsProps) => {

  const clipboard = useClipboard({ timeout: 1000 })

+  const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
+    const currentScrollTop = event.currentTarget.scrollTop
+
+    if (prevScrollTop.current > currentScrollTop) {
+      isUserManuallyScrollingUp.current = true
+    } else {
+      const currentScrollTop = event.currentTarget.scrollTop
+      const scrollHeight = event.currentTarget.scrollHeight
+      const clientHeight = event.currentTarget.clientHeight
+
+      if (currentScrollTop + clientHeight >= scrollHeight) {
+        isUserManuallyScrollingUp.current = false
+      }
+    }
+
+    if (isUserManuallyScrollingUp.current === true) {
+      event.preventDefault()
+      event.stopPropagation()
+    }
+    prevScrollTop.current = currentScrollTop
+  }, [])
+
+  useEffect(() => {
+    if (isUserManuallyScrollingUp.current === true || !listRef.current) return
+    const scrollHeight = listRef.current?.scrollHeight ?? 0
+    listRef.current?.scrollTo({
+      top: scrollHeight,
+      behavior: 'instant',
+    })
+  }, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
+
  return (
-    <div
+    <ScrollArea
+      ref={listRef}
      className={twMerge(
-        'p-4 pb-0',
-        !withCopy && 'max-w-[38vw] lg:max-w-[40vw] xl:max-w-[50vw]',
+        'h-[calc(100%-49px)] w-full p-4 py-0',
        logs.length === 0 && 'mx-auto'
      )}
+      onScroll={handleScroll}
    >
      {withCopy && (
        <div className="absolute right-2 top-7">
@ -107,7 +142,7 @@ const ServerLogs = (props: ServerLogsProps) => {
      )}
      <div className="flex h-full w-full flex-col">
        {logs.length > 0 ? (
-          <code className="inline-block whitespace-break-spaces text-[13px]">
+          <code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
            {logs.slice(-limit).map((log, i) => {
              return (
                <p key={i} className="my-2 leading-relaxed">
@ -256,7 +291,7 @@ const ServerLogs = (props: ServerLogsProps) => {
          </div>
        )}
      </div>
-    </div>
+    </ScrollArea>
  )
 }

--- a/web/containers/TagInput/index.test.tsx
+++ b/web/containers/TagInput/index.test.tsx
@ -0,0 +1,50 @@
+import React from 'react'
+import { render, fireEvent } from '@testing-library/react'
+import TagInput from './index' // Adjust the import path as necessary
+import '@testing-library/jest-dom'
+
+describe('TagInput Component', () => {
+  let props: any
+
+  beforeEach(() => {
+    props = {
+      title: 'Tags',
+      name: 'tag-input',
+      description: 'Add your tags',
+      placeholder: 'Enter a tag',
+      value: ['tag1', 'tag2'],
+      onValueChanged: jest.fn(),
+    }
+  })
+
+  it('renders correctly', () => {
+    const { getByText, getByPlaceholderText } = render(<TagInput {...props} />)
+    expect(getByText('Tags')).toBeInTheDocument()
+    expect(getByText('tag1')).toBeInTheDocument()
+    expect(getByText('tag2')).toBeInTheDocument()
+    expect(getByPlaceholderText('Enter a tag')).toBeInTheDocument()
+  })
+
+  it('calls onValueChanged when a new tag is added', () => {
+    const { getByPlaceholderText } = render(<TagInput {...props} />)
+    const input = getByPlaceholderText('Enter a tag')
+
+    fireEvent.change(input, { target: { value: 'tag3' } })
+    fireEvent.keyDown(input, { key: 'Enter', code: 'Enter' })
+
+    expect(props.onValueChanged).toHaveBeenCalledWith(
+      expect.arrayContaining(['tag1', 'tag2', 'tag3'])
+    )
+  })
+
+  it('calls onValueChanged when a tag is removed', () => {
+    const { getAllByRole } = render(<TagInput {...props} />)
+    const removeButton = getAllByRole('button')[0] // Click on the first remove button
+
+    fireEvent.click(removeButton)
+
+    expect(props.onValueChanged).toHaveBeenCalledWith(
+      expect.arrayContaining(['tag2'])
+    )
+  })
+})
--- a/web/containers/TagInput/index.tsx
+++ b/web/containers/TagInput/index.tsx
@ -0,0 +1,149 @@
+import { useEffect, useRef, useState } from 'react'
+
+import { Badge, Input, Tooltip } from '@janhq/joi'
+
+import { InfoIcon, XIcon } from 'lucide-react'
+
+type Props = {
+  title: string
+  disabled?: boolean
+  name: string
+  description: string
+  placeholder: string
+  value: string[]
+  onValueChanged?: (e: string | number | boolean | string[]) => void
+}
+
+function TooltipBadge({
+  item,
+  value,
+  onValueChanged,
+}: {
+  item: string
+  value: string[]
+  onValueChanged?: (e: string[]) => void
+}) {
+  const textRef = useRef<HTMLSpanElement>(null)
+  const [isEllipsized, setIsEllipsized] = useState(false)
+
+  useEffect(() => {
+    if (textRef.current) {
+      setIsEllipsized(textRef.current.scrollWidth > textRef.current.clientWidth)
+    }
+  }, [item])
+
+  return (
+    <div className="relative">
+      {isEllipsized ? (
+        <Tooltip
+          trigger={
+            <div className="relative">
+              <Badge theme="secondary" className="text-ellipsis">
+                <span
+                  ref={textRef}
+                  className="inline-block max-w-[100px] overflow-hidden text-ellipsis whitespace-nowrap"
+                >
+                  {item}
+                </span>
+                <button
+                  type="button"
+                  className="ml-1.5 w-3 bg-transparent"
+                  onClick={() => {
+                    onValueChanged &&
+                      onValueChanged(value.filter((i) => i !== item))
+                  }}
+                >
+                  <XIcon className="w-3" />
+                </button>
+              </Badge>
+            </div>
+          }
+          content={item}
+        />
+      ) : (
+        <Badge theme="secondary" className="relative">
+          <span
+            ref={textRef}
+            className="max-w-[90px] overflow-hidden text-ellipsis"
+          >
+            {item}
+          </span>
+          <button
+            type="button"
+            className="ml-1.5 w-3 bg-transparent"
+            onClick={() => {
+              onValueChanged && onValueChanged(value.filter((i) => i !== item))
+            }}
+          >
+            <XIcon className="w-3" />
+          </button>
+        </Badge>
+      )}
+    </div>
+  )
+}
+
+const TagInput = ({
+  title,
+  disabled = false,
+  value,
+  description,
+  placeholder,
+  onValueChanged,
+}: Props) => {
+  const [pendingDataPoint, setPendingDataPoint] = useState('')
+
+  const addPendingDataPoint = () => {
+    if (pendingDataPoint) {
+      const newDataPoints = new Set([...value, pendingDataPoint])
+      onValueChanged && onValueChanged(Array.from(newDataPoints))
+      setPendingDataPoint('')
+    }
+  }
+
+  return (
+    <div className="flex flex-col">
+      <div className="mb-2 flex items-center gap-x-2">
+        <p className="font-medium">{title}</p>
+        <Tooltip
+          trigger={
+            <InfoIcon
+              size={16}
+              className="flex-shrink-0 text-[hsla(var(--text-secondary))]"
+            />
+          }
+          content={description}
+        />
+      </div>
+      <Input
+        value={pendingDataPoint}
+        disabled={disabled}
+        onChange={(e) => setPendingDataPoint(e.target.value)}
+        placeholder={placeholder}
+        className="w-full"
+        onKeyDown={(e) => {
+          if (e.key === 'Enter' || e.key === 'Tab') {
+            e.preventDefault()
+            addPendingDataPoint()
+          }
+        }}
+      />
+      {value.length > 0 && (
+        <div className="relative mt-2 flex min-h-[2.5rem] flex-wrap items-center gap-2">
+          {value.map((item, idx) => {
+            return (
+              <TooltipBadge
+                key={idx}
+                item={item}
+                value={value}
+                onValueChanged={onValueChanged}
+              />
+            )
+          })}
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default TagInput
--- a/web/helpers/atoms/AppConfig.atom.ts
+++ b/web/helpers/atoms/AppConfig.atom.ts
@ -12,14 +12,35 @@ export const janDataFolderPathAtom = atom('')

 export const experimentalFeatureEnabledAtom = atomWithStorage(
  EXPERIMENTAL_FEATURE,
-  false
+  false,
+  undefined,
+  { getOnInit: true }
 )

-export const proxyEnabledAtom = atomWithStorage(PROXY_FEATURE_ENABLED, false)
-export const proxyAtom = atomWithStorage(HTTPS_PROXY_FEATURE, '')
+export const proxyEnabledAtom = atomWithStorage(
+  PROXY_FEATURE_ENABLED,
+  false,
+  undefined,
+  { getOnInit: true }
+)
+export const proxyAtom = atomWithStorage(HTTPS_PROXY_FEATURE, '', undefined, {
+  getOnInit: true,
+})

-export const ignoreSslAtom = atomWithStorage(IGNORE_SSL, false)
-export const vulkanEnabledAtom = atomWithStorage(VULKAN_ENABLED, false)
-export const quickAskEnabledAtom = atomWithStorage(QUICK_ASK_ENABLED, false)
+export const ignoreSslAtom = atomWithStorage(IGNORE_SSL, false, undefined, {
+  getOnInit: true,
+})
+export const vulkanEnabledAtom = atomWithStorage(
+  VULKAN_ENABLED,
+  false,
+  undefined,
+  { getOnInit: true }
+)
+export const quickAskEnabledAtom = atomWithStorage(
+  QUICK_ASK_ENABLED,
+  false,
+  undefined,
+  { getOnInit: true }
+)

 export const hostAtom = atom('http://localhost:1337/')
--- a/web/helpers/atoms/ChatMessage.atom.ts
+++ b/web/helpers/atoms/ChatMessage.atom.ts
@ -11,13 +11,22 @@ import {
  updateThreadStateLastMessageAtom,
 } from './Thread.atom'

+import { TokenSpeed } from '@/types/token'
+
 /**
 * Stores all chat messages for all threads
 */
 export const chatMessages = atom<Record<string, ThreadMessage[]>>({})

+/**
+ * Stores the status of the messages load for each thread
+ */
 export const readyThreadsMessagesAtom = atom<Record<string, boolean>>({})

+/**
+ * Store the token speed for current message
+ */
+export const tokenSpeedAtom = atom<TokenSpeed | undefined>(undefined)
 /**
 * Return the chat messages for the current active conversation
 */
--- a/web/helpers/atoms/LocalServer.atom.ts
+++ b/web/helpers/atoms/LocalServer.atom.ts
@ -1,3 +1,5 @@
 import { atom } from 'jotai'

 export const serverEnabledAtom = atom<boolean>(false)
+
+export const LocalAPIserverModelParamsAtom = atom()
--- a/web/helpers/atoms/Model.atom.ts
+++ b/web/helpers/atoms/Model.atom.ts
@ -16,7 +16,9 @@ enum ModelStorageAtomKeys {
 */
 export const downloadedModelsAtom = atomWithStorage<Model[]>(
  ModelStorageAtomKeys.DownloadedModels,
-  []
+  [],
+  undefined,
+  { getOnInit: true }
 )

 /**
@ -25,7 +27,9 @@ export const downloadedModelsAtom = atomWithStorage<Model[]>(
 */
 export const configuredModelsAtom = atomWithStorage<Model[]>(
  ModelStorageAtomKeys.AvailableModels,
-  []
+  [],
+  undefined,
+  { getOnInit: true }
 )

 export const removeDownloadedModelAtom = atom(
--- a/web/helpers/atoms/Setting.atom.ts
+++ b/web/helpers/atoms/Setting.atom.ts
@ -13,10 +13,22 @@ export const REDUCE_TRANSPARENT = 'reduceTransparent'
 export const SPELL_CHECKING = 'spellChecking'
 export const themesOptionsAtom = atom<{ name: string; value: string }[]>([])
 export const janThemesPathAtom = atom<string | undefined>(undefined)
-export const selectedThemeIdAtom = atomWithStorage<string>(THEME, '')
+export const selectedThemeIdAtom = atomWithStorage<string>(
+  THEME,
+  '',
+  undefined,
+  { getOnInit: true }
+)
 export const themeDataAtom = atom<Theme | undefined>(undefined)
 export const reduceTransparentAtom = atomWithStorage<boolean>(
  REDUCE_TRANSPARENT,
-  false
+  false,
+  undefined,
+  { getOnInit: true }
+)
+export const spellCheckAtom = atomWithStorage<boolean>(
+  SPELL_CHECKING,
+  false,
+  undefined,
+  { getOnInit: true }
 )
-export const spellCheckAtom = atomWithStorage<boolean>(SPELL_CHECKING, false)
--- a/web/helpers/atoms/Thread.atom.ts
+++ b/web/helpers/atoms/Thread.atom.ts
@ -207,7 +207,9 @@ export const setThreadModelParamsAtom = atom(
 */
 export const activeSettingInputBoxAtom = atomWithStorage<boolean>(
  ACTIVE_SETTING_INPUT_BOX,
-  false
+  false,
+  undefined,
+  { getOnInit: true }
 )

 /**
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@ -26,15 +26,13 @@ export const stateModelAtom = atom<ModelState>({
  model: undefined,
 })

-const pendingModelLoadAtom = atom<boolean>(false)
-
 export function useActiveModel() {
  const [activeModel, setActiveModel] = useAtom(activeModelAtom)
  const activeThread = useAtomValue(activeThreadAtom)
  const [stateModel, setStateModel] = useAtom(stateModelAtom)
  const downloadedModels = useAtomValue(downloadedModelsAtom)
  const setLoadModelError = useSetAtom(loadModelErrorAtom)
-  const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
+  const pendingModelLoad = useRef(false)
  const isVulkanEnabled = useAtomValue(vulkanEnabledAtom)

  const downloadedModelsRef = useRef<Model[]>([])
@ -55,7 +53,7 @@ export function useActiveModel() {
    if (activeModel) {
      await stopModel(activeModel)
    }
-    setPendingModelLoad(true)
+    pendingModelLoad.current = true

    let model = downloadedModelsRef?.current.find((e) => e.id === modelId)

@ -120,16 +118,16 @@ export function useActiveModel() {
          undefined,
        }))

-        if (!pendingModelLoad && abortable) {
+        if (!pendingModelLoad.current && abortable) {
          return Promise.reject(new Error('aborted'))
        }

        toaster({
          title: 'Failed!',
-          description: `Model ${model.id} failed to start.`,
+          description: `Model ${model.id} failed to start. ${error.message ?? ''}`,
          type: 'error',
        })
-        setLoadModelError(error)
+        setLoadModelError(error.message ?? error)
        return Promise.reject(error)
      })
  }
@ -147,16 +145,10 @@ export function useActiveModel() {
        .then(() => {
          setActiveModel(undefined)
          setStateModel({ state: 'start', loading: false, model: undefined })
-          setPendingModelLoad(false)
+          pendingModelLoad.current = false
        })
    },
-    [
-      activeModel,
-      setStateModel,
-      setActiveModel,
-      setPendingModelLoad,
-      stateModel,
-    ]
+    [activeModel, setStateModel, setActiveModel, stateModel]
  )

  const stopInference = useCallback(async () => {
--- a/web/hooks/useConfigurations.ts
+++ b/web/hooks/useConfigurations.ts
@ -0,0 +1,41 @@
+import { useCallback, useEffect } from 'react'
+
+import { ExtensionTypeEnum, ModelExtension } from '@janhq/core'
+import { useAtomValue } from 'jotai'
+
+import { extensionManager } from '@/extension'
+import {
+  ignoreSslAtom,
+  proxyAtom,
+  proxyEnabledAtom,
+} from '@/helpers/atoms/AppConfig.atom'
+
+export const useConfigurations = () => {
+  const proxyEnabled = useAtomValue(proxyEnabledAtom)
+  const proxyUrl = useAtomValue(proxyAtom)
+  const proxyIgnoreSSL = useAtomValue(ignoreSslAtom)
+
+  const configurePullOptions = useCallback(() => {
+    extensionManager
+      .get<ModelExtension>(ExtensionTypeEnum.Model)
+      ?.configurePullOptions(
+        proxyEnabled
+          ? {
+              proxy_url: proxyUrl,
+              verify_peer_ssl: !proxyIgnoreSSL,
+            }
+          : {
+              proxy_url: '',
+              verify_peer_ssl: false,
+            }
+      )
+  }, [proxyEnabled, proxyUrl, proxyIgnoreSSL])
+
+  useEffect(() => {
+    configurePullOptions()
+  }, [])
+
+  return {
+    configurePullOptions,
+  }
+}
--- a/web/hooks/useFactoryReset.test.ts
+++ b/web/hooks/useFactoryReset.test.ts
@ -17,6 +17,14 @@ jest.mock('@janhq/core', () => ({
  fs: {
    rm: jest.fn(),
  },
+  EngineManager: {
+    instance: jest.fn().mockReturnValue({
+      get: jest.fn(),
+      engines: {
+        values: jest.fn().mockReturnValue([]),
+      },
+    }),
+  },
 }))

 describe('useFactoryReset', () => {
@ -37,6 +45,7 @@ describe('useFactoryReset', () => {
        getAppConfigurations: mockGetAppConfigurations,
        updateAppConfiguration: mockUpdateAppConfiguration,
        relaunch: mockRelaunch,
+        factoryReset: jest.fn(),
      },
    }
    mockGetAppConfigurations.mockResolvedValue({
@ -72,7 +81,6 @@ describe('useFactoryReset', () => {
    expect(mockSetFactoryResetState).toHaveBeenCalledWith(
      FactoryResetState.ClearLocalStorage
    )
-    expect(mockRelaunch).toHaveBeenCalled()
  })

  it('should keep current folder when specified', async () => {
--- a/web/hooks/useFactoryReset.ts
+++ b/web/hooks/useFactoryReset.ts
@ -1,6 +1,6 @@
 import { useCallback } from 'react'

-import { fs, AppConfiguration } from '@janhq/core'
+import { fs, AppConfiguration, EngineManager } from '@janhq/core'
 import { atom, useAtomValue, useSetAtom } from 'jotai'

 import { useActiveModel } from './useActiveModel'
@ -37,6 +37,15 @@ export default function useFactoryReset() {
      // 1: Stop running model
      setFactoryResetState(FactoryResetState.StoppingModel)
      await stopModel()
+
+      await Promise.all(
+        EngineManager.instance()
+          .engines.values()
+          .map(async (engine) => {
+            await engine.onUnload()
+          })
+      )
+
      await new Promise((resolve) => setTimeout(resolve, 4000))

      // 2: Delete the old jan data folder
@ -53,13 +62,17 @@ export default function useFactoryReset() {
        await window.core?.api?.updateAppConfiguration(configuration)
      }

+      // Perform factory reset
+      await window.core?.api?.factoryReset()
+
      // 4: Clear app local storage
      setFactoryResetState(FactoryResetState.ClearLocalStorage)
      // reset the localStorage
      localStorage.clear()

+      window.core = undefined
      // 5: Relaunch the app
-      await window.core?.api?.relaunch()
+      window.location.reload()
    },
    [defaultJanDataFolder, stopModel, setFactoryResetState]
  )
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@ -9,7 +9,6 @@ import {
  OptionType,
  events,
  fs,
-  baseName,
 } from '@janhq/core'

 import { atom, useAtomValue, useSetAtom } from 'jotai'
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@ -1,5 +1,5 @@
 // useModels.test.ts
-import { renderHook, act } from '@testing-library/react'
+import { renderHook, act, waitFor } from '@testing-library/react'
 import { events, ModelEvent, ModelManager } from '@janhq/core'
 import { extensionManager } from '@/extension'

@ -36,19 +36,98 @@ describe('useModels', () => {
        }),
        get: () => undefined,
        has: () => true,
-        // set: () => {}
      },
    })

    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)

-    act(() => {
-      renderHook(() => useModels())
+    const { result } = renderHook(() => useModels())
+    await act(() => {
+      result.current?.getData()
    })

    expect(mockModelExtension.getModels).toHaveBeenCalled()
  })

+  it('should return empty on error', async () => {
+    const mockModelExtension = {
+      getModels: jest.fn().mockRejectedValue(new Error('Error')),
+    } as any
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+      },
+    })
+
+    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
+
+    const { result } = renderHook(() => useModels())
+
+    await act(() => {
+      result.current?.getData()
+    })
+
+    expect(mockModelExtension.getModels()).rejects.toThrow()
+  })
+
+  it('should update states on models update', async () => {
+    const mockModelExtension = {
+      getModels: jest.fn().mockResolvedValue(models),
+    } as any
+
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+      },
+    })
+
+    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
+    jest.spyOn(events, 'on').mockImplementationOnce((event, cb) => {
+      cb({ fetch: false })
+    })
+    renderHook(() => useModels())
+
+    expect(mockModelExtension.getModels).not.toHaveBeenCalled()
+  })
+
+  it('should update states on models update', async () => {
+    const mockModelExtension = {
+      getModels: jest.fn().mockResolvedValue(models),
+    } as any
+
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+      },
+    })
+
+    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
+    jest.spyOn(events, 'on').mockImplementationOnce((event, cb) => {
+      cb({ fetch: true })
+    })
+    renderHook(() => useModels())
+
+    expect(mockModelExtension.getModels).toHaveBeenCalled()
+  })
+
  it('should remove event listener on unmount', async () => {
    const removeListenerSpy = jest.spyOn(events, 'off')

--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@ -16,6 +16,7 @@ import { useDebouncedCallback } from 'use-debounce'
 import { isLocalEngine } from '@/utils/modelEngine'

 import { extensionManager } from '@/extension'
+
 import {
  configuredModelsAtom,
  downloadedModelsAtom,
@ -34,7 +35,7 @@ const useModels = () => {
    const getDownloadedModels = async () => {
      const localModels = (await getModels()).map((e) => ({
        ...e,
-        name: ModelManager.instance().models.get(e.id)?.name ?? e.id,
+        name: ModelManager.instance().models.get(e.id)?.name ?? e.name ?? e.id,
        metadata:
          ModelManager.instance().models.get(e.id)?.metadata ?? e.metadata,
      }))
@ -53,9 +54,11 @@ const useModels = () => {
      setDownloadedModels(toUpdate)

      let isUpdated = false
+
      toUpdate.forEach((model) => {
        if (!ModelManager.instance().models.has(model.id)) {
          ModelManager.instance().models.set(model.id, model)
+          // eslint-disable-next-line react-hooks/exhaustive-deps
          isUpdated = true
        }
      })
@ -75,21 +78,41 @@ const useModels = () => {

  const reloadData = useDebouncedCallback(() => getData(), 300)

-  useEffect(() => {
-    // Try get data on mount
-    reloadData()
+  const updateStates = useCallback(() => {
+    const cachedModels = ModelManager.instance().models.values().toArray()
+    setDownloadedModels((downloadedModels) => [
+      ...downloadedModels,
+      ...cachedModels.filter(
+        (e) =>
+          !isLocalEngine(e.engine) &&
+          !downloadedModels.some((g: Model) => g.id === e.id)
+      ),
+    ])

+    setExtensionModels(cachedModels)
+  }, [setDownloadedModels, setExtensionModels])
+
+  const getModels = async (): Promise<Model[]> =>
+    extensionManager
+      .get<ModelExtension>(ExtensionTypeEnum.Model)
+      ?.getModels()
+      .catch(() => []) ?? []
+
+  useEffect(() => {
    // Listen for model updates
-    events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
+    events.on(ModelEvent.OnModelsUpdate, async (data: { fetch?: boolean }) => {
+      if (data.fetch) reloadData()
+      else updateStates()
+    })
    return () => {
      // Remove listener on unmount
      events.off(ModelEvent.OnModelsUpdate, async () => {})
    }
-  }, [getData, reloadData])
+  }, [reloadData, updateStates])
+
+  return {
+    getData,
+  }
 }

-const getModels = async (): Promise<Model[]> =>
-  extensionManager.get<ModelExtension>(ExtensionTypeEnum.Model)?.getModels() ??
-  []
-
 export default useModels
--- a/Show More
+++ b/Show More