diff --git a/.github/workflows/template-noti-discord-and-update-url-readme.yml b/.github/workflows/template-noti-discord-and-update-url-readme.yml
index a53c20be5..d799dacfa 100644
--- a/.github/workflows/template-noti-discord-and-update-url-readme.yml
+++ b/.github/workflows/template-noti-discord-and-update-url-readme.yml
@@ -47,11 +47,11 @@ jobs:
         with:
           args: |
             Jan App ${{ inputs.build_reason }} build artifact version {{ VERSION }}:
-            - Windows: https://delta.jan.ai/nightly/jan-win-x64-{{ VERSION }}.exe
-            - macOS Intel: https://delta.jan.ai/nightly/jan-mac-x64-{{ VERSION }}.dmg
-            - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-mac-arm64-{{ VERSION }}.dmg
-            - Linux Deb: https://delta.jan.ai/nightly/jan-linux-amd64-{{ VERSION }}.deb
-            - Linux AppImage: https://delta.jan.ai/nightly/jan-linux-x86_64-{{ VERSION }}.AppImage
+            - Windows: https://delta.jan.ai/nightly/jan-nightly-win-x64-{{ VERSION }}.exe
+            - macOS Intel: https://delta.jan.ai/nightly/jan-nightly-mac-x64-{{ VERSION }}.dmg
+            - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-nightly-mac-arm64-{{ VERSION }}.dmg
+            - Linux Deb: https://delta.jan.ai/nightly/jan-nightly-linux-amd64-{{ VERSION }}.deb
+            - Linux AppImage: https://delta.jan.ai/nightly/jan-nightly-linux-x86_64-{{ VERSION }}.AppImage
             - Github action run: https://github.com/janhq/jan/actions/runs/{{ GITHUB_RUN_ID }}
         env:
           DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index f28d152d9..ab815678a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,3 +47,4 @@ coverage
 .yarnrc
 test_results.html
 *.tsbuildinfo
+electron/shared/**
diff --git a/core/src/browser/core.test.ts b/core/src/browser/core.test.ts
index f38cc0b40..720ea9dcf 100644
--- a/core/src/browser/core.test.ts
+++ b/core/src/browser/core.test.ts
@@ -3,7 +3,6 @@ import { joinPath } from './core'
 import { openFileExplorer } from './core'
 import { getJanDataFolderPath } from './core'
 import { abortDownload } from './core'
-import { getFileSize } from './core'
 import { executeOnMain } from './core'
 
 describe('test core apis', () => {
@@ -66,18 +65,6 @@ describe('test core apis', () => {
     expect(result).toBe('aborted')
   })
 
-  it('should get file size', async () => {
-    const url = 'http://example.com/file'
-    globalThis.core = {
-      api: {
-        getFileSize: jest.fn().mockResolvedValue(1024),
-      },
-    }
-    const result = await getFileSize(url)
-    expect(globalThis.core.api.getFileSize).toHaveBeenCalledWith(url)
-    expect(result).toBe(1024)
-  })
-
   it('should execute function on main process', async () => {
     const extension = 'testExtension'
     const method = 'testMethod'
diff --git a/core/src/browser/core.ts b/core/src/browser/core.ts
index b19e0b339..7058fc172 100644
--- a/core/src/browser/core.ts
+++ b/core/src/browser/core.ts
@@ -28,15 +28,6 @@ const downloadFile: (downloadRequest: DownloadRequest, network?: NetworkConfig)
   network
 ) => globalThis.core?.api?.downloadFile(downloadRequest, network)
 
-/**
- * Get unit in bytes for a remote file.
- *
- * @param url - The url of the file.
- * @returns {Promise<number>} - A promise that resolves with the file size.
- */
-const getFileSize: (url: string) => Promise<number> = (url: string) =>
-  globalThis.core.api?.getFileSize(url)
-
 /**
  * Aborts the download of a specific file.
  * @param {string} fileName - The name of the file whose download is to be aborted.
@@ -167,7 +158,6 @@ export {
   getUserHomePath,
   systemInformation,
   showToast,
-  getFileSize,
   dirName,
   FileStat,
 }
diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts
index d934e1c06..b7a9fca4e 100644
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@@ -113,7 +113,6 @@ export abstract class BaseExtension implements ExtensionType {
     for (const model of models) {
       ModelManager.instance().register(model)
     }
-    events.emit(ModelEvent.OnModelsUpdate, {})
   }
 
   /**
diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts
index 9713256b3..084267582 100644
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@@ -38,14 +38,16 @@ export function requestInference(
             errorCode = ErrorCode.InvalidApiKey
           }
           const error = {
-            message: data.error?.message ?? 'Error occurred.',
+            message: data.error?.message ?? data.message ?? 'Error occurred.',
             code: errorCode,
           }
           subscriber.error(error)
           subscriber.complete()
           return
         }
-        if (model.parameters?.stream === false) {
+        // There could be overriden stream parameter in the model
+        // that is set in request body (transformed payload)
+        if (requestBody?.stream === false || model.parameters?.stream === false) {
           const data = await response.json()
           if (transformResponse) {
             subscriber.next(transformResponse(data))
diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts
index e224ec5cc..9a3428988 100644
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@@ -12,6 +12,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
     return ExtensionTypeEnum.Model
   }
 
+  abstract configurePullOptions(configs: { [key: string]: any }): Promise<any>
   abstract getModels(): Promise<Model[]>
   abstract pullModel(model: string, id?: string, name?: string): Promise<void>
   abstract cancelModelPull(modelId: string): Promise<void>
diff --git a/core/src/node/api/processors/download.test.ts b/core/src/node/api/processors/download.test.ts
index 21d94165d..c4b171a7d 100644
--- a/core/src/node/api/processors/download.test.ts
+++ b/core/src/node/api/processors/download.test.ts
@@ -23,6 +23,11 @@ jest.mock('fs', () => ({
   createWriteStream: jest.fn(),
 }))
 
+const requestMock = jest.fn((options, callback) => {
+  callback(new Error('Test error'), null)
+})
+jest.mock('request', () => requestMock)
+
 jest.mock('request-progress', () => {
   return jest.fn().mockImplementation(() => {
     return {
@@ -54,18 +59,6 @@ describe('Downloader', () => {
   beforeEach(() => {
     jest.resetAllMocks()
   })
-  it('should handle getFileSize errors correctly', async () => {
-    const observer = jest.fn()
-    const url = 'http://example.com/file'
-
-    const downloader = new Downloader(observer)
-    const requestMock = jest.fn((options, callback) => {
-      callback(new Error('Test error'), null)
-    })
-    jest.mock('request', () => requestMock)
-
-    await expect(downloader.getFileSize(observer, url)).rejects.toThrow('Test error')
-  })
 
   it('should pause download correctly', () => {
     const observer = jest.fn()
diff --git a/core/src/node/api/processors/download.ts b/core/src/node/api/processors/download.ts
index ebeb7c299..709ad9687 100644
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@@ -135,25 +135,4 @@ export class Downloader implements Processor {
   pauseDownload(_observer: any, fileName: any) {
     DownloadManager.instance.networkRequests[fileName]?.pause()
   }
-
-  async getFileSize(_observer: any, url: string): Promise<number> {
-    return new Promise((resolve, reject) => {
-      const request = require('request')
-      request(
-        {
-          url,
-          method: 'HEAD',
-        },
-        function (err: any, response: any) {
-          if (err) {
-            console.error('Getting file size failed:', err)
-            reject(err)
-          } else {
-            const size: number = response.headers['content-length'] ?? -1
-            resolve(size)
-          }
-        }
-      )
-    })
-  }
 }
diff --git a/core/src/node/api/restful/common.ts b/core/src/node/api/restful/common.ts
index 39f7b8d8b..989104e03 100644
--- a/core/src/node/api/restful/common.ts
+++ b/core/src/node/api/restful/common.ts
@@ -1,7 +1,6 @@
 import { HttpServer } from '../HttpServer'
 import {
   chatCompletions,
-  deleteBuilder,
   downloadModel,
   getBuilder,
   retrieveBuilder,
@@ -14,8 +13,6 @@ import {
 } from './helper/builder'
 
 import { JanApiRouteConfiguration } from './helper/configuration'
-import { startModel, stopModel } from './helper/startStopModel'
-import { ModelSettingParams } from '../../../types'
 
 export const commonRouter = async (app: HttpServer) => {
   const normalizeData = (data: any) => {
@@ -28,19 +25,25 @@ export const commonRouter = async (app: HttpServer) => {
   // Read & Delete :: Threads | Models | Assistants
   Object.keys(JanApiRouteConfiguration).forEach((key) => {
     app.get(`/${key}`, async (_req, _res) => {
-      if (key === 'models') {
+      if (key.includes('models')) {
         return models(_req, _res)
       }
       return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
     })
 
-    app.get(`/${key}/:id`, async (request: any) =>
-      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
-    )
+    app.get(`/${key}/:id`, async (_req: any, _res: any) => {
+      if (key.includes('models')) {
+        return models(_req, _res)
+      }
+      return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id)
+    })
 
-    app.delete(`/${key}/:id`, async (request: any) =>
-      deleteBuilder(JanApiRouteConfiguration[key], request.params.id)
-    )
+    app.delete(`/${key}/:id`, async (_req: any, _res: any) => {
+      if (key.includes('models')) {
+        return models(_req, _res)
+      }
+      return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id)
+    })
   })
 
   // Threads
@@ -70,16 +73,9 @@ export const commonRouter = async (app: HttpServer) => {
     })
   )
 
-  app.put(`/models/:modelId/start`, async (request: any) => {
-    let settingParams: ModelSettingParams | undefined = undefined
-    if (Object.keys(request.body).length !== 0) {
-      settingParams = JSON.parse(request.body) as ModelSettingParams
-    }
+  app.post(`/models/start`, async (request: any, reply: any) => models(request, reply))
 
-    return startModel(request.params.modelId, settingParams)
-  })
-
-  app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId))
+  app.post(`/models/stop`, async (request: any, reply: any) => models(request, reply))
 
   // Chat Completion
   app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))
diff --git a/core/src/node/api/restful/helper/builder.test.ts b/core/src/node/api/restful/helper/builder.test.ts
index f21257098..cfaee6007 100644
--- a/core/src/node/api/restful/helper/builder.test.ts
+++ b/core/src/node/api/restful/helper/builder.test.ts
@@ -1,17 +1,7 @@
-import {
-  existsSync,
-  readdirSync,
-  readFileSync,
-  writeFileSync,
-  mkdirSync,
-  appendFileSync,
-  rmdirSync,
-} from 'fs'
-import { join } from 'path'
+import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, appendFileSync } from 'fs'
 import {
   getBuilder,
   retrieveBuilder,
-  deleteBuilder,
   getMessages,
   retrieveMessage,
   createThread,
@@ -82,34 +72,6 @@ describe('builder helper functions', () => {
     })
   })
 
-  describe('deleteBuilder', () => {
-    it('should return a message if trying to delete Jan assistant', async () => {
-      const result = await deleteBuilder({ ...mockConfiguration, dirName: 'assistants' }, 'jan')
-      expect(result).toEqual({ message: 'Cannot delete Jan assistant' })
-    })
-
-    it('should return a message if data is not found', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await deleteBuilder(mockConfiguration, 'nonexistentId')
-      expect(result).toEqual({ message: 'Not found' })
-    })
-
-    it('should delete the directory and return success message', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await deleteBuilder(mockConfiguration, 'model1')
-      expect(rmdirSync).toHaveBeenCalledWith(join('/mock/path', 'mockDir', 'model1'), {
-        recursive: true,
-      })
-      expect(result).toEqual({ id: 'model1', object: 'mockObject', deleted: true })
-    })
-  })
-
   describe('getMessages', () => {
     it('should return an empty array if message file does not exist', async () => {
       ;(existsSync as jest.Mock).mockReturnValue(false)
diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts
index c3493a8be..e081708cf 100644
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@@ -73,34 +73,6 @@ export const retrieveBuilder = async (configuration: RouteConfiguration, id: str
   return filteredData
 }
 
-export const deleteBuilder = async (configuration: RouteConfiguration, id: string) => {
-  if (configuration.dirName === 'assistants' && id === 'jan') {
-    return {
-      message: 'Cannot delete Jan assistant',
-    }
-  }
-
-  const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
-  try {
-    const data = await retrieveBuilder(configuration, id)
-    if (!data) {
-      return {
-        message: 'Not found',
-      }
-    }
-
-    const objectPath = join(directoryPath, id)
-    rmdirSync(objectPath, { recursive: true })
-    return {
-      id: id,
-      object: configuration.delete.object,
-      deleted: true,
-    }
-  } catch (ex) {
-    console.error(ex)
-  }
-}
-
 export const getMessages = async (threadId: string): Promise<ThreadMessage[]> => {
   const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
   const messageFile = 'messages.jsonl'
@@ -308,7 +280,7 @@ export const models = async (request: any, reply: any) => {
     'Content-Type': 'application/json',
   }
 
-  const response = await fetch(`${CORTEX_API_URL}/models`, {
+  const response = await fetch(`${CORTEX_API_URL}/models${request.url.split('/models')[1] ?? ""}`, {
     method: request.method,
     headers: headers,
     body: JSON.stringify(request.body),
diff --git a/core/src/node/api/restful/helper/startStopModel.test.ts b/core/src/node/api/restful/helper/startStopModel.test.ts
deleted file mode 100644
index 7c1a56cf1..000000000
--- a/core/src/node/api/restful/helper/startStopModel.test.ts
+++ /dev/null
@@ -1,10 +0,0 @@
-import { startModel } from './startStopModel'
-
-describe('startModel', () => {
-  it('test_startModel_error', async () => {
-    const modelId = 'testModelId'
-    const settingParams = undefined
-
-    expect(startModel(modelId, settingParams)).resolves.toThrow()
-  })
-})
diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts
deleted file mode 100644
index 2e9db6d15..000000000
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ /dev/null
@@ -1,25 +0,0 @@
-import { ModelSettingParams } from '../../../../types'
-import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
-
-/**
- * Start a model
- * @param modelId
- * @param settingParams
- * @returns
- */
-export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
-  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
-    method: 'POST',
-    body: JSON.stringify({ model: modelId, ...settingParams }),
-  })
-}
-
-/*
- * Stop model.
- */
-export const stopModel = async (modelId: string) => {
-  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
-    method: 'POST',
-    body: JSON.stringify({ model: modelId }),
-  })
-}
diff --git a/core/src/types/api/index.ts b/core/src/types/api/index.ts
index 093314a15..dc9afcb00 100644
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@@ -27,6 +27,7 @@ export enum NativeRoute {
 
   quickAskSizeUpdated = 'quickAskSizeUpdated',
   ackDeepLink = 'ackDeepLink',
+  factoryReset = 'factoryReset'
 }
 
 /**
@@ -65,7 +66,6 @@ export enum DownloadRoute {
   pauseDownload = 'pauseDownload',
   resumeDownload = 'resumeDownload',
   getDownloadProgress = 'getDownloadProgress',
-  getFileSize = 'getFileSize',
 }
 
 export enum DownloadEvent {
diff --git a/core/src/types/setting/settingComponent.ts b/core/src/types/setting/settingComponent.ts
index 2eae4e16f..2474f6bd4 100644
--- a/core/src/types/setting/settingComponent.ts
+++ b/core/src/types/setting/settingComponent.ts
@@ -12,7 +12,7 @@ export type SettingComponentProps = {
 
 export type ConfigType = 'runtime' | 'setting'
 
-export type ControllerType = 'slider' | 'checkbox' | 'input'
+export type ControllerType = 'slider' | 'checkbox' | 'input' | 'tag'
 
 export type InputType = 'password' | 'text' | 'email' | 'number' | 'tel' | 'url'
 
@@ -22,7 +22,7 @@ export type InputAction = InputActionsTuple[number]
 
 export type InputComponentProps = {
   placeholder: string
-  value: string
+  value: string | string[]
   type?: InputType
   textAlign?: 'left' | 'right'
   inputActions?: InputAction[]
diff --git a/docs/src/pages/docs/_meta.json b/docs/src/pages/docs/_meta.json
index 231f6a763..8ed88963c 100644
--- a/docs/src/pages/docs/_meta.json
+++ b/docs/src/pages/docs/_meta.json
@@ -13,6 +13,7 @@
   },
   "desktop": "Desktop",
   "data-folder": "Jan Data Folder",
+  "privacy": "Privacy",
   "user-guides": {
     "title": "BASIC USAGE",
     "type": "separator"
diff --git a/docs/src/pages/docs/privacy.mdx b/docs/src/pages/docs/privacy.mdx
new file mode 100644
index 000000000..d3be5b6de
--- /dev/null
+++ b/docs/src/pages/docs/privacy.mdx
@@ -0,0 +1,63 @@
+---
+title: Jan Privacy
+description: Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why.
+keywords:
+  [
+    Jan AI,
+    Jan,
+    ChatGPT alternative,
+    local AI,
+    private AI,
+    conversational AI,
+    OpenAI platform alternative,
+    no-subscription fee,
+    large language model,
+    about Jan,
+    desktop application,
+    thinking machine,
+    jan vision,
+  ]
+---
+
+# Privacy
+
+Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why. 
+
+- Jan can't see your chats with AI
+- You're free to opt out
+
+## Why and what we track
+
+To build a reliable, user-friendly AI that you own, we need to understand how Jan is used. We collect two types of data: performance data and usage data.
+
+### Performance data
+We track app crashes and collect technical details about what went wrong, along with basic information about the hardware you’re using.
+
+When Jan crashes, we collect technical details about what went wrong.
+
+- Specific AI model in use during the crash
+- Hardware: `CPU`, `GPU`, `RAM`
+- Logs: `Date/Time`, `OS & version`, `app version`, `error codes & messages`.
+
+### Usage data
+
+We track data like how often the app is opened to check:
+
+- **Active Users**: How many people use Jan daily to measure engagement
+- **Retention Rates**: To understand if users are finding value in Jan over time
+
+Usage data is tied to a randomly generated telemetry ID. None of our usage data can be linked to your personal identity.
+
+## What we **don’t** track:
+- Your conversations with Jan. Those stay on your device.
+- Your files. We don’t scan, upload, or even look at them.
+- Anything tied to your identity.
+
+## Using Cloud Models
+
+Jan allows you to connect cloud model APIs. If you choose to use cloud-based models (e.g. GPT, Claude models), the API provider handling the model will have access to your messages as part of processing the request. Again, Jan doesn't see or store these messages - they go directly to the provider. Remember: With local models, everything stays on your device, so no one - not even us- can see your messages.
+
+## Where we store & process data
+We use [PostHog](https://posthog.com/eu) EU for analytics, ensuring all data is processed within the European Union. This setup complies with GDPR and other strict privacy regulations. PostHog lets us self-host and securely manage the data we collect. Read more [on PostHog's GDPR doc](https://posthog.com/docs/privacy/gdpr-compliance).
+
+For a detailed breakdown of the analytics data we collect, you can check out our analytics repo. If you have any questions or concerns, feel free to reach out to us at hi@jan.ai.
\ No newline at end of file
diff --git a/docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png b/docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png
new file mode 100644
index 000000000..420b609fc
Binary files /dev/null and b/docs/src/pages/integrations/coding/_assets/tabby-answer-engine.png differ
diff --git a/docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png b/docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png
new file mode 100644
index 000000000..50cfbd226
Binary files /dev/null and b/docs/src/pages/integrations/coding/_assets/tabby-chat-sidebar.png differ
diff --git a/docs/src/pages/integrations/coding/tabby.mdx b/docs/src/pages/integrations/coding/tabby.mdx
new file mode 100644
index 000000000..6647b4eb4
--- /dev/null
+++ b/docs/src/pages/integrations/coding/tabby.mdx
@@ -0,0 +1,104 @@
+---
+title: Tabby
+description: A step-by-step guide on integrating Jan with Tabby and VSCode, JetBrains, or other IDEs.
+keywords:
+  [
+    Jan,
+    Customizable Intelligence, LLM,
+    local AI,
+    privacy focus,
+    free and open source,
+    private and offline,
+    conversational AI,
+    no-subscription fee,
+    large language models,
+    Tabby integration,
+    VSCode integration,
+    JetBrains integration,
+  ]
+---
+
+import { Tabs, Steps } from 'nextra/components'
+
+# Tabby
+
+## Integrate Jan with Tabby and Your Favorite IDEs
+
+[Tabby](https://www.tabbyml.com/) is an open-source, self-hosted AI coding assistant.
+With Tabby, teams can easily set up their own LLM-powered code completion server.
+
+Tabby provides integrations with VSCode, JetBrains, and other IDEs to help developers code more efficiently,
+and it can be used with various LLM services, including Jan.
+
+To integrate Jan with Tabby, follow these steps:
+
+<Steps>
+
+### Step 1: Enable the Jan API Server
+
+To set up Tabby with Jan's Local Server, you must activate the Jan API Server with your chosen model.
+
+1. Click the `Local API Server` (`<>`) button above the Settings. Jan will direct you to the **Local API Server** section.
+2. Configure the server, including the **IP Port**, **Cross-Origin Resource Sharing (CORS)**, and **Verbose Server Logs**.
+3. Press the **Start Server** button.
+
+### Step 2: Find the Model ID and Ensure the Model is Activated
+
+1. Go to `Settings` > `My Models`.
+2. Models are listed with their **Model ID** beneath their names.
+3. Click the **three dots (⋮)** button next to the model.
+4. Select **Start Model** to activate the model.
+
+### Step 3: Installing Tabby Server
+
+Use the following documentation to install the Tabby server:
+- [Docker](https://tabby.tabbyml.com/docs/quick-start/installation/docker/)
+- [Apple Silicon](https://tabby.tabbyml.com/docs/quick-start/installation/apple/)
+- [Linux](https://tabby.tabbyml.com/docs/quick-start/installation/linux/)
+- [Windows](https://tabby.tabbyml.com/docs/quick-start/installation/windows/)
+
+Then, follow the steps to connect Jan with the Tabby server:
+[Connect Jan with Tabby](https://tabby.tabbyml.com/docs/references/models-http-api/jan.ai/).
+
+For example, to connect Jan with Tabby, save the following configuration under `~/.tabby/config.toml`:
+
+```toml title="~/.tabby/config.toml"
+# Chat model
+[model.chat.http]
+kind = "openai/chat"
+model_name = "model_id"
+api_endpoint = "http://localhost:1337/v1"
+api_key = ""
+```
+
+Currently, the Jan completion and embedding API is under construction.
+Once completed, you can also connect Jan with Tabby for completion and embedding tasks.
+
+### Step 4: Installing Tabby on Your Favorite IDEs
+
+Refer to the following documentation to install the Tabby extension on your favorite IDEs:
+- [Visual Studio Code](https://tabby.tabbyml.com/docs/extensions/installation/vscode/)
+- [JetBrains IntelliJ Platform](https://tabby.tabbyml.com/docs/extensions/installation/intellij/)
+- [VIM / NeoVIM](https://tabby.tabbyml.com/docs/extensions/installation/vim/)
+
+</Steps>
+
+## How to Use Tabby with Jan Integration
+
+### Answer Engine: Chat with Your Codes and Documentation
+
+Tabby offers an [Answer Engine](https://tabby.tabbyml.com/docs/administration/answer-engine/) on the homepage,
+which can leverage the Jan LLM and related contexts like code, documentation, and web pages to answer user questions.
+
+Simply open the Tabby homepage at [localhost:8080](http://localhost:8080) and ask your questions.
+
+![Answer Engine](./_assets/tabby-answer-engine.png)
+
+### IDE Chat Sidebar
+
+After installing the Tabby extension on your preferred IDEs, you can engage in a conversation with Jan to:
+
+1. Discuss your code, receive suggestions, and seek assistance.
+2. Request Jan to inline edit your code, and then review and accept the proposed changes.
+
+![Chat Sidebar](./_assets/tabby-chat-sidebar.png)
\ No newline at end of file
diff --git a/electron/handlers/native.ts b/electron/handlers/native.ts
index 869b9fd58..813231bd4 100644
--- a/electron/handlers/native.ts
+++ b/electron/handlers/native.ts
@@ -12,6 +12,9 @@ import {
 } from '@janhq/core/node'
 import { SelectFileOption } from '@janhq/core'
 import { menu } from '../utils/menu'
+import { migrate } from '../utils/migration'
+import { createUserSpace } from '../utils/path'
+import { setupExtensions } from '../utils/extension'
 
 const isMac = process.platform === 'darwin'
 
@@ -33,14 +36,28 @@ export function handleAppIPCs() {
     nativeTheme.themeSource = 'light'
   })
 
+  /**
+   * Handles the "setCloseApp" IPC message by closing the main application window.
+   * This effectively closes the application if no other windows are open.
+   */
   ipcMain.handle(NativeRoute.setCloseApp, () => {
     windowManager.mainWindow?.close()
   })
 
+  /**
+   * Handles the "setMinimizeApp" IPC message by minimizing the main application window.
+   * The window will be minimized to the system's taskbar or dock.
+   */
   ipcMain.handle(NativeRoute.setMinimizeApp, () => {
     windowManager.mainWindow?.minimize()
   })
 
+  /**
+   * Handles the "setMaximizeApp" IPC message. It toggles the maximization state of the main window.
+   * If the window is currently maximized, it will be un-maximized (restored to its previous size).
+   * If the window is not maximized, it will be maximized to fill the screen.
+   * @param _event - The IPC event object.
+   */
   ipcMain.handle(NativeRoute.setMaximizeApp, async (_event) => {
     if (windowManager.mainWindow?.isMaximized()) {
       windowManager.mainWindow.unmaximize()
@@ -104,6 +121,11 @@ export function handleAppIPCs() {
     }
   })
 
+  /**
+   * Handles the "selectDirectory" IPC message to open a dialog for selecting a directory.
+   * If no main window is found, logs an error and exits.
+   * @returns {string} The path of the selected directory, or nothing if canceled.
+   */
   ipcMain.handle(NativeRoute.selectDirectory, async () => {
     const mainWindow = windowManager.mainWindow
     if (!mainWindow) {
@@ -122,6 +144,14 @@ export function handleAppIPCs() {
     }
   })
 
+  /**
+   * Handles the "selectFiles" IPC message to open a dialog for selecting files.
+   * Allows options for setting the dialog title, button label, and selection properties.
+   * Logs an error if no main window is found.
+   * @param _event - The IPC event object.
+   * @param option - Options for customizing file selection dialog.
+   * @returns {string[]} An array of selected file paths, or nothing if canceled.
+   */
   ipcMain.handle(
     NativeRoute.selectFiles,
     async (_event, option?: SelectFileOption) => {
@@ -156,11 +186,20 @@ export function handleAppIPCs() {
     }
   )
 
+  /**
+   * Handles the "hideQuickAskWindow" IPC message to hide the quick ask window.
+   * @returns A promise that resolves when the window is hidden.
+   */
   ipcMain.handle(
     NativeRoute.hideQuickAskWindow,
     async (): Promise<void> => windowManager.hideQuickAskWindow()
   )
 
+  /**
+   * Handles the "sendQuickAskInput" IPC message to send user input to the main window.
+   * @param _event - The IPC event object.
+   * @param input - User input string to be sent.
+   */
   ipcMain.handle(
     NativeRoute.sendQuickAskInput,
     async (_event, input: string): Promise<void> => {
@@ -171,6 +210,12 @@ export function handleAppIPCs() {
     }
   )
 
+  /**
+   * Handles the "showOpenMenu" IPC message to show the context menu at given coordinates.
+   * Only applicable on non-Mac platforms.
+   * @param e - The event object.
+   * @param args - Contains coordinates where the menu should appear.
+   */
   ipcMain.handle(NativeRoute.showOpenMenu, function (e, args) {
     if (!isMac && windowManager.mainWindow) {
       menu.popup({
@@ -181,23 +226,55 @@ export function handleAppIPCs() {
     }
   })
 
+  /**
+   * Handles the "hideMainWindow" IPC message to hide the main application window.
+   * @returns A promise that resolves when the window is hidden.
+   */
   ipcMain.handle(
     NativeRoute.hideMainWindow,
     async (): Promise<void> => windowManager.hideMainWindow()
   )
 
+  /**
+   * Handles the "showMainWindow" IPC message to show the main application window.
+   * @returns A promise that resolves when the window is shown.
+   */
   ipcMain.handle(
     NativeRoute.showMainWindow,
     async (): Promise<void> => windowManager.showMainWindow()
   )
 
+  /**
+   * Handles the "quickAskSizeUpdated" IPC message to update the size of the quick ask window.
+   * Resizes window by the given height offset.
+   * @param _event - The IPC event object.
+   * @param heightOffset - The amount of height to increase.
+   * @returns A promise that resolves when the window is resized.
+   */
   ipcMain.handle(
     NativeRoute.quickAskSizeUpdated,
     async (_event, heightOffset: number): Promise<void> =>
       windowManager.expandQuickAskWindow(heightOffset)
   )
 
+  /**
+   * Handles the "ackDeepLink" IPC message to acknowledge a deep link.
+   * Triggers handling of deep link in the application.
+   * @param _event - The IPC event object.
+   * @returns A promise that resolves when the deep link is acknowledged.
+   */
   ipcMain.handle(NativeRoute.ackDeepLink, async (_event): Promise<void> => {
     windowManager.ackDeepLink()
   })
+
+  /**
+   * Handles the "factoryReset" IPC message to reset the application to its initial state.
+   * Clears loaded modules, recreates user space, runs migrations, and sets up extensions.
+   * @param _event - The IPC event object.
+   * @returns A promise that resolves after the reset operations are complete.
+   */
+  ipcMain.handle(NativeRoute.factoryReset, async (_event): Promise<void> => {
+    ModuleManager.instance.clearImportedModules()
+    return createUserSpace().then(migrate).then(setupExtensions)
+  })
 }
diff --git a/electron/managers/window.ts b/electron/managers/window.ts
index c9c43ea77..918036365 100644
--- a/electron/managers/window.ts
+++ b/electron/managers/window.ts
@@ -28,6 +28,7 @@ class WindowManager {
       ...mainWindowConfig,
       width: bounds.width,
       height: bounds.height,
+      show: false,
       x: bounds.x,
       y: bounds.y,
       webPreferences: {
@@ -78,6 +79,10 @@ class WindowManager {
         windowManager.hideMainWindow()
       }
     })
+
+    windowManager.mainWindow?.on('ready-to-show', function () {
+      windowManager.mainWindow?.show()
+    })
   }
 
   createQuickAskWindow(preloadPath: string, startUrl: string): void {
diff --git a/electron/tests/e2e/thread.e2e.spec.ts b/electron/tests/e2e/thread.e2e.spec.ts
index 5d7328053..dfd131988 100644
--- a/electron/tests/e2e/thread.e2e.spec.ts
+++ b/electron/tests/e2e/thread.e2e.spec.ts
@@ -25,7 +25,7 @@ test('Select GPT model from Hub and Chat with Invalid API Key', async ({
     { timeout: TIMEOUT }
   )
 
-  const APIKeyError = page.getByTestId('invalid-API-key-error')
+  const APIKeyError = page.getByTestId('passthrough-error-message')
   await expect(APIKeyError).toBeVisible({
     timeout: TIMEOUT,
   })
diff --git a/electron/utils/migration.ts b/electron/utils/migration.ts
index 80851f9de..505de0f7b 100644
--- a/electron/utils/migration.ts
+++ b/electron/utils/migration.ts
@@ -3,7 +3,6 @@ import { app } from 'electron'
 import { join } from 'path'
 import {
   rmdirSync,
-  readFileSync,
   existsSync,
   mkdirSync,
   readdirSync,
diff --git a/extensions/assistant-extension/src/node/engine.ts b/extensions/assistant-extension/src/node/engine.ts
deleted file mode 100644
index 05a380340..000000000
--- a/extensions/assistant-extension/src/node/engine.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-import fs from 'fs'
-import path from 'path'
-import { SettingComponentProps, getJanDataFolderPath } from '@janhq/core/node'
-
-// Sec: Do not send engine settings over requests
-// Read it manually instead
-export const readEmbeddingEngine = (engineName: string) => {
-  if (engineName !== 'openai' && engineName !== 'groq') {
-    const engineSettings = fs.readFileSync(
-      path.join(getJanDataFolderPath(), 'engines', `${engineName}.json`),
-      'utf-8'
-    )
-    return JSON.parse(engineSettings)
-  } else {
-    const settingDirectoryPath = path.join(
-      getJanDataFolderPath(),
-      'settings',
-      '@janhq',
-      // TODO: James - To be removed
-      engineName === 'openai'
-        ? 'inference-openai-extension'
-        : 'inference-groq-extension',
-      'settings.json'
-    )
-
-    const content = fs.readFileSync(settingDirectoryPath, 'utf-8')
-    const settings: SettingComponentProps[] = JSON.parse(content)
-    const apiKeyId = engineName === 'openai' ? 'openai-api-key' : 'groq-api-key'
-    const keySetting = settings.find((setting) => setting.key === apiKeyId)
-
-    let apiKey = keySetting?.controllerProps.value
-    if (typeof apiKey !== 'string') apiKey = ''
-
-    return {
-      api_key: apiKey,
-    }
-  }
-}
diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts
index 3386029fa..5804ff763 100644
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@@ -8,7 +8,6 @@ import { MemoryVectorStore } from 'langchain/vectorstores/memory'
 import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
-import { readEmbeddingEngine } from './engine'
 
 export class Retrieval {
   public chunkSize: number = 100
@@ -28,8 +27,8 @@ export class Retrieval {
     // declare time-weighted retriever and storage
     this.timeWeightedVectorStore = new MemoryVectorStore(
       new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding' },
-        { basePath: 'http://127.0.0.1:3928/v1' }
+        { openAIApiKey: 'cortex-embedding' },
+        { basePath: 'http://127.0.0.1:39291/v1' }
       )
     )
     this.timeWeightedretriever = new TimeWeightedVectorStoreRetriever({
@@ -49,21 +48,11 @@ export class Retrieval {
   }
 
   public updateEmbeddingEngine(model: string, engine: string): void {
-    // Engine settings are not compatible with the current embedding model params
-    // Switch case manually for now
-    if (engine === 'nitro') {
-      this.embeddingModel = new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding', model },
-        // TODO: Raw settings
-        { basePath: 'http://127.0.0.1:3928/v1' },
-      )
-    } else {
-      // Fallback to OpenAI Settings
-      const settings = readEmbeddingEngine(engine)
-      this.embeddingModel = new OpenAIEmbeddings({
-        openAIApiKey: settings.api_key,
-      })
-    }
+    this.embeddingModel = new OpenAIEmbeddings(
+      { openAIApiKey: 'cortex-embedding', model },
+      // TODO: Raw settings
+      { basePath: 'http://127.0.0.1:39291/v1' }
+    )
 
     // update time-weighted embedding model
     this.timeWeightedVectorStore.embeddings = this.embeddingModel
diff --git a/extensions/inference-anthropic-extension/package.json b/extensions/inference-anthropic-extension/package.json
index 19c0df5e8..8115ba2df 100644
--- a/extensions/inference-anthropic-extension/package.json
+++ b/extensions/inference-anthropic-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-anthropic-extension",
   "productName": "Anthropic Inference Engine",
-  "version": "1.0.2",
+  "version": "1.0.3",
   "description": "This extension enables Anthropic chat completion API calls",
   "main": "dist/index.js",
   "module": "dist/module.js",
diff --git a/extensions/inference-anthropic-extension/resources/models.json b/extensions/inference-anthropic-extension/resources/models.json
index 8b4ef03fb..59e41245b 100644
--- a/extensions/inference-anthropic-extension/resources/models.json
+++ b/extensions/inference-anthropic-extension/resources/models.json
@@ -1,28 +1,4 @@
 [
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-opus-20240229",
-    "object": "model",
-    "name": "Claude 3 Opus 20240229",
-    "version": "1.1",
-    "description": "Claude 3 Opus is a powerful model suitables for highly complex task.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "stream": false
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
   {
     "sources": [
       {
@@ -47,78 +23,6 @@
     },
     "engine": "anthropic"
   },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-sonnet-20240229",
-    "object": "model",
-    "name": "Claude 3 Sonnet 20240229",
-    "version": "1.1",
-    "description": "Claude 3 Sonnet is an ideal model balance of intelligence and speed for enterprise workloads.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "stream": false
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-haiku-20240307",
-    "object": "model",
-    "name": "Claude 3 Haiku 20240307",
-    "version": "1.1",
-    "description": "Claude 3 Haiku is the fastest model provides near-instant responsiveness.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "stream": false
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-5-haiku-20241022",
-    "object": "model",
-    "name": "Claude 3.5 Haiku 20241022",
-    "version": "1.0",
-    "description": "Claude 3.5 Haiku is the fastest model provides near-instant responsiveness.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "stream": false
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
   {
     "sources": [
       {
@@ -143,54 +47,6 @@
     },
     "engine": "anthropic"
   },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-5-sonnet-20240620",
-    "object": "model",
-    "name": "Claude 3.5 Sonnet 20240620",
-    "version": "1.1",
-    "description": "Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "stream": true
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
-  {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-5-sonnet-20241022",
-    "object": "model",
-    "name": "Claude 3.5 Sonnet 20241022",
-    "version": "1.0",
-    "description": "Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "stream": true
-    },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "anthropic"
-  },
   {
     "sources": [
       {
diff --git a/extensions/inference-cohere-extension/src/index.ts b/extensions/inference-cohere-extension/src/index.ts
index dd7f03317..2615ea893 100644
--- a/extensions/inference-cohere-extension/src/index.ts
+++ b/extensions/inference-cohere-extension/src/index.ts
@@ -113,6 +113,8 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
   }
 
   transformResponse = (data: any) => {
-    return typeof data === 'object' ? data.text : JSON.parse(data).text ?? ''
+    return typeof data === 'object'
+      ? data.text
+      : (JSON.parse(data.replace('data: ', '').trim()).text ?? '')
   }
 }
diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index e6d5cb833..40ac6bb0e 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.2
\ No newline at end of file
+1.0.4-rc4
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index ecff683c3..7d9a9213a 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -2,23 +2,23 @@
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
+set ENGINE_VERSION=0.1.40
 
 @REM Download cortex.llamacpp binaries
-set VERSION=v0.1.35
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
-set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
-set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64
+set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/v%ENGINE_VERSION%
+set SUBFOLDERS=windows-amd64-noavx-cuda-12-0 windows-amd64-noavx-cuda-11-7 windows-amd64-avx2-cuda-12-0 windows-amd64-avx2-cuda-11-7 windows-amd64-noavx windows-amd64-avx windows-amd64-avx2 windows-amd64-avx512 windows-amd64-vulkan
 
-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 
@@ -28,10 +28,10 @@ del %BIN_PATH%\cortex.exe
 
 @REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
-    echo Processing folder: %BIN_PATH%\%%F
+    echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION%
 
     @REM Move all .dll files except engine.dll
-    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
+    for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION%\*.dll) do (
         if /I not "%%~nxD"=="engine.dll" (
             move "%%D" "%BIN_PATH%"
         )
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 902a31e51..f62e5961b 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -2,9 +2,11 @@
 
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
-ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
-CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
+ENGINE_VERSION=0.1.40
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
+SHARED_PATH="../../electron/shared"
 # Detect platform
 OS_TYPE=$(uname)
 
@@ -17,17 +19,19 @@ if [ "$OS_TYPE" == "Linux" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
+    mkdir -p "${SHARED_PATH}/engines/cortex.llamacpp/deps"
+    touch "${SHARED_PATH}/engines/cortex.llamacpp/deps/keep"
 
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads
@@ -38,8 +42,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v${ENGINE_VERSION}"
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v${ENGINE_VERSION}"
 
 else
     echo "Unsupported operating system: $OS_TYPE"
diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json
index d262ad5ec..c6d3f70b6 100644
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-cortex-extension",
   "productName": "Cortex Inference Engine",
-  "version": "1.0.21",
+  "version": "1.0.22",
   "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
   "main": "dist/index.js",
   "node": "dist/node/index.cjs.js",
diff --git a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
index a2197dab2..90aa50117 100644
--- a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
@@ -1,8 +1,8 @@
 {
     "sources": [
       {
-        "url": "https://huggingface.co/cortexso/phi3/resolve/main/model.gguf",
-        "filename": "model.gguf"
+        "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf",
+        "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf"
       }
     ],
     "id": "phi3-3.8b",
@@ -14,7 +14,7 @@
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
-      "llama_model_path": "model.gguf",
+      "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf",
       "ngl": 33
     },
     "parameters": {
diff --git a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
index f7131ee98..afce04952 100644
--- a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
@@ -1,8 +1,8 @@
 {
     "sources": [
       {
-        "url": "https://huggingface.co/bartowski/Phi-3-medium-128k-instruct-GGUF/resolve/main/Phi-3-medium-128k-instruct-Q4_K_M.gguf",
-        "filename": "Phi-3-medium-128k-instruct-Q4_K_M.gguf"
+        "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf",
+        "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf"
       }
     ],
     "id": "phi3-medium",
@@ -14,7 +14,7 @@
     "settings": {
       "ctx_len": 128000,
       "prompt_template": "<|user|> {prompt}<|end|><|assistant|>",
-      "llama_model_path": "Phi-3-medium-128k-instruct-Q4_K_M.gguf",
+      "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf",
       "ngl": 33
     },
     "parameters": {
diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
index 34ad9295d..284386869 100644
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -120,6 +120,7 @@ export default [
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
         CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
         CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
+        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.40'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-cortex-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts
index 48dbcd780..381a80f5e 100644
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@@ -1,6 +1,7 @@
 declare const NODE: string
 declare const CORTEX_API_URL: string
 declare const CORTEX_SOCKET_URL: string
+declare const CORTEX_ENGINE_VERSION: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>
 
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 44ec423da..15f7a0294 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -17,7 +17,10 @@ import {
   extractModelLoadParams,
   fs,
   events,
-  ModelEvent
+  ModelEvent,
+  SystemInformation,
+  dirName,
+  AppConfigurationEventName,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
@@ -45,6 +48,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
   provider: string = InferenceEngine.cortex
 
+  shouldReconnect = true
+
   /**
    * The URL for making inference requests.
    */
@@ -65,23 +70,34 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     super.onLoad()
 
+    this.queue.add(() => this.clean())
+
     // Run the process watchdog
     const systemInfo = await systemInformation()
-    await this.clean()
-    await executeOnMain(NODE, 'run', systemInfo)
-
+    this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
     this.queue.add(() => this.healthz())
-
+    this.queue.add(() => this.setDefaultEngine(systemInfo))
     this.subscribeToEvents()
 
     window.addEventListener('beforeunload', () => {
       this.clean()
     })
+
+    const currentMode = systemInfo.gpuSetting?.run_mode
+
+    events.on(AppConfigurationEventName.OnConfigurationUpdate, async () => {
+      const systemInfo = await systemInformation()
+      // Update run mode on settings update
+      if (systemInfo.gpuSetting?.run_mode !== currentMode)
+        this.queue.add(() => this.setDefaultEngine(systemInfo))
+    })
   }
 
-  onUnload(): void {
+  async onUnload() {
+    console.log('Clean up cortex.cpp services')
+    this.shouldReconnect = false
     this.clean()
-    executeOnMain(NODE, 'dispose')
+    await executeOnMain(NODE, 'dispose')
     super.onUnload()
   }
 
@@ -89,7 +105,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     model: Model & { file_path?: string }
   ): Promise<void> {
     if (
-      model.engine === InferenceEngine.nitro &&
+      (model.engine === InferenceEngine.nitro || model.settings.vision_model) &&
       model.settings.llama_model_path
     ) {
       // Legacy chat model support
@@ -105,7 +121,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       model.settings = settings
     }
 
-    if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
+    if (
+      (model.engine === InferenceEngine.nitro || model.settings.vision_model) &&
+      model.settings.mmproj
+    ) {
       // Legacy clip vision model support
       model.settings = {
         ...model.settings,
@@ -127,6 +146,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
                 ? InferenceEngine.cortex_llamacpp
                 : model.engine,
           },
+          timeout: false,
         })
         .json()
         .catch(async (e) => {
@@ -149,25 +169,54 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * Do health check on cortex.cpp
    * @returns
    */
-  healthz(): Promise<void> {
+  private healthz(): Promise<void> {
     return ky
       .get(`${CORTEX_API_URL}/healthz`, {
         retry: {
-          limit: 10,
+          limit: 20,
+          delay: () => 500,
           methods: ['get'],
         },
       })
-      .then(() => { })
+      .then(() => {})
+  }
+
+  /**
+   * Set default engine variant on launch
+   */
+  private async setDefaultEngine(systemInfo: SystemInformation) {
+    const variant = await executeOnMain(
+      NODE,
+      'engineVariant',
+      systemInfo.gpuSetting
+    )
+    return (
+      ky
+        // Fallback support for legacy API
+        .post(
+          `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
+          {
+            json: {
+              version: CORTEX_ENGINE_VERSION,
+              variant,
+            },
+          }
+        )
+        .then(() => {})
+    )
   }
 
   /**
    * Clean cortex processes
    * @returns
    */
-  clean(): Promise<any> {
+  private clean(): Promise<any> {
     return ky
       .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
         timeout: 2000, // maximum 2 seconds
+        retry: {
+          limit: 0,
+        },
       })
       .catch(() => {
         // Do nothing
@@ -177,7 +226,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   /**
    * Subscribe to cortex.cpp websocket events
    */
-  subscribeToEvents() {
+  private subscribeToEvents() {
     this.queue.add(
       () =>
         new Promise<void>((resolve) => {
@@ -195,33 +244,47 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
             )
             const percent = total > 0 ? transferred / total : 0
 
-            events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], {
-              modelId: data.task.id,
-              percent: percent,
-              size: {
-                transferred: transferred,
-                total: total,
-              },
-            })
+            events.emit(
+              DownloadTypes[data.type as keyof typeof DownloadTypes],
+              {
+                modelId: data.task.id,
+                percent: percent,
+                size: {
+                  transferred: transferred,
+                  total: total,
+                },
+              }
+            )
             // Update models list from Hub
             if (data.type === DownloadTypes.DownloadSuccess) {
               // Delay for the state update from cortex.cpp
               // Just to be sure
               setTimeout(() => {
-                events.emit(ModelEvent.OnModelsUpdate, {})
+                events.emit(ModelEvent.OnModelsUpdate, {
+                  fetch: true,
+                })
               }, 500)
             }
           })
+
+          this.socket.onclose = (event) => {
+            console.log('WebSocket closed:', event)
+            events.emit(ModelEvent.OnModelStopped, {})
+            if (this.shouldReconnect) {
+              console.log(`Attempting to reconnect...`)
+              setTimeout(() => this.subscribeToEvents(), 1000)
+            }
+          }
+
           resolve()
         })
     )
   }
-
 }
 
 /// Legacy
-export const getModelFilePath = async (
-  model: Model,
+const getModelFilePath = async (
+  model: Model & { file_path?: string },
   file: string
 ): Promise<string> => {
   // Symlink to the model file
@@ -231,6 +294,9 @@ export const getModelFilePath = async (
   ) {
     return model.sources[0]?.url
   }
+  if (model.file_path) {
+    await joinPath([await dirName(model.file_path), file])
+  }
   return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
 }
 ///
diff --git a/extensions/inference-cortex-extension/src/node/cpuInfo.ts b/extensions/inference-cortex-extension/src/node/cpuInfo.ts
new file mode 100644
index 000000000..4366a995b
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/cpuInfo.ts
@@ -0,0 +1,27 @@
+import { cpuInfo } from 'cpu-instructions'
+
+// Check the CPU info and determine the supported instruction set
+const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
+  ? 'avx512'
+  : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
+    ? 'avx2'
+    : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
+      ? 'avx'
+      : 'noavx'
+
+// Send the result and wait for confirmation before exiting
+new Promise<void>((resolve, reject) => {
+  // @ts-ignore
+  process.send(info, (error: Error | null) => {
+    if (error) {
+      reject(error)
+    } else {
+      resolve()
+    }
+  })
+})
+  .then(() => process.exit(0))
+  .catch((error) => {
+    console.error('Failed to send info:', error)
+    process.exit(1)
+  })
diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
index 622eb38af..1bcefce9d 100644
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -1,7 +1,8 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableCortexFile } from './execute'
-import { GpuSetting } from '@janhq/core'
+import { engineVariant, executableCortexFile } from './execute'
+import { GpuSetting } from '@janhq/core/node'
 import { cpuInfo } from 'cpu-instructions'
+import { fork } from 'child_process'
 
 let testSettings: GpuSetting = {
   run_mode: 'cpu',
@@ -30,6 +31,15 @@ jest.mock('cpu-instructions', () => ({
 let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
 mockCpuInfo.mockReturnValue([])
 
+jest.mock('@janhq/core/node', () => ({
+  appResourcePath: () => '.',
+  log: jest.fn(),
+}))
+jest.mock('child_process', () => ({
+  fork: jest.fn(),
+}))
+const mockFork = fork as jest.Mock
+
 describe('test executable cortex file', () => {
   afterAll(function () {
     Object.defineProperty(process, 'platform', {
@@ -37,6 +47,37 @@ describe('test executable cortex file', () => {
     })
   })
 
+  it('executes on MacOS', () => {
+    const mockProcess = {
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('noavx')
+        }
+      }),
+      send: jest.fn(),
+    }
+    Object.defineProperty(process, 'platform', {
+      value: 'darwin',
+    })
+    Object.defineProperty(process, 'arch', {
+      value: 'arm64',
+    })
+    expect(executableCortexFile(testSettings)).toEqual(
+      expect.objectContaining({
+        enginePath: expect.stringContaining('shared'),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`cortex-server`)
+            : expect.anything(),
+        cudaVisibleDevices: '',
+        vkVisibleDevices: '',
+      })
+    )
+
+    mockFork.mockReturnValue(mockProcess)
+    expect(engineVariant(testSettings)).resolves.toEqual('mac-arm64')
+  })
+
   it('executes on MacOS', () => {
     Object.defineProperty(process, 'platform', {
       value: 'darwin',
@@ -44,25 +85,23 @@ describe('test executable cortex file', () => {
     Object.defineProperty(process, 'arch', {
       value: 'arm64',
     })
-    expect(executableCortexFile(testSettings)).toEqual(
-      expect.objectContaining({
-        enginePath: expect.stringContaining(`arm64`),
-        binPath: expect.stringContaining(`bin`),
-        executablePath:
-          originalPlatform === 'darwin'
-            ? expect.stringContaining(`cortex-server`)
-            : expect.anything(),
-        cudaVisibleDevices: '',
-        vkVisibleDevices: '',
-      })
-    )
+
+    const mockProcess = {
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('noavx')
+        }
+      }),
+      send: jest.fn(),
+    }
+    mockFork.mockReturnValue(mockProcess)
     Object.defineProperty(process, 'arch', {
       value: 'x64',
     })
+
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`x64`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`cortex-server`)
@@ -71,6 +110,7 @@ describe('test executable cortex file', () => {
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant(testSettings)).resolves.toEqual('mac-amd64')
   })
 
   it('executes on Windows CPU', () => {
@@ -81,16 +121,25 @@ describe('test executable cortex file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
-    mockCpuInfo.mockReturnValue(['avx'])
+    const mockProcess = {
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx')
+        }
+      }),
+      send: jest.fn(),
+    }
+    mockFork.mockReturnValue(mockProcess)
+
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`avx`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant()).resolves.toEqual('windows-amd64-avx')
   })
 
   it('executes on Windows Cuda 11', () => {
@@ -117,16 +166,27 @@ describe('test executable cortex file', () => {
         },
       ],
     }
-    mockCpuInfo.mockReturnValue(['avx2'])
+
+    const mockProcess = {
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx2')
+        }
+      }),
+      send: jest.fn(),
+    }
+    mockFork.mockReturnValue(mockProcess)
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).resolves.toEqual(
+      'windows-amd64-avx2-cuda-11-7'
+    )
   })
 
   it('executes on Windows Cuda 12', () => {
@@ -153,16 +213,36 @@ describe('test executable cortex file', () => {
         },
       ],
     }
-    mockCpuInfo.mockReturnValue(['noavx'])
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('noavx')
+        }
+      }),
+      send: jest.fn(),
+    })
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).resolves.toEqual(
+      'windows-amd64-noavx-cuda-12-0'
+    )
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx512')
+        }
+      }),
+      send: jest.fn(),
+    })
+    expect(engineVariant(settings)).resolves.toEqual(
+      'windows-amd64-avx2-cuda-12-0'
+    )
   })
 
   it('executes on Linux CPU', () => {
@@ -173,15 +253,23 @@ describe('test executable cortex file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
-    mockCpuInfo.mockReturnValue(['noavx'])
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('noavx')
+        }
+      }),
+      send: jest.fn(),
+    })
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx`),
+        enginePath: expect.stringContaining('shared'),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant()).resolves.toEqual('linux-amd64-noavx')
   })
 
   it('executes on Linux Cuda 11', () => {
@@ -208,15 +296,25 @@ describe('test executable cortex file', () => {
         },
       ],
     }
+
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx512')
+        }
+      }),
+      send: jest.fn(),
+    })
+
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-11-7`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).resolves.toBe('linux-amd64-avx2-cuda-11-7')
   })
 
   it('executes on Linux Cuda 12', () => {
@@ -243,15 +341,25 @@ describe('test executable cortex file', () => {
         },
       ],
     }
+    mockFork.mockReturnValue({
+      on: jest.fn((event, callback) => {
+        if (event === 'message') {
+          callback('avx2')
+        }
+      }),
+      send: jest.fn(),
+    })
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-12-0`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining('shared'),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).resolves.toEqual(
+      'linux-amd64-avx2-cuda-12-0'
+    )
   })
 
   // Generate test for different cpu instructions on Linux
@@ -266,18 +374,27 @@ describe('test executable cortex file', () => {
 
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })
 
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
           executablePath: expect.stringContaining(`cortex-server`),
 
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
       )
+      expect(engineVariant(settings)).resolves.toEqual(
+        `linux-amd64-${instruction}`
+      )
     })
   })
   // Generate test for different cpu instructions on Windows
@@ -291,16 +408,25 @@ describe('test executable cortex file', () => {
     }
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
       )
+      expect(engineVariant(settings)).resolves.toEqual(
+        `windows-amd64-${instruction}`
+      )
     })
   })
 
@@ -331,16 +457,25 @@ describe('test executable cortex file', () => {
     }
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
       )
+      expect(engineVariant(settings)).resolves.toEqual(
+        `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
+      )
     })
   })
 
@@ -371,16 +506,25 @@ describe('test executable cortex file', () => {
       ],
     }
     cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
       )
+      expect(engineVariant(settings)).resolves.toEqual(
+        `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
+      )
     })
   })
 
@@ -412,16 +556,23 @@ describe('test executable cortex file', () => {
       ],
     }
     cpuInstructions.forEach((instruction) => {
-      mockCpuInfo.mockReturnValue([instruction])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback(instruction)
+          }
+        }),
+        send: jest.fn(),
+      })
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`vulkan`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
       )
+      expect(engineVariant(settings)).resolves.toEqual(`linux-amd64-vulkan`)
     })
   })
 
@@ -439,11 +590,17 @@ describe('test executable cortex file', () => {
         ...testSettings,
         run_mode: 'cpu',
       }
-      mockCpuInfo.mockReturnValue([])
+      mockFork.mockReturnValue({
+        on: jest.fn((event, callback) => {
+          if (event === 'message') {
+            callback('noavx')
+          }
+        }),
+        send: jest.fn(),
+      })
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`x64`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('shared'),
           executablePath:
             originalPlatform === 'darwin'
               ? expect.stringContaining(`cortex-server`)
diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
index 74ffb48c6..0b091d464 100644
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -1,10 +1,9 @@
-import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
-import { cpuInfo } from 'cpu-instructions'
+import { GpuSetting, appResourcePath, log } from '@janhq/core/node'
+import { fork } from 'child_process'
 
 export interface CortexExecutableOptions {
   enginePath: string
-  binPath: string
   executablePath: string
   cudaVisibleDevices: string
   vkVisibleDevices: string
@@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => {
 
   if (!settings) return ''
 
-  return settings.vulkan === true
-    ? 'vulkan'
-    : settings.run_mode === 'cpu'
-      ? ''
-      : 'cuda'
+  return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
 }
 
 /**
@@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => {
  */
 const os = (): string => {
   return process.platform === 'win32'
-    ? 'win'
+    ? 'windows-amd64'
     : process.platform === 'darwin'
       ? process.arch === 'arm64'
-        ? 'arm64'
-        : 'x64'
-      : 'linux'
+        ? 'mac-arm64'
+        : 'mac-amd64'
+      : 'linux-amd64'
 }
 
 /**
@@ -57,7 +52,9 @@ const extension = (): '.exe' | '' => {
  */
 const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
   const isUsingCuda =
-    settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
+    settings?.vulkan !== true &&
+    settings?.run_mode === 'gpu' &&
+    !os().includes('mac')
 
   if (!isUsingCuda) return undefined
   return settings?.cuda?.version === '11' ? '11-7' : '12-0'
@@ -67,48 +64,74 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
  * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
  * @returns
  */
-const cpuInstructions = (): string => {
+const cpuInstructions = async (): Promise<string> => {
   if (process.platform === 'darwin') return ''
-  return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
-    ? 'avx512'
-    : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
-      ? 'avx2'
-      : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
-        ? 'avx'
-        : 'noavx'
+
+  const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
+
+  return new Promise((resolve, reject) => {
+    child.on('message', (cpuInfo?: string) => {
+      resolve(cpuInfo ?? 'noavx')
+      child.kill() // Kill the child process after receiving the result
+    })
+
+    child.on('error', (err) => {
+      resolve('noavx')
+      child.kill()
+    })
+
+    child.on('exit', (code) => {
+      if (code !== 0) {
+        resolve('noavx')
+        child.kill()
+      }
+    })
+  })
 }
 
 /**
- * Find which executable file to run based on the current platform.
- * @returns The name of the executable file to run.
+ * The executable options for the cortex.cpp extension.
  */
 export const executableCortexFile = (
   gpuSetting?: GpuSetting
 ): CortexExecutableOptions => {
-  const cpuInstruction = cpuInstructions()
-  let engineFolder = gpuSetting?.vulkan
-    ? 'vulkan'
-    : process.platform === 'darwin'
-      ? os()
-      : [
-        gpuRunMode(gpuSetting) !== 'cuda' ||
-          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
-          ? cpuInstruction
-          : 'noavx',
-        gpuRunMode(gpuSetting),
-        cudaVersion(gpuSetting),
-      ]
-        .filter((e) => !!e)
-        .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let binaryName = `cortex-server${extension()}`
   const binPath = path.join(__dirname, '..', 'bin')
   return {
-    enginePath: path.join(binPath, engineFolder),
+    enginePath: path.join(appResourcePath(), 'shared'),
     executablePath: path.join(binPath, binaryName),
-    binPath: binPath,
     cudaVisibleDevices,
     vkVisibleDevices,
   }
 }
+
+/**
+ * Find which variant to run based on the current platform.
+ */
+export const engineVariant = async (
+  gpuSetting?: GpuSetting
+): Promise<string> => {
+  const cpuInstruction = await cpuInstructions()
+  log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
+  let engineVariant = [
+    os(),
+    gpuSetting?.vulkan
+      ? 'vulkan'
+      : gpuRunMode(gpuSetting) !== 'cuda'
+        ? // CPU mode - support all variants
+          cpuInstruction
+        : // GPU mode - packaged CUDA variants of avx2 and noavx
+          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
+          ? 'avx2'
+          : 'noavx',
+    gpuRunMode(gpuSetting),
+    cudaVersion(gpuSetting),
+  ]
+    .filter((e) => !!e)
+    .join('-')
+
+  log(`[CORTEX]: Engine variant: ${engineVariant}`)
+  return engineVariant
+}
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index 3816605d2..a13bf6028 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -1,8 +1,7 @@
 import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
-import { appResourcePath } from '@janhq/core/node'
 
 // The HOST address to use for the Nitro subprocess
 const LOCAL_PORT = '39291'
@@ -20,9 +19,9 @@ function run(systemInfo?: SystemInformation): Promise<any> {
       // If ngl is not set or equal to 0, run on CPU with correct instructions
       systemInfo?.gpuSetting
         ? {
-          ...systemInfo.gpuSetting,
-          run_mode: systemInfo.gpuSetting.run_mode,
-        }
+            ...systemInfo.gpuSetting,
+            run_mode: systemInfo.gpuSetting.run_mode,
+          }
         : undefined
     )
 
@@ -30,16 +29,13 @@ function run(systemInfo?: SystemInformation): Promise<any> {
     log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
     log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
 
-    addEnvPaths(path.join(appResourcePath(), 'shared'))
-    addEnvPaths(executableOptions.binPath)
     addEnvPaths(executableOptions.enginePath)
-    // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
-    // This is required for the cortex engine to run for now since dlls are not moved to the root
-    addEnvPaths(
-      path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp')
-    )
 
     const dataFolderPath = getJanDataFolderPath()
+    if (watchdog) {
+      watchdog.terminate()
+    }
+
     watchdog = new ProcessWatchdog(
       executableOptions.executablePath,
       [
@@ -81,17 +77,12 @@ function dispose() {
 function addEnvPaths(dest: string) {
   // Add engine path to the PATH and LD_LIBRARY_PATH
   if (process.platform === 'win32') {
-    process.env.PATH = (process.env.PATH || '').concat(
-      path.delimiter,
-      dest,
-    )
-    log(`[CORTEX] PATH: ${process.env.PATH}`)
+    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
   } else {
     process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
       path.delimiter,
-      dest,
+      dest
     )
-    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
   }
 }
 
@@ -105,4 +96,5 @@ export interface CortexProcessInfo {
 export default {
   run,
   dispose,
+  engineVariant,
 }
diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json
index 50fe12349..9700383d6 100644
--- a/extensions/inference-openai-extension/package.json
+++ b/extensions/inference-openai-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-openai-extension",
   "productName": "OpenAI Inference Engine",
-  "version": "1.0.3",
+  "version": "1.0.4",
   "description": "This extension enables OpenAI chat completion API calls",
   "main": "dist/index.js",
   "module": "dist/module.js",
diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/inference-openai-extension/resources/models.json
index 124e123b9..a34bc5460 100644
--- a/extensions/inference-openai-extension/resources/models.json
+++ b/extensions/inference-openai-extension/resources/models.json
@@ -97,11 +97,10 @@
     "format": "api",
     "settings": {},
     "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
+      "temperature": 1,
+      "top_p": 1,
       "stream": true,
-      "stop": [],
+      "max_tokens": 32768,
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
@@ -125,11 +124,10 @@
     "format": "api",
     "settings": {},
     "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
+      "temperature": 1,
+      "top_p": 1,
+      "max_tokens": 65536,
       "stream": true,
-      "stop": [],
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 64880b678..18bc4e0aa 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -76,11 +76,10 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine {
   transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => {
     // Transform the payload for preview models
     if (this.previewModels.includes(payload.model)) {
-      const { max_tokens, temperature, top_p, stop, ...params } = payload
+      const { max_tokens, stop, ...params } = payload
       return {
         ...params,
         max_completion_tokens: max_tokens,
-        stream: false // o1 only support stream = false
       }
     }
     // Pass through for non-preview models
diff --git a/extensions/inference-openrouter-extension/resources/models.json b/extensions/inference-openrouter-extension/resources/models.json
index d89c07e5a..31dea8734 100644
--- a/extensions/inference-openrouter-extension/resources/models.json
+++ b/extensions/inference-openrouter-extension/resources/models.json
@@ -1,4 +1,4 @@
-  [
+[
   {
     "sources": [
       {
@@ -13,7 +13,7 @@
     "format": "api",
     "settings": {},
     "parameters": {
-      "max_tokens": 1024,
+      "max_tokens": 128000,
       "temperature": 0.7,
       "top_p": 0.95,
       "frequency_penalty": 0,
diff --git a/extensions/inference-openrouter-extension/src/index.ts b/extensions/inference-openrouter-extension/src/index.ts
index 75d1188a8..1b2cd014d 100644
--- a/extensions/inference-openrouter-extension/src/index.ts
+++ b/extensions/inference-openrouter-extension/src/index.ts
@@ -83,6 +83,6 @@ export default class JanInferenceOpenRouterExtension extends RemoteOAIEngine {
 
   transformPayload = (payload: PayloadType) => ({
     ...payload,
-    model: this.model,
+    model: payload.model !== 'open-router-auto' ? payload.model : this.model,
   })
 }
diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
index bd834454a..ca563ff9f 100644
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/model-extension",
   "productName": "Model Management",
-  "version": "1.0.34",
+  "version": "1.0.35",
   "description": "Model Management Extension provides model exploration and seamless downloads",
   "main": "dist/index.js",
   "author": "Jan <service@jan.ai>",
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index 7a65e8e3f..26316fbbc 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -1,6 +1,6 @@
 import PQueue from 'p-queue'
 import ky from 'ky'
-import {  extractModelLoadParams, Model } from '@janhq/core'
+import { extractModelLoadParams, Model } from '@janhq/core'
 import { extractInferenceParams } from '@janhq/core'
 /**
  * cortex.cpp Model APIs interface
@@ -18,6 +18,7 @@ interface ICortexAPI {
   deleteModel(model: string): Promise<void>
   updateModel(model: object): Promise<void>
   cancelModelPull(model: string): Promise<void>
+  configs(body: { [key: string]: any }): Promise<void>
 }
 
 type ModelList = {
@@ -52,7 +53,7 @@ export class CortexAPI implements ICortexAPI {
    */
   getModels(): Promise<Model[]> {
     return this.queue
-      .add(() => ky.get(`${API_URL}/models`).json<ModelList>())
+      .add(() => ky.get(`${API_URL}/v1/models`).json<ModelList>())
       .then((e) =>
         typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
       )
@@ -104,7 +105,7 @@ export class CortexAPI implements ICortexAPI {
    */
   deleteModel(model: string): Promise<void> {
     return this.queue.add(() =>
-      ky.delete(`${API_URL}/models/${model}`).json().then()
+      ky.delete(`${API_URL}/v1/models/${model}`).json().then()
     )
   }
 
@@ -130,7 +131,7 @@ export class CortexAPI implements ICortexAPI {
   cancelModelPull(model: string): Promise<void> {
     return this.queue.add(() =>
       ky
-        .delete(`${API_URL}/models/pull`, { json: { taskId: model } })
+        .delete(`${API_URL}/v1/models/pull`, { json: { taskId: model } })
         .json()
         .then()
     )
@@ -142,7 +143,7 @@ export class CortexAPI implements ICortexAPI {
    */
   async getModelStatus(model: string): Promise<boolean> {
     return this.queue
-      .add(() => ky.get(`${API_URL}/models/status/${model}`))
+      .add(() => ky.get(`${API_URL}/v1/models/status/${model}`))
       .then((e) => true)
       .catch(() => false)
   }
@@ -155,13 +156,24 @@ export class CortexAPI implements ICortexAPI {
     return ky
       .get(`${API_URL}/healthz`, {
         retry: {
-          limit: 10,
+          limit: 20,
+          delay: () => 500,
           methods: ['get'],
         },
       })
       .then(() => {})
   }
 
+  /**
+   * Configure model pull options
+   * @param body
+   */
+  configs(body: { [key: string]: any }): Promise<void> {
+    return this.queue.add(() =>
+      ky.patch(`${API_URL}/v1/configs`, { json: body }).then(() => {})
+    )
+  }
+
   /**
    * TRansform model to the expected format (e.g. parameters, settings, metadata)
    * @param model
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index b3ad2a012..e29084bc2 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -20,11 +20,8 @@ import { deleteModelFiles } from './legacy/delete'
 
 declare const SETTINGS: Array<any>
 
-/**
- * Extension enum
- */
-enum ExtensionEnum {
-  downloadedModels = 'downloadedModels',
+export enum Settings {
+  huggingfaceToken = 'hugging-face-access-token',
 }
 
 /**
@@ -40,15 +37,29 @@ export default class JanModelExtension extends ModelExtension {
   async onLoad() {
     this.registerSettings(SETTINGS)
 
-    // Try get models from cortex.cpp
-    this.getModels().then((models) => {
-      this.registerModels(models)
-    })
+    // Configure huggingface token if available
+    const huggingfaceToken = await this.getSetting<string>(
+      Settings.huggingfaceToken,
+      undefined
+    )
+    if (huggingfaceToken)
+      this.cortexAPI.configs({ huggingface_token: huggingfaceToken })
 
     // Listen to app download events
     this.handleDesktopEvents()
   }
 
+  /**
+   * Subscribe to settings update and make change accordingly
+   * @param key
+   * @param value
+   */
+  onSettingUpdate<T>(key: string, value: T): void {
+    if (key === Settings.huggingfaceToken) {
+      this.cortexAPI.configs({ huggingface_token: value })
+    }
+  }
+
   /**
    * Called when the extension is unloaded.
    * @override
@@ -127,55 +138,43 @@ export default class JanModelExtension extends ModelExtension {
    * @returns A Promise that resolves with an array of all models.
    */
   async getModels(): Promise<Model[]> {
-    /**
-     * In this action, if return empty array right away
-     * it would reset app cache and app will not function properly
-     * should compare and try import
-     */
-    let currentModels: Model[] = []
-
     /**
      * Legacy models should be supported
      */
     let legacyModels = await scanModelsFolder()
 
-    try {
-      if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
-        // Updated from an older version than 0.5.5
-        // Scan through the models folder and import them (Legacy flow)
-        // Return models immediately
-        currentModels = legacyModels
-      } else {
-        currentModels = JSON.parse(
-          localStorage.getItem(ExtensionEnum.downloadedModels)
-        ) as Model[]
-      }
-    } catch (e) {
-      currentModels = []
-      console.error(e)
-    }
-
     /**
      * Here we are filtering out the models that are not imported
      * and are not using llama.cpp engine
      */
-    var toImportModels = currentModels.filter(
+    var toImportModels = legacyModels.filter(
       (e) => e.engine === InferenceEngine.nitro
     )
 
-    await this.cortexAPI.getModels().then((models) => {
-      const existingIds = models.map((e) => e.id)
-      toImportModels = toImportModels.filter(
-        (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
+    /**
+     * Fetch models from cortex.cpp
+     */
+    var fetchedModels = await this.cortexAPI.getModels().catch(() => [])
+
+    // Checking if there are models to import
+    const existingIds = fetchedModels.map((e) => e.id)
+    toImportModels = toImportModels.filter(
+      (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
+    )
+
+    /**
+     * There is no model to import
+     * just return fetched models
+     */
+    if (!toImportModels.length)
+      return fetchedModels.concat(
+        legacyModels.filter((e) => !fetchedModels.some((x) => x.id === e.id))
       )
-    })
 
     console.log('To import models:', toImportModels.length)
     /**
      * There are models to import
-     * do not return models from cortex.cpp yet
-     * otherwise it will reset the app cache
-     * */
+     */
     if (toImportModels.length > 0) {
       // Import models
       await Promise.all(
@@ -193,17 +192,19 @@ export default class JanModelExtension extends ModelExtension {
                 ]) // Copied models
               : model.sources[0].url, // Symlink models,
             model.name
-          ).then((e) => {
-            this.updateModel({
-              id: model.id,
-              ...model.settings,
-              ...model.parameters,
-            } as Partial<Model>)
-          })
+          )
+            .then((e) => {
+              this.updateModel({
+                id: model.id,
+                ...model.settings,
+                ...model.parameters,
+              } as Partial<Model>)
+            })
+            .catch((e) => {
+              console.debug(e)
+            })
         })
       )
-
-      return currentModels
     }
 
     /**
@@ -252,6 +253,13 @@ export default class JanModelExtension extends ModelExtension {
     return this.cortexAPI.getModelStatus(model)
   }
 
+  /**
+   * Configure pull options such as proxy, headers, etc.
+   */
+  async configurePullOptions(options: { [key: string]: any }): Promise<any> {
+    return this.cortexAPI.configs(options).catch((e) => console.debug(e))
+  }
+
   /**
    * Handle download state from main app
    */
diff --git a/extensions/model-extension/src/legacy/delete.ts b/extensions/model-extension/src/legacy/delete.ts
index 5288e30ee..43fa56d69 100644
--- a/extensions/model-extension/src/legacy/delete.ts
+++ b/extensions/model-extension/src/legacy/delete.ts
@@ -1,10 +1,12 @@
-import { fs, joinPath } from '@janhq/core'
+import { dirName, fs } from '@janhq/core'
+import { scanModelsFolder } from './model-json'
 
 export const deleteModelFiles = async (id: string) => {
   try {
-    const dirPath = await joinPath(['file://models', id])
+    const models = await scanModelsFolder()
+    const dirPath = models.find((e) => e.id === id)?.file_path
     // remove model folder directory
-    await fs.rm(dirPath)
+    if (dirPath) await fs.rm(await dirName(dirPath))
   } catch (err) {
     console.error(err)
   }
diff --git a/extensions/model-extension/src/legacy/model-json.ts b/extensions/model-extension/src/legacy/model-json.ts
index 3cad6014b..03560cde2 100644
--- a/extensions/model-extension/src/legacy/model-json.ts
+++ b/extensions/model-extension/src/legacy/model-json.ts
@@ -12,7 +12,9 @@ const LocalEngines = [
  * Scan through models folder and return downloaded models
  * @returns
  */
-export const scanModelsFolder = async (): Promise<Model[]> => {
+export const scanModelsFolder = async (): Promise<
+  (Model & { file_path?: string })[]
+> => {
   const _homeDir = 'file://models'
   try {
     if (!(await fs.existsSync(_homeDir))) {
@@ -37,7 +39,7 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
 
       const jsonPath = await getModelJsonPath(folderFullPath)
 
-      if (await fs.existsSync(jsonPath)) {
+      if (jsonPath && (await fs.existsSync(jsonPath))) {
         // if we have the model.json file, read it
         let model = await fs.readFileSync(jsonPath, 'utf-8')
 
@@ -83,7 +85,10 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
                   file.toLowerCase().endsWith('.gguf') || // GGUF
                   file.toLowerCase().endsWith('.engine') // Tensort-LLM
                 )
-              })?.length >= (model.engine === InferenceEngine.nitro_tensorrt_llm ? 1 : (model.sources?.length ?? 1))
+              })?.length >=
+                (model.engine === InferenceEngine.nitro_tensorrt_llm
+                  ? 1
+                  : (model.sources?.length ?? 1))
             )
           })
 
diff --git a/extensions/monitoring-extension/src/index.ts b/extensions/monitoring-extension/src/index.ts
index 1d21fde77..eca71326e 100644
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@@ -1,7 +1,9 @@
 import {
+  AppConfigurationEventName,
   GpuSetting,
   MonitoringExtension,
   OperatingSystemInfo,
+  events,
   executeOnMain,
 } from '@janhq/core'
 
@@ -37,6 +39,7 @@ export default class JanMonitoringExtension extends MonitoringExtension {
 
     // Attempt to fetch nvidia info
     await executeOnMain(NODE, 'updateNvidiaInfo')
+    events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
   }
 
   onSettingUpdate<T>(key: string, value: T): void {
diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts
index 980ee75d1..e32f85082 100644
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@@ -259,15 +259,15 @@ const updateGpuInfo = async () =>
             data.gpu_highest_vram = highestVramId
           } else {
             data.gpus = []
-            data.gpu_highest_vram = ''
+            data.gpu_highest_vram = undefined
           }
 
           if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
-            data.gpus_in_use = [data.gpu_highest_vram]
+            data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
           }
 
           data = await updateCudaExistence(data)
-          console.log(data)
+          console.log('[MONITORING]::Cuda info: ', data)
           writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
           log(`[APP]::${JSON.stringify(data)}`)
           resolve({})
@@ -344,7 +344,7 @@ const updateCudaExistence = async (
             data.cuda.version = match[1]
           }
         }
-        console.log(data)
+        console.log('[MONITORING]::Finalized cuda info update: ', data)
         resolve()
       })
     })
diff --git a/joi/src/core/Slider/index.tsx b/joi/src/core/Slider/index.tsx
index 40e0c3977..7f8c6cb89 100644
--- a/joi/src/core/Slider/index.tsx
+++ b/joi/src/core/Slider/index.tsx
@@ -1,5 +1,6 @@
 import React from 'react'
 import * as SliderPrimitive from '@radix-ui/react-slider'
+import { twMerge } from 'tailwind-merge'
 
 import './styles.scss'
 
@@ -25,7 +26,7 @@ const Slider = ({
   disabled,
 }: Props) => (
   <SliderPrimitive.Root
-    className="slider"
+    className={twMerge('slider', disabled && 'slider--disabled')}
     name={name}
     min={min}
     max={max}
diff --git a/joi/src/core/Slider/styles.scss b/joi/src/core/Slider/styles.scss
index 019e5ba38..6b7cf8575 100644
--- a/joi/src/core/Slider/styles.scss
+++ b/joi/src/core/Slider/styles.scss
@@ -6,6 +6,11 @@
   touch-action: none;
   height: 16px;
 
+  &--disabled {
+    cursor: not-allowed;
+    opacity: 0.2;
+  }
+
   &__track {
     background-color: hsla(var(--slider-track-bg));
     position: relative;
diff --git a/web/app/search/layout.tsx b/web/app/search/layout.tsx
index 6c491c381..8af34dc00 100644
--- a/web/app/search/layout.tsx
+++ b/web/app/search/layout.tsx
@@ -52,9 +52,8 @@ export default function RootLayout() {
       <body className="font-sans antialiased">
         <JotaiWrapper>
           <ThemeWrapper>
-            <ClipboardListener>
-              <Search />
-            </ClipboardListener>
+            <ClipboardListener />
+            <Search />
           </ThemeWrapper>
         </JotaiWrapper>
       </body>
diff --git a/web/containers/EngineSetting/index.tsx b/web/containers/EngineSetting/index.tsx
index acbd507ce..0ae2929bf 100644
--- a/web/containers/EngineSetting/index.tsx
+++ b/web/containers/EngineSetting/index.tsx
@@ -4,7 +4,10 @@ import SettingComponentBuilder from '@/containers/ModelSetting/SettingComponent'
 
 type Props = {
   componentData: SettingComponentProps[]
-  onValueChanged: (key: string, value: string | number | boolean) => void
+  onValueChanged: (
+    key: string,
+    value: string | number | boolean | string[]
+  ) => void
   disabled?: boolean
 }
 
diff --git a/web/containers/ErrorMessage/index.test.tsx b/web/containers/ErrorMessage/index.test.tsx
index d2ae5aa81..306a80e32 100644
--- a/web/containers/ErrorMessage/index.test.tsx
+++ b/web/containers/ErrorMessage/index.test.tsx
@@ -63,9 +63,6 @@ describe('ErrorMessage Component', () => {
 
     render(<ErrorMessage message={message} />)
 
-    expect(
-      screen.getByText('Apologies, something’s amiss!')
-    ).toBeInTheDocument()
     expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument()
   })
 
diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx
index 18558c1d8..532f02259 100644
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@@ -27,11 +27,8 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
 
   const getErrorTitle = () => {
     switch (message.error_code) {
-      case ErrorCode.Unknown:
-        return 'Apologies, something’s amiss!'
       case ErrorCode.InvalidApiKey:
       case ErrorCode.AuthenticationError:
-      case ErrorCode.InvalidRequestError:
         return (
           <span data-testid="invalid-API-key-error">
             Invalid API key. Please check your API key from{' '}
@@ -55,17 +52,17 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
         )
       default:
         return (
-          <>
+          <p data-testid="passthrough-error-message">
             {message.content[0]?.text?.value && (
               <AutoLink text={message.content[0].text.value} />
             )}
-          </>
+          </p>
         )
     }
   }
 
   return (
-    <div className="mt-10">
+    <div className="mx-auto mt-10 max-w-[700px]">
       {message.status === MessageStatus.Error && (
         <div
           key={message.id}
diff --git a/web/containers/Layout/index.tsx b/web/containers/Layout/index.tsx
index 8a3f417f4..e787163d4 100644
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@@ -1,10 +1,8 @@
 'use client'
 
-import { useEffect } from 'react'
+import { useEffect, useMemo } from 'react'
 
-import { motion as m } from 'framer-motion'
-
-import { useAtom, useAtomValue } from 'jotai'
+import { useAtomValue, useSetAtom } from 'jotai'
 
 import { twMerge } from 'tailwind-merge'
 
@@ -36,7 +34,7 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { reduceTransparentAtom } from '@/helpers/atoms/Setting.atom'
 
 const BaseLayout = () => {
-  const [mainViewState, setMainViewState] = useAtom(mainViewStateAtom)
+  const setMainViewState = useSetAtom(mainViewStateAtom)
   const importModelStage = useAtomValue(getImportModelStageAtom)
   const reduceTransparent = useAtomValue(reduceTransparentAtom)
 
@@ -68,24 +66,7 @@ const BaseLayout = () => {
       <TopPanel />
       <div className="relative top-9 flex h-[calc(100vh-(36px+36px))] w-screen">
         <RibbonPanel />
-        <div className={twMerge('relative flex w-full')}>
-          <div className="w-full">
-            <m.div
-              key={mainViewState}
-              initial={{ opacity: 0, y: -8 }}
-              className="h-full"
-              animate={{
-                opacity: 1,
-                y: 0,
-                transition: {
-                  duration: 0.5,
-                },
-              }}
-            >
-              <MainViewContainer />
-            </m.div>
-          </div>
-        </div>
+        <MainViewContainer />
         <LoadingModal />
         {importModelStage === 'SELECTING_MODEL' && <SelectingModelModal />}
         {importModelStage === 'MODEL_SELECTED' && <ImportModelOptionModal />}
diff --git a/web/containers/ListContainer/index.tsx b/web/containers/ListContainer/index.tsx
index bd650e315..44e5b2527 100644
--- a/web/containers/ListContainer/index.tsx
+++ b/web/containers/ListContainer/index.tsx
@@ -1,15 +1,30 @@
-import { ReactNode, useCallback, useEffect, useRef } from 'react'
+import { PropsWithChildren, useCallback, useEffect, useRef } from 'react'
 
 import { ScrollArea } from '@janhq/joi'
 
-type Props = {
-  children: ReactNode
-}
+import { useAtomValue } from 'jotai'
 
-const ListContainer = ({ children }: Props) => {
+import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
+
+const ListContainer = ({ children }: PropsWithChildren) => {
   const listRef = useRef<HTMLDivElement>(null)
   const prevScrollTop = useRef(0)
   const isUserManuallyScrollingUp = useRef(false)
+  const activeThread = useAtomValue(activeThreadAtom)
+  const prevActiveThread = useRef(activeThread)
+
+  // Handle active thread changes
+  useEffect(() => {
+    if (prevActiveThread.current?.id !== activeThread?.id) {
+      isUserManuallyScrollingUp.current = false
+      const scrollHeight = listRef.current?.scrollHeight ?? 0
+      listRef.current?.scrollTo({
+        top: scrollHeight,
+        behavior: 'instant',
+      })
+      prevActiveThread.current = activeThread // Update the previous active thread reference
+    }
+  }, [activeThread])
 
   const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
     const currentScrollTop = event.currentTarget.scrollTop
@@ -25,6 +40,11 @@ const ListContainer = ({ children }: Props) => {
         isUserManuallyScrollingUp.current = false
       }
     }
+
+    if (isUserManuallyScrollingUp.current === true) {
+      event.preventDefault()
+      event.stopPropagation()
+    }
     prevScrollTop.current = currentScrollTop
   }, [])
 
diff --git a/web/containers/MainViewContainer/index.tsx b/web/containers/MainViewContainer/index.tsx
index 4f3b4986a..ba7f87fd2 100644
--- a/web/containers/MainViewContainer/index.tsx
+++ b/web/containers/MainViewContainer/index.tsx
@@ -1,5 +1,10 @@
+import { memo } from 'react'
+
+import { motion as m } from 'framer-motion'
 import { useAtomValue } from 'jotai'
 
+import { twMerge } from 'tailwind-merge'
+
 import { MainViewState } from '@/constants/screens'
 
 import HubScreen from '@/screens/Hub'
@@ -31,7 +36,26 @@ const MainViewContainer = () => {
       break
   }
 
-  return children
+  return (
+    <div className={twMerge('relative flex w-full')}>
+      <div className="w-full">
+        <m.div
+          key={mainViewState}
+          initial={{ opacity: 0, y: -8 }}
+          className="h-full"
+          animate={{
+            opacity: 1,
+            y: 0,
+            transition: {
+              duration: 0.25,
+            },
+          }}
+        >
+          {children}
+        </m.div>
+      </div>
+    </div>
+  )
 }
 
-export default MainViewContainer
+export default memo(MainViewContainer)
diff --git a/web/containers/ModelConfigInput/index.test.tsx b/web/containers/ModelConfigInput/index.test.tsx
index b92bdfcb2..cf9cb9da3 100644
--- a/web/containers/ModelConfigInput/index.test.tsx
+++ b/web/containers/ModelConfigInput/index.test.tsx
@@ -2,7 +2,6 @@ import '@testing-library/jest-dom'
 import React from 'react'
 import { render, fireEvent } from '@testing-library/react'
 import ModelConfigInput from './index'
-import { Tooltip } from '@janhq/joi'
 
 // Mocking the Tooltip component to simplify testing
 jest.mock('@janhq/joi', () => ({
diff --git a/web/containers/ModelConfigInput/index.tsx b/web/containers/ModelConfigInput/index.tsx
index f0e6ea1f2..e67080df2 100644
--- a/web/containers/ModelConfigInput/index.tsx
+++ b/web/containers/ModelConfigInput/index.tsx
@@ -19,28 +19,30 @@ const ModelConfigInput = ({
   description,
   placeholder,
   onValueChanged,
-}: Props) => (
-  <div className="flex flex-col">
-    <div className="mb-2 flex items-center gap-x-2">
-      <p className="font-medium">{title}</p>
-      <Tooltip
-        trigger={
-          <InfoIcon
-            size={16}
-            className="flex-shrink-0 text-[hsla(var(--text-secondary))]"
-          />
-        }
-        content={description}
+}: Props) => {
+  return (
+    <div className="flex flex-col">
+      <div className="mb-2 flex items-center gap-x-2">
+        <p className="font-medium">{title}</p>
+        <Tooltip
+          trigger={
+            <InfoIcon
+              size={16}
+              className="flex-shrink-0 text-[hsla(var(--text-secondary))]"
+            />
+          }
+          content={description}
+        />
+      </div>
+      <TextArea
+        placeholder={placeholder}
+        onChange={(e) => onValueChanged?.(e.target.value)}
+        autoResize
+        value={value}
+        disabled={disabled}
       />
     </div>
-    <TextArea
-      placeholder={placeholder}
-      onChange={(e) => onValueChanged?.(e.target.value)}
-      autoResize
-      value={value}
-      disabled={disabled}
-    />
-  </div>
-)
+  )
+}
 
 export default ModelConfigInput
diff --git a/web/containers/ModelSearch/index.tsx b/web/containers/ModelSearch/index.tsx
index 10b6466a6..9c3ca1cb0 100644
--- a/web/containers/ModelSearch/index.tsx
+++ b/web/containers/ModelSearch/index.tsx
@@ -46,8 +46,7 @@ const ModelSearch = ({ onSearchLocal }: Props) => {
         errMessage = err.message
       }
       toaster({
-        title: 'Failed to get Hugging Face models',
-        description: errMessage,
+        title: errMessage,
         type: 'error',
       })
       console.error(err)
diff --git a/web/containers/ModelSetting/SettingComponent.tsx b/web/containers/ModelSetting/SettingComponent.tsx
index ac45b0f06..d892dbe61 100644
--- a/web/containers/ModelSetting/SettingComponent.tsx
+++ b/web/containers/ModelSetting/SettingComponent.tsx
@@ -8,11 +8,15 @@ import {
 import Checkbox from '@/containers/Checkbox'
 import ModelConfigInput from '@/containers/ModelConfigInput'
 import SliderRightPanel from '@/containers/SliderRightPanel'
+import TagInput from '@/containers/TagInput'
 
 type Props = {
   componentProps: SettingComponentProps[]
   disabled?: boolean
-  onValueUpdated: (key: string, value: string | number | boolean) => void
+  onValueUpdated: (
+    key: string,
+    value: string | number | boolean | string[]
+  ) => void
 }
 
 const SettingComponent: React.FC<Props> = ({
@@ -53,7 +57,24 @@ const SettingComponent: React.FC<Props> = ({
             name={data.key}
             description={data.description}
             placeholder={placeholder}
-            value={textValue}
+            value={textValue as string}
+            onValueChanged={(value) => onValueUpdated(data.key, value)}
+          />
+        )
+      }
+
+      case 'tag': {
+        const { placeholder, value: textValue } =
+          data.controllerProps as InputComponentProps
+        return (
+          <TagInput
+            title={data.title}
+            disabled={disabled}
+            key={data.key}
+            name={data.key}
+            description={data.description}
+            placeholder={placeholder}
+            value={textValue as string[]}
             onValueChanged={(value) => onValueUpdated(data.key, value)}
           />
         )
diff --git a/web/containers/ModelSetting/index.tsx b/web/containers/ModelSetting/index.tsx
index 27559f065..9c21bf6ac 100644
--- a/web/containers/ModelSetting/index.tsx
+++ b/web/containers/ModelSetting/index.tsx
@@ -6,7 +6,10 @@ import SettingComponentBuilder from './SettingComponent'
 
 type Props = {
   componentProps: SettingComponentProps[]
-  onValueChanged: (key: string, value: string | number | boolean) => void
+  onValueChanged: (
+    key: string,
+    value: string | number | boolean | string[]
+  ) => void
   disabled?: boolean
 }
 
diff --git a/web/containers/Providers/AppUpdateListener.tsx b/web/containers/Providers/AppUpdateListener.tsx
index 77b39bb06..4d05f6010 100644
--- a/web/containers/Providers/AppUpdateListener.tsx
+++ b/web/containers/Providers/AppUpdateListener.tsx
@@ -1,4 +1,4 @@
-import { Fragment, PropsWithChildren, useEffect } from 'react'
+import { Fragment, useEffect } from 'react'
 
 import { AppUpdateInfo } from '@janhq/core'
 import { useSetAtom } from 'jotai'
@@ -8,7 +8,7 @@ import {
   updateVersionErrorAtom,
 } from '@/helpers/atoms/App.atom'
 
-const AppUpdateListener = ({ children }: PropsWithChildren) => {
+const AppUpdateListener = () => {
   const setProgress = useSetAtom(appDownloadProgressAtom)
   const setUpdateVersionError = useSetAtom(updateVersionErrorAtom)
 
@@ -39,7 +39,7 @@ const AppUpdateListener = ({ children }: PropsWithChildren) => {
     }
   }, [setProgress, setUpdateVersionError])
 
-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
 
 export default AppUpdateListener
diff --git a/web/containers/Providers/ClipboardListener.tsx b/web/containers/Providers/ClipboardListener.tsx
index 2d9910b9b..d1124794e 100644
--- a/web/containers/Providers/ClipboardListener.tsx
+++ b/web/containers/Providers/ClipboardListener.tsx
@@ -1,10 +1,10 @@
-import { Fragment, PropsWithChildren } from 'react'
+import { Fragment } from 'react'
 
 import { useSetAtom } from 'jotai'
 
 import { selectedTextAtom } from './Jotai'
 
-const ClipboardListener = ({ children }: PropsWithChildren) => {
+const ClipboardListener = () => {
   const setSelectedText = useSetAtom(selectedTextAtom)
 
   if (typeof window !== 'undefined') {
@@ -13,7 +13,7 @@ const ClipboardListener = ({ children }: PropsWithChildren) => {
     })
   }
 
-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
 
 export default ClipboardListener
diff --git a/web/containers/Providers/CoreConfigurator.tsx b/web/containers/Providers/CoreConfigurator.tsx
new file mode 100644
index 000000000..8af31162d
--- /dev/null
+++ b/web/containers/Providers/CoreConfigurator.tsx
@@ -0,0 +1,64 @@
+'use client'
+
+import { PropsWithChildren, useCallback, useEffect, useState } from 'react'
+
+import Loader from '@/containers/Loader'
+
+import { setupCoreServices } from '@/services/coreService'
+import {
+  isCoreExtensionInstalled,
+  setupBaseExtensions,
+} from '@/services/extensionService'
+
+import { extensionManager } from '@/extension'
+
+export const CoreConfigurator = ({ children }: PropsWithChildren) => {
+  const [setupCore, setSetupCore] = useState(false)
+  const [activated, setActivated] = useState(false)
+  const [settingUp, setSettingUp] = useState(false)
+
+  const setupExtensions = useCallback(async () => {
+    // Register all active extensions
+    await extensionManager.registerActive()
+
+    setTimeout(async () => {
+      if (!isCoreExtensionInstalled()) {
+        setSettingUp(true)
+        await setupBaseExtensions()
+        return
+      }
+
+      extensionManager.load()
+      setSettingUp(false)
+      setActivated(true)
+    }, 500)
+  }, [])
+
+  // Services Setup
+  useEffect(() => {
+    setupCoreServices()
+    setSetupCore(true)
+    return () => {
+      extensionManager.unload()
+    }
+  }, [])
+
+  useEffect(() => {
+    if (setupCore) {
+      // Electron
+      if (window && window.core?.api) {
+        setupExtensions()
+      } else {
+        // Host
+        setActivated(true)
+      }
+    }
+  }, [setupCore, setupExtensions])
+
+  return (
+    <>
+      {settingUp && <Loader description="Preparing Update..." />}
+      {setupCore && activated && <>{children}</>}
+    </>
+  )
+}
diff --git a/web/containers/Providers/DataLoader.tsx b/web/containers/Providers/DataLoader.tsx
index 4319c5eed..470294996 100644
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@@ -1,13 +1,12 @@
 'use client'
 
-import { Fragment, ReactNode, useEffect } from 'react'
+import { Fragment, useEffect } from 'react'
 
 import { AppConfiguration, getUserHomePath } from '@janhq/core'
 import { useSetAtom } from 'jotai'
 
 import useAssistants from '@/hooks/useAssistants'
 import useGetSystemResources from '@/hooks/useGetSystemResources'
-import { useLoadTheme } from '@/hooks/useLoadTheme'
 import useModels from '@/hooks/useModels'
 import useThreads from '@/hooks/useThreads'
 
@@ -20,21 +19,22 @@ import {
 } from '@/helpers/atoms/AppConfig.atom'
 import { janSettingScreenAtom } from '@/helpers/atoms/Setting.atom'
 
-type Props = {
-  children: ReactNode
-}
-
-const DataLoader: React.FC<Props> = ({ children }) => {
+const DataLoader: React.FC = () => {
   const setJanDataFolderPath = useSetAtom(janDataFolderPathAtom)
   const setQuickAskEnabled = useSetAtom(quickAskEnabledAtom)
   const setJanDefaultDataFolder = useSetAtom(defaultJanDataFolderAtom)
   const setJanSettingScreen = useSetAtom(janSettingScreenAtom)
+  const { getData: loadModels } = useModels()
 
-  useModels()
   useThreads()
   useAssistants()
   useGetSystemResources()
-  useLoadTheme()
+
+  useEffect(() => {
+    // Load data once
+    loadModels()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
 
   useEffect(() => {
     window.core?.api
@@ -63,7 +63,7 @@ const DataLoader: React.FC<Props> = ({ children }) => {
 
   console.debug('Load Data...')
 
-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
 
 export default DataLoader
diff --git a/web/containers/Providers/DeepLinkListener.tsx b/web/containers/Providers/DeepLinkListener.tsx
index d5941204f..b991996a2 100644
--- a/web/containers/Providers/DeepLinkListener.tsx
+++ b/web/containers/Providers/DeepLinkListener.tsx
@@ -1,4 +1,4 @@
-import { Fragment, ReactNode } from 'react'
+import { Fragment } from 'react'
 
 import { useSetAtom } from 'jotai'
 
@@ -13,11 +13,8 @@ import {
   importHuggingFaceModelStageAtom,
   importingHuggingFaceRepoDataAtom,
 } from '@/helpers/atoms/HuggingFace.atom'
-type Props = {
-  children: ReactNode
-}
 
-const DeepLinkListener: React.FC<Props> = ({ children }) => {
+const DeepLinkListener: React.FC = () => {
   const { getHfRepoData } = useGetHFRepoData()
   const setLoadingInfo = useSetAtom(loadingModalInfoAtom)
   const setImportingHuggingFaceRepoData = useSetAtom(
@@ -69,7 +66,7 @@ const DeepLinkListener: React.FC<Props> = ({ children }) => {
     handleDeepLinkAction(action)
   })
 
-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
 
 type DeepLinkAction = {
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index 5cb0debab..bad1afda9 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -1,4 +1,4 @@
-import { PropsWithChildren, useCallback, useEffect } from 'react'
+import { useCallback, useEffect } from 'react'
 
 import React from 'react'
 
@@ -23,7 +23,7 @@ import { toaster } from '../Toast'
 
 import AppUpdateListener from './AppUpdateListener'
 import ClipboardListener from './ClipboardListener'
-import EventHandler from './EventHandler'
+import ModelHandler from './ModelHandler'
 
 import ModelImportListener from './ModelImportListener'
 import QuickAskListener from './QuickAskListener'
@@ -39,7 +39,7 @@ import {
   removeDownloadingModelAtom,
 } from '@/helpers/atoms/Model.atom'
 
-const EventListenerWrapper = ({ children }: PropsWithChildren) => {
+const EventListener = () => {
   const setDownloadState = useSetAtom(setDownloadStateAtom)
   const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
   const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)
@@ -112,8 +112,8 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
         state.downloadState = 'end'
         setDownloadState(state)
         removeDownloadingModel(state.modelId)
+        events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
       }
-      events.emit(ModelEvent.OnModelsUpdate, {})
     },
     [removeDownloadingModel, setDownloadState]
   )
@@ -156,16 +156,14 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
   ])
 
   return (
-    <AppUpdateListener>
-      <ClipboardListener>
-        <ModelImportListener>
-          <QuickAskListener>
-            <EventHandler>{children}</EventHandler>
-          </QuickAskListener>
-        </ModelImportListener>
-      </ClipboardListener>
-    </AppUpdateListener>
+    <>
+      <AppUpdateListener />
+      <ClipboardListener />
+      <ModelImportListener />
+      <QuickAskListener />
+      <ModelHandler />
+    </>
   )
 }
 
-export default EventListenerWrapper
+export default EventListener
diff --git a/web/containers/Providers/Jotai.tsx b/web/containers/Providers/Jotai.tsx
index eee24a43a..c68226fef 100644
--- a/web/containers/Providers/Jotai.tsx
+++ b/web/containers/Providers/Jotai.tsx
@@ -1,13 +1,9 @@
 'use client'
 
-import { ReactNode } from 'react'
+import { PropsWithChildren } from 'react'
 
 import { Provider, atom } from 'jotai'
 
-type Props = {
-  children: ReactNode
-}
-
 export const editPromptAtom = atom<string>('')
 export const currentPromptAtom = atom<string>('')
 export const fileUploadAtom = atom<FileInfo[]>([])
@@ -16,7 +12,7 @@ export const searchAtom = atom<string>('')
 
 export const selectedTextAtom = atom('')
 
-export default function JotaiWrapper({ children }: Props) {
+export default function JotaiWrapper({ children }: PropsWithChildren) {
   return <Provider>{children}</Provider>
 }
 
diff --git a/web/containers/Providers/KeyListener.tsx b/web/containers/Providers/KeyListener.tsx
index e76a84d61..02a1d4eb5 100644
--- a/web/containers/Providers/KeyListener.tsx
+++ b/web/containers/Providers/KeyListener.tsx
@@ -1,6 +1,6 @@
 'use client'
 
-import { Fragment, ReactNode, useEffect } from 'react'
+import { Fragment, useEffect } from 'react'
 
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 
@@ -8,6 +8,8 @@ import { MainViewState } from '@/constants/screens'
 
 import { useCreateNewThread } from '@/hooks/useCreateNewThread'
 
+import { useStarterScreen } from '@/hooks/useStarterScreen'
+
 import {
   mainViewStateAtom,
   showLeftPanelAtom,
@@ -20,11 +22,7 @@ import {
   ThreadModalAction,
 } from '@/helpers/atoms/Thread.atom'
 
-type Props = {
-  children: ReactNode
-}
-
-export default function KeyListener({ children }: Props) {
+export default function KeyListener() {
   const setShowLeftPanel = useSetAtom(showLeftPanelAtom)
   const setShowRightPanel = useSetAtom(showRightPanelAtom)
   const [mainViewState, setMainViewState] = useAtom(mainViewStateAtom)
@@ -32,6 +30,7 @@ export default function KeyListener({ children }: Props) {
   const assistants = useAtomValue(assistantsAtom)
   const activeThread = useAtomValue(activeThreadAtom)
   const setModalActionThread = useSetAtom(modalActionThreadAtom)
+  const { isShowStarterScreen } = useStarterScreen()
 
   useEffect(() => {
     const onKeyDown = (e: KeyboardEvent) => {
@@ -60,7 +59,7 @@ export default function KeyListener({ children }: Props) {
         return
       }
 
-      if (e.code === 'KeyN' && prefixKey) {
+      if (e.code === 'KeyN' && prefixKey && !isShowStarterScreen) {
         if (mainViewState !== MainViewState.Thread) return
         requestCreateNewThread(assistants[0])
         setMainViewState(MainViewState.Thread)
@@ -82,6 +81,7 @@ export default function KeyListener({ children }: Props) {
   }, [
     activeThread,
     assistants,
+    isShowStarterScreen,
     mainViewState,
     requestCreateNewThread,
     setMainViewState,
@@ -90,5 +90,5 @@ export default function KeyListener({ children }: Props) {
     setShowRightPanel,
   ])
 
-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/ModelHandler.tsx
similarity index 88%
rename from web/containers/Providers/EventHandler.tsx
rename to web/containers/Providers/ModelHandler.tsx
index 6cad910f7..b36f413b9 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/ModelHandler.tsx
@@ -1,4 +1,4 @@
-import { Fragment, ReactNode, useCallback, useEffect, useRef } from 'react'
+import { Fragment, useCallback, useEffect, useRef } from 'react'
 
 import {
   ChatCompletionMessage,
@@ -30,6 +30,7 @@ import {
   getCurrentChatMessagesAtom,
   addNewMessageAtom,
   updateMessageAtom,
+  tokenSpeedAtom,
 } from '@/helpers/atoms/ChatMessage.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import {
@@ -43,7 +44,7 @@ import {
 const maxWordForThreadTitle = 10
 const defaultThreadTitle = 'New Thread'
 
-export default function EventHandler({ children }: { children: ReactNode }) {
+export default function ModelHandler() {
   const messages = useAtomValue(getCurrentChatMessagesAtom)
   const addNewMessage = useSetAtom(addNewMessageAtom)
   const updateMessage = useSetAtom(updateMessageAtom)
@@ -62,6 +63,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {
   const activeModelRef = useRef(activeModel)
   const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
   const activeModelParamsRef = useRef(activeModelParams)
+  const setTokenSpeed = useSetAtom(tokenSpeedAtom)
 
   useEffect(() => {
     threadsRef.current = threads
@@ -179,8 +181,37 @@ export default function EventHandler({ children }: { children: ReactNode }) {
         if (message.content.length) {
           setIsGeneratingResponse(false)
         }
+
+        setTokenSpeed((prev) => {
+          const currentTimestamp = new Date().getTime() // Get current time in milliseconds
+          if (!prev) {
+            // If this is the first update, just set the lastTimestamp and return
+            return {
+              lastTimestamp: currentTimestamp,
+              tokenSpeed: 0,
+              tokenCount: 1,
+              message: message.id,
+            }
+          }
+
+          const timeDiffInSeconds =
+            (currentTimestamp - prev.lastTimestamp) / 1000 // Time difference in seconds
+          const totalTokenCount = prev.tokenCount + 1
+          const averageTokenSpeed =
+            totalTokenCount / (timeDiffInSeconds > 0 ? timeDiffInSeconds : 1) // Calculate average token speed
+          return {
+            ...prev,
+            tokenSpeed: averageTokenSpeed,
+            tokenCount: totalTokenCount,
+            message: message.id,
+          }
+        })
         return
-      } else if (message.status === MessageStatus.Error) {
+      } else if (
+        message.status === MessageStatus.Error &&
+        activeModelRef.current?.engine &&
+        isLocalEngine(activeModelRef.current.engine)
+      ) {
         ;(async () => {
           if (
             !(await extensionManager
@@ -329,5 +360,5 @@ export default function EventHandler({ children }: { children: ReactNode }) {
     }
   }, [onNewMessageResponse, onMessageResponseUpdate, onModelStopped])
 
-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
diff --git a/web/containers/Providers/ModelImportListener.tsx b/web/containers/Providers/ModelImportListener.tsx
index f1ca2a768..e99b1e6fc 100644
--- a/web/containers/Providers/ModelImportListener.tsx
+++ b/web/containers/Providers/ModelImportListener.tsx
@@ -1,4 +1,4 @@
-import { Fragment, PropsWithChildren, useCallback, useEffect } from 'react'
+import { Fragment, useCallback, useEffect } from 'react'
 
 import {
   ImportingModel,
@@ -17,7 +17,7 @@ import {
   updateImportingModelProgressAtom,
 } from '@/helpers/atoms/Model.atom'
 
-const ModelImportListener = ({ children }: PropsWithChildren) => {
+const ModelImportListener = () => {
   const updateImportingModelProgress = useSetAtom(
     updateImportingModelProgressAtom
   )
@@ -43,7 +43,7 @@ const ModelImportListener = ({ children }: PropsWithChildren) => {
   const onImportModelSuccess = useCallback(
     (state: ImportingModel) => {
       if (!state.modelId) return
-      events.emit(ModelEvent.OnModelsUpdate, {})
+      events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
       setImportingModelSuccess(state.importId, state.modelId)
     },
     [setImportingModelSuccess]
@@ -103,7 +103,7 @@ const ModelImportListener = ({ children }: PropsWithChildren) => {
     onImportModelFailed,
   ])
 
-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
 
 export default ModelImportListener
diff --git a/web/containers/Providers/QuickAskListener.tsx b/web/containers/Providers/QuickAskListener.tsx
index 415fc19a6..03d685953 100644
--- a/web/containers/Providers/QuickAskListener.tsx
+++ b/web/containers/Providers/QuickAskListener.tsx
@@ -1,4 +1,4 @@
-import { Fragment, ReactNode } from 'react'
+import { Fragment } from 'react'
 
 import { useSetAtom } from 'jotai'
 
@@ -10,11 +10,7 @@ import useSendChatMessage from '@/hooks/useSendChatMessage'
 
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 
-type Props = {
-  children: ReactNode
-}
-
-const QuickAskListener: React.FC<Props> = ({ children }) => {
+const QuickAskListener: React.FC = () => {
   const { sendChatMessage } = useSendChatMessage()
   const setMainState = useSetAtom(mainViewStateAtom)
 
@@ -27,7 +23,7 @@ const QuickAskListener: React.FC<Props> = ({ children }) => {
     debounced(input)
   })
 
-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
 
 export default QuickAskListener
diff --git a/web/containers/Providers/Responsive.test.tsx b/web/containers/Providers/Responsive.test.tsx
index e72a5e7e6..5a03acea2 100644
--- a/web/containers/Providers/Responsive.test.tsx
+++ b/web/containers/Providers/Responsive.test.tsx
@@ -45,17 +45,6 @@ describe('Responsive', () => {
     })
   })
 
-  it('renders children correctly', () => {
-    const { getByText } = render(
-      <Responsive>
-        <div>Child Content</div>
-      </Responsive>
-    )
-
-    // Check if the child content is rendered
-    expect(getByText('Child Content')).toBeInTheDocument()
-  })
-
   it('hides left and right panels on small screens', () => {
     // Simulate mobile view
     window.matchMedia = jest.fn().mockImplementation((query) => ({
@@ -64,11 +53,7 @@ describe('Responsive', () => {
       removeListener: jest.fn(),
     }))
 
-    render(
-      <Responsive>
-        <div>Child Content</div>
-      </Responsive>
-    )
+    render(<Responsive />)
 
     // Check that the left and right panel states were updated to false
     expect(mockSetShowLeftPanel).toHaveBeenCalledWith(false)
@@ -83,11 +68,7 @@ describe('Responsive', () => {
       removeListener: jest.fn(),
     }))
 
-    render(
-      <Responsive>
-        <div>Child Content</div>
-      </Responsive>
-    )
+    render(<Responsive />)
 
     // Change back to desktop view
     window.matchMedia = jest.fn().mockImplementation((query) => ({
@@ -97,11 +78,7 @@ describe('Responsive', () => {
     }))
 
     // Call the effect manually to simulate the component re-rendering
-    const rerender = render(
-      <Responsive>
-        <div>Child Content</div>
-      </Responsive>
-    )
+    const rerender = render(<Responsive />)
 
     // Check that the last known states were restored (which were true initially)
     expect(mockSetShowLeftPanel).toHaveBeenCalledWith(true)
diff --git a/web/containers/Providers/Responsive.tsx b/web/containers/Providers/Responsive.tsx
index 940cb68fb..cb7bd4c1c 100644
--- a/web/containers/Providers/Responsive.tsx
+++ b/web/containers/Providers/Responsive.tsx
@@ -1,11 +1,11 @@
-import { Fragment, PropsWithChildren, useEffect, useRef } from 'react'
+import { Fragment, useEffect, useRef } from 'react'
 
 import { useMediaQuery } from '@janhq/joi'
 import { useAtom } from 'jotai'
 
 import { showLeftPanelAtom, showRightPanelAtom } from '@/helpers/atoms/App.atom'
 
-const Responsive = ({ children }: PropsWithChildren) => {
+const Responsive = () => {
   const matches = useMediaQuery('(max-width: 880px)')
   const [showLeftPanel, setShowLeftPanel] = useAtom(showLeftPanelAtom)
   const [showRightPanel, setShowRightPanel] = useAtom(showRightPanelAtom)
@@ -30,7 +30,7 @@ const Responsive = ({ children }: PropsWithChildren) => {
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [matches, setShowLeftPanel, setShowRightPanel])
 
-  return <Fragment>{children}</Fragment>
+  return <Fragment></Fragment>
 }
 
 export default Responsive
diff --git a/web/containers/Providers/SettingsHandler.tsx b/web/containers/Providers/SettingsHandler.tsx
new file mode 100644
index 000000000..0ec34b0d4
--- /dev/null
+++ b/web/containers/Providers/SettingsHandler.tsx
@@ -0,0 +1,20 @@
+'use client'
+
+import { useEffect } from 'react'
+
+import { useConfigurations } from '@/hooks/useConfigurations'
+import { useLoadTheme } from '@/hooks/useLoadTheme'
+
+const SettingsHandler: React.FC = () => {
+  useLoadTheme()
+
+  const { configurePullOptions } = useConfigurations()
+
+  useEffect(() => {
+    configurePullOptions()
+  }, [configurePullOptions])
+
+  return <></>
+}
+
+export default SettingsHandler
diff --git a/web/containers/Providers/index.tsx b/web/containers/Providers/index.tsx
index 4731c600b..67778e30c 100644
--- a/web/containers/Providers/index.tsx
+++ b/web/containers/Providers/index.tsx
@@ -1,93 +1,42 @@
 'use client'
 
-import { PropsWithChildren, useCallback, useEffect, useState } from 'react'
+import { PropsWithChildren } from 'react'
 
 import { Toaster } from 'react-hot-toast'
 
-import Loader from '@/containers/Loader'
-import EventListenerWrapper from '@/containers/Providers/EventListener'
+import EventListener from '@/containers/Providers/EventListener'
 import JotaiWrapper from '@/containers/Providers/Jotai'
 
 import ThemeWrapper from '@/containers/Providers/Theme'
 
-import { setupCoreServices } from '@/services/coreService'
-import {
-  isCoreExtensionInstalled,
-  setupBaseExtensions,
-} from '@/services/extensionService'
-
 import Umami from '@/utils/umami'
 
+import { CoreConfigurator } from './CoreConfigurator'
 import DataLoader from './DataLoader'
 
 import DeepLinkListener from './DeepLinkListener'
 import KeyListener from './KeyListener'
 import Responsive from './Responsive'
 
-import { extensionManager } from '@/extension'
+import SettingsHandler from './SettingsHandler'
 
 const Providers = ({ children }: PropsWithChildren) => {
-  const [setupCore, setSetupCore] = useState(false)
-  const [activated, setActivated] = useState(false)
-  const [settingUp, setSettingUp] = useState(false)
-
-  const setupExtensions = useCallback(async () => {
-    // Register all active extensions
-    await extensionManager.registerActive()
-
-    setTimeout(async () => {
-      if (!isCoreExtensionInstalled()) {
-        setSettingUp(true)
-        await setupBaseExtensions()
-        return
-      }
-
-      extensionManager.load()
-      setSettingUp(false)
-      setActivated(true)
-    }, 500)
-  }, [])
-
-  // Services Setup
-  useEffect(() => {
-    setupCoreServices()
-    setSetupCore(true)
-    return () => {
-      extensionManager.unload()
-    }
-  }, [])
-
-  useEffect(() => {
-    if (setupCore) {
-      // Electron
-      if (window && window.core?.api) {
-        setupExtensions()
-      } else {
-        // Host
-        setActivated(true)
-      }
-    }
-  }, [setupCore, setupExtensions])
-
   return (
     <ThemeWrapper>
       <JotaiWrapper>
         <Umami />
-        {settingUp && <Loader description="Preparing Update..." />}
-        {setupCore && activated && (
+        <CoreConfigurator>
           <>
-            <Responsive>
-              <KeyListener>
-                <EventListenerWrapper>
-                  <DataLoader>
-                    <DeepLinkListener>{children}</DeepLinkListener>
-                  </DataLoader>
-                </EventListenerWrapper>
-                <Toaster />
-              </KeyListener>
-            </Responsive>
+            <Responsive />
+            <KeyListener />
+            <EventListener />
+            <DataLoader />
+            <SettingsHandler />
+            <DeepLinkListener />
+            <Toaster />
+            {children}
           </>
-        )}
+        </CoreConfigurator>
       </JotaiWrapper>
     </ThemeWrapper>
   )
diff --git a/web/containers/ServerLogs/index.tsx b/web/containers/ServerLogs/index.tsx
index e12d89fd1..2e978bd23 100644
--- a/web/containers/ServerLogs/index.tsx
+++ b/web/containers/ServerLogs/index.tsx
@@ -1,8 +1,8 @@
 /* eslint-disable @typescript-eslint/naming-convention */
 
-import { memo, useCallback, useEffect, useState } from 'react'
+import { memo, useCallback, useEffect, useRef, useState } from 'react'
 
-import { Button, useClipboard } from '@janhq/joi'
+import { Button, ScrollArea, useClipboard } from '@janhq/joi'
 import { useAtomValue } from 'jotai'
 
 import { FolderIcon, CheckIcon, CopyIcon } from 'lucide-react'
@@ -22,6 +22,9 @@ const ServerLogs = (props: ServerLogsProps) => {
   const { getLogs } = useLogs()
   const serverEnabled = useAtomValue(serverEnabledAtom)
   const [logs, setLogs] = useState<string[]>([])
+  const listRef = useRef<HTMLDivElement>(null)
+  const prevScrollTop = useRef(0)
+  const isUserManuallyScrollingUp = useRef(false)
 
   const updateLogs = useCallback(
     () =>
@@ -58,13 +61,45 @@ const ServerLogs = (props: ServerLogsProps) => {
 
   const clipboard = useClipboard({ timeout: 1000 })
 
+  const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
+    const currentScrollTop = event.currentTarget.scrollTop
+
+    if (prevScrollTop.current > currentScrollTop) {
+      isUserManuallyScrollingUp.current = true
+    } else {
+      const currentScrollTop = event.currentTarget.scrollTop
+      const scrollHeight = event.currentTarget.scrollHeight
+      const clientHeight = event.currentTarget.clientHeight
+
+      if (currentScrollTop + clientHeight >= scrollHeight) {
+        isUserManuallyScrollingUp.current = false
+      }
+    }
+
+    if (isUserManuallyScrollingUp.current === true) {
+      event.preventDefault()
+      event.stopPropagation()
+    }
+    prevScrollTop.current = currentScrollTop
+  }, [])
+
+  useEffect(() => {
+    if (isUserManuallyScrollingUp.current === true || !listRef.current) return
+    const scrollHeight = listRef.current?.scrollHeight ?? 0
+    listRef.current?.scrollTo({
+      top: scrollHeight,
+      behavior: 'instant',
+    })
+  }, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
+
   return (
-    <div
+    <ScrollArea
+      ref={listRef}
       className={twMerge(
-        'p-4 pb-0',
-        !withCopy && 'max-w-[38vw] lg:max-w-[40vw] xl:max-w-[50vw]',
+        'h-[calc(100%-49px)] w-full p-4 py-0',
         logs.length === 0 && 'mx-auto'
       )}
+      onScroll={handleScroll}
     >
       {withCopy && (
         <div className="absolute right-2 top-7">
@@ -107,7 +142,7 @@ const ServerLogs = (props: ServerLogsProps) => {
       )}
       <div className="flex h-full w-full flex-col">
         {logs.length > 0 ? (
-          <code className="inline-block whitespace-break-spaces text-[13px]">
+          <code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
             {logs.slice(-limit).map((log, i) => {
               return (
                 <p key={i} className="my-2 leading-relaxed">
@@ -256,7 +291,7 @@ const ServerLogs = (props: ServerLogsProps) => {
           </div>
         )}
       </div>
-    </div>
+    </ScrollArea>
   )
 }
 
diff --git a/web/containers/TagInput/index.test.tsx b/web/containers/TagInput/index.test.tsx
new file mode 100644
index 000000000..e3d6ef8cc
--- /dev/null
+++ b/web/containers/TagInput/index.test.tsx
@@ -0,0 +1,50 @@
+import React from 'react'
+import { render, fireEvent } from '@testing-library/react'
+import TagInput from './index' // Adjust the import path as necessary
+import '@testing-library/jest-dom'
+
+describe('TagInput Component', () => {
+  let props: any
+
+  beforeEach(() => {
+    props = {
+      title: 'Tags',
+      name: 'tag-input',
+      description: 'Add your tags',
+      placeholder: 'Enter a tag',
+      value: ['tag1', 'tag2'],
+      onValueChanged: jest.fn(),
+    }
+  })
+
+  it('renders correctly', () => {
+    const { getByText, getByPlaceholderText } = render(<TagInput {...props} />)
+    expect(getByText('Tags')).toBeInTheDocument()
+    expect(getByText('tag1')).toBeInTheDocument()
+    expect(getByText('tag2')).toBeInTheDocument()
+    expect(getByPlaceholderText('Enter a tag')).toBeInTheDocument()
+  })
+
+  it('calls onValueChanged when a new tag is added', () => {
+    const { getByPlaceholderText } = render(<TagInput {...props} />)
+    const input = getByPlaceholderText('Enter a tag')
+
+    fireEvent.change(input, { target: { value: 'tag3' } })
+    fireEvent.keyDown(input, { key: 'Enter', code: 'Enter' })
+
+    expect(props.onValueChanged).toHaveBeenCalledWith(
+      expect.arrayContaining(['tag1', 'tag2', 'tag3'])
+    )
+  })
+
+  it('calls onValueChanged when a tag is removed', () => {
+    const { getAllByRole } = render(<TagInput {...props} />)
+    const removeButton = getAllByRole('button')[0] // Click on the first remove button
+
+    fireEvent.click(removeButton)
+
+    expect(props.onValueChanged).toHaveBeenCalledWith(
+      expect.arrayContaining(['tag2'])
+    )
+  })
+})
diff --git a/web/containers/TagInput/index.tsx b/web/containers/TagInput/index.tsx
new file mode 100644
index 000000000..160cd19e3
--- /dev/null
+++ b/web/containers/TagInput/index.tsx
@@ -0,0 +1,149 @@
+import { useEffect, useRef, useState } from 'react'
+
+import { Badge, Input, Tooltip } from '@janhq/joi'
+
+import { InfoIcon, XIcon } from 'lucide-react'
+
+type Props = {
+  title: string
+  disabled?: boolean
+  name: string
+  description: string
+  placeholder: string
+  value: string[]
+  onValueChanged?: (e: string | number | boolean | string[]) => void
+}
+
+function TooltipBadge({
+  item,
+  value,
+  onValueChanged,
+}: {
+  item: string
+  value: string[]
+  onValueChanged?: (e: string[]) => void
+}) {
+  const textRef = useRef<HTMLSpanElement>(null)
+  const [isEllipsized, setIsEllipsized] = useState(false)
+
+  useEffect(() => {
+    if (textRef.current) {
+      setIsEllipsized(textRef.current.scrollWidth > textRef.current.clientWidth)
+    }
+  }, [item])
+
+  return (
+    <div className="relative">
+      {isEllipsized ? (
+        <Tooltip
+          trigger={
+            <div className="relative">
+              <Badge theme="secondary" className="text-ellipsis">
+                <span
+                  ref={textRef}
+                  className="inline-block max-w-[100px] overflow-hidden text-ellipsis whitespace-nowrap"
+                >
+                  {item}
+                </span>
+                <button
+                  type="button"
+                  className="ml-1.5 w-3 bg-transparent"
+                  onClick={() => {
+                    onValueChanged &&
+                      onValueChanged(value.filter((i) => i !== item))
+                  }}
+                >
+                  <XIcon className="w-3" />
+                </button>
+              </Badge>
+            </div>
+          }
+          content={item}
+        />
+      ) : (
+        <Badge theme="secondary" className="relative">
+          <span
+            ref={textRef}
+            className="max-w-[90px] overflow-hidden text-ellipsis"
+          >
+            {item}
+          </span>
+          <button
+            type="button"
+            className="ml-1.5 w-3 bg-transparent"
+            onClick={() => {
+              onValueChanged && onValueChanged(value.filter((i) => i !== item))
+            }}
+          >
+            <XIcon className="w-3" />
+          </button>
+        </Badge>
+      )}
+    </div>
+  )
+}
+
+const TagInput = ({
+  title,
+  disabled = false,
+  value,
+  description,
+  placeholder,
+  onValueChanged,
+}: Props) => {
+  const [pendingDataPoint, setPendingDataPoint] = useState('')
+
+  const addPendingDataPoint = () => {
+    if (pendingDataPoint) {
+      const newDataPoints = new Set([...value, pendingDataPoint])
+      onValueChanged && onValueChanged(Array.from(newDataPoints))
+      setPendingDataPoint('')
+    }
+  }
+
+  return (
+    <div className="flex flex-col">
+      <div className="mb-2 flex items-center gap-x-2">
+        <p className="font-medium">{title}</p>
+        <Tooltip
+          trigger={
+            <InfoIcon
+              size={16}
+              className="flex-shrink-0 text-[hsla(var(--text-secondary))]"
+            />
+          }
+          content={description}
+        />
+      </div>
+      <Input
+        value={pendingDataPoint}
+        disabled={disabled}
+        onChange={(e) => setPendingDataPoint(e.target.value)}
+        placeholder={placeholder}
+        className="w-full"
+        onKeyDown={(e) => {
+          if (e.key === 'Enter' || e.key === 'Tab') {
+            e.preventDefault()
+            addPendingDataPoint()
+          }
+        }}
+      />
+      {value.length > 0 && (
+        <div className="relative mt-2 flex min-h-[2.5rem] flex-wrap items-center gap-2">
+          {value.map((item, idx) => {
+            return (
+              <TooltipBadge
+                key={idx}
+                item={item}
+                value={value}
+                onValueChanged={onValueChanged}
+              />
+            )
+          })}
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default TagInput
diff --git a/web/helpers/atoms/AppConfig.atom.ts b/web/helpers/atoms/AppConfig.atom.ts
index f4acc7dc2..68a375f3b 100644
--- a/web/helpers/atoms/AppConfig.atom.ts
+++ b/web/helpers/atoms/AppConfig.atom.ts
@@ -12,14 +12,35 @@ export const janDataFolderPathAtom = atom('')
 
 export const experimentalFeatureEnabledAtom = atomWithStorage(
   EXPERIMENTAL_FEATURE,
-  false
+  false,
+  undefined,
+  { getOnInit: true }
 )
 
-export const proxyEnabledAtom = atomWithStorage(PROXY_FEATURE_ENABLED, false)
-export const proxyAtom = atomWithStorage(HTTPS_PROXY_FEATURE, '')
+export const proxyEnabledAtom = atomWithStorage(
+  PROXY_FEATURE_ENABLED,
+  false,
+  undefined,
+  { getOnInit: true }
+)
+export const proxyAtom = atomWithStorage(HTTPS_PROXY_FEATURE, '', undefined, {
+  getOnInit: true,
+})
 
-export const ignoreSslAtom = atomWithStorage(IGNORE_SSL, false)
-export const vulkanEnabledAtom = atomWithStorage(VULKAN_ENABLED, false)
-export const quickAskEnabledAtom = atomWithStorage(QUICK_ASK_ENABLED, false)
+export const ignoreSslAtom = atomWithStorage(IGNORE_SSL, false, undefined, {
+  getOnInit: true,
+})
+export const vulkanEnabledAtom = atomWithStorage(
+  VULKAN_ENABLED,
+  false,
+  undefined,
+  { getOnInit: true }
+)
+export const quickAskEnabledAtom = atomWithStorage(
+  QUICK_ASK_ENABLED,
+  false,
+  undefined,
+  { getOnInit: true }
+)
 
 export const hostAtom = atom('http://localhost:1337/')
diff --git a/web/helpers/atoms/ChatMessage.atom.ts b/web/helpers/atoms/ChatMessage.atom.ts
index 4da22d13a..1f6099a2e 100644
--- a/web/helpers/atoms/ChatMessage.atom.ts
+++ b/web/helpers/atoms/ChatMessage.atom.ts
@@ -11,13 +11,22 @@ import {
   updateThreadStateLastMessageAtom,
 } from './Thread.atom'
 
+import { TokenSpeed } from '@/types/token'
+
 /**
  * Stores all chat messages for all threads
  */
 export const chatMessages = atom<Record<string, ThreadMessage[]>>({})
 
+/**
+ * Stores the status of the messages load for each thread
+ */
 export const readyThreadsMessagesAtom = atom<Record<string, boolean>>({})
 
+/**
+ * Store the token speed for current message
+ */
+export const tokenSpeedAtom = atom<TokenSpeed | undefined>(undefined)
 /**
  * Return the chat messages for the current active conversation
  */
diff --git a/web/helpers/atoms/LocalServer.atom.ts b/web/helpers/atoms/LocalServer.atom.ts
index 077da3ed8..3ee584eed 100644
--- a/web/helpers/atoms/LocalServer.atom.ts
+++ b/web/helpers/atoms/LocalServer.atom.ts
@@ -1,3 +1,5 @@
 import { atom } from 'jotai'
 
 export const serverEnabledAtom = atom<boolean>(false)
+
+export const LocalAPIserverModelParamsAtom = atom()
diff --git a/web/helpers/atoms/Model.atom.ts b/web/helpers/atoms/Model.atom.ts
index dd4414801..445e36a4a 100644
--- a/web/helpers/atoms/Model.atom.ts
+++ b/web/helpers/atoms/Model.atom.ts
@@ -16,7 +16,9 @@ enum ModelStorageAtomKeys {
  */
 export const downloadedModelsAtom = atomWithStorage<Model[]>(
   ModelStorageAtomKeys.DownloadedModels,
-  []
+  [],
+  undefined,
+  { getOnInit: true }
 )
 
 /**
@@ -25,7 +27,9 @@ export const downloadedModelsAtom = atomWithStorage<Model[]>(
  */
 export const configuredModelsAtom = atomWithStorage<Model[]>(
   ModelStorageAtomKeys.AvailableModels,
-  []
+  [],
+  undefined,
+  { getOnInit: true }
 )
 
 export const removeDownloadedModelAtom = atom(
diff --git a/web/helpers/atoms/Setting.atom.ts b/web/helpers/atoms/Setting.atom.ts
index 57ca87854..904e85fe5 100644
--- a/web/helpers/atoms/Setting.atom.ts
+++ b/web/helpers/atoms/Setting.atom.ts
@@ -13,10 +13,22 @@ export const REDUCE_TRANSPARENT = 'reduceTransparent'
 export const SPELL_CHECKING = 'spellChecking'
 export const themesOptionsAtom = atom<{ name: string; value: string }[]>([])
 export const janThemesPathAtom = atom<string | undefined>(undefined)
-export const selectedThemeIdAtom = atomWithStorage<string>(THEME, '')
+export const selectedThemeIdAtom = atomWithStorage<string>(
+  THEME,
+  '',
+  undefined,
+  { getOnInit: true }
+)
 export const themeDataAtom = atom<Theme | undefined>(undefined)
 export const reduceTransparentAtom = atomWithStorage<boolean>(
   REDUCE_TRANSPARENT,
-  false
+  false,
+  undefined,
+  { getOnInit: true }
+)
+export const spellCheckAtom = atomWithStorage<boolean>(
+  SPELL_CHECKING,
+  false,
+  undefined,
+  { getOnInit: true }
 )
-export const spellCheckAtom = atomWithStorage<boolean>(SPELL_CHECKING, false)
diff --git a/web/helpers/atoms/Thread.atom.ts b/web/helpers/atoms/Thread.atom.ts
index 1945fea45..e0ea433ce 100644
--- a/web/helpers/atoms/Thread.atom.ts
+++ b/web/helpers/atoms/Thread.atom.ts
@@ -207,7 +207,9 @@ export const setThreadModelParamsAtom = atom(
  */
 export const activeSettingInputBoxAtom = atomWithStorage<boolean>(
   ACTIVE_SETTING_INPUT_BOX,
-  false
+  false,
+  undefined,
+  { getOnInit: true }
 )
 
 /**
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 7b9ee98e0..63513bee2 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -26,15 +26,13 @@ export const stateModelAtom = atom<ModelState>({
   model: undefined,
 })
 
-const pendingModelLoadAtom = atom<boolean>(false)
-
 export function useActiveModel() {
   const [activeModel, setActiveModel] = useAtom(activeModelAtom)
   const activeThread = useAtomValue(activeThreadAtom)
   const [stateModel, setStateModel] = useAtom(stateModelAtom)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
   const setLoadModelError = useSetAtom(loadModelErrorAtom)
-  const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
+  const pendingModelLoad = useRef(false)
   const isVulkanEnabled = useAtomValue(vulkanEnabledAtom)
 
   const downloadedModelsRef = useRef<Model[]>([])
@@ -55,7 +53,7 @@ export function useActiveModel() {
     if (activeModel) {
       await stopModel(activeModel)
     }
-    setPendingModelLoad(true)
+    pendingModelLoad.current = true
 
     let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
 
@@ -120,16 +118,16 @@ export function useActiveModel() {
           undefined,
         }))
 
-        if (!pendingModelLoad && abortable) {
+        if (!pendingModelLoad.current && abortable) {
           return Promise.reject(new Error('aborted'))
         }
 
         toaster({
           title: 'Failed!',
-          description: `Model ${model.id} failed to start.`,
+          description: `Model ${model.id} failed to start. ${error.message ?? ''}`,
           type: 'error',
         })
-        setLoadModelError(error)
+        setLoadModelError(error.message ?? error)
         return Promise.reject(error)
       })
   }
@@ -147,16 +145,10 @@ export function useActiveModel() {
         .then(() => {
           setActiveModel(undefined)
           setStateModel({ state: 'start', loading: false, model: undefined })
-          setPendingModelLoad(false)
+          pendingModelLoad.current = false
         })
     },
-    [
-      activeModel,
-      setStateModel,
-      setActiveModel,
-      setPendingModelLoad,
-      stateModel,
-    ]
+    [activeModel, setStateModel, setActiveModel, stateModel]
   )
 
   const stopInference = useCallback(async () => {
diff --git a/web/hooks/useConfigurations.ts b/web/hooks/useConfigurations.ts
new file mode 100644
index 000000000..9d4999b2f
--- /dev/null
+++ b/web/hooks/useConfigurations.ts
@@ -0,0 +1,41 @@
+import { useCallback, useEffect } from 'react'
+
+import { ExtensionTypeEnum, ModelExtension } from '@janhq/core'
+import { useAtomValue } from 'jotai'
+
+import { extensionManager } from '@/extension'
+import {
+  ignoreSslAtom,
+  proxyAtom,
+  proxyEnabledAtom,
+} from '@/helpers/atoms/AppConfig.atom'
+
+export const useConfigurations = () => {
+  const proxyEnabled = useAtomValue(proxyEnabledAtom)
+  const proxyUrl = useAtomValue(proxyAtom)
+  const proxyIgnoreSSL = useAtomValue(ignoreSslAtom)
+
+  const configurePullOptions = useCallback(() => {
+    extensionManager
+      .get<ModelExtension>(ExtensionTypeEnum.Model)
+      ?.configurePullOptions(
+        proxyEnabled
+          ? {
+              proxy_url: proxyUrl,
+              verify_peer_ssl: !proxyIgnoreSSL,
+            }
+          : {
+              proxy_url: '',
+              verify_peer_ssl: false,
+            }
+      )
+  }, [proxyEnabled, proxyUrl, proxyIgnoreSSL])
+
+  useEffect(() => {
+    configurePullOptions()
+  }, [])
+
+  return {
+    configurePullOptions,
+  }
+}
diff --git a/web/hooks/useFactoryReset.test.ts b/web/hooks/useFactoryReset.test.ts
index b9ec10d6b..a5b5844bc 100644
--- a/web/hooks/useFactoryReset.test.ts
+++ b/web/hooks/useFactoryReset.test.ts
@@ -17,6 +17,14 @@ jest.mock('@janhq/core', () => ({
   fs: {
     rm: jest.fn(),
   },
+  EngineManager: {
+    instance: jest.fn().mockReturnValue({
+      get: jest.fn(),
+      engines: {
+        values: jest.fn().mockReturnValue([]),
+      },
+    }),
+  },
 }))
 
 describe('useFactoryReset', () => {
@@ -37,6 +45,7 @@ describe('useFactoryReset', () => {
         getAppConfigurations: mockGetAppConfigurations,
         updateAppConfiguration: mockUpdateAppConfiguration,
         relaunch: mockRelaunch,
+        factoryReset: jest.fn(),
       },
     }
     mockGetAppConfigurations.mockResolvedValue({
@@ -72,7 +81,6 @@ describe('useFactoryReset', () => {
     expect(mockSetFactoryResetState).toHaveBeenCalledWith(
       FactoryResetState.ClearLocalStorage
     )
-    expect(mockRelaunch).toHaveBeenCalled()
   })
 
   it('should keep current folder when specified', async () => {
diff --git a/web/hooks/useFactoryReset.ts b/web/hooks/useFactoryReset.ts
index a8e3efb9a..da2e15b03 100644
--- a/web/hooks/useFactoryReset.ts
+++ b/web/hooks/useFactoryReset.ts
@@ -1,6 +1,6 @@
 import { useCallback } from 'react'
 
-import { fs, AppConfiguration } from '@janhq/core'
+import { fs, AppConfiguration, EngineManager } from '@janhq/core'
 import { atom, useAtomValue, useSetAtom } from 'jotai'
 
 import { useActiveModel } from './useActiveModel'
@@ -37,6 +37,15 @@ export default function useFactoryReset() {
       // 1: Stop running model
       setFactoryResetState(FactoryResetState.StoppingModel)
       await stopModel()
+
+      await Promise.all(
+        EngineManager.instance()
+          .engines.values()
+          .map(async (engine) => {
+            await engine.onUnload()
+          })
+      )
+
       await new Promise((resolve) => setTimeout(resolve, 4000))
 
       // 2: Delete the old jan data folder
@@ -53,13 +62,17 @@ export default function useFactoryReset() {
         await window.core?.api?.updateAppConfiguration(configuration)
       }
 
+      // Perform factory reset
+      await window.core?.api?.factoryReset()
+
       // 4: Clear app local storage
       setFactoryResetState(FactoryResetState.ClearLocalStorage)
       // reset the localStorage
       localStorage.clear()
 
+      window.core = undefined
       // 5: Relaunch the app
-      await window.core?.api?.relaunch()
+      window.location.reload()
     },
     [defaultJanDataFolder, stopModel, setFactoryResetState]
   )
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index c49ddb964..84c6a5126 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -9,7 +9,6 @@ import {
   OptionType,
   events,
   fs,
-  baseName,
 } from '@janhq/core'
 
 import { atom, useAtomValue, useSetAtom } from 'jotai'
diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts
index 9b6b898ad..331dfd67b 100644
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@@ -1,5 +1,5 @@
 // useModels.test.ts
-import { renderHook, act } from '@testing-library/react'
+import { renderHook, act, waitFor } from '@testing-library/react'
 import { events, ModelEvent, ModelManager } from '@janhq/core'
 import { extensionManager } from '@/extension'
 
@@ -36,19 +36,98 @@ describe('useModels', () => {
         }),
         get: () => undefined,
         has: () => true,
-        // set: () => {}
       },
     })
 
     jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
 
-    act(() => {
-      renderHook(() => useModels())
+    const { result } = renderHook(() => useModels())
+    await act(() => {
+      result.current?.getData()
     })
 
     expect(mockModelExtension.getModels).toHaveBeenCalled()
   })
 
+  it('should return empty on error', async () => {
+    const mockModelExtension = {
+      getModels: jest.fn().mockRejectedValue(new Error('Error')),
+    } as any
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+      },
+    })
+
+    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
+
+    const { result } = renderHook(() => useModels())
+
+    await act(() => {
+      result.current?.getData()
+    })
+
+    expect(mockModelExtension.getModels()).rejects.toThrow()
+  })
+
+  it('should update states on models update', async () => {
+    const mockModelExtension = {
+      getModels: jest.fn().mockResolvedValue(models),
+    } as any
+
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+      },
+    })
+
+    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
+    jest.spyOn(events, 'on').mockImplementationOnce((event, cb) => {
+      cb({ fetch: false })
+    })
+    renderHook(() => useModels())
+
+    expect(mockModelExtension.getModels).not.toHaveBeenCalled()
+  })
+
+  it('should update states on models update', async () => {
+    const mockModelExtension = {
+      getModels: jest.fn().mockResolvedValue(models),
+    } as any
+
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+      },
+    })
+
+    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
+    jest.spyOn(events, 'on').mockImplementationOnce((event, cb) => {
+      cb({ fetch: true })
+    })
+    renderHook(() => useModels())
+
+    expect(mockModelExtension.getModels).toHaveBeenCalled()
+  })
+
   it('should remove event listener on unmount', async () => {
     const removeListenerSpy = jest.spyOn(events, 'off')
 
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 400e02793..0daedf7f8 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -16,6 +16,7 @@ import { useDebouncedCallback } from 'use-debounce'
 import { isLocalEngine } from '@/utils/modelEngine'
 
 import { extensionManager } from '@/extension'
+
 import {
   configuredModelsAtom,
   downloadedModelsAtom,
@@ -34,7 +35,7 @@ const useModels = () => {
     const getDownloadedModels = async () => {
       const localModels = (await getModels()).map((e) => ({
         ...e,
-        name: ModelManager.instance().models.get(e.id)?.name ?? e.id,
+        name: ModelManager.instance().models.get(e.id)?.name ?? e.name ?? e.id,
         metadata:
           ModelManager.instance().models.get(e.id)?.metadata ?? e.metadata,
       }))
@@ -53,9 +54,11 @@ const useModels = () => {
       setDownloadedModels(toUpdate)
 
       let isUpdated = false
+
       toUpdate.forEach((model) => {
         if (!ModelManager.instance().models.has(model.id)) {
           ModelManager.instance().models.set(model.id, model)
+          // eslint-disable-next-line react-hooks/exhaustive-deps
           isUpdated = true
         }
       })
@@ -75,21 +78,41 @@ const useModels = () => {
 
   const reloadData = useDebouncedCallback(() => getData(), 300)
 
-  useEffect(() => {
-    // Try get data on mount
-    reloadData()
+  const updateStates = useCallback(() => {
+    const cachedModels = ModelManager.instance().models.values().toArray()
+    setDownloadedModels((downloadedModels) => [
+      ...downloadedModels,
+      ...cachedModels.filter(
+        (e) =>
+          !isLocalEngine(e.engine) &&
+          !downloadedModels.some((g: Model) => g.id === e.id)
+      ),
+    ])
 
+    setExtensionModels(cachedModels)
+  }, [setDownloadedModels, setExtensionModels])
+
+  const getModels = async (): Promise<Model[]> =>
+    extensionManager
+      .get<ModelExtension>(ExtensionTypeEnum.Model)
+      ?.getModels()
+      .catch(() => []) ?? []
+
+  useEffect(() => {
     // Listen for model updates
-    events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
+    events.on(ModelEvent.OnModelsUpdate, async (data: { fetch?: boolean }) => {
+      if (data.fetch) reloadData()
+      else updateStates()
+    })
     return () => {
       // Remove listener on unmount
       events.off(ModelEvent.OnModelsUpdate, async () => {})
     }
-  }, [getData, reloadData])
+  }, [reloadData, updateStates])
+
+  return {
+    getData,
+  }
 }
 
-const getModels = async (): Promise<Model[]> =>
-  extensionManager.get<ModelExtension>(ExtensionTypeEnum.Model)?.getModels() ??
-  []
-
 export default useModels
diff --git a/web/hooks/usePath.ts b/web/hooks/usePath.ts
index 98e3009b4..b732926a6 100644
--- a/web/hooks/usePath.ts
+++ b/web/hooks/usePath.ts
@@ -42,39 +42,6 @@ export const usePath = () => {
     openFileExplorer(fullPath)
   }
 
-  const onViewJson = async (type: string) => {
-    // TODO: this logic should be refactored.
-    if (type !== 'Model' && !activeThread) return
-
-    let filePath = undefined
-    const assistantId = activeThread?.assistants[0]?.assistant_id
-    switch (type) {
-      case 'Engine':
-      case 'Thread':
-        filePath = await joinPath([
-          'threads',
-          activeThread?.id ?? '',
-          'thread.json',
-        ])
-        break
-      case 'Model':
-        if (!selectedModel) return
-        filePath = await joinPath(['models', selectedModel.id, 'model.json'])
-        break
-      case 'Assistant':
-      case 'Tools':
-        if (!assistantId) return
-        filePath = await joinPath(['assistants', assistantId, 'assistant.json'])
-        break
-      default:
-        break
-    }
-
-    if (!filePath) return
-    const fullPath = await joinPath([janDataFolderPath, filePath])
-    openFileExplorer(fullPath)
-  }
-
   const onViewFile = async (id: string) => {
     if (!activeThread) return
 
@@ -99,7 +66,6 @@ export const usePath = () => {
 
   return {
     onRevealInFinder,
-    onViewJson,
     onViewFile,
     onViewFileContainer,
   }
diff --git a/web/hooks/useRecommendedModel.ts b/web/hooks/useRecommendedModel.ts
index 21a9c69e7..d5bf0aba7 100644
--- a/web/hooks/useRecommendedModel.ts
+++ b/web/hooks/useRecommendedModel.ts
@@ -31,7 +31,8 @@ export default function useRecommendedModel() {
 
   const getAndSortDownloadedModels = useCallback(async (): Promise<Model[]> => {
     const models = downloadedModels.sort((a, b) =>
-      a.engine !== InferenceEngine.nitro && b.engine === InferenceEngine.nitro
+      a.engine !== InferenceEngine.cortex_llamacpp &&
+      b.engine === InferenceEngine.cortex_llamacpp
         ? 1
         : -1
     )
@@ -43,6 +44,7 @@ export default function useRecommendedModel() {
     Model | undefined
   > => {
     const models = await getAndSortDownloadedModels()
+
     if (!activeThread) return
     const modelId = activeThread.assistants[0]?.model.id
     const model = models.find((model) => model.id === modelId)
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index cda53b24a..815bdf116 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -27,13 +27,14 @@ import { MessageRequestBuilder } from '@/utils/messageRequestBuilder'
 
 import { ThreadMessageBuilder } from '@/utils/threadMessageBuilder'
 
-import { loadModelErrorAtom, useActiveModel } from './useActiveModel'
+import { useActiveModel } from './useActiveModel'
 
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
   addNewMessageAtom,
   deleteMessageAtom,
   getCurrentChatMessagesAtom,
+  tokenSpeedAtom,
 } from '@/helpers/atoms/ChatMessage.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import {
@@ -45,7 +46,6 @@ import {
   updateThreadWaitingForResponseAtom,
 } from '@/helpers/atoms/Thread.atom'
 
-export const queuedMessageAtom = atom(false)
 export const reloadModelAtom = atom(false)
 
 export default function useSendChatMessage() {
@@ -60,10 +60,8 @@ export default function useSendChatMessage() {
   const currentMessages = useAtomValue(getCurrentChatMessagesAtom)
   const selectedModel = useAtomValue(selectedModelAtom)
   const { activeModel, startModel } = useActiveModel()
-  const loadModelFailed = useAtomValue(loadModelErrorAtom)
 
   const modelRef = useRef<Model | undefined>()
-  const loadModelFailedRef = useRef<string | undefined>()
   const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
   const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom)
 
@@ -72,7 +70,7 @@ export default function useSendChatMessage() {
   const [fileUpload, setFileUpload] = useAtom(fileUploadAtom)
   const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
   const activeThreadRef = useRef<Thread | undefined>()
-  const setQueuedMessage = useSetAtom(queuedMessageAtom)
+  const setTokenSpeed = useSetAtom(tokenSpeedAtom)
 
   const selectedModelRef = useRef<Model | undefined>()
 
@@ -80,10 +78,6 @@ export default function useSendChatMessage() {
     modelRef.current = activeModel
   }, [activeModel])
 
-  useEffect(() => {
-    loadModelFailedRef.current = loadModelFailed
-  }, [loadModelFailed])
-
   useEffect(() => {
     activeThreadRef.current = activeThread
   }, [activeThread])
@@ -141,7 +135,10 @@ export default function useSendChatMessage() {
     sendChatMessage(toSendMessage.content[0]?.text.value)
   }
 
-  const sendChatMessage = async (message: string) => {
+  const sendChatMessage = async (
+    message: string,
+    messages?: ThreadMessage[]
+  ) => {
     if (!message || message.trim().length === 0) return
 
     if (!activeThreadRef.current) {
@@ -150,6 +147,7 @@ export default function useSendChatMessage() {
     }
 
     if (engineParamsUpdate) setReloadModel(true)
+    setTokenSpeed(undefined)
 
     const runtimeParams = extractInferenceParams(activeModelParams)
     const settingParams = extractModelLoadParams(activeModelParams)
@@ -193,7 +191,7 @@ export default function useSendChatMessage() {
         parameters: runtimeParams,
       },
       activeThreadRef.current,
-      currentMessages
+      messages ?? currentMessages
     ).addSystemMessage(activeThreadRef.current.assistants[0].instructions)
 
     requestBuilder.pushMessage(prompt, base64Blob, fileUpload[0]?.type)
@@ -234,9 +232,7 @@ export default function useSendChatMessage() {
     }
 
     if (modelRef.current?.id !== modelId) {
-      setQueuedMessage(true)
       const error = await startModel(modelId).catch((error: Error) => error)
-      setQueuedMessage(false)
       if (error) {
         updateThreadWaiting(activeThreadRef.current.id, false)
         return
diff --git a/web/hooks/useSettings.ts b/web/hooks/useSettings.ts
index 874381317..0f02d41af 100644
--- a/web/hooks/useSettings.ts
+++ b/web/hooks/useSettings.ts
@@ -53,7 +53,7 @@ export const useSettings = () => {
     const settings = await readSettings()
     if (runMode != null) settings.run_mode = runMode
     if (notify != null) settings.notify = notify
-    if (gpusInUse != null) settings.gpus_in_use = gpusInUse
+    if (gpusInUse != null) settings.gpus_in_use = gpusInUse.filter((e) => !!e)
     if (vulkan != null) {
       settings.vulkan = vulkan
       // GPU enabled, set run_mode to 'gpu'
diff --git a/web/hooks/useStarterScreen.ts b/web/hooks/useStarterScreen.ts
index 3305c0072..c551ee601 100644
--- a/web/hooks/useStarterScreen.ts
+++ b/web/hooks/useStarterScreen.ts
@@ -1,4 +1,4 @@
-import { useState, useEffect } from 'react'
+import { useState, useEffect, useMemo } from 'react'
 
 import { useAtomValue } from 'jotai'
 
@@ -12,8 +12,9 @@ export function useStarterScreen() {
   const downloadedModels = useAtomValue(downloadedModelsAtom)
   const threads = useAtomValue(threadsAtom)
 
-  const isDownloadALocalModel = downloadedModels.some((x) =>
-    isLocalEngine(x.engine)
+  const isDownloadALocalModel = useMemo(
+    () => downloadedModels.some((x) => isLocalEngine(x.engine)),
+    [downloadedModels]
   )
 
   const [extensionHasSettings, setExtensionHasSettings] = useState<
@@ -57,14 +58,19 @@ export function useStarterScreen() {
       setExtensionHasSettings(extensionsMenu)
     }
     getAllSettings()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [])
 
-  const isAnyRemoteModelConfigured = extensionHasSettings.some(
-    (x) => x.apiKey.length > 1
+  const isAnyRemoteModelConfigured = useMemo(
+    () => extensionHasSettings.some((x) => x.apiKey.length > 1),
+    [extensionHasSettings]
   )
 
-  const isShowStarterScreen =
-    !isAnyRemoteModelConfigured && !isDownloadALocalModel && !threads.length
+  const isShowStarterScreen = useMemo(
+    () =>
+      !isAnyRemoteModelConfigured && !isDownloadALocalModel && !threads.length,
+    [isAnyRemoteModelConfigured, isDownloadALocalModel, threads]
+  )
 
   return {
     extensionHasSettings,
diff --git a/web/jest.config.js b/web/jest.config.js
index 12ed39b20..f78007532 100644
--- a/web/jest.config.js
+++ b/web/jest.config.js
@@ -13,6 +13,11 @@ const config = {
   moduleNameMapper: {
     // ...
     '^@/(.*)$': '<rootDir>/$1',
+    'react-markdown': '<rootDir>/mock/empty-mock.tsx',
+    'rehype-highlight': '<rootDir>/mock/empty-mock.tsx',
+    'rehype-katex': '<rootDir>/mock/empty-mock.tsx',
+    'rehype-raw': '<rootDir>/mock/empty-mock.tsx',
+    'remark-math': '<rootDir>/mock/empty-mock.tsx',
   },
   // Add more setup options before each test is run
   // setupFilesAfterEnv: ['<rootDir>/jest.setup.ts'],
diff --git a/web/mock/empty-mock.tsx b/web/mock/empty-mock.tsx
new file mode 100644
index 000000000..dd7f322f2
--- /dev/null
+++ b/web/mock/empty-mock.tsx
@@ -0,0 +1,2 @@
+const EmptyMock = {}
+export default EmptyMock
diff --git a/web/package.json b/web/package.json
index d3ee82a33..3fdc6889b 100644
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@janhq/web",
-  "version": "0.1.0",
+  "version": "0.5.9",
   "private": true,
   "homepage": "./",
   "scripts": {
@@ -14,47 +14,44 @@
     "test": "jest"
   },
   "dependencies": {
-    "@heroicons/react": "^2.0.18",
-    "@hookform/resolvers": "^3.3.2",
     "@janhq/core": "link:./core",
     "@janhq/joi": "link:./joi",
     "autoprefixer": "10.4.16",
     "class-variance-authority": "^0.7.0",
-    "csstype": "^3.0.10",
     "framer-motion": "^10.16.4",
-    "highlight.js": "^11.9.0",
+    "highlight.js": "^11.10.0",
     "jotai": "^2.6.0",
     "katex": "^0.16.10",
     "lodash": "^4.17.21",
     "lucide-react": "^0.291.0",
     "marked": "^9.1.2",
-    "marked-highlight": "^2.0.6",
-    "marked-katex-extension": "^5.0.2",
     "next": "14.2.3",
     "next-themes": "^0.2.1",
     "postcss": "8.4.31",
     "postcss-url": "10.1.3",
-    "posthog-js": "^1.95.1",
     "react": "18.2.0",
     "react-circular-progressbar": "^2.1.0",
     "react-dom": "18.2.0",
     "react-dropzone": "14.2.3",
-    "react-hook-form": "^7.47.0",
     "react-hot-toast": "^2.4.1",
     "react-icons": "^4.12.0",
-    "react-scroll-to-bottom": "^4.2.0",
+    "react-markdown": "^9.0.1",
     "react-toastify": "^9.1.3",
+    "rehype-highlight": "^6.0.0",
+    "rehype-highlight-code-lines": "^1.0.4",
+    "rehype-katex": "^7.0.1",
+    "rehype-raw": "^7.0.0",
+    "remark-math": "^6.0.0",
     "sass": "^1.69.4",
+    "slate": "latest",
+    "slate-dom": "0.111.0",
+    "slate-history": "0.110.3",
+    "slate-react": "0.110.3",
     "tailwind-merge": "^2.0.0",
     "tailwindcss": "3.3.5",
     "ulidx": "^2.3.0",
     "use-debounce": "^10.0.0",
-    "uuid": "^9.0.1",
-    "zod": "^3.22.4",
-    "slate": "latest",
-    "slate-dom": "0.111.0",
-    "slate-react": "0.110.3",
-    "slate-history": "0.110.3"
+    "uuid": "^9.0.1"
   },
   "devDependencies": {
     "@next/eslint-plugin-next": "^14.0.1",
@@ -65,7 +62,7 @@
     "@types/react": "18.2.34",
     "@types/react-dom": "18.2.14",
     "@types/react-icons": "^3.0.0",
-    "@types/react-scroll-to-bottom": "^4.2.4",
+    "@types/react-syntax-highlighter": "^15.5.13",
     "@types/uuid": "^9.0.6",
     "@typescript-eslint/eslint-plugin": "^6.8.0",
     "@typescript-eslint/parser": "^6.8.0",
@@ -85,5 +82,8 @@
     "rimraf": "^5.0.5",
     "ts-jest": "^29.2.5",
     "typescript": "^5.3.3"
+  },
+  "resolutions": {
+    "highlight.js": "11.10.0"
   }
 }
diff --git a/web/screens/LocalServer/LocalServerCenterPanel/index.tsx b/web/screens/LocalServer/LocalServerCenterPanel/index.tsx
index e16ceb329..c5e42a9d2 100644
--- a/web/screens/LocalServer/LocalServerCenterPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerCenterPanel/index.tsx
@@ -1,6 +1,6 @@
 import { useEffect, useState } from 'react'
 
-import { Button, ScrollArea } from '@janhq/joi'
+import { Button } from '@janhq/joi'
 import { CodeIcon, Paintbrush } from 'lucide-react'
 
 import { InfoIcon } from 'lucide-react'
@@ -26,8 +26,8 @@ const LocalServerCenterPanel = () => {
 
   return (
     <CenterPanelContainer>
-      <div className="flex h-full w-full flex-col overflow-hidden">
-        <div className="sticky top-0 flex  items-center justify-between border-b border-[hsla(var(--app-border))] px-4 py-2">
+      <div className="flex h-full w-full flex-col">
+        <div className="sticky top-0 z-10  flex items-center justify-between border-b border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))] px-4 py-2">
           <h2 className="font-bold">Server Logs</h2>
           <div className="space-x-2">
             <Button
@@ -72,9 +72,7 @@ const LocalServerCenterPanel = () => {
             </div>
           </div>
         ) : (
-          <ScrollArea className="h-full w-full">
-            <ServerLogs />
-          </ScrollArea>
+          <ServerLogs />
         )}
       </div>
     </CenterPanelContainer>
diff --git a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
index 6f5de80ec..010566f51 100644
--- a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
@@ -1,5 +1,6 @@
 import { Fragment, useCallback, useState } from 'react'
 
+import { EngineManager, Model, ModelSettingParams } from '@janhq/core'
 import { Button, Tooltip, Select, Input, Checkbox } from '@janhq/joi'
 
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
@@ -22,15 +23,19 @@ import {
   hostOptions,
 } from '@/helpers/atoms/ApiServer.atom'
 
-import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
+import {
+  LocalAPIserverModelParamsAtom,
+  serverEnabledAtom,
+} from '@/helpers/atoms/LocalServer.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 
 const LocalServerLeftPanel = () => {
   const [errorRangePort, setErrorRangePort] = useState(false)
   const [errorPrefix, setErrorPrefix] = useState(false)
   const [serverEnabled, setServerEnabled] = useAtom(serverEnabledAtom)
+  const [isLoading, setIsLoading] = useState(false)
 
-  const { startModel, stateModel } = useActiveModel()
+  const { stateModel } = useActiveModel()
   const selectedModel = useAtomValue(selectedModelAtom)
 
   const [isCorsEnabled, setIsCorsEnabled] = useAtom(apiServerCorsEnabledAtom)
@@ -41,9 +46,19 @@ const LocalServerLeftPanel = () => {
   const [port, setPort] = useAtom(apiServerPortAtom)
   const [prefix, setPrefix] = useAtom(apiServerPrefix)
   const setLoadModelError = useSetAtom(loadModelErrorAtom)
-
+  const localAPIserverModelParams = useAtomValue(LocalAPIserverModelParamsAtom)
   const FIRST_TIME_VISIT_API_SERVER = 'firstTimeVisitAPIServer'
 
+  const model: Model | undefined = selectedModel
+    ? {
+        ...selectedModel,
+        object: selectedModel.object || '',
+        settings: (typeof localAPIserverModelParams === 'object'
+          ? { ...(localAPIserverModelParams as ModelSettingParams) }
+          : { ...selectedModel.settings }) as ModelSettingParams,
+      }
+    : undefined
+
   const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] =
     useState<boolean>(false)
 
@@ -66,6 +81,7 @@ const LocalServerLeftPanel = () => {
   const onStartServerClick = async () => {
     if (selectedModel == null) return
     try {
+      setIsLoading(true)
       const isStarted = await window.core?.api?.startServer({
         host,
         port,
@@ -78,9 +94,13 @@ const LocalServerLeftPanel = () => {
         localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false')
         setFirstTimeVisitAPIServer(false)
       }
-      startModel(selectedModel.id, false).catch((e) => console.error(e))
+      const engine = EngineManager.instance().get((model as Model).engine)
+      engine?.loadModel(model as Model)
+      // startModel(selectedModel.id, false).catch((e) => console.error(e))
+      setIsLoading(false)
     } catch (e) {
       console.error(e)
+      setIsLoading(false)
       toaster({
         title: `Failed to start server!`,
         description: 'Please check Server Logs for more details.',
@@ -93,6 +113,7 @@ const LocalServerLeftPanel = () => {
     window.core?.api?.stopServer()
     setServerEnabled(false)
     setLoadModelError(undefined)
+    setIsLoading(false)
   }
 
   const onToggleServer = async () => {
@@ -117,6 +138,7 @@ const LocalServerLeftPanel = () => {
               block
               theme={serverEnabled ? 'destructive' : 'primary'}
               disabled={
+                isLoading ||
                 stateModel.loading ||
                 errorRangePort ||
                 errorPrefix ||
@@ -124,7 +146,11 @@ const LocalServerLeftPanel = () => {
               }
               onClick={onToggleServer}
             >
-              {serverEnabled ? 'Stop' : 'Start'} Server
+              {isLoading
+                ? 'Starting...'
+                : serverEnabled
+                  ? 'Stop Server'
+                  : 'Start Server'}
             </Button>
             {serverEnabled && (
               <Button variant="soft" asChild className="whitespace-nowrap">
diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
index a59e83e7e..900a8128e 100644
--- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
@@ -17,11 +17,15 @@ import { useClipboard } from '@/hooks/useClipboard'
 
 import { getConfigurationsData } from '@/utils/componentSettings'
 
-import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
+import {
+  LocalAPIserverModelParamsAtom,
+  serverEnabledAtom,
+} from '@/helpers/atoms/LocalServer.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 
 const LocalServerRightPanel = () => {
   const loadModelError = useAtomValue(loadModelErrorAtom)
+  const setLocalAPIserverModelParams = useSetAtom(LocalAPIserverModelParamsAtom)
   const serverEnabled = useAtomValue(serverEnabledAtom)
   const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
 
@@ -33,12 +37,19 @@ const LocalServerRightPanel = () => {
     extractModelLoadParams(selectedModel?.settings)
   )
 
+  const overriddenSettings =
+    selectedModel?.settings.ctx_len && selectedModel.settings.ctx_len > 2048
+      ? { ctx_len: 4096 }
+      : {}
+
   useEffect(() => {
     if (selectedModel) {
-      setCurrentModelSettingParams(
-        extractModelLoadParams(selectedModel?.settings)
-      )
+      setCurrentModelSettingParams({
+        ...selectedModel?.settings,
+        ...overriddenSettings,
+      })
     }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [selectedModel])
 
   const modelRuntimeParams = extractInferenceParams(selectedModel?.settings)
@@ -57,6 +68,7 @@ const LocalServerRightPanel = () => {
       componentDataEngineSetting.filter(
         (x) => x.key !== 'prompt_template' && x.key !== 'embedding'
       ),
+
     [componentDataEngineSetting]
   )
 
@@ -66,16 +78,27 @@ const LocalServerRightPanel = () => {
     )
   }, [componentDataRuntimeSetting])
 
+  const onUpdateParams = useCallback(() => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    setLocalAPIserverModelParams(() => {
+      return { ...currentModelSettingParams }
+    })
+  }, [currentModelSettingParams, setLocalAPIserverModelParams])
+
   const onValueChanged = useCallback(
-    (key: string, value: string | number | boolean) => {
-      setCurrentModelSettingParams({
-        ...currentModelSettingParams,
+    (key: string, value: string | number | boolean | string[]) => {
+      setCurrentModelSettingParams((prevParams) => ({
+        ...prevParams,
         [key]: value,
-      })
+      }))
     },
-    [currentModelSettingParams]
+    []
   )
 
+  useEffect(() => {
+    onUpdateParams()
+  }, [currentModelSettingParams, onUpdateParams])
+
   return (
     <RightPanelContainer>
       <div className="mb-4 px-4 pt-4">
@@ -144,6 +167,7 @@ const LocalServerRightPanel = () => {
             <ModelSetting
               componentProps={modelSettings}
               onValueChanged={onValueChanged}
+              disabled={serverEnabled}
             />
           </AccordionItem>
         )}
@@ -153,6 +177,7 @@ const LocalServerRightPanel = () => {
             <EngineSetting
               componentData={engineSettings}
               onValueChanged={onValueChanged}
+              disabled={serverEnabled}
             />
           </AccordionItem>
         )}
diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx
index 150f70398..8d791694c 100644
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@@ -20,9 +20,12 @@ import { AlertTriangleIcon, AlertCircleIcon } from 'lucide-react'
 
 import { twMerge } from 'tailwind-merge'
 
+import { useDebouncedCallback } from 'use-debounce'
+
 import { snackbar, toaster } from '@/containers/Toast'
 
 import { useActiveModel } from '@/hooks/useActiveModel'
+import { useConfigurations } from '@/hooks/useConfigurations'
 import { useSettings } from '@/hooks/useSettings'
 
 import DataFolder from './DataFolder'
@@ -65,6 +68,7 @@ const Advanced = () => {
   const [dropdownOptions, setDropdownOptions] = useState<HTMLDivElement | null>(
     null
   )
+  const { configurePullOptions } = useConfigurations()
 
   const [toggle, setToggle] = useState<HTMLDivElement | null>(null)
 
@@ -78,6 +82,15 @@ const Advanced = () => {
       return y['name']
     })
 
+  /**
+   * There could be a case where the state update is not synced
+   * so that retrieving state value from other hooks would not be accurate
+   * there is also a case where state update persist everytime user type in the input
+   */
+  const updatePullOptions = useDebouncedCallback(
+    () => configurePullOptions(),
+    300
+  )
   /**
    * Handle proxy change
    */
@@ -90,8 +103,9 @@ const Advanced = () => {
       } else {
         setProxy('')
       }
+      updatePullOptions()
     },
-    [setPartialProxy, setProxy]
+    [setPartialProxy, setProxy, updatePullOptions]
   )
 
   /**
@@ -193,7 +207,12 @@ const Advanced = () => {
     let updatedGpusInUse = [...gpusInUse]
     if (updatedGpusInUse.includes(gpuId)) {
       updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
-      if (gpuEnabled && updatedGpusInUse.length === 0) {
+      if (
+        gpuEnabled &&
+        updatedGpusInUse.length === 0 &&
+        gpuId &&
+        gpuId.trim()
+      ) {
         // Vulkan support only allow 1 active device at a time
         if (vulkanEnabled) {
           updatedGpusInUse = []
@@ -205,11 +224,13 @@ const Advanced = () => {
       if (vulkanEnabled) {
         updatedGpusInUse = []
       }
-      updatedGpusInUse.push(gpuId)
+      if (gpuId && gpuId.trim()) updatedGpusInUse.push(gpuId)
     }
     setGpusInUse(updatedGpusInUse)
-    await saveSettings({ gpusInUse: updatedGpusInUse })
-    window.core?.api?.relaunch()
+    await saveSettings({ gpusInUse: updatedGpusInUse.filter((e) => !!e) })
+    // Reload window to apply changes
+    // This will trigger engine servers to restart
+    window.location.reload()
   }
 
   const gpuSelectionPlaceHolder =
@@ -417,7 +438,7 @@ const Advanced = () => {
         {/* Vulkan for AMD GPU/ APU and Intel Arc GPU */}
         {!isMac && experimentalEnabled && (
           <div className="flex w-full flex-col items-start justify-between gap-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row">
-            <div className="flex-shrink-0 space-y-1">
+            <div className="space-y-1">
               <div className="flex gap-x-2">
                 <h6 className="font-semibold capitalize">Vulkan Support</h6>
               </div>
@@ -426,11 +447,12 @@ const Advanced = () => {
                 model performance (reload needed).
               </p>
             </div>
-
-            <Switch
-              checked={vulkanEnabled}
-              onChange={(e) => updateVulkanEnabled(e.target.checked)}
-            />
+            <div className="flex-sharink-0">
+              <Switch
+                checked={vulkanEnabled}
+                onChange={(e) => updateVulkanEnabled(e.target.checked)}
+              />
+            </div>
           </div>
         )}
 
@@ -452,7 +474,10 @@ const Advanced = () => {
             <Switch
               data-testid="proxy-switch"
               checked={proxyEnabled}
-              onChange={() => setProxyEnabled(!proxyEnabled)}
+              onChange={() => {
+                setProxyEnabled(!proxyEnabled)
+                updatePullOptions()
+              }}
             />
             <div className="w-full">
               <Input
@@ -481,7 +506,10 @@ const Advanced = () => {
           <Switch
             data-testid="ignore-ssl-switch"
             checked={ignoreSSL}
-            onChange={(e) => setIgnoreSSL(e.target.checked)}
+            onChange={(e) => {
+              setIgnoreSSL(e.target.checked)
+              updatePullOptions()
+            }}
           />
         </div>
 
diff --git a/web/screens/Settings/ExtensionSetting/index.tsx b/web/screens/Settings/ExtensionSetting/index.tsx
index 4a8b140f3..c8e80a6de 100644
--- a/web/screens/Settings/ExtensionSetting/index.tsx
+++ b/web/screens/Settings/ExtensionSetting/index.tsx
@@ -44,7 +44,7 @@ const ExtensionSetting = () => {
 
   const onValueChanged = async (
     key: string,
-    value: string | number | boolean
+    value: string | number | boolean | string[]
   ) => {
     // find the key in settings state, update it and set the state back
     const newSettings = settings.map((setting) => {
diff --git a/web/screens/Settings/Hotkeys/index.tsx b/web/screens/Settings/Hotkeys/index.tsx
index 79227651e..7e18c9c34 100644
--- a/web/screens/Settings/Hotkeys/index.tsx
+++ b/web/screens/Settings/Hotkeys/index.tsx
@@ -39,14 +39,6 @@ const availableHotkeys = [
     combination: 'Shift Enter',
     description: 'Insert a new line (in input field)',
   },
-  {
-    combination: 'Arrow Up',
-    description: 'Navigate to previous option (within search dialog)',
-  },
-  {
-    combination: 'Arrow Down',
-    description: 'Navigate to next option (within search dialog)',
-  },
 ]
 
 const Hotkeys = () => {
diff --git a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
index 647263ffe..c4077a563 100644
--- a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
+++ b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
@@ -51,7 +51,7 @@ const SettingDetailTextInputItem = ({
   }, [])
 
   const copy = useCallback(() => {
-    navigator.clipboard.writeText(value)
+    navigator.clipboard.writeText(value as string)
     if (value.length > 0) {
       setCopied(true)
     }
@@ -123,7 +123,6 @@ const InputExtraActions: React.FC<InputActionProps> = ({
   return (
     <div className="flex flex-row space-x-2">
       {actions.map((action) => {
-        console.log(action)
         switch (action) {
           case 'copy':
             return copied ? (
diff --git a/web/screens/Settings/SettingDetail/SettingDetailItem/index.tsx b/web/screens/Settings/SettingDetail/SettingDetailItem/index.tsx
index a406a055f..7c44095c8 100644
--- a/web/screens/Settings/SettingDetail/SettingDetailItem/index.tsx
+++ b/web/screens/Settings/SettingDetail/SettingDetailItem/index.tsx
@@ -5,7 +5,10 @@ import SettingDetailToggleItem from './SettingDetailToggleItem'
 
 type Props = {
   componentProps: SettingComponentProps[]
-  onValueUpdated: (key: string, value: string | number | boolean) => void
+  onValueUpdated: (
+    key: string,
+    value: string | number | boolean | string[]
+  ) => void
 }
 
 const SettingDetailItem = ({ componentProps, onValueUpdated }: Props) => {
diff --git a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
index f73efb486..95c905dde 100644
--- a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
@@ -24,7 +24,7 @@ const AssistantSetting: React.FC<Props> = ({ componentData }) => {
   const setEngineParamsUpdate = useSetAtom(engineParamsUpdateAtom)
 
   const onValueChanged = useCallback(
-    (key: string, value: string | number | boolean) => {
+    (key: string, value: string | number | boolean | string[]) => {
       if (!activeThread) return
       const shouldReloadModel =
         componentData.find((x) => x.key === key)?.requireModelReload ?? false
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
index 0433a8688..44d1748ed 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
@@ -24,6 +24,8 @@ import useDownloadModel from '@/hooks/useDownloadModel'
 
 import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
 
+import { useStarterScreen } from '@/hooks/useStarterScreen'
+
 import { formatDownloadPercentage, toGibibytes } from '@/utils/converter'
 import {
   getLogoEngine,
@@ -38,16 +40,8 @@ import {
 } from '@/helpers/atoms/Model.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 
-type Props = {
-  extensionHasSettings: {
-    name?: string
-    setting: string
-    apiKey: string
-    provider: string
-  }[]
-}
-
-const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
+const OnDeviceStarterScreen = () => {
+  const { extensionHasSettings } = useStarterScreen()
   const [searchValue, setSearchValue] = useState('')
   const [isOpen, setIsOpen] = useState(Boolean(searchValue.length))
   const downloadingModels = useAtomValue(getDownloadingModelAtom)
@@ -99,7 +93,10 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
     return rows
   }
 
-  const rows = getRows(groupByEngine, itemsPerRow)
+  const rows = getRows(
+    groupByEngine.sort((a, b) => a.localeCompare(b)),
+    itemsPerRow
+  )
 
   const refDropdown = useClickOutside(() => setIsOpen(false))
 
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx
index 6b3f4150a..ee001bcb1 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx
@@ -1,4 +1,6 @@
-import { MessageStatus } from '@janhq/core'
+import { memo, useEffect, useState } from 'react'
+
+import { MessageStatus, ThreadMessage } from '@janhq/core'
 
 import { useAtomValue } from 'jotai'
 
@@ -15,33 +17,63 @@ import EmptyThread from './EmptyThread'
 
 import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 
-const ChatBody = () => {
+const ChatConfigurator = memo(() => {
   const messages = useAtomValue(getCurrentChatMessagesAtom)
 
+  const [current, setCurrent] = useState<ThreadMessage[]>([])
+
+  const isMessagesIdentificial = (
+    arr1: ThreadMessage[],
+    arr2: ThreadMessage[]
+  ): boolean => {
+    if (arr1.length !== arr2.length) return false
+    return arr1.every((item, index) => item.id === arr2[index].id)
+  }
+
+  useEffect(() => {
+    if (
+      messages.length !== current.length ||
+      !isMessagesIdentificial(messages, current)
+    ) {
+      setCurrent(messages)
+    }
+  }, [messages, current])
+
   const loadModelError = useAtomValue(loadModelErrorAtom)
 
   if (!messages.length) return <EmptyThread />
-
   return (
-    <ListContainer>
-      {messages.map((message, index) => (
-        <div key={message.id}>
-          {message.status !== MessageStatus.Error &&
-            message.content.length > 0 && (
-              <ChatItem {...message} key={message.id} />
-            )}
-
-          {!loadModelError &&
-            index === messages.length - 1 &&
-            message.status !== MessageStatus.Pending &&
-            message.status !== MessageStatus.Ready && (
-              <ErrorMessage message={message} />
-            )}
-        </div>
-      ))}
-      {loadModelError && <LoadModelError />}
-    </ListContainer>
+    <div className="flex h-full w-full flex-col">
+      <ChatBody loadModelError={loadModelError} messages={current} />
+    </div>
   )
-}
+})
 
-export default ChatBody
+const ChatBody = memo(
+  ({
+    messages,
+    loadModelError,
+  }: {
+    messages: ThreadMessage[]
+    loadModelError?: string
+  }) => {
+    return (
+      <ListContainer>
+        {messages.map((message, index) => (
+          <div key={message.id}>
+            <ChatItem
+              {...message}
+              key={message.id}
+              loadModelError={loadModelError}
+              isCurrentMessage={index === messages.length - 1}
+            />
+          </div>
+        ))}
+
+        {loadModelError && <LoadModelError />}
+      </ListContainer>
+    )
+  }
+)
+
+export default memo(ChatConfigurator)
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
index 70fecb8a9..0d477d78d 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
@@ -3,8 +3,8 @@ import { useCallback, useEffect, useRef, useState } from 'react'
 
 import { MessageStatus } from '@janhq/core'
 import hljs from 'highlight.js'
-
 import { useAtom, useAtomValue } from 'jotai'
+
 import { BaseEditor, createEditor, Editor, Transforms } from 'slate'
 import { withHistory } from 'slate-history' // Import withHistory
 import {
@@ -270,7 +270,8 @@ const RichTextEditor = ({
       textareaRef.current.style.height = activeSettingInputBox
         ? '100px'
         : '40px'
-      textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px'
+      textareaRef.current.style.height =
+        textareaRef.current.scrollHeight + 2 + 'px'
       textareaRef.current?.scrollTo({
         top: textareaRef.current.scrollHeight,
         behavior: 'instant',
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
index 066c93430..5662cd0c0 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
@@ -249,19 +249,11 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings?.text_model ===
-                        false
-                        ? 'cursor-not-allowed opacity-50'
-                        : 'cursor-pointer'
+                      'cursor-pointer'
                     )}
                     onClick={() => {
-                      if (
-                        activeThread?.assistants[0].model.settings
-                          ?.text_model !== false
-                      ) {
-                        fileInputRef.current?.click()
-                        setShowAttacmentMenus(false)
-                      }
+                      fileInputRef.current?.click()
+                      setShowAttacmentMenus(false)
                     }}
                   >
                     <FileTextIcon size={16} />
@@ -270,22 +262,11 @@ const ChatInput = () => {
                 }
                 content={
                   (!activeThread?.assistants[0].tools ||
-                    !activeThread?.assistants[0].tools[0]?.enabled ||
-                    activeThread?.assistants[0].model.settings?.text_model ===
-                      false) && (
-                    <>
-                      {activeThread?.assistants[0].model.settings
-                        ?.text_model === false ? (
-                        <span>
-                          This model does not support text-based retrieval.
-                        </span>
-                      ) : (
-                        <span>
-                          Turn on Retrieval in Assistant Settings to use this
-                          feature.
-                        </span>
-                      )}
-                    </>
+                    !activeThread?.assistants[0].tools[0]?.enabled) && (
+                    <span>
+                      Turn on Retrieval in Assistant Settings to use this
+                      feature.
+                    </span>
                   )
                 }
               />
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatItem/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatItem/index.tsx
index 1b12eff98..192c6f82a 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatItem/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatItem/index.tsx
@@ -1,15 +1,67 @@
-import React, { forwardRef } from 'react'
+import React, { forwardRef, useEffect, useState } from 'react'
 
-import { ThreadMessage } from '@janhq/core'
+import {
+  events,
+  MessageEvent,
+  MessageStatus,
+  ThreadContent,
+  ThreadMessage,
+} from '@janhq/core'
+
+import ErrorMessage from '@/containers/ErrorMessage'
 
 import SimpleTextMessage from '../SimpleTextMessage'
 
 type Ref = HTMLDivElement
 
-const ChatItem = forwardRef<Ref, ThreadMessage>((message, ref) => (
-  <div ref={ref} className="relative">
-    <SimpleTextMessage {...message} />
-  </div>
-))
+type Props = {
+  loadModelError?: string
+  isCurrentMessage?: boolean
+} & ThreadMessage
+
+const ChatItem = forwardRef<Ref, Props>((message, ref) => {
+  const [content, setContent] = useState<ThreadContent[]>(message.content)
+  const [status, setStatus] = useState<MessageStatus>(message.status)
+  const [errorMessage, setErrorMessage] = useState<ThreadMessage | undefined>(
+    message.isCurrentMessage && message.status === MessageStatus.Error
+      ? message
+      : undefined
+  )
+
+  function onMessageUpdate(data: ThreadMessage) {
+    if (data.id === message.id) {
+      setContent(data.content)
+      if (data.status !== status) setStatus(data.status)
+      if (data.status === MessageStatus.Error && message.isCurrentMessage)
+        setErrorMessage(data)
+    }
+  }
+
+  useEffect(() => {
+    if (!message.isCurrentMessage && errorMessage) setErrorMessage(undefined)
+  }, [message, errorMessage])
+
+  useEffect(() => {
+    if (message.status === MessageStatus.Pending)
+      events.on(MessageEvent.OnMessageUpdate, onMessageUpdate)
+    return () => {
+      events.off(MessageEvent.OnMessageUpdate, onMessageUpdate)
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+
+  return (
+    <>
+      {status !== MessageStatus.Error && content.length > 0 && (
+        <div ref={ref} className="relative">
+          <SimpleTextMessage {...message} content={content} status={status} />
+        </div>
+      )}
+      {errorMessage && !message.loadModelError && (
+        <ErrorMessage message={errorMessage} />
+      )}
+    </>
+  )
+})
 
 export default ChatItem
diff --git a/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx
index a98d14e7d..ea22e3a58 100644
--- a/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx
@@ -90,7 +90,7 @@ const EditChatInput: React.FC<Props> = ({ message }) => {
           newMessages
         )
         .then(() => {
-          sendChatMessage(editPrompt)
+          sendChatMessage(editPrompt, newMessages)
         })
     }
   }
diff --git a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
index 0420b7d51..5829a6923 100644
--- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
@@ -9,8 +9,6 @@ import { MainViewState } from '@/constants/screens'
 
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 
-import { useSettings } from '@/hooks/useSettings'
-
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
@@ -21,25 +19,9 @@ const LoadModelError = () => {
   const setMainState = useSetAtom(mainViewStateAtom)
   const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
   const activeThread = useAtomValue(activeThreadAtom)
-  const { settings } = useSettings()
-
-  const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
 
   const ErrorMessage = () => {
-    if (loadModelError === PORT_NOT_AVAILABLE) {
-      return (
-        <p>
-          Port 3928 is currently unavailable. Check for conflicting apps, or
-          access&nbsp;
-          <span
-            className="cursor-pointer text-[hsla(var(--app-link))]"
-            onClick={() => setModalTroubleShooting(true)}
-          >
-            troubleshooting assistance
-          </span>
-        </p>
-      )
-    } else if (
+    if (
       typeof loadModelError?.includes === 'function' &&
       loadModelError.includes('EXTENSION_IS_NOT_INSTALLED')
     ) {
@@ -66,10 +48,10 @@ const LoadModelError = () => {
       )
     } else {
       return (
-        <div>
-          Apologies, {`Something's wrong.`}.&nbsp;
+        <div className="mx-6 flex flex-col items-center space-y-2 text-center font-medium text-[hsla(var(--text-secondary))]">
+          {loadModelError && <p>{loadModelError}</p>}
           <p>
-            Access&nbsp;
+            {`Something's wrong.`}&nbsp;Access&nbsp;
             <span
               className="cursor-pointer text-[hsla(var(--app-link))]"
               onClick={() => setModalTroubleShooting(true)}
diff --git a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
index 12bcf7a4d..d84ae57d7 100644
--- a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
@@ -1,4 +1,9 @@
-import React, { useEffect, useRef, useState } from 'react'
+/* eslint-disable @typescript-eslint/no-explicit-any */
+/* eslint-disable react-hooks/exhaustive-deps */
+/* eslint-disable @typescript-eslint/naming-convention */
+import React, { useEffect, useState } from 'react'
+
+import Markdown from 'react-markdown'
 
 import {
   ChatCompletionRole,
@@ -8,14 +13,16 @@ import {
 } from '@janhq/core'
 
 import { Tooltip } from '@janhq/joi'
-import hljs from 'highlight.js'
 
+import latex from 'highlight.js/lib/languages/latex'
 import { useAtomValue } from 'jotai'
 import { FolderOpenIcon } from 'lucide-react'
-import { Marked, Renderer } from 'marked'
-import { markedHighlight } from 'marked-highlight'
-import markedKatex from 'marked-katex-extension'
-
+import rehypeHighlight from 'rehype-highlight'
+import rehypeHighlightCodeLines from 'rehype-highlight-code-lines'
+import rehypeKatex from 'rehype-katex'
+import rehypeRaw from 'rehype-raw'
+import remarkMath from 'remark-math'
+import 'katex/dist/katex.min.css'
 import { twMerge } from 'tailwind-merge'
 
 import LogoMark from '@/containers/Brand/Logo/Mark'
@@ -23,6 +30,7 @@ import LogoMark from '@/containers/Brand/Logo/Mark'
 import { useClipboard } from '@/hooks/useClipboard'
 import { usePath } from '@/hooks/usePath'
 
+import { getLanguageFromExtension } from '@/utils/codeLanguageExtension'
 import { toGibibytes } from '@/utils/converter'
 import { displayDate } from '@/utils/datetime'
 
@@ -37,6 +45,7 @@ import { RelativeImage } from './RelativeImage'
 import {
   editMessageAtom,
   getCurrentChatMessagesAtom,
+  tokenSpeedAtom,
 } from '@/helpers/atoms/ChatMessage.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
@@ -53,109 +62,181 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
 
   const clipboard = useClipboard({ timeout: 1000 })
 
-  function escapeHtml(html: string): string {
-    return html
-      .replace(/&/g, '&amp;')
-      .replace(/</g, '&lt;')
-      .replace(/>/g, '&gt;')
-      .replace(/"/g, '&quot;')
-      .replace(/'/g, '&#039;')
+  function extractCodeLines(node: { children: { children: any[] }[] }) {
+    const codeLines: any[] = []
+
+    // Helper function to extract text recursively from children
+    function getTextFromNode(node: {
+      type: string
+      value: any
+      children: any[]
+    }): string {
+      if (node.type === 'text') {
+        return node.value
+      } else if (node.children) {
+        return node.children.map(getTextFromNode).join('')
+      }
+      return ''
+    }
+
+    // Traverse each line in the <code> block
+    node.children[0].children.forEach(
+      (lineNode: {
+        type: string
+        tagName: string
+        value: any
+        children: any[]
+      }) => {
+        if (lineNode.type === 'element' && lineNode.tagName === 'span') {
+          const lineContent = getTextFromNode(lineNode)
+          codeLines.push(lineContent)
+        }
+      }
+    )
+
+    // Join the lines with newline characters for proper formatting
+    return codeLines.join('\n')
+  }
+  function wrapCodeBlocksWithoutVisit() {
+    return (tree: { children: any[] }) => {
+      tree.children = tree.children.map((node) => {
+        if (node.tagName === 'pre' && node.children[0]?.tagName === 'code') {
+          const language = node.children[0].properties.className?.[1]?.replace(
+            'language-',
+            ''
+          )
+
+          if (extractCodeLines(node) === '') {
+            return node
+          }
+
+          return {
+            type: 'element',
+            tagName: 'div',
+            properties: {
+              className: ['code-block-wrapper'],
+            },
+            children: [
+              {
+                type: 'element',
+                tagName: 'div',
+                properties: {
+                  className: [
+                    'code-block',
+                    'group/item',
+                    'relative',
+                    'my-4',
+                    'overflow-auto',
+                  ],
+                },
+                children: [
+                  {
+                    type: 'element',
+                    tagName: 'div',
+                    properties: {
+                      className:
+                        'code-header bg-[hsla(var(--app-code-block))] flex justify-between items-center py-2 px-3 code-header--border rounded-t-lg',
+                    },
+                    children: [
+                      {
+                        type: 'element',
+                        tagName: 'span',
+                        properties: {
+                          className: 'text-xs font-medium text-gray-300',
+                        },
+                        children: [
+                          {
+                            type: 'text',
+                            value: language
+                              ? `${getLanguageFromExtension(language)}`
+                              : '',
+                          },
+                        ],
+                      },
+                      {
+                        type: 'element',
+                        tagName: 'button',
+                        properties: {
+                          className:
+                            'copy-button ml-auto flex items-center gap-1 text-xs font-medium text-gray-400 hover:text-gray-600 focus:outline-none',
+                          onClick: (event: Event) => {
+                            clipboard.copy(extractCodeLines(node))
+
+                            const button = event.currentTarget as HTMLElement
+                            button.innerHTML = `
+                              <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-check pointer-events-none text-green-600"><path d="M20 6 9 17l-5-5"/></svg>
+                              <span>Copied</span>
+                            `
+
+                            setTimeout(() => {
+                              button.innerHTML = `
+                                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy pointer-events-none text-gray-400"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>
+                                <span>Copy</span>
+                              `
+                            }, 2000)
+                          },
+                        },
+                        children: [
+                          {
+                            type: 'element',
+                            tagName: 'svg',
+                            properties: {
+                              xmlns: 'http://www.w3.org/2000/svg',
+                              width: '16',
+                              height: '16',
+                              viewBox: '0 0 24 24',
+                              fill: 'none',
+                              stroke: 'currentColor',
+                              strokeWidth: '2',
+                              strokeLinecap: 'round',
+                              strokeLinejoin: 'round',
+                              className:
+                                'lucide lucide-copy pointer-events-none text-gray-400',
+                            },
+                            children: [
+                              {
+                                type: 'element',
+                                tagName: 'rect',
+                                properties: {
+                                  width: '14',
+                                  height: '14',
+                                  x: '8',
+                                  y: '8',
+                                  rx: '2',
+                                  ry: '2',
+                                },
+                                children: [],
+                              },
+                              {
+                                type: 'element',
+                                tagName: 'path',
+                                properties: {
+                                  d: 'M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2',
+                                },
+                                children: [],
+                              },
+                            ],
+                          },
+                          { type: 'text', value: 'Copy' },
+                        ],
+                      },
+                    ],
+                  },
+                  node,
+                ],
+              },
+            ],
+          }
+        }
+        return node
+      })
+    }
   }
 
-  const marked: Marked = new Marked(
-    markedHighlight({
-      langPrefix: 'hljs',
-      highlight(code, lang) {
-        if (lang === undefined || lang === '') {
-          return hljs.highlight(code, { language: 'plaintext' }).value
-        }
-        try {
-          return hljs.highlight(code, { language: lang }).value
-        } catch (err) {
-          return hljs.highlight(code, { language: 'javascript' }).value
-        }
-      },
-    }),
-    {
-      renderer: {
-        html: (html: string) => {
-          return escapeHtml(html) // Escape any HTML
-        },
-        link: (href, title, text) => {
-          return Renderer.prototype.link
-            ?.apply(this, [href, title, text])
-            .replace('<a', "<a target='_blank'")
-        },
-        code(code, lang) {
-          return `
-          <div class="relative code-block group/item overflow-auto">
-            <button class='text-xs copy-action hidden group-hover/item:block p-2 rounded-lg absolute top-6 right-2'>
-              ${
-                clipboard.copied
-                  ? `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-check pointer-events-none text-green-600"><path d="M20 6 9 17l-5-5"/></svg>`
-                  : `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy pointer-events-none text-gray-400"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`
-              }
-            </button>
-            <pre class="hljs">
-              <code class="language-${lang ?? ''}">${code}</code>
-            </pre>
-          </div>
-          `
-        },
-      },
-    }
-  )
-
-  marked.use(markedKatex({ throwOnError: false }))
-
   const { onViewFile, onViewFileContainer } = usePath()
-  const parsedText = marked.parse(text)
-  const [tokenCount, setTokenCount] = useState(0)
-  const [lastTimestamp, setLastTimestamp] = useState<number | undefined>()
-  const [tokenSpeed, setTokenSpeed] = useState(0)
+  const tokenSpeed = useAtomValue(tokenSpeedAtom)
   const messages = useAtomValue(getCurrentChatMessagesAtom)
 
-  const codeBlockCopyEvent = useRef((e: Event) => {
-    const target: HTMLElement = e.target as HTMLElement
-    if (typeof target.className !== 'string') return null
-
-    const isCopyActionClassName = target?.className.includes('copy-action')
-
-    if (isCopyActionClassName) {
-      const content = target?.parentNode?.querySelector('code')?.innerText ?? ''
-      clipboard.copy(content)
-    }
-  })
-
-  useEffect(() => {
-    document.addEventListener('click', codeBlockCopyEvent.current)
-    return () => {
-      // eslint-disable-next-line react-hooks/exhaustive-deps
-      document.removeEventListener('click', codeBlockCopyEvent.current)
-    }
-  }, [])
-
-  useEffect(() => {
-    if (props.status !== MessageStatus.Pending) {
-      return
-    }
-    const currentTimestamp = new Date().getTime() // Get current time in milliseconds
-    if (!lastTimestamp) {
-      // If this is the first update, just set the lastTimestamp and return
-      if (props.content[0]?.text?.value !== '')
-        setLastTimestamp(currentTimestamp)
-      return
-    }
-
-    const timeDiffInSeconds = (currentTimestamp - lastTimestamp) / 1000 // Time difference in seconds
-    const totalTokenCount = tokenCount + 1
-    const averageTokenSpeed = totalTokenCount / timeDiffInSeconds // Calculate average token speed
-
-    setTokenSpeed(averageTokenSpeed)
-    setTokenCount(totalTokenCount)
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [props.content])
-
   return (
     <div className="group relative mx-auto max-w-[700px] p-4">
       <div
@@ -205,10 +286,11 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
         >
           <MessageToolbar message={props} />
         </div>
-        {messages[messages.length - 1]?.id === props.id &&
-          (props.status === MessageStatus.Pending || tokenSpeed > 0) && (
+        {tokenSpeed &&
+          tokenSpeed.message === props.id &&
+          tokenSpeed.tokenSpeed > 0 && (
             <p className="absolute right-8 text-xs font-medium text-[hsla(var(--text-secondary))]">
-              Token Speed: {Number(tokenSpeed).toFixed(2)}t/s
+              Token Speed: {Number(tokenSpeed.tokenSpeed).toFixed(2)}t/s
             </p>
           )}
       </div>
@@ -283,10 +365,33 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
           ) : (
             <div
               className={twMerge(
-                'message max-width-[100%] flex flex-col gap-y-2 overflow-auto break-all leading-relaxed	'
+                'message max-width-[100%] flex flex-col gap-y-2 overflow-auto leading-relaxed'
               )}
-              dangerouslySetInnerHTML={{ __html: parsedText }}
-            />
+              dir="ltr"
+            >
+              <Markdown
+                remarkPlugins={[remarkMath]}
+                rehypePlugins={[
+                  [rehypeKatex, { throwOnError: false }],
+                  rehypeRaw,
+                  [
+                    // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+                    // @ts-ignore
+                    rehypeHighlight,
+                    {
+                      languages: { latex },
+                      subset: false,
+                      plainText: ['txt', 'text'],
+                    },
+                  ],
+                  [rehypeHighlightCodeLines, { showLineNumbers: true }],
+                  wrapCodeBlocksWithoutVisit,
+                ]}
+                skipHtml={true}
+              >
+                {text}
+              </Markdown>
+            </div>
           )}
         </>
       </div>
diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx
index 1f23e9dc5..01ba0aaeb 100644
--- a/web/screens/Thread/ThreadCenterPanel/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/index.tsx
@@ -1,6 +1,6 @@
 /* eslint-disable @typescript-eslint/naming-convention */
 
-import { useEffect, useState } from 'react'
+import { memo, useEffect, useState } from 'react'
 
 import { Accept, useDropzone } from 'react-dropzone'
 
@@ -232,4 +232,4 @@ const ThreadCenterPanel = () => {
   )
 }
 
-export default ThreadCenterPanel
+export default memo(ThreadCenterPanel)
diff --git a/web/screens/Thread/ThreadLeftPanel/index.tsx b/web/screens/Thread/ThreadLeftPanel/index.tsx
index aca21c8a2..61c6672fc 100644
--- a/web/screens/Thread/ThreadLeftPanel/index.tsx
+++ b/web/screens/Thread/ThreadLeftPanel/index.tsx
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useState } from 'react'
 
-import { Thread } from '@janhq/core'
+import { InferenceEngine, Thread } from '@janhq/core'
 
 import { Button } from '@janhq/joi'
 import { useAtomValue, useSetAtom } from 'jotai'
@@ -63,15 +63,19 @@ const ThreadLeftPanel = () => {
    * This will create a new thread if there are assistants available
    * and there are no threads available
    */
+
   useEffect(() => {
     if (
       threadDataReady &&
       assistants.length > 0 &&
       threads.length === 0 &&
-      (recommendedModel || downloadedModels[0])
+      downloadedModels.length > 0
     ) {
-      const model = recommendedModel || downloadedModels[0]
-      requestCreateNewThread(assistants[0], model)
+      const model = downloadedModels.filter(
+        (model) => model.engine === InferenceEngine.cortex_llamacpp
+      )
+      const selectedModel = model[0] || recommendedModel
+      requestCreateNewThread(assistants[0], selectedModel)
     } else if (threadDataReady && !activeThreadId) {
       setActiveThread(threads[0])
     }
diff --git a/web/screens/Thread/ThreadRightPanel/PromptTemplateSetting/index.tsx b/web/screens/Thread/ThreadRightPanel/PromptTemplateSetting/index.tsx
index e53e7fd5f..fad7fd09c 100644
--- a/web/screens/Thread/ThreadRightPanel/PromptTemplateSetting/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/PromptTemplateSetting/index.tsx
@@ -26,7 +26,7 @@ const PromptTemplateSetting: React.FC<Props> = ({ componentData }) => {
 
   const setEngineParamsUpdate = useSetAtom(engineParamsUpdateAtom)
   const onValueChanged = useCallback(
-    (key: string, value: string | number | boolean) => {
+    (key: string, value: string | number | boolean | string[]) => {
       if (!activeThread) return
 
       setEngineParamsUpdate(true)
diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx
index 674c97766..952ba8eb3 100644
--- a/web/screens/Thread/ThreadRightPanel/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/index.tsx
@@ -173,7 +173,7 @@ const ThreadRightPanel = () => {
   }, 300)
 
   const onValueChanged = useCallback(
-    (key: string, value: string | number | boolean) => {
+    (key: string, value: string | number | boolean | string[]) => {
       if (!activeThread) {
         return
       }
diff --git a/web/screens/Thread/index.tsx b/web/screens/Thread/index.tsx
index b576c905c..6789c181d 100644
--- a/web/screens/Thread/index.tsx
+++ b/web/screens/Thread/index.tsx
@@ -1,3 +1,5 @@
+import { memo } from 'react'
+
 import { useStarterScreen } from '@/hooks/useStarterScreen'
 
 import ThreadLeftPanel from '@/screens/Thread/ThreadLeftPanel'
@@ -9,19 +11,31 @@ import ModalDeleteThread from './ThreadLeftPanel/ModalDeleteThread'
 import ModalEditTitleThread from './ThreadLeftPanel/ModalEditTitleThread'
 import ThreadRightPanel from './ThreadRightPanel'
 
+type Props = {
+  isShowStarterScreen: boolean
+}
+
+const ThreadPanels = memo(({ isShowStarterScreen }: Props) => {
+  return isShowStarterScreen ? (
+    <OnDeviceStarterScreen />
+  ) : (
+    <>
+      <ThreadLeftPanel />
+      <ThreadCenterPanel />
+      <ThreadRightPanel />
+    </>
+  )
+})
+
+const WelcomeController = () => {
+  const { isShowStarterScreen } = useStarterScreen()
+  return <ThreadPanels isShowStarterScreen={isShowStarterScreen} />
+}
+
 const ThreadScreen = () => {
-  const { extensionHasSettings, isShowStarterScreen } = useStarterScreen()
   return (
     <div className="relative flex h-full w-full flex-1 overflow-x-hidden">
-      {isShowStarterScreen ? (
-        <OnDeviceStarterScreen extensionHasSettings={extensionHasSettings} />
-      ) : (
-        <>
-          <ThreadLeftPanel />
-          <ThreadCenterPanel />
-          <ThreadRightPanel />
-        </>
-      )}
+      <WelcomeController />
 
       {/* Showing variant modal action for thread screen */}
       <ModalEditTitleThread />
@@ -31,4 +45,4 @@ const ThreadScreen = () => {
   )
 }
 
-export default ThreadScreen
+export default memo(ThreadScreen)
diff --git a/web/styles/components/code-block.scss b/web/styles/components/code-block.scss
index fed56884b..5f6c059e3 100644
--- a/web/styles/components/code-block.scss
+++ b/web/styles/components/code-block.scss
@@ -55,22 +55,25 @@
 .hljs {
   overflow: auto;
   display: block;
-  width: auto;
-  background: hsla(var(--app-code-block));
-  color: #f8f8f2;
-  padding: 16px;
   font-size: 14px;
-  word-wrap: normal;
+  border-bottom-left-radius: 0.4rem;
+  border-bottom-right-radius: 0.4rem;
+  color: #f8f8f2;
+}
+
+pre {
+  background: hsla(var(--app-code-block));
+  overflow: auto;
+
   border-radius: 0.4rem;
-  margin-top: 1rem;
-  margin-bottom: 1rem;
-  white-space: normal;
 }
 pre > code {
-  display: block;
-  text-indent: 0;
   white-space: pre;
-  max-width: 10vw;
+  font-size: 14px;
+  overflow: auto;
+  color: #f8f8f2;
+  display: block;
+  padding: 16px;
 }
 
 .hljs-emphasis {
@@ -81,6 +84,14 @@ pre > code {
   font-weight: bold;
 }
 
+.code-block {
+  pre {
+    padding: 0;
+    border-top-left-radius: 0;
+    border-top-right-radius: 0;
+  }
+}
+
 @media screen and (-ms-high-contrast: active) {
   .hljs-addition,
   .hljs-attribute,
@@ -105,3 +116,56 @@ pre > code {
     font-weight: bold;
   }
 }
+
+.code-block-wrapper {
+  white-space: nowrap;
+}
+
+.code-line {
+  // padding-left: 12px;
+  padding-right: 12px;
+  margin-left: -12px;
+  margin-right: -12px;
+  border-left: 4px solid transparent;
+}
+
+div.code-line:empty {
+  height: 21.5938px;
+}
+
+span.code-line {
+  // min-width: 100%;
+  white-space: pre;
+  display: inline-block;
+  max-width: 10vw;
+}
+
+.code-line.inserted {
+  background-color: var(--color-inserted-line);
+}
+
+.code-line.deleted {
+  background-color: var(--color-deleted-line);
+}
+
+.highlighted-code-line {
+  background-color: var(--color-highlighted-line);
+  border-left: 4px solid var(--color-highlighted-line-indicator);
+}
+
+.numbered-code-line::before {
+  content: attr(data-line-number);
+  margin-left: -4px;
+  margin-right: 16px;
+  width: 1.2rem;
+  font-size: 12px;
+  color: rgba($color: #fff, $alpha: 0.4);
+  text-align: right;
+  display: inline-block;
+}
+
+.code-header {
+  &--border {
+    border-bottom: 1px solid rgba($color: #fff, $alpha: 0.1);
+  }
+}
diff --git a/web/styles/components/message.scss b/web/styles/components/message.scss
index 0bc0ab6eb..d73a39f65 100644
--- a/web/styles/components/message.scss
+++ b/web/styles/components/message.scss
@@ -1,5 +1,6 @@
 .message {
   white-space: pre-line;
+  word-break: break-word;
   font-size: 16px;
   ul,
   ol {
@@ -27,11 +28,3 @@
     @apply inline-flex flex-col border-s-4 border-[hsla(var(--primary-bg))] bg-[hsla(var(--primary-bg-soft))] px-4 py-2;
   }
 }
-
-.code-block {
-  white-space: normal;
-}
-
-pre {
-  max-width: 95vw;
-}
diff --git a/web/types/token.d.ts b/web/types/token.d.ts
new file mode 100644
index 000000000..61f865ad3
--- /dev/null
+++ b/web/types/token.d.ts
@@ -0,0 +1,6 @@
+export type TokenSpeed = {
+  message: string
+  tokenSpeed: number
+  tokenCount: number
+  lastTimestamp: number
+}
diff --git a/web/utils/codeLanguageExtension.ts b/web/utils/codeLanguageExtension.ts
new file mode 100644
index 000000000..cdabac015
--- /dev/null
+++ b/web/utils/codeLanguageExtension.ts
@@ -0,0 +1,34 @@
+// Utility function using switch-case for extension to language mapping
+export function getLanguageFromExtension(extension: string): string {
+  switch (extension.toLowerCase()) {
+    case 'ts':
+    case 'tsx':
+      return 'typescript'
+    case 'js':
+    case 'jsx':
+      return 'javascript'
+    case 'py':
+      return 'python'
+    case 'java':
+      return 'java'
+    case 'rb':
+      return 'ruby'
+    case 'cs':
+      return 'csharp'
+    case 'md':
+      return 'markdown'
+    case 'yaml':
+    case 'yml':
+      return 'yaml'
+    case 'sh':
+      return 'bash'
+    case 'rs':
+      return 'rust'
+    case 'kt':
+      return 'kotlin'
+    case 'swift':
+      return 'swift'
+    default:
+      return extension
+  }
+}
diff --git a/web/utils/componentSettings.ts b/web/utils/componentSettings.ts
index ade558c95..a961d36da 100644
--- a/web/utils/componentSettings.ts
+++ b/web/utils/componentSettings.ts
@@ -60,11 +60,14 @@ export const getConfigurationsData = (
         componentSetting.controllerProps.placeholder = placeholder
     } else if ('checkbox' === componentSetting.controllerType) {
       const checked = keySetting as boolean
-
       if ('value' in componentSetting.controllerProps)
         componentSetting.controllerProps.value = checked
+    } else if ('tag' === componentSetting.controllerType) {
+      if ('value' in componentSetting.controllerProps)
+        componentSetting.controllerProps.value = keySetting as string
     }
     componentData.push(componentSetting)
   })
+
   return componentData
 }
diff --git a/web/utils/huggingface.test.ts b/web/utils/huggingface.test.ts
index db7dbf3e1..9f616cdbe 100644
--- a/web/utils/huggingface.test.ts
+++ b/web/utils/huggingface.test.ts
@@ -3,11 +3,8 @@ import {
   toHuggingFaceUrl,
   InvalidHostError,
 } from './huggingface'
-import { getFileSize } from '@janhq/core'
 
-// Mock the getFileSize function
 jest.mock('@janhq/core', () => ({
-  getFileSize: jest.fn(),
   AllQuantizations: ['q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0'],
 }))
 
@@ -38,9 +35,15 @@ describe('huggingface utils', () => {
       }
 
       ;(global.fetch as jest.Mock).mockResolvedValue({
-        json: jest.fn().mockResolvedValue(mockResponse),
+        json: jest
+          .fn()
+          .mockResolvedValueOnce(mockResponse)
+          .mockResolvedValueOnce([{
+            path: 'model-q4_0.gguf', size: 1000000,
+          },{
+            path: 'model-q4_0.gguf', size: 2000
+          }]),
       })
-      ;(getFileSize as jest.Mock).mockResolvedValue(1000000)
 
       const result = await fetchHuggingFaceRepoData('user/repo')
 
@@ -61,7 +64,7 @@ describe('huggingface utils', () => {
       })
 
       await expect(fetchHuggingFaceRepoData('user/repo')).rejects.toThrow(
-        'user/repo is not supported. Only GGUF models are supported.'
+        'Only GGUF models are currently supported.'
       )
     })
 
diff --git a/web/utils/huggingface.ts b/web/utils/huggingface.ts
index 3e71f3a0f..9167ea7dc 100644
--- a/web/utils/huggingface.ts
+++ b/web/utils/huggingface.ts
@@ -1,4 +1,4 @@
-import { AllQuantizations, getFileSize, HuggingFaceRepoData } from '@janhq/core'
+import { AllQuantizations, HuggingFaceRepoData } from '@janhq/core'
 
 /**
  * Fetches data from a Hugging Face repository.
@@ -34,26 +34,22 @@ export const fetchHuggingFaceRepoData = async (
   const data = response as HuggingFaceRepoData
 
   if (data.tags.indexOf('gguf') === -1) {
-    throw new Error(
-      `${repoId} is not supported. Only GGUF models are supported.`
-    )
+    throw new Error(`Only GGUF models are currently supported.`)
   }
 
-  const promises: Promise<number>[] = []
-
   // fetching file sizes
   const url = new URL(sanitizedUrl)
   const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
 
+  const repoTree: { path: string; size: number }[] = await fetch(
+    `https://huggingface.co/api/models/${paths[2]}/${paths[3]}/tree/main`
+  ).then((res) => res.json())
+
   for (const sibling of data.siblings) {
     const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${sibling.rfilename}`
     sibling.downloadUrl = downloadUrl
-    promises.push(getFileSize(downloadUrl))
-  }
-
-  const result = await Promise.all(promises)
-  for (let i = 0; i < data.siblings.length; i++) {
-    data.siblings[i].fileSize = result[i]
+    sibling.fileSize =
+      repoTree.find((file) => file.path === sibling.rfilename)?.size ?? 0
   }
 
   AllQuantizations.forEach((quantization) => {
diff --git a/web/utils/predefinedComponent.ts b/web/utils/predefinedComponent.ts
index 3a9f45e92..daa8ed345 100644
--- a/web/utils/predefinedComponent.ts
+++ b/web/utils/predefinedComponent.ts
@@ -17,10 +17,10 @@ export const presetConfiguration: Record<string, SettingComponentProps> = {
     key: 'stop',
     title: 'Stop',
     description: `Defines specific tokens or phrases that signal the model to stop producing further output, allowing you to control the length and coherence of the output.`,
-    controllerType: 'input',
+    controllerType: 'tag',
     controllerProps: {
-      placeholder: 'Stop',
-      value: '',
+      placeholder: 'Enter stop words',
+      value: [''],
     },
     requireModelReload: false,
     configType: 'runtime',