diff --git a/.github/workflows/clean-cloudflare-page-preview-url-and-r2.yml b/.github/workflows/clean-cloudflare-page-preview-url-and-r2.yml
index 620f74714..de761ca69 100644
--- a/.github/workflows/clean-cloudflare-page-preview-url-and-r2.yml
+++ b/.github/workflows/clean-cloudflare-page-preview-url-and-r2.yml
@@ -55,10 +55,10 @@ jobs:
     steps:
       - name: install-aws-cli-action
         uses: unfor19/install-aws-cli-action@v1
-      - name: Delete object older than 7 days
+      - name: Delete object older than 10 days
         run: |
           # Get the list of objects in the 'latest' folder
-          OBJECTS=$(aws s3api list-objects --bucket ${{ secrets.CLOUDFLARE_R2_BUCKET_NAME }} --query 'Contents[?LastModified<`'$(date -d "$current_date -30 days" -u +"%Y-%m-%dT%H:%M:%SZ")'`].{Key: Key}' --endpoint-url https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com | jq -c .)
+          OBJECTS=$(aws s3api list-objects --bucket ${{ secrets.CLOUDFLARE_R2_BUCKET_NAME }} --query 'Contents[?LastModified<`'$(date -d "$current_date -10 days" -u +"%Y-%m-%dT%H:%M:%SZ")'`].{Key: Key}' --endpoint-url https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com | jq -c .)
           
           # Create a JSON file for the delete operation
           echo "{\"Objects\": $OBJECTS, \"Quiet\": false}" > delete.json
diff --git a/README.md b/README.md
index 34eecc9f3..e1f74ef23 100644
--- a/README.md
+++ b/README.md
@@ -43,31 +43,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
   <tr style="text-align:center">
     <td style="text-align:center"><b>Stable (Recommended)</b></td>
     <td style="text-align:center">
-      <a href='https://github.com/janhq/jan/releases/download/v0.4.5/jan-win-x64-0.4.5.exe'>
+      <a href='https://github.com/janhq/jan/releases/download/v0.4.6/jan-win-x64-0.4.6.exe'>
         <img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
         <b>jan.exe</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://github.com/janhq/jan/releases/download/v0.4.5/jan-mac-x64-0.4.5.dmg'>
+      <a href='https://github.com/janhq/jan/releases/download/v0.4.6/jan-mac-x64-0.4.6.dmg'>
         <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
         <b>Intel</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://github.com/janhq/jan/releases/download/v0.4.5/jan-mac-arm64-0.4.5.dmg'>
+      <a href='https://github.com/janhq/jan/releases/download/v0.4.6/jan-mac-arm64-0.4.6.dmg'>
         <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
         <b>M1/M2</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://github.com/janhq/jan/releases/download/v0.4.5/jan-linux-amd64-0.4.5.deb'>
+      <a href='https://github.com/janhq/jan/releases/download/v0.4.6/jan-linux-amd64-0.4.6.deb'>
         <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
         <b>jan.deb</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://github.com/janhq/jan/releases/download/v0.4.5/jan-linux-x86_64-0.4.5.AppImage'>
+      <a href='https://github.com/janhq/jan/releases/download/v0.4.6/jan-linux-x86_64-0.4.6.AppImage'>
         <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
         <b>jan.AppImage</b>
       </a>
@@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
   <tr style="text-align:center">
     <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.5-216.exe'>
+      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.6-263.exe'>
         <img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
         <b>jan.exe</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.5-216.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.6-263.dmg'>
         <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
         <b>Intel</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.5-216.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.6-263.dmg'>
         <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
         <b>M1/M2</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.5-216.deb'>
+      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.6-263.deb'>
         <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
         <b>jan.deb</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.5-216.AppImage'>
+      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.6-263.AppImage'>
         <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
         <b>jan.AppImage</b>
       </a>
diff --git a/core/src/node/api/routes/common.ts b/core/src/node/api/routes/common.ts
index 27385e561..8887755fe 100644
--- a/core/src/node/api/routes/common.ts
+++ b/core/src/node/api/routes/common.ts
@@ -12,6 +12,8 @@ import {
 import { JanApiRouteConfiguration } from '../common/configuration'
 import { startModel, stopModel } from '../common/startStopModel'
 import { ModelSettingParams } from '../../../types'
+import { getJanDataFolderPath } from '../../utils'
+import { normalizeFilePath } from '../../path'
 
 export const commonRouter = async (app: HttpServer) => {
   // Common Routes
@@ -52,7 +54,14 @@ export const commonRouter = async (app: HttpServer) => {
   // App Routes
   app.post(`/app/${AppRoute.joinPath}`, async (request: any, reply: any) => {
     const args = JSON.parse(request.body) as any[]
-    reply.send(JSON.stringify(join(...args[0])))
+
+    const paths = args[0].map((arg: string) =>
+      typeof arg === 'string' && (arg.startsWith(`file:/`) || arg.startsWith(`file:\\`))
+        ? join(getJanDataFolderPath(), normalizeFilePath(arg))
+        : arg
+    )
+
+    reply.send(JSON.stringify(join(...paths)))
   })
 
   app.post(`/app/${AppRoute.baseName}`, async (request: any, reply: any) => {
diff --git a/core/src/node/api/routes/download.ts b/core/src/node/api/routes/download.ts
index b4e11f957..ab8c0bd37 100644
--- a/core/src/node/api/routes/download.ts
+++ b/core/src/node/api/routes/download.ts
@@ -4,55 +4,55 @@ import { DownloadManager } from '../../download'
 import { HttpServer } from '../HttpServer'
 import { createWriteStream } from 'fs'
 import { getJanDataFolderPath } from '../../utils'
-import { normalizeFilePath } from "../../path";
+import { normalizeFilePath } from '../../path'
 
 export const downloadRouter = async (app: HttpServer) => {
   app.post(`/${DownloadRoute.downloadFile}`, async (req, res) => {
-    const strictSSL = !(req.query.ignoreSSL === "true");
-    const proxy = req.query.proxy?.startsWith("http") ? req.query.proxy : undefined;
-    const body = JSON.parse(req.body as any);
+    const strictSSL = !(req.query.ignoreSSL === 'true')
+    const proxy = req.query.proxy?.startsWith('http') ? req.query.proxy : undefined
+    const body = JSON.parse(req.body as any)
     const normalizedArgs = body.map((arg: any) => {
-      if (typeof arg === "string") {
-        return join(getJanDataFolderPath(), normalizeFilePath(arg));
+      if (typeof arg === 'string' && arg.startsWith('file:')) {
+        return join(getJanDataFolderPath(), normalizeFilePath(arg))
       }
-      return arg;
-    });
+      return arg
+    })
 
-    const localPath = normalizedArgs[1];
-    const fileName = localPath.split("/").pop() ?? "";
+    const localPath = normalizedArgs[1]
+    const fileName = localPath.split('/').pop() ?? ''
 
-    const request = require("request");
-    const progress = require("request-progress");
+    const request = require('request')
+    const progress = require('request-progress')
 
-    const rq = request({ url: normalizedArgs[0], strictSSL, proxy });
+    const rq = request({ url: normalizedArgs[0], strictSSL, proxy })
     progress(rq, {})
-      .on("progress", function (state: any) {
-        console.log("download onProgress", state);
+      .on('progress', function (state: any) {
+        console.log('download onProgress', state)
       })
-      .on("error", function (err: Error) {
-        console.log("download onError", err);
+      .on('error', function (err: Error) {
+        console.log('download onError', err)
       })
-      .on("end", function () {
-        console.log("download onEnd");
+      .on('end', function () {
+        console.log('download onEnd')
       })
-      .pipe(createWriteStream(normalizedArgs[1]));
+      .pipe(createWriteStream(normalizedArgs[1]))
 
-    DownloadManager.instance.setRequest(fileName, rq);
-  });
+    DownloadManager.instance.setRequest(fileName, rq)
+  })
 
   app.post(`/${DownloadRoute.abortDownload}`, async (req, res) => {
-    const body = JSON.parse(req.body as any);
+    const body = JSON.parse(req.body as any)
     const normalizedArgs = body.map((arg: any) => {
-      if (typeof arg === "string") {
-        return join(getJanDataFolderPath(), normalizeFilePath(arg));
+      if (typeof arg === 'string' && arg.startsWith('file:')) {
+        return join(getJanDataFolderPath(), normalizeFilePath(arg))
       }
-      return arg;
-    });
+      return arg
+    })
 
-    const localPath = normalizedArgs[0];
-    const fileName = localPath.split("/").pop() ?? "";
-    const rq = DownloadManager.instance.networkRequests[fileName];
-    DownloadManager.instance.networkRequests[fileName] = undefined;
-    rq?.abort();
-  });
-};
+    const localPath = normalizedArgs[0]
+    const fileName = localPath.split('/').pop() ?? ''
+    const rq = DownloadManager.instance.networkRequests[fileName]
+    DownloadManager.instance.networkRequests[fileName] = undefined
+    rq?.abort()
+  })
+}
diff --git a/docs/blog/authors.yml b/docs/blog/authors.yml
index 1a62bd7d7..728b0a7da 100644
--- a/docs/blog/authors.yml
+++ b/docs/blog/authors.yml
@@ -4,10 +4,18 @@ dan-jan:
   url: https://github.com/dan-jan 
   image_url: https://avatars.githubusercontent.com/u/101145494?v=4
   email: daniel@jan.ai
+  
+hiro-v:
+  name: Hiro Vuong
+  title: MLE
+  url: https://github.com/hiro-v
+  image_url: https://avatars.githubusercontent.com/u/22463238?v=4
+  email: hiro@jan.ai
 
 ashley-jan:
   name: Ashley Tran
   title: Product Designer
   url: https://github.com/imtuyethan
   image_url: https://avatars.githubusercontent.com/u/89722390?v=4
-  email: ashley@jan.ai
\ No newline at end of file
+  email: ashley@jan.ai
+  
\ No newline at end of file
diff --git a/docs/docs/about/01-README.md b/docs/docs/about/01-README.md
index 3b2759513..d5d3b8dc2 100644
--- a/docs/docs/about/01-README.md
+++ b/docs/docs/about/01-README.md
@@ -110,9 +110,10 @@ Adhering to Jan's privacy preserving philosophy, our analytics philosophy is to
 
 #### What is tracked
 
-1. By default, Github tracks downloads and device metadata for all public Github repos. This helps us troubleshoot & ensure cross platform support.
-1. We use Posthog to track a single `app.opened` event without additional user metadata, in order to understand retention.
-1. Additionally, we plan to enable a `Settings` feature for users to turn off all tracking.
+1. By default, Github tracks downloads and device metadata for all public GitHub repositories. This helps us troubleshoot & ensure cross-platform support.
+2. We use [Umami](https://umami.is/) to collect, analyze, and understand application data while maintaining visitor privacy and data ownership. We are using the Umami Cloud in Europe to ensure GDPR compliance. Please see [Umami Privacy Policy](https://umami.is/privacy) for more details.
+3. We use Umami to track a single `app.opened` event without additional user metadata, in order to understand retention. In addition, we track `app.event` to understand app version usage.
+4. Additionally, we plan to enable a `Settings` feature for users to turn off all tracking.
 
 #### Request for help
 
diff --git a/docs/docs/developer/01-overview/04-install-and-prerequisites.md b/docs/docs/developer/01-overview/04-install-and-prerequisites.md
new file mode 100644
index 000000000..110f62e36
--- /dev/null
+++ b/docs/docs/developer/01-overview/04-install-and-prerequisites.md
@@ -0,0 +1,79 @@
+---
+title: Installation and Prerequisites
+slug: /developer/prereq
+description: Guide to install and setup Jan for development.
+keywords:
+  [
+    Jan AI,
+    Jan,
+    ChatGPT alternative,
+    local AI,
+    private AI,
+    conversational AI,
+    no-subscription fee,
+    large language model,
+    installation,
+    prerequisites,
+    developer setup,
+  ]
+---
+
+## Requirements
+
+### Hardware Requirements
+
+Ensure your system meets the following specifications to guarantee a smooth development experience:
+
+- [Hardware Requirements](../../guides/02-installation/06-hardware.md)
+
+### System Requirements
+
+Make sure your operating system meets the specific requirements for Jan development:
+
+- [Windows](../../install/windows/#system-requirements)
+- [MacOS](../../install/mac/#system-requirements)
+- [Linux](../../install/linux/#system-requirements)
+
+## Prerequisites
+
+- [Node.js](https://nodejs.org/en/) (version 20.0.0 or higher)
+- [yarn](https://yarnpkg.com/) (version 1.22.0 or higher)
+- [make](https://www.gnu.org/software/make/) (version 3.81 or higher)
+
+## Instructions
+
+1. **Clone the Repository:**
+
+```bash
+git clone https://github.com/janhq/jan
+cd jan
+git checkout -b DESIRED_BRANCH
+```
+
+2. **Install Dependencies**
+
+```bash
+yarn install
+```
+
+3. **Run Development and Use Jan Desktop**
+
+```bash
+make dev
+```
+
+This command starts the development server and opens the Jan Desktop app.
+
+## For Production Build
+
+```bash
+# Do steps 1 and 2 in the previous section
+# Build the app
+make build
+```
+
+This will build the app MacOS (M1/M2/M3) for production (with code signing already done) and place the result in `/electron/dist` folder.
+
+## Troubleshooting
+
+If you run into any issues due to a broken build, please check the [Stuck on a Broken Build](../../troubleshooting/stuck-on-broken-build) guide.
diff --git a/docs/docs/guides/02-installation/01-mac.md b/docs/docs/guides/02-installation/01-mac.md
index 8e67b5bed..7a3961384 100644
--- a/docs/docs/guides/02-installation/01-mac.md
+++ b/docs/docs/guides/02-installation/01-mac.md
@@ -12,11 +12,16 @@ keywords:
     conversational AI,
     no-subscription fee,
     large language model,
+    installation guide,
   ]
 ---
 
 # Installing Jan on MacOS
 
+## System Requirements
+
+Ensure that your MacOS version is 13 or higher to run Jan.
+
 ## Installation
 
 Jan is available for download via our homepage, [https://jan.ai/](https://jan.ai/).
diff --git a/docs/docs/guides/02-installation/02-windows.md b/docs/docs/guides/02-installation/02-windows.md
index b200554d2..d60ab86f7 100644
--- a/docs/docs/guides/02-installation/02-windows.md
+++ b/docs/docs/guides/02-installation/02-windows.md
@@ -12,11 +12,23 @@ keywords:
     conversational AI,
     no-subscription fee,
     large language model,
+    installation guide,
   ]
 ---
 
 # Installing Jan on Windows
 
+## System Requirements
+
+Ensure that your system meets the following requirements:
+
+- Windows 10 or higher is required to run Jan.
+
+To enable GPU support, you will need:
+
+- NVIDIA GPU with CUDA Toolkit 11.7 or higher
+- NVIDIA driver 470.63.01 or higher
+
 ## Installation
 
 Jan is available for download via our homepage, [https://jan.ai](https://jan.ai/).
@@ -59,13 +71,3 @@ To remove all user data associated with Jan, you can delete the `/jan` directory
 cd C:\Users\%USERNAME%\AppData\Roaming
 rmdir /S jan
 ```
-
-## Troubleshooting
-
-### Microsoft Defender
-
-**Error: "Microsoft Defender SmartScreen prevented an unrecognized app from starting"**
-
-Windows Defender may display the above warning when running the Jan Installer, as a standard security measure.
-
-To proceed, select the "More info" option and select the "Run Anyway" option to continue with the installation.
diff --git a/docs/docs/guides/02-installation/03-linux.md b/docs/docs/guides/02-installation/03-linux.md
index 21dfac1a9..0ec7fea60 100644
--- a/docs/docs/guides/02-installation/03-linux.md
+++ b/docs/docs/guides/02-installation/03-linux.md
@@ -12,11 +12,24 @@ keywords:
     conversational AI,
     no-subscription fee,
     large language model,
+    installation guide,
   ]
 ---
 
 # Installing Jan on Linux
 
+## System Requirements
+
+Ensure that your system meets the following requirements:
+
+- glibc 2.27 or higher (check with `ldd --version`)
+- gcc 11, g++ 11, cpp 11, or higher, refer to this [link](https://jan.ai/guides/troubleshooting/gpu-not-used/#specific-requirements-for-linux) for more information.
+
+To enable GPU support, you will need:
+
+- NVIDIA GPU with CUDA Toolkit 11.7 or higher
+- NVIDIA driver 470.63.01 or higher
+
 ## Installation
 
 Jan is available for download via our homepage, [https://jan.ai](https://jan.ai/).
@@ -66,7 +79,6 @@ jan-linux-amd64-{version}.deb
 # AppImage
 jan-linux-x86_64-{version}.AppImage
 ```
-```
 
 ## Uninstall Jan
 
diff --git a/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx b/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx
index 533797fca..f0db1bd55 100644
--- a/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx
+++ b/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx
@@ -65,6 +65,13 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k`
 }
 ```
 
+:::tip
+
+- You can find the list of available models in the [OpenAI Platform](https://platform.openai.com/docs/models/overview).
+- Please note that the `id` property need to match the model name in the list. For example, if you want to use the [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo), you need to set the `id` property as `gpt-4-1106-preview`.
+
+:::
+
 ### 2. Configure OpenAI API Keys
 
 You can find your API keys in the [OpenAI Platform](https://platform.openai.com/api-keys) and set the OpenAI API keys in `~/jan/engines/openai.json` file.
diff --git a/docs/docs/guides/08-troubleshooting/02-somethings-amiss.mdx b/docs/docs/guides/08-troubleshooting/02-somethings-amiss.mdx
index a5669e36d..4e16e362a 100644
--- a/docs/docs/guides/08-troubleshooting/02-somethings-amiss.mdx
+++ b/docs/docs/guides/08-troubleshooting/02-somethings-amiss.mdx
@@ -45,7 +45,9 @@ This may occur due to several reasons. Please follow these steps to resolve it:
 
 5. If you are on Nvidia GPUs, please download [Cuda](https://developer.nvidia.com/cuda-downloads).
 
-6. When [checking app logs](https://jan.ai/troubleshooting/how-to-get-error-logs/), if you encounter the error log `Bind address failed at 127.0.0.1:3928`, it indicates that the port used by Nitro might already be in use. Use the following commands to check the port status:
+6. If you're using Linux, please ensure that your system meets the following requirements gcc 11, g++ 11, cpp 11, or higher, refer to this [link](https://jan.ai/guides/troubleshooting/gpu-not-used/#specific-requirements-for-linux) for more information.
+
+7. When [checking app logs](https://jan.ai/troubleshooting/how-to-get-error-logs/), if you encounter the error log `Bind address failed at 127.0.0.1:3928`, it indicates that the port used by Nitro might already be in use. Use the following commands to check the port status:
 
 <Tabs groupId="operating-systems">
   <TabItem value="mac" label="macOS">
diff --git a/docs/docs/guides/08-troubleshooting/06-unexpected-token.mdx b/docs/docs/guides/08-troubleshooting/06-unexpected-token.mdx
index 973001f1b..1de609ffa 100644
--- a/docs/docs/guides/08-troubleshooting/06-unexpected-token.mdx
+++ b/docs/docs/guides/08-troubleshooting/06-unexpected-token.mdx
@@ -17,4 +17,8 @@ keywords:
   ]
 ---
 
-1. You may receive an error response `Error occurred: Unexpected token '<', "<!DOCTYPE"...is not valid JSON`, when you start a chat with OpenAI models. Using a VPN may help fix the issue.
+You may receive an error response `Error occurred: Unexpected token '<', "<!DOCTYPE"...is not valid JSON`, when you start a chat with OpenAI models. 
+
+1. Check that you added an OpenAI API key. You can get an API key from OpenAI's [developer platform](https://platform.openai.com/). Alternatively, we recommend you download a local model from Jan Hub, which remains free to use and runs on your own computer!
+
+2. Using a VPN may help fix the issue.
diff --git a/docs/docs/guides/08-troubleshooting/07-undefined-issue.mdx b/docs/docs/guides/08-troubleshooting/07-undefined-issue.mdx
new file mode 100644
index 000000000..4aba6438d
--- /dev/null
+++ b/docs/docs/guides/08-troubleshooting/07-undefined-issue.mdx
@@ -0,0 +1,26 @@
+---
+title: Undefined Issue
+slug: /troubleshooting/undefined-issue
+description: Undefined issue troubleshooting guide.
+keywords:
+  [
+    Jan AI,
+    Jan,
+    ChatGPT alternative,
+    local AI,
+    private AI,
+    conversational AI,
+    no-subscription fee,
+    large language model,
+    troubleshooting,
+    undefined issue,
+  ]
+---
+
+You may encounter an "undefined" issue when using Jan. Here are some troubleshooting steps to help you resolve the issue.
+
+1. Try wiping the Jan folder and reopening the Jan app and see if the issue persists.
+2. If the issue persists, try to go `~/jan/extensions/@janhq/inference-nitro-extensions/dist/bin/<your-os>/nitro` and run the nitro manually and see if you get any error messages.
+3. Resolve the error messages you get from the nitro and see if the issue persists.
+4. Reopen the Jan app and see if the issue is resolved.
+5. If the issue persists, please share with us the [app logs](https://jan.ai/troubleshooting/how-to-get-error-logs/) via [Jan Discord](https://discord.gg/mY69SZaMaC).
diff --git a/docs/docs/template/QA_script.md b/docs/docs/template/QA_script.md
index 05dbed2b4..bba667bcd 100644
--- a/docs/docs/template/QA_script.md
+++ b/docs/docs/template/QA_script.md
@@ -1,6 +1,6 @@
 # [Release Version] QA Script 
 
-**Release Version:**
+**Release Version:** v0.4.6
 
 **Operating System:**
 
@@ -25,10 +25,10 @@
 
 ### 3. Users uninstall app
 
-- [ ] :key: Check that the uninstallation process removes all components of the app from the system.
+- [ ] :key::warning: Check that the uninstallation process removes the app successfully from the system.
 - [ ] Clean the Jan root directory and open the app to check if it creates all the necessary folders, especially models and extensions.
 - [ ] When updating the app, check if the `/models` directory has any JSON files that change according to the update.
-- [ ] Verify if updating the app also updates extensions correctly (test functionality changes; support notifications for necessary tests with each version related to extensions update).
+- [ ] Verify if updating the app also updates extensions correctly (test functionality changes, support notifications for necessary tests with each version related to extensions update).
 
 ### 4. Users close app
 
@@ -60,49 +60,45 @@
 - [ ] :key: Ensure that the conversation thread is maintained without any loss of data upon sending multiple messages.
 - [ ] Test for the ability to send different types of messages (e.g., text, emojis, code blocks).
 - [ ] :key: Validate the scroll functionality in the chat window for lengthy conversations.
-- [ ] Check if the user can renew responses multiple times.
 - [ ] Check if the user can copy the response.
 - [ ] Check if the user can delete responses.
-- [ ] :warning: Test if the user deletes the message midway, then the assistant stops that response.
 - [ ] :key: Check the `clear message` button works.
 - [ ] :key: Check the `delete entire chat` works.
-- [ ] :warning: Check if deleting all the chat retains the system prompt.
+- [ ] Check if deleting all the chat retains the system prompt.
 - [ ] Check the output format of the AI (code blocks, JSON, markdown, ...).
 - [ ] :key: Validate that there is appropriate error handling and messaging if the assistant fails to respond.
 - [ ] Test assistant's ability to maintain context over multiple exchanges.
 - [ ] :key: Check the `create new chat` button works correctly
 - [ ] Confirm that by changing `models` mid-thread the app can still handle it.
-- [ ] Check that by changing `instructions` mid-thread the app can still handle it.
-- [ ] Check the `regenerate` button renews the response.
-- [ ] Check the `Instructions` update correctly after the user updates it midway.
+- [ ] Check the `regenerate` button renews the response (single / multiple times).
+- [ ] Check the `Instructions` update correctly after the user updates it midway (mid-thread).
 
 ### 2. Users can customize chat settings like model parameters via both the GUI & thread.json
 
-- [ ] :key: Confirm that the chat settings options are accessible via the GUI.
+- [ ] :key: Confirm that the Threads settings options are accessible.
 - [ ] Test the functionality to adjust model parameters (e.g., Temperature, Top K, Top P) from the GUI and verify they are reflected in the chat behavior.
 - [ ] :key: Ensure that changes can be saved and persisted between sessions.
 - [ ] Validate that users can access and modify the thread.json file.
 - [ ] :key: Check that changes made in thread.json are correctly applied to the chat session upon reload or restart.
-- [ ] Verify if there is a revert option to go back to previous settings after changes are made.
-- [ ] Test for user feedback or confirmation after saving changes to settings.
 - [ ] Check the maximum and minimum limits of the adjustable parameters and how they affect the assistant's responses.
 - [ ] :key: Validate user permissions for those who can change settings and persist them.
 - [ ] :key: Ensure that users switch between threads with different models, the app can handle it.
 
-### 3. Users can click on a history thread
+### 3. Model dropdown
+- [ ] :key: Model list should highlight recommended based on user RAM
+- [ ] Model size should display (for both installed and imported models)
 
+### 4. Users can click on a history thread
 - [ ] Test the ability to click on any thread in the history panel.
 - [ ] :key: Verify that clicking a thread brings up the past conversation in the main chat window.
 - [ ] :key: Ensure that the selected thread is highlighted or otherwise indicated in the history panel.
 - [ ] Confirm that the chat window displays the entire conversation from the selected history thread without any missing messages.
 - [ ] :key: Check the performance and accuracy of the history feature when dealing with a large number of threads.
 - [ ] Validate that historical threads reflect the exact state of the chat at that time, including settings.
-- [ ] :key: :warning: Test the search functionality within the history panel for quick navigation.
 - [ ] :key: Verify the ability to delete or clean old threads.
 - [ ] :key: Confirm that changing the title of the thread updates correctly.
 
-### 4. Users can config instructions for the assistant.
-
+### 5. Users can config instructions for the assistant.
 - [ ] Ensure there is a clear interface to input or change instructions for the assistant.
 - [ ] Test if the instructions set by the user are being followed by the assistant in subsequent conversations.
 - [ ] :key: Validate that changes to instructions are updated in real time and do not require a restart of the application or session.
@@ -112,6 +108,8 @@
 - [ ] Validate that instructions can be saved with descriptive names for easy retrieval.
 - [ ] :key: Check if the assistant can handle conflicting instructions and how it resolves them.
 - [ ] Ensure that instruction configurations are documented for user reference.
+- [ ] :key: RAG - Users can import documents and the system should process queries about the uploaded file, providing accurate and appropriate responses in the conversation thread.
+
 
 ## D. Hub
 
@@ -125,8 +123,7 @@
 
 - [ ] Display the best model for their RAM at the top.
 - [ ] :key: Ensure that models are labeled with RAM requirements and compatibility.
-- [ ] :key: Validate that the download function is disabled for models that exceed the user's system capabilities.
-- [ ] Test that the platform provides alternative recommendations for models not suitable due to RAM limitations.
+- [ ] :warning: Test that the platform provides alternative recommendations for models not suitable due to RAM limitations.
 - [ ] :key: Check the download model functionality and validate if the cancel download feature works correctly.
 
 ### 3. Users can download models via a HuggingFace URL (coming soon)
@@ -139,7 +136,7 @@
 
 - [ ] :key: Have clear instructions so users can do their own.
 - [ ] :key: Ensure the new model updates after restarting the app.
-- [ ] Ensure it raises clear errors for users to fix the problem while adding a new model.
+- [ ] :warning:Ensure it raises clear errors for users to fix the problem while adding a new model.
 
 ### 5. Users can use the model as they want
 
@@ -149,9 +146,13 @@
 - [ ] Check if starting another model stops the other model entirely.
 - [ ] Check the `Explore models` navigate correctly to the model panel.
 - [ ] :key: Check when deleting a model it will delete all the files on the user's computer.
-- [ ] The recommended tags should present right for the user's hardware.
+- [ ] :warning:The recommended tags should present right for the user's hardware.
 - [ ] Assess that the descriptions of models are accurate and informative.
 
+### 6. Users can Integrate With a Remote Server
+- [ ] :key: Import openAI GPT model https://jan.ai/guides/using-models/integrate-with-remote-server/ and the model displayed in Hub / Thread dropdown
+- [ ] Users can use the remote model properly
+
 ## E. System Monitor
 
 ### 1. Users can see disk and RAM utilization
@@ -181,7 +182,7 @@
 - [ ] Confirm that the application saves the theme preference and persists it across sessions.
 - [ ] Validate that all elements of the UI are compatible with the theme changes and maintain legibility and contrast.
 
-### 2. Users change the extensions
+### 2. Users change the extensions [TBU]
 
 - [ ] Confirm that the `Extensions` tab lists all available plugins.
 - [ ] :key: Test the toggle switch for each plugin to ensure it enables or disables the plugin correctly.
@@ -208,3 +209,19 @@
 - [ ] :key: Test that the application prevents the installation of incompatible or corrupt plugin files.
 - [ ] :key: Check that the user can uninstall or disable custom plugins as easily as pre-installed ones.
 - [ ] Verify that the application's performance remains stable after the installation of custom plugins.
+
+### 5. Advanced Settings
+- [ ] Attemp to test downloading model from hub using **HTTP Proxy** [guideline](https://github.com/janhq/jan/pull/1562)
+- [ ] Users can move **Jan data folder**
+- [ ] Users can click on Reset button to **factory reset** app settings to its original state & delete all usage data.
+
+## G. Local API server
+
+### 1. Local Server Usage with Server Options
+- [ ] :key: Explore API Reference: Swagger API for sending/receiving requests
+    - [ ] Use default server option
+    - [ ] Configure and use custom server options
+- [ ] Test starting/stopping the local API server with different Model/Model settings
+- [ ] Server logs captured with correct Server Options provided
+- [ ] Verify functionality of Open logs/Clear feature
+- [ ] Ensure that threads and other functions impacting the model are disabled while the local server is running
diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml
index bfff0ad73..864c80fdf 100644
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@@ -67,20 +67,31 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl http://localhost:1337/v1/chat/completions \
-              -H "Content-Type: application/json" \
+            curl -X 'POST' \
+              'http://localhost:1337/v1/chat/completions' \
+              -H 'accept: application/json' \
+              -H 'Content-Type: application/json' \
               -d '{
-                "model": "tinyllama-1.1b",
                 "messages": [
                   {
-                    "role": "system",
-                    "content": "You are a helpful assistant."
+                    "content": "You are a helpful assistant.",
+                    "role": "system"
                   },
                   {
-                    "role": "user",
-                    "content": "Hello!"
+                    "content": "Hello!",
+                    "role": "user"
                   }
-                ]
+                ],
+                "model": "tinyllama-1.1b",
+                "stream": true,
+                "max_tokens": 2048,
+                "stop": [
+                  "hello"
+                ],
+                "frequency_penalty": 0,
+                "presence_penalty": 0,
+                "temperature": 0.7,
+                "top_p": 0.95
               }'
   /models:
     get:
@@ -103,7 +114,9 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl http://localhost:1337/v1/models
+            curl -X 'GET' \
+              'http://localhost:1337/v1/models' \
+              -H 'accept: application/json'
   "/models/download/{model_id}":
     get:
       operationId: downloadModel
@@ -131,7 +144,9 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl -X POST http://localhost:1337/v1/models/download/{model_id}
+            curl -X 'GET' \
+              'http://localhost:1337/v1/models/download/{model_id}' \
+              -H 'accept: application/json'
   "/models/{model_id}":
     get:
       operationId: retrieveModel
@@ -162,7 +177,9 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl http://localhost:1337/v1/models/{model_id}
+            curl -X 'GET' \
+              'http://localhost:1337/v1/models/{model_id}' \
+              -H 'accept: application/json'
     delete:
       operationId: deleteModel
       tags:
@@ -191,7 +208,9 @@ paths:
       x-codeSamples:
         - lang: cURL
           source: |
-            curl -X DELETE http://localhost:1337/v1/models/{model_id}
+            curl -X 'DELETE' \
+              'http://localhost:1337/v1/models/{model_id}' \
+              -H 'accept: application/json'
   /threads:
     post:
       operationId: createThread
diff --git a/docs/openapi/specs/assistants.yaml b/docs/openapi/specs/assistants.yaml
index d784c315a..5db1f6a97 100644
--- a/docs/openapi/specs/assistants.yaml
+++ b/docs/openapi/specs/assistants.yaml
@@ -316,4 +316,4 @@ components:
         deleted:
           type: boolean
           description: Indicates whether the assistant was successfully deleted.
-          example: true
\ No newline at end of file
+          example: true
diff --git a/docs/openapi/specs/chat.yaml b/docs/openapi/specs/chat.yaml
index b324501a8..cfa391598 100644
--- a/docs/openapi/specs/chat.yaml
+++ b/docs/openapi/specs/chat.yaml
@@ -188,4 +188,4 @@ components:
             total_tokens:
               type: integer
               example: 533
-              description: Total number of tokens used
\ No newline at end of file
+              description: Total number of tokens used
diff --git a/docs/openapi/specs/messages.yaml b/docs/openapi/specs/messages.yaml
index d9d7d87a4..6f5fe1a58 100644
--- a/docs/openapi/specs/messages.yaml
+++ b/docs/openapi/specs/messages.yaml
@@ -1,3 +1,4 @@
+---
 components:
   schemas:
     MessageObject:
@@ -75,7 +76,7 @@ components:
           example: msg_abc123
         object:
           type: string
-          description: "Type of the object, indicating it's a thread message."
+          description: Type of the object, indicating it's a thread message.
           default: thread.message
         created_at:
           type: integer
@@ -88,7 +89,7 @@ components:
           example: thread_abc123
         role:
           type: string
-          description: "Role of the sender, either 'user' or 'assistant'."
+          description: Role of the sender, either 'user' or 'assistant'.
           example: user
         content:
           type: array
@@ -97,7 +98,7 @@ components:
             properties:
               type:
                 type: string
-                description: "Type of content, e.g., 'text'."
+                description: Type of content, e.g., 'text'.
                 example: text
               text:
                 type: object
@@ -110,21 +111,21 @@ components:
                     type: array
                     items:
                       type: string
-                    description: "Annotations for the text content, if any."
+                    description: Annotations for the text content, if any.
                     example: []
         file_ids:
           type: array
           items:
             type: string
-          description: "Array of file IDs associated with the message, if any."
+          description: Array of file IDs associated with the message, if any.
           example: []
         assistant_id:
           type: string
-          description: "Identifier of the assistant involved in the message, if applicable."
+          description: Identifier of the assistant involved in the message, if applicable.
           example: null
         run_id:
           type: string
-          description: "Run ID associated with the message, if applicable."
+          description: Run ID associated with the message, if applicable.
           example: null
         metadata:
           type: object
@@ -139,7 +140,7 @@ components:
           example: msg_abc123
         object:
           type: string
-          description: "Type of the object, indicating it's a thread message."
+          description: Type of the object, indicating it's a thread message.
           example: thread.message
         created_at:
           type: integer
@@ -152,7 +153,7 @@ components:
           example: thread_abc123
         role:
           type: string
-          description: "Role of the sender, either 'user' or 'assistant'."
+          description: Role of the sender, either 'user' or 'assistant'.
           example: user
         content:
           type: array
@@ -161,7 +162,7 @@ components:
             properties:
               type:
                 type: string
-                description: "Type of content, e.g., 'text'."
+                description: Type of content, e.g., 'text'.
                 example: text
               text:
                 type: object
@@ -174,21 +175,21 @@ components:
                     type: array
                     items:
                       type: string
-                    description: "Annotations for the text content, if any."
+                    description: Annotations for the text content, if any.
                     example: []
         file_ids:
           type: array
           items:
             type: string
-          description: "Array of file IDs associated with the message, if any."
+          description: Array of file IDs associated with the message, if any.
           example: []
         assistant_id:
           type: string
-          description: "Identifier of the assistant involved in the message, if applicable."
+          description: Identifier of the assistant involved in the message, if applicable.
           example: null
         run_id:
           type: string
-          description: "Run ID associated with the message, if applicable."
+          description: Run ID associated with the message, if applicable.
           example: null
         metadata:
           type: object
@@ -199,7 +200,7 @@ components:
       properties:
         object:
           type: string
-          description: "Type of the object, indicating it's a list."
+          description: Type of the object, indicating it's a list.
           default: list
         data:
           type: array
@@ -226,7 +227,7 @@ components:
           example: msg_abc123
         object:
           type: string
-          description: "Type of the object, indicating it's a thread message."
+          description: Type of the object, indicating it's a thread message.
           example: thread.message
         created_at:
           type: integer
@@ -239,7 +240,7 @@ components:
           example: thread_abc123
         role:
           type: string
-          description: "Role of the sender, either 'user' or 'assistant'."
+          description: Role of the sender, either 'user' or 'assistant'.
           example: user
         content:
           type: array
@@ -248,7 +249,7 @@ components:
             properties:
               type:
                 type: string
-                description: "Type of content, e.g., 'text'."
+                description: Type of content, e.g., 'text'.
               text:
                 type: object
                 properties:
@@ -260,20 +261,20 @@ components:
                     type: array
                     items:
                       type: string
-                    description: "Annotations for the text content, if any."
+                    description: Annotations for the text content, if any.
         file_ids:
           type: array
           items:
             type: string
-          description: "Array of file IDs associated with the message, if any."
+          description: Array of file IDs associated with the message, if any.
           example: []
         assistant_id:
           type: string
-          description: "Identifier of the assistant involved in the message, if applicable."
+          description: Identifier of the assistant involved in the message, if applicable.
           example: null
         run_id:
           type: string
-          description: "Run ID associated with the message, if applicable."
+          description: Run ID associated with the message, if applicable.
           example: null
         metadata:
           type: object
@@ -309,4 +310,4 @@ components:
         data:
           type: array
           items:
-            $ref: "#/components/schemas/MessageFileObject"
\ No newline at end of file
+            $ref: "#/components/schemas/MessageFileObject"
diff --git a/docs/openapi/specs/models.yaml b/docs/openapi/specs/models.yaml
index 8113f3ab8..40e6abaaf 100644
--- a/docs/openapi/specs/models.yaml
+++ b/docs/openapi/specs/models.yaml
@@ -18,114 +18,82 @@ components:
     Model:
       type: object
       properties:
-        type:
+        source_url:
           type: string
-          default: model
-          description: The type of the object.
-        version:
-          type: string
-          default: "1"
-          description: The version number of the model.
+          format: uri
+          description: URL to the source of the model.
+          example: https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf
         id:
           type: string
-          description: Unique identifier used in chat-completions model_name, matches
+          description:
+            Unique identifier used in chat-completions model_name, matches
             folder name.
-          example: zephyr-7b
+          example: trinity-v1.2-7b
+        object:
+          type: string
+          example: model
         name:
           type: string
           description: Name of the model.
-          example: Zephyr 7B
-        owned_by:
+          example: Trinity-v1.2 7B Q4
+        version:
           type: string
-          description: Compatibility field for OpenAI.
-          default: ""
-        created:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time.
+          default: "1.0"
+          description: The version number of the model.
         description:
           type: string
           description: Description of the model.
-        state:
-          type: string
-          enum:
-            - null
-            - downloading
-            - ready
-            - starting
-            - stopping
-          description: Current state of the model.
+          example:
+            Trinity is an experimental model merge using the Slerp method.
+            Recommended for daily assistance purposes.
         format:
           type: string
           description: State format of the model, distinct from the engine.
-          example: ggufv3
-        source:
-          type: array
-          items:
-            type: object
-            properties:
-              url:
-                format: uri
-                description: URL to the source of the model.
-                example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
-              filename:
-                type: string
-                description: Filename of the model.
-                example: zephyr-7b-beta.Q4_K_M.gguf
+          example: gguf
         settings:
           type: object
           properties:
             ctx_len:
-              type: string
+              type: integer
               description: Context length.
-              example: "4096"
-            ngl:
+              example: 4096
+            prompt_template:
               type: string
-              description: Number of layers.
-              example: "100"
-            embedding:
-              type: string
-              description: Indicates if embedding is enabled.
-              example: "true"
-            n_parallel:
-              type: string
-              description: Number of parallel processes.
-              example: "4"
+              example: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
           additionalProperties: false
         parameters:
           type: object
           properties:
             temperature:
-              type: string
-              description: Temperature setting for the model.
-              example: "0.7"
-            token_limit:
-              type: string
-              description: Token limit for the model.
-              example: "4096"
-            top_k:
-              type: string
-              description: Top-k setting for the model.
-              example: "0"
+              example: 0.7
             top_p:
-              type: string
-              description: Top-p setting for the model.
-              example: "1"
+              example: 0.95
             stream:
-              type: string
-              description: Indicates if streaming is enabled.
-              example: "true"
+              example: true
+            max_tokens:
+              example: 4096
+            stop:
+              example: []
+            frequency_penalty:
+              example: 0
+            presence_penalty:
+              example: 0
           additionalProperties: false
         metadata:
-          type: object
-          description: Additional metadata.
-        assets:
-          type: array
-          items:
+          author:
             type: string
-          description: List of assets related to the model.
-      required:
-        - source
+            example: Jan
+          tags:
+            example:
+              - 7B
+              - Merged
+              - Featured
+          size:
+            example: 4370000000,
+          cover:
+            example: https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png
+        engine:
+          example: nitro
     ModelObject:
       type: object
       properties:
@@ -133,7 +101,7 @@ components:
           type: string
           description: |
             The identifier of the model.
-          example: zephyr-7b
+          example: trinity-v1.2-7b
         object:
           type: string
           description: |
@@ -153,197 +121,89 @@ components:
     GetModelResponse:
       type: object
       properties:
+        source_url:
+          type: string
+          format: uri
+          description: URL to the source of the model.
+          example: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
         id:
           type: string
-          description: The identifier of the model.
-          example: zephyr-7b
+          description:
+            Unique identifier used in chat-completions model_name, matches
+            folder name.
+          example: mistral-ins-7b-q4
         object:
           type: string
-          description: Type of the object, indicating it's a model.
-          default: model
-        created:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the model.
-        owned_by:
+          example: model
+        name:
           type: string
-          description: The entity that owns the model.
-          example: _
-        state:
+          description: Name of the model.
+          example: Mistral Instruct 7B Q4
+        version:
           type: string
-          enum:
-            - not_downloaded
-            - downloaded
-            - running
-            - stopped
-          description: The current state of the model.
-        source:
-          type: array
-          items:
-            type: object
-            properties:
-              url:
-                format: uri
-                description: URL to the source of the model.
-                example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
-              filename:
-                type: string
-                description: Filename of the model.
-                example: zephyr-7b-beta.Q4_K_M.gguf
-        engine_parameters:
-          type: object
-          properties:
-            pre_prompt:
-              type: string
-              description: Predefined prompt used for setting up internal configurations.
-              default: ""
-              example: Initial setup complete.
-            system_prompt:
-              type: string
-              description: Prefix used for system-level prompts.
-              default: "SYSTEM: "
-            user_prompt:
-              type: string
-              description: Prefix used for user prompts.
-              default: "USER: "
-            ai_prompt:
-              type: string
-              description: Prefix used for assistant prompts.
-              default: "ASSISTANT: "
-            ngl:
-              type: integer
-              description: Number of neural network layers loaded onto the GPU for
-                acceleration.
-              minimum: 0
-              maximum: 100
-              default: 100
-              example: 100
-            ctx_len:
-              type: integer
-              description: Context length for model operations, varies based on the specific
-                model.
-              minimum: 128
-              maximum: 4096
-              default: 4096
-              example: 4096
-            n_parallel:
-              type: integer
-              description: Number of parallel operations, relevant when continuous batching is
-                enabled.
-              minimum: 1
-              maximum: 10
-              default: 1
-              example: 4
-            cont_batching:
-              type: boolean
-              description: Indicates if continuous batching is used for processing.
-              default: false
-              example: false
-            cpu_threads:
-              type: integer
-              description: Number of threads allocated for CPU-based inference.
-              minimum: 1
-              example: 8
-            embedding:
-              type: boolean
-              description: Indicates if embedding layers are enabled in the model.
-              default: true
-              example: true
-        model_parameters:
+          default: "1.0"
+          description: The version number of the model.
+        description:
+          type: string
+          description: Description of the model.
+          example:
+            Trinity is an experimental model merge using the Slerp method.
+            Recommended for daily assistance purposes.
+        format:
+          type: string
+          description: State format of the model, distinct from the engine.
+          example: gguf
+        settings:
           type: object
           properties:
             ctx_len:
               type: integer
-              description: Maximum context length the model can handle.
-              minimum: 0
-              maximum: 4096
-              default: 4096
+              description: Context length.
               example: 4096
-            ngl:
-              type: integer
-              description: Number of layers in the neural network.
-              minimum: 1
-              maximum: 100
-              default: 100
-              example: 100
-            embedding:
-              type: boolean
-              description: Indicates if embedding layers are used.
-              default: true
-              example: true
-            n_parallel:
-              type: integer
-              description: Number of parallel processes the model can run.
-              minimum: 1
-              maximum: 10
-              default: 1
-              example: 4
+            prompt_template:
+              type: string
+              example: "[INST] {prompt} [/INST]"
+          additionalProperties: false
+        parameters:
+          type: object
+          properties:
             temperature:
-              type: number
-              description: Controls randomness in model's responses. Higher values lead to
-                more random responses.
-              minimum: 0
-              maximum: 2
-              default: 0.7
               example: 0.7
-            token_limit:
-              type: integer
-              description: Maximum number of tokens the model can generate in a single
-                response.
-              minimum: 1
-              maximum: 4096
-              default: 4096
-              example: 4096
-            top_k:
-              type: integer
-              description: Limits the model to consider only the top k most likely next tokens
-                at each step.
-              minimum: 0
-              maximum: 100
-              default: 0
-              example: 0
             top_p:
-              type: number
-              description: Nucleus sampling parameter. The model considers the smallest set of
-                tokens whose cumulative probability exceeds the top_p value.
-              minimum: 0
-              maximum: 1
-              default: 1
-              example: 1
+              example: 0.95
+            stream:
+              example: true
+            max_tokens:
+              example: 4096
+            stop:
+              example: []
+            frequency_penalty:
+              example: 0
+            presence_penalty:
+              example: 0
+          additionalProperties: false
         metadata:
-          type: object
-          properties:
-            engine:
-              type: string
-              description: The engine used by the model.
-              enum:
-                - nitro
-                - openai
-                - hf_inference
-            quantization:
-              type: string
-              description: Quantization parameter of the model.
-              example: Q3_K_L
-            size:
-              type: string
-              description: Size of the model.
-              example: 7B
-      required:
-        - id
-        - object
-        - created
-        - owned_by
-        - state
-        - source
-        - parameters
-        - metadata
+          author:
+            type: string
+            example: MistralAI
+          tags:
+            example:
+              - 7B
+              - Featured
+              - Foundation Model
+          size:
+            example: 4370000000,
+          cover:
+            example: https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png
+        engine:
+          example: nitro
     DeleteModelResponse:
       type: object
       properties:
         id:
           type: string
           description: The identifier of the model that was deleted.
-          example: model-zephyr-7B
+          example: mistral-ins-7b-q4
         object:
           type: string
           description: Type of the object, indicating it's a model.
diff --git a/docs/openapi/specs/threads.yaml b/docs/openapi/specs/threads.yaml
index fe00f7588..40b2463fa 100644
--- a/docs/openapi/specs/threads.yaml
+++ b/docs/openapi/specs/threads.yaml
@@ -142,7 +142,7 @@ components:
                 example: Jan
               instructions:
                 type: string
-                description: |
+                description: >
                   The instruction of assistant, defaults to "Be my grammar corrector"
               model:
                 type: object
@@ -224,4 +224,4 @@ components:
         deleted:
           type: boolean
           description: Indicates whether the thread was successfully deleted.
-          example: true
\ No newline at end of file
+          example: true
diff --git a/electron/playwright.config.ts b/electron/playwright.config.ts
index 1fa3313f2..8047b7513 100644
--- a/electron/playwright.config.ts
+++ b/electron/playwright.config.ts
@@ -1,9 +1,16 @@
 import { PlaywrightTestConfig } from '@playwright/test'
 
 const config: PlaywrightTestConfig = {
-  testDir: './tests',
+  testDir: './tests/e2e',
   retries: 0,
   globalTimeout: 300000,
+  use: {
+    screenshot: 'only-on-failure',
+    video: 'retain-on-failure',
+    trace: 'retain-on-failure',
+  },
+
+  reporter: [['html', { outputFolder: './playwright-report' }]],
 }
 
 export default config
diff --git a/electron/tests/e2e/hub.e2e.spec.ts b/electron/tests/e2e/hub.e2e.spec.ts
new file mode 100644
index 000000000..68632058e
--- /dev/null
+++ b/electron/tests/e2e/hub.e2e.spec.ts
@@ -0,0 +1,34 @@
+import {
+  page,
+  test,
+  setupElectron,
+  teardownElectron,
+  TIMEOUT,
+} from '../pages/basePage'
+import { expect } from '@playwright/test'
+
+test.beforeAll(async () => {
+  const appInfo = await setupElectron()
+  expect(appInfo.asar).toBe(true)
+  expect(appInfo.executable).toBeTruthy()
+  expect(appInfo.main).toBeTruthy()
+  expect(appInfo.name).toBe('jan')
+  expect(appInfo.packageJson).toBeTruthy()
+  expect(appInfo.packageJson.name).toBe('jan')
+  expect(appInfo.platform).toBeTruthy()
+  expect(appInfo.platform).toBe(process.platform)
+  expect(appInfo.resourcesDir).toBeTruthy()
+})
+
+test.afterAll(async () => {
+  await teardownElectron()
+})
+
+test('explores hub', async () => {
+  await page.getByTestId('Hub').first().click({
+    timeout: TIMEOUT,
+  })
+  await page.getByTestId('hub-container-test-id').isVisible({
+    timeout: TIMEOUT,
+  })
+})
diff --git a/electron/tests/e2e/navigation.e2e.spec.ts b/electron/tests/e2e/navigation.e2e.spec.ts
new file mode 100644
index 000000000..2da59953c
--- /dev/null
+++ b/electron/tests/e2e/navigation.e2e.spec.ts
@@ -0,0 +1,38 @@
+import { expect } from '@playwright/test'
+import {
+  page,
+  setupElectron,
+  TIMEOUT,
+  test,
+  teardownElectron,
+} from '../pages/basePage'
+
+test.beforeAll(async () => {
+  await setupElectron()
+})
+
+test.afterAll(async () => {
+  await teardownElectron()
+})
+
+test('renders left navigation panel', async () => {
+  const systemMonitorBtn = await page
+    .getByTestId('System Monitor')
+    .first()
+    .isEnabled({
+      timeout: TIMEOUT,
+    })
+  const settingsBtn = await page
+    .getByTestId('Thread')
+    .first()
+    .isEnabled({ timeout: TIMEOUT })
+  expect([systemMonitorBtn, settingsBtn].filter((e) => !e).length).toBe(0)
+  // Chat section should be there
+  await page.getByTestId('Local API Server').first().click({
+    timeout: TIMEOUT,
+  })
+  const localServer = page.getByTestId('local-server-testid').first()
+  await expect(localServer).toBeVisible({
+    timeout: TIMEOUT,
+  })
+})
diff --git a/electron/tests/e2e/settings.e2e.spec.ts b/electron/tests/e2e/settings.e2e.spec.ts
new file mode 100644
index 000000000..54215d9b1
--- /dev/null
+++ b/electron/tests/e2e/settings.e2e.spec.ts
@@ -0,0 +1,23 @@
+import { expect } from '@playwright/test'
+
+import {
+  setupElectron,
+  teardownElectron,
+  test,
+  page,
+  TIMEOUT,
+} from '../pages/basePage'
+
+test.beforeAll(async () => {
+  await setupElectron()
+})
+
+test.afterAll(async () => {
+  await teardownElectron()
+})
+
+test('shows settings', async () => {
+  await page.getByTestId('Settings').first().click({ timeout: TIMEOUT })
+  const settingDescription = page.getByTestId('testid-setting-description')
+  await expect(settingDescription).toBeVisible({ timeout: TIMEOUT })
+})
diff --git a/electron/tests/hub.e2e.spec.ts b/electron/tests/hub.e2e.spec.ts
deleted file mode 100644
index cc72e037e..000000000
--- a/electron/tests/hub.e2e.spec.ts
+++ /dev/null
@@ -1,48 +0,0 @@
-import { _electron as electron } from 'playwright'
-import { ElectronApplication, Page, expect, test } from '@playwright/test'
-
-import {
-  findLatestBuild,
-  parseElectronApp,
-  stubDialog,
-} from 'electron-playwright-helpers'
-
-let electronApp: ElectronApplication
-let page: Page
-const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')
-
-test.beforeAll(async () => {
-  process.env.CI = 'e2e'
-
-  const latestBuild = findLatestBuild('dist')
-  expect(latestBuild).toBeTruthy()
-
-  // parse the packaged Electron app and find paths and other info
-  const appInfo = parseElectronApp(latestBuild)
-  expect(appInfo).toBeTruthy()
-
-  electronApp = await electron.launch({
-    args: [appInfo.main], // main file from package.json
-    executablePath: appInfo.executable, // path to the Electron executable
-  })
-  await stubDialog(electronApp, 'showMessageBox', { response: 1 })
-
-  page = await electronApp.firstWindow({
-    timeout: TIMEOUT,
-  })
-})
-
-test.afterAll(async () => {
-  await electronApp.close()
-  await page.close()
-})
-
-test('explores hub', async () => {
-  test.setTimeout(TIMEOUT)
-  await page.getByTestId('Hub').first().click({
-    timeout: TIMEOUT,
-  })
-  await page.getByTestId('hub-container-test-id').isVisible({
-    timeout: TIMEOUT,
-  })
-})
diff --git a/electron/tests/navigation.e2e.spec.ts b/electron/tests/navigation.e2e.spec.ts
deleted file mode 100644
index 5c8721c2f..000000000
--- a/electron/tests/navigation.e2e.spec.ts
+++ /dev/null
@@ -1,61 +0,0 @@
-import { _electron as electron } from 'playwright'
-import { ElectronApplication, Page, expect, test } from '@playwright/test'
-
-import {
-  findLatestBuild,
-  parseElectronApp,
-  stubDialog,
-} from 'electron-playwright-helpers'
-
-let electronApp: ElectronApplication
-let page: Page
-const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')
-
-test.beforeAll(async () => {
-  process.env.CI = 'e2e'
-
-  const latestBuild = findLatestBuild('dist')
-  expect(latestBuild).toBeTruthy()
-
-  // parse the packaged Electron app and find paths and other info
-  const appInfo = parseElectronApp(latestBuild)
-  expect(appInfo).toBeTruthy()
-
-  electronApp = await electron.launch({
-    args: [appInfo.main], // main file from package.json
-    executablePath: appInfo.executable, // path to the Electron executable
-  })
-  await stubDialog(electronApp, 'showMessageBox', { response: 1 })
-
-  page = await electronApp.firstWindow({
-    timeout: TIMEOUT,
-  })
-})
-
-test.afterAll(async () => {
-  await electronApp.close()
-  await page.close()
-})
-
-test('renders left navigation panel', async () => {
-  test.setTimeout(TIMEOUT)
-  const systemMonitorBtn = await page
-    .getByTestId('System Monitor')
-    .first()
-    .isEnabled({
-      timeout: TIMEOUT,
-    })
-  const settingsBtn = await page
-    .getByTestId('Thread')
-    .first()
-    .isEnabled({ timeout: TIMEOUT })
-  expect([systemMonitorBtn, settingsBtn].filter((e) => !e).length).toBe(0)
-  // Chat section should be there
-  await page.getByTestId('Local API Server').first().click({
-    timeout: TIMEOUT,
-  })
-  const localServer = await page.getByTestId('local-server-testid').first()
-  await expect(localServer).toBeVisible({
-    timeout: TIMEOUT,
-  })
-})
diff --git a/electron/tests/pages/basePage.ts b/electron/tests/pages/basePage.ts
new file mode 100644
index 000000000..5f1a6fca1
--- /dev/null
+++ b/electron/tests/pages/basePage.ts
@@ -0,0 +1,67 @@
+import {
+  expect,
+  test as base,
+  _electron as electron,
+  ElectronApplication,
+  Page,
+} from '@playwright/test'
+import {
+  findLatestBuild,
+  parseElectronApp,
+  stubDialog,
+} from 'electron-playwright-helpers'
+
+export const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')
+
+export let electronApp: ElectronApplication
+export let page: Page
+
+export async function setupElectron() {
+  process.env.CI = 'e2e'
+
+  const latestBuild = findLatestBuild('dist')
+  expect(latestBuild).toBeTruthy()
+
+  // parse the packaged Electron app and find paths and other info
+  const appInfo = parseElectronApp(latestBuild)
+  expect(appInfo).toBeTruthy()
+
+  electronApp = await electron.launch({
+    args: [appInfo.main], // main file from package.json
+    executablePath: appInfo.executable, // path to the Electron executable
+  })
+  await stubDialog(electronApp, 'showMessageBox', { response: 1 })
+
+  page = await electronApp.firstWindow({
+    timeout: TIMEOUT,
+  })
+  // Return appInfo for future use
+  return appInfo
+}
+
+export async function teardownElectron() {
+  await page.close()
+  await electronApp.close()
+}
+
+export const test = base.extend<{
+  attachScreenshotsToReport: void
+}>({
+  attachScreenshotsToReport: [
+    async ({ request }, use, testInfo) => {
+      await use()
+
+      // After the test, we can check whether the test passed or failed.
+      if (testInfo.status !== testInfo.expectedStatus) {
+        const screenshot = await page.screenshot()
+        await testInfo.attach('screenshot', {
+          body: screenshot,
+          contentType: 'image/png',
+        })
+      }
+    },
+    { auto: true },
+  ],
+})
+
+test.setTimeout(TIMEOUT)
diff --git a/electron/tests/settings.e2e.spec.ts b/electron/tests/settings.e2e.spec.ts
deleted file mode 100644
index ad2d7b4a4..000000000
--- a/electron/tests/settings.e2e.spec.ts
+++ /dev/null
@@ -1,45 +0,0 @@
-import { _electron as electron } from 'playwright'
-import { ElectronApplication, Page, expect, test } from '@playwright/test'
-
-import {
-  findLatestBuild,
-  parseElectronApp,
-  stubDialog,
-} from 'electron-playwright-helpers'
-
-let electronApp: ElectronApplication
-let page: Page
-const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')
-
-test.beforeAll(async () => {
-  process.env.CI = 'e2e'
-
-  const latestBuild = findLatestBuild('dist')
-  expect(latestBuild).toBeTruthy()
-
-  // parse the packaged Electron app and find paths and other info
-  const appInfo = parseElectronApp(latestBuild)
-  expect(appInfo).toBeTruthy()
-
-  electronApp = await electron.launch({
-    args: [appInfo.main], // main file from package.json
-    executablePath: appInfo.executable, // path to the Electron executable
-  })
-  await stubDialog(electronApp, 'showMessageBox', { response: 1 })
-
-  page = await electronApp.firstWindow({
-    timeout: TIMEOUT,
-  })
-})
-
-test.afterAll(async () => {
-  await electronApp.close()
-  await page.close()
-})
-
-test('shows settings', async () => {
-  test.setTimeout(TIMEOUT)
-  await page.getByTestId('Settings').first().click({ timeout: TIMEOUT })
-  const settingDescription = page.getByTestId('testid-setting-description')
-  await expect(settingDescription).toBeVisible({ timeout: TIMEOUT })
-})
diff --git a/extensions/conversational-extension/src/index.ts b/extensions/conversational-extension/src/index.ts
index 3d28a9c1d..bf8c213ad 100644
--- a/extensions/conversational-extension/src/index.ts
+++ b/extensions/conversational-extension/src/index.ts
@@ -12,7 +12,7 @@ import {
  * functionality for managing threads.
  */
 export default class JSONConversationalExtension extends ConversationalExtension {
-  private static readonly _homeDir = 'file://threads'
+  private static readonly _threadFolder = 'file://threads'
   private static readonly _threadInfoFileName = 'thread.json'
   private static readonly _threadMessagesFileName = 'messages.jsonl'
 
@@ -20,8 +20,8 @@ export default class JSONConversationalExtension extends ConversationalExtension
    * Called when the extension is loaded.
    */
   async onLoad() {
-    if (!(await fs.existsSync(JSONConversationalExtension._homeDir)))
-      await fs.mkdirSync(JSONConversationalExtension._homeDir)
+    if (!(await fs.existsSync(JSONConversationalExtension._threadFolder)))
+      await fs.mkdirSync(JSONConversationalExtension._threadFolder)
     console.debug('JSONConversationalExtension loaded')
   }
 
@@ -68,7 +68,7 @@ export default class JSONConversationalExtension extends ConversationalExtension
   async saveThread(thread: Thread): Promise<void> {
     try {
       const threadDirPath = await joinPath([
-        JSONConversationalExtension._homeDir,
+        JSONConversationalExtension._threadFolder,
         thread.id,
       ])
       const threadJsonPath = await joinPath([
@@ -92,7 +92,7 @@ export default class JSONConversationalExtension extends ConversationalExtension
    */
   async deleteThread(threadId: string): Promise<void> {
     const path = await joinPath([
-      JSONConversationalExtension._homeDir,
+      JSONConversationalExtension._threadFolder,
       `${threadId}`,
     ])
     try {
@@ -109,7 +109,7 @@ export default class JSONConversationalExtension extends ConversationalExtension
   async addNewMessage(message: ThreadMessage): Promise<void> {
     try {
       const threadDirPath = await joinPath([
-        JSONConversationalExtension._homeDir,
+        JSONConversationalExtension._threadFolder,
         message.thread_id,
       ])
       const threadMessagePath = await joinPath([
@@ -177,7 +177,7 @@ export default class JSONConversationalExtension extends ConversationalExtension
   ): Promise<void> {
     try {
       const threadDirPath = await joinPath([
-        JSONConversationalExtension._homeDir,
+        JSONConversationalExtension._threadFolder,
         threadId,
       ])
       const threadMessagePath = await joinPath([
@@ -205,7 +205,7 @@ export default class JSONConversationalExtension extends ConversationalExtension
   private async readThread(threadDirName: string): Promise<any> {
     return fs.readFileSync(
       await joinPath([
-        JSONConversationalExtension._homeDir,
+        JSONConversationalExtension._threadFolder,
         threadDirName,
         JSONConversationalExtension._threadInfoFileName,
       ]),
@@ -219,14 +219,14 @@ export default class JSONConversationalExtension extends ConversationalExtension
    */
   private async getValidThreadDirs(): Promise<string[]> {
     const fileInsideThread: string[] = await fs.readdirSync(
-      JSONConversationalExtension._homeDir
+      JSONConversationalExtension._threadFolder
     )
 
     const threadDirs: string[] = []
     for (let i = 0; i < fileInsideThread.length; i++) {
       if (fileInsideThread[i].includes('.DS_Store')) continue
       const path = await joinPath([
-        JSONConversationalExtension._homeDir,
+        JSONConversationalExtension._threadFolder,
         fileInsideThread[i],
       ])
 
@@ -246,7 +246,7 @@ export default class JSONConversationalExtension extends ConversationalExtension
   async getAllMessages(threadId: string): Promise<ThreadMessage[]> {
     try {
       const threadDirPath = await joinPath([
-        JSONConversationalExtension._homeDir,
+        JSONConversationalExtension._threadFolder,
         threadId,
       ])
 
@@ -263,22 +263,17 @@ export default class JSONConversationalExtension extends ConversationalExtension
         JSONConversationalExtension._threadMessagesFileName,
       ])
 
-      const result = await fs
-        .readFileSync(messageFilePath, 'utf-8')
-        .then((content) =>
-          content
-            .toString()
-            .split('\n')
-            .filter((line) => line !== '')
-        )
+      let readResult = await fs.readFileSync(messageFilePath, 'utf-8')
+
+      if (typeof readResult === 'object') {
+        readResult = JSON.stringify(readResult)
+      }
+
+      const result = readResult.split('\n').filter((line) => line !== '')
 
       const messages: ThreadMessage[] = []
       result.forEach((line: string) => {
-        try {
-          messages.push(JSON.parse(line) as ThreadMessage)
-        } catch (err) {
-          console.error(err)
-        }
+        messages.push(JSON.parse(line))
       })
       return messages
     } catch (err) {
diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts
index 374a054cd..77a9fb208 100644
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-nitro-extension/rollup.config.ts
@@ -27,6 +27,9 @@ export default [
         TROUBLESHOOTING_URL: JSON.stringify(
           "https://jan.ai/guides/troubleshooting"
         ),
+        JAN_SERVER_INFERENCE_URL: JSON.stringify(
+          "http://localhost:1337/v1/chat/completions"
+        ),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts
index bc126337f..7a4fb4805 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@@ -1,6 +1,7 @@
 declare const NODE: string;
 declare const INFERENCE_URL: string;
 declare const TROUBLESHOOTING_URL: string;
+declare const JAN_SERVER_INFERENCE_URL: string;
 
 /**
  * The response from the initModel function.
diff --git a/extensions/inference-nitro-extension/src/helpers/sse.ts b/extensions/inference-nitro-extension/src/helpers/sse.ts
index c6352383d..aab260828 100644
--- a/extensions/inference-nitro-extension/src/helpers/sse.ts
+++ b/extensions/inference-nitro-extension/src/helpers/sse.ts
@@ -6,6 +6,7 @@ import { Observable } from "rxjs";
  * @returns An Observable that emits the generated response as a string.
  */
 export function requestInference(
+  inferenceUrl: string,
   recentMessages: any[],
   model: Model,
   controller?: AbortController
@@ -17,7 +18,7 @@ export function requestInference(
       stream: true,
       ...model.parameters,
     });
-    fetch(INFERENCE_URL, {
+    fetch(inferenceUrl, {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index 2b0021ba0..9e96ad93f 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -68,35 +68,48 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
    */
   private nitroProcessInfo: any = undefined;
 
+  private inferenceUrl = "";
+
   /**
    * Subscribes to events emitted by the @janhq/core package.
    */
   async onLoad() {
     if (!(await fs.existsSync(JanInferenceNitroExtension._homeDir))) {
-      await fs
-        .mkdirSync(JanInferenceNitroExtension._homeDir)
-        .catch((err: Error) => console.debug(err));
+      try {
+        await fs.mkdirSync(JanInferenceNitroExtension._homeDir);
+      } catch (e) {
+        console.debug(e);
+      }
     }
 
+    // init inference url
+    // @ts-ignore
+    const electronApi = window?.electronAPI;
+    this.inferenceUrl = INFERENCE_URL;
+    if (!electronApi) {
+      this.inferenceUrl = JAN_SERVER_INFERENCE_URL;
+    }
+    console.debug("Inference url: ", this.inferenceUrl);
+
     if (!(await fs.existsSync(JanInferenceNitroExtension._settingsDir)))
       await fs.mkdirSync(JanInferenceNitroExtension._settingsDir);
     this.writeDefaultEngineSettings();
 
     // Events subscription
     events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.onMessageRequest(data),
+      this.onMessageRequest(data)
     );
 
     events.on(ModelEvent.OnModelInit, (model: Model) =>
-      this.onModelInit(model),
+      this.onModelInit(model)
     );
 
     events.on(ModelEvent.OnModelStop, (model: Model) =>
-      this.onModelStop(model),
+      this.onModelStop(model)
     );
 
     events.on(InferenceEvent.OnInferenceStopped, () =>
-      this.onInferenceStopped(),
+      this.onInferenceStopped()
     );
 
     // Attempt to fetch nvidia info
@@ -121,7 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
       } else {
         await fs.writeFileSync(
           engineFile,
-          JSON.stringify(this._engineSettings, null, 2),
+          JSON.stringify(this._engineSettings, null, 2)
         );
       }
     } catch (err) {
@@ -149,7 +162,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
 
     this.getNitroProcesHealthIntervalId = setInterval(
       () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck,
+      JanInferenceNitroExtension._intervalHealthCheck
     );
   }
 
@@ -206,7 +219,11 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
     return new Promise(async (resolve, reject) => {
       if (!this._currentModel) return Promise.reject("No model loaded");
 
-      requestInference(data.messages ?? [], this._currentModel).subscribe({
+      requestInference(
+        this.inferenceUrl,
+        data.messages ?? [],
+        this._currentModel
+      ).subscribe({
         next: (_content: any) => {},
         complete: async () => {
           resolve(message);
@@ -251,7 +268,12 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
       ...(this._currentModel || {}),
       ...(data.model || {}),
     };
-    requestInference(data.messages ?? [], model, this.controller).subscribe({
+    requestInference(
+      this.inferenceUrl,
+      data.messages ?? [],
+      model,
+      this.controller
+    ).subscribe({
       next: (content: any) => {
         const messageContent: ThreadContent = {
           type: ContentType.Text,
diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts
index ca266639c..83b5226d4 100644
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-nitro-extension/src/node/execute.ts
@@ -25,12 +25,12 @@ export const executableNitroFile = (): NitroExecutableOptions => {
     if (nvidiaInfo["run_mode"] === "cpu") {
       binaryFolder = path.join(binaryFolder, "win-cpu");
     } else {
-      if (nvidiaInfo["cuda"].version === "12") {
-        binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
-      } else {
+      if (nvidiaInfo["cuda"].version === "11") {
         binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
+      } else {
+        binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
       }
-      cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
+      cudaVisibleDevices = nvidiaInfo["gpus_in_use"].join(",");
     }
     binaryName = "nitro.exe";
   } else if (process.platform === "darwin") {
@@ -50,12 +50,12 @@ export const executableNitroFile = (): NitroExecutableOptions => {
     if (nvidiaInfo["run_mode"] === "cpu") {
       binaryFolder = path.join(binaryFolder, "linux-cpu");
     } else {
-      if (nvidiaInfo["cuda"].version === "12") {
-        binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
-      } else {
+      if (nvidiaInfo["cuda"].version === "11") {
         binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
+      } else {
+        binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
       }
-      cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
+      cudaVisibleDevices = nvidiaInfo["gpus_in_use"].join(",");
     }
   }
   return {
diff --git a/extensions/inference-nitro-extension/src/node/nvidia.ts b/extensions/inference-nitro-extension/src/node/nvidia.ts
index 13e43290b..bed2856a1 100644
--- a/extensions/inference-nitro-extension/src/node/nvidia.ts
+++ b/extensions/inference-nitro-extension/src/node/nvidia.ts
@@ -19,6 +19,8 @@ const DEFALT_SETTINGS = {
   },
   gpus: [],
   gpu_highest_vram: "",
+  gpus_in_use: [],
+  is_initial: true,
 };
 
 /**
@@ -48,11 +50,15 @@ export interface NitroProcessInfo {
  */
 export async function updateNvidiaInfo() {
   if (process.platform !== "darwin") {
-    await Promise.all([
-      updateNvidiaDriverInfo(),
-      updateCudaExistence(),
-      updateGpuInfo(),
-    ]);
+    let data;
+    try {
+      data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
+    } catch (error) {
+      data = DEFALT_SETTINGS;
+      writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
+    }
+    updateNvidiaDriverInfo();
+    updateGpuInfo();
   }
 }
 
@@ -73,12 +79,7 @@ export async function updateNvidiaDriverInfo(): Promise<void> {
   exec(
     "nvidia-smi --query-gpu=driver_version --format=csv,noheader",
     (error, stdout) => {
-      let data;
-      try {
-        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
-      } catch (error) {
-        data = DEFALT_SETTINGS;
-      }
+      let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
 
       if (!error) {
         const firstLine = stdout.split("\n")[0].trim();
@@ -107,7 +108,7 @@ export function checkFileExistenceInPaths(
 /**
  * Validate cuda for linux and windows
  */
-export function updateCudaExistence() {
+export function updateCudaExistence(data: Record<string, any> = DEFALT_SETTINGS): Record<string, any> {
   let filesCuda12: string[];
   let filesCuda11: string[];
   let paths: string[];
@@ -141,19 +142,14 @@ export function updateCudaExistence() {
     cudaVersion = "12";
   }
 
-  let data;
-  try {
-    data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
-  } catch (error) {
-    data = DEFALT_SETTINGS;
-  }
-
   data["cuda"].exist = cudaExists;
   data["cuda"].version = cudaVersion;
-  if (cudaExists) {
+  console.log(data["is_initial"], data["gpus_in_use"]);
+  if (cudaExists && data["is_initial"] && data["gpus_in_use"].length > 0) {
     data.run_mode = "gpu";
   }
-  writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
+  data.is_initial = false;
+  return data;
 }
 
 /**
@@ -161,14 +157,9 @@ export function updateCudaExistence() {
  */
 export async function updateGpuInfo(): Promise<void> {
   exec(
-    "nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
+    "nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits",
     (error, stdout) => {
-      let data;
-      try {
-        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
-      } catch (error) {
-        data = DEFALT_SETTINGS;
-      }
+      let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
 
       if (!error) {
         // Get GPU info and gpu has higher memory first
@@ -178,21 +169,27 @@ export async function updateGpuInfo(): Promise<void> {
           .trim()
           .split("\n")
           .map((line) => {
-            let [id, vram] = line.split(", ");
+            let [id, vram, name] = line.split(", ");
             vram = vram.replace(/\r/g, "");
             if (parseFloat(vram) > highestVram) {
               highestVram = parseFloat(vram);
               highestVramId = id;
             }
-            return { id, vram };
+            return { id, vram, name };
           });
 
-        data["gpus"] = gpus;
-        data["gpu_highest_vram"] = highestVramId;
+        data.gpus = gpus;
+        data.gpu_highest_vram = highestVramId;
       } else {
-        data["gpus"] = [];
+        data.gpus = [];
+        data.gpu_highest_vram = "";
       }
 
+      if (!data["gpus_in_use"] || data["gpus_in_use"].length === 0) {
+        data.gpus_in_use = [data["gpu_highest_vram"]];
+      }
+
+      data = updateCudaExistence(data);
       writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
       Promise.resolve();
     }
diff --git a/extensions/monitoring-extension/package.json b/extensions/monitoring-extension/package.json
index 9935e536e..20d3c485f 100644
--- a/extensions/monitoring-extension/package.json
+++ b/extensions/monitoring-extension/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@janhq/monitoring-extension",
-  "version": "1.0.9",
+  "version": "1.0.10",
   "description": "This extension provides system health and OS level data",
   "main": "dist/index.js",
   "module": "dist/module.js",
@@ -26,6 +26,7 @@
     "README.md"
   ],
   "bundleDependencies": [
-    "node-os-utils"
+    "node-os-utils",
+    "@janhq/core"
   ]
 }
diff --git a/extensions/monitoring-extension/src/module.ts b/extensions/monitoring-extension/src/module.ts
index 86b553d52..2c1b14343 100644
--- a/extensions/monitoring-extension/src/module.ts
+++ b/extensions/monitoring-extension/src/module.ts
@@ -1,4 +1,14 @@
 const nodeOsUtils = require("node-os-utils");
+const getJanDataFolderPath = require("@janhq/core/node").getJanDataFolderPath;
+const path = require("path");
+const { readFileSync } = require("fs");
+const exec = require("child_process").exec;
+
+const NVIDIA_INFO_FILE = path.join(
+  getJanDataFolderPath(),
+  "settings",
+  "settings.json"
+);
 
 const getResourcesInfo = () =>
   new Promise((resolve) => {
@@ -16,18 +26,48 @@ const getResourcesInfo = () =>
   });
 
 const getCurrentLoad = () =>
-  new Promise((resolve) => {
+  new Promise((resolve, reject) => {
     nodeOsUtils.cpu.usage().then((cpuPercentage) => {
-      const response = {
-        cpu: {
-          usage: cpuPercentage,
-        },
+      let data = {
+        run_mode: "cpu",
+        gpus_in_use: [],
       };
-      resolve(response);
+      if (process.platform !== "darwin") {
+        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
+      }
+      if (data.run_mode === "gpu" && data.gpus_in_use.length > 0) {
+        const gpuIds = data["gpus_in_use"].join(",");
+        if (gpuIds !== "") {
+          exec(
+            `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
+            (error, stdout, stderr) => {
+              if (error) {
+                console.error(`exec error: ${error}`);
+                reject(error);
+                return;
+              }
+              const gpuInfo = stdout.trim().split("\n").map((line) => {
+                const [id, name, temperature, utilization, memoryTotal, memoryFree, memoryUtilization] = line.split(", ").map(item => item.replace(/\r/g, ""));
+                return { id, name, temperature, utilization, memoryTotal, memoryFree, memoryUtilization };
+              });
+              resolve({
+                cpu: { usage: cpuPercentage },
+                gpu: gpuInfo
+              });
+            }
+          );
+        } else {
+          // Handle the case where gpuIds is empty
+          resolve({ cpu: { usage: cpuPercentage }, gpu: [] });
+        }
+      } else {
+        // Handle the case where run_mode is not 'gpu' or no GPUs are in use
+        resolve({ cpu: { usage: cpuPercentage }, gpu: [] });
+      }
     });
   });
 
 module.exports = {
   getResourcesInfo,
   getCurrentLoad,
-};
+};
\ No newline at end of file
diff --git a/web/containers/Layout/BottomBar/index.tsx b/web/containers/Layout/BottomBar/index.tsx
index 6e334b9ef..32dc70c70 100644
--- a/web/containers/Layout/BottomBar/index.tsx
+++ b/web/containers/Layout/BottomBar/index.tsx
@@ -26,11 +26,12 @@ import { MainViewState } from '@/constants/screens'
 import { useActiveModel } from '@/hooks/useActiveModel'
 
 import { useDownloadState } from '@/hooks/useDownloadState'
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
+
 import useGetSystemResources from '@/hooks/useGetSystemResources'
 import { useMainViewState } from '@/hooks/useMainViewState'
 
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
 const menuLinks = [
   {
@@ -47,14 +48,22 @@ const menuLinks = [
 
 const BottomBar = () => {
   const { activeModel, stateModel } = useActiveModel()
-  const { ram, cpu } = useGetSystemResources()
+  const { ram, cpu, gpus } = useGetSystemResources()
   const progress = useAtomValue(appDownloadProgress)
-  const { downloadedModels } = useGetDownloadedModels()
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
+
   const { setMainViewState } = useMainViewState()
   const { downloadStates } = useDownloadState()
   const setShowSelectModelModal = useSetAtom(showSelectModelModalAtom)
   const [serverEnabled] = useAtom(serverEnabledAtom)
 
+  const calculateGpuMemoryUsage = (gpu: Record<string, never>) => {
+    const total = parseInt(gpu.memoryTotal)
+    const free = parseInt(gpu.memoryFree)
+    if (!total || !free) return 0
+    return Math.round(((total - free) / total) * 100)
+  }
+
   return (
     <div className="fixed bottom-0 left-16 z-20 flex h-12 w-[calc(100%-64px)] items-center justify-between border-t border-border bg-background/80 px-3">
       <div className="flex flex-shrink-0 items-center gap-x-2">
@@ -117,6 +126,17 @@ const BottomBar = () => {
           <SystemItem name="CPU:" value={`${cpu}%`} />
           <SystemItem name="Mem:" value={`${ram}%`} />
         </div>
+        {gpus.length > 0 && (
+          <div className="flex items-center gap-x-2">
+            {gpus.map((gpu, index) => (
+              <SystemItem
+                key={index}
+                name={`GPU ${gpu.id}:`}
+                value={`${gpu.utilization}% Util, ${calculateGpuMemoryUsage(gpu)}% Mem`}
+              />
+            ))}
+          </div>
+        )}
         {/* VERSION is defined by webpack, please see next.config.js */}
         <span className="text-xs text-muted-foreground">
           Jan v{VERSION ?? ''}
diff --git a/web/containers/Layout/TopBar/CommandListDownloadedModel/index.tsx b/web/containers/Layout/TopBar/CommandListDownloadedModel/index.tsx
index 3edce06eb..ac5756e9f 100644
--- a/web/containers/Layout/TopBar/CommandListDownloadedModel/index.tsx
+++ b/web/containers/Layout/TopBar/CommandListDownloadedModel/index.tsx
@@ -11,7 +11,7 @@ import {
   Badge,
 } from '@janhq/uikit'
 
-import { useAtom } from 'jotai'
+import { useAtom, useAtomValue } from 'jotai'
 import { DatabaseIcon, CpuIcon } from 'lucide-react'
 
 import { showSelectModelModalAtom } from '@/containers/Providers/KeyListener'
@@ -19,14 +19,14 @@ import { showSelectModelModalAtom } from '@/containers/Providers/KeyListener'
 import { MainViewState } from '@/constants/screens'
 
 import { useActiveModel } from '@/hooks/useActiveModel'
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 import { useMainViewState } from '@/hooks/useMainViewState'
 
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
 export default function CommandListDownloadedModel() {
   const { setMainViewState } = useMainViewState()
-  const { downloadedModels } = useGetDownloadedModels()
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
   const { activeModel, startModel, stopModel } = useActiveModel()
   const [serverEnabled] = useAtom(serverEnabledAtom)
   const [showSelectModelModal, setShowSelectModelModal] = useAtom(
diff --git a/web/containers/Layout/TopBar/index.tsx b/web/containers/Layout/TopBar/index.tsx
index f72f5f066..206a9013d 100644
--- a/web/containers/Layout/TopBar/index.tsx
+++ b/web/containers/Layout/TopBar/index.tsx
@@ -20,7 +20,6 @@ import { MainViewState } from '@/constants/screens'
 
 import { useClickOutside } from '@/hooks/useClickOutside'
 import { useCreateNewThread } from '@/hooks/useCreateNewThread'
-import useGetAssistants, { getAssistants } from '@/hooks/useGetAssistants'
 import { useMainViewState } from '@/hooks/useMainViewState'
 
 import { usePath } from '@/hooks/usePath'
@@ -29,13 +28,14 @@ import { showRightSideBarAtom } from '@/screens/Chat/Sidebar'
 
 import { openFileTitle } from '@/utils/titleUtils'
 
+import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 const TopBar = () => {
   const activeThread = useAtomValue(activeThreadAtom)
   const { mainViewState } = useMainViewState()
   const { requestCreateNewThread } = useCreateNewThread()
-  const { assistants } = useGetAssistants()
+  const assistants = useAtomValue(assistantsAtom)
   const [showRightSideBar, setShowRightSideBar] = useAtom(showRightSideBarAtom)
   const [showLeftSideBar, setShowLeftSideBar] = useAtom(showLeftSideBarAtom)
   const showing = useAtomValue(showRightSideBarAtom)
@@ -61,12 +61,7 @@ const TopBar = () => {
 
   const onCreateConversationClick = async () => {
     if (assistants.length === 0) {
-      const res = await getAssistants()
-      if (res.length === 0) {
-        alert('No assistant available')
-        return
-      }
-      requestCreateNewThread(res[0])
+      alert('No assistant available')
     } else {
       requestCreateNewThread(assistants[0])
     }
diff --git a/web/containers/Providers/DataLoader.tsx b/web/containers/Providers/DataLoader.tsx
new file mode 100644
index 000000000..2b6675d98
--- /dev/null
+++ b/web/containers/Providers/DataLoader.tsx
@@ -0,0 +1,21 @@
+'use client'
+
+import { Fragment, ReactNode } from 'react'
+
+import useAssistants from '@/hooks/useAssistants'
+import useModels from '@/hooks/useModels'
+import useThreads from '@/hooks/useThreads'
+
+type Props = {
+  children: ReactNode
+}
+
+const DataLoader: React.FC<Props> = ({ children }) => {
+  useModels()
+  useThreads()
+  useAssistants()
+
+  return <Fragment>{children}</Fragment>
+}
+
+export default DataLoader
diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index ec0fbfc90..f22ed1bc7 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -18,7 +18,6 @@ import {
   loadModelErrorAtom,
   stateModelAtom,
 } from '@/hooks/useActiveModel'
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 
 import { queuedMessageAtom } from '@/hooks/useSendChatMessage'
 
@@ -29,16 +28,18 @@ import {
   addNewMessageAtom,
   updateMessageAtom,
 } from '@/helpers/atoms/ChatMessage.atom'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import {
   updateThreadWaitingForResponseAtom,
   threadsAtom,
   isGeneratingResponseAtom,
+  updateThreadAtom,
 } from '@/helpers/atoms/Thread.atom'
 
 export default function EventHandler({ children }: { children: ReactNode }) {
   const addNewMessage = useSetAtom(addNewMessageAtom)
   const updateMessage = useSetAtom(updateMessageAtom)
-  const { downloadedModels } = useGetDownloadedModels()
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
   const setActiveModel = useSetAtom(activeModelAtom)
   const setStateModel = useSetAtom(stateModelAtom)
   const setQueuedMessage = useSetAtom(queuedMessageAtom)
@@ -49,6 +50,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {
   const modelsRef = useRef(downloadedModels)
   const threadsRef = useRef(threads)
   const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
+  const updateThread = useSetAtom(updateThreadAtom)
 
   useEffect(() => {
     threadsRef.current = threads
@@ -131,6 +133,12 @@ export default function EventHandler({ children }: { children: ReactNode }) {
           ...thread.metadata,
           lastMessage: messageContent,
         }
+
+        updateThread({
+          ...thread,
+          metadata,
+        })
+
         extensionManager
           .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
           ?.saveThread({
@@ -143,7 +151,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {
           ?.addNewMessage(message)
       }
     },
-    [updateMessage, updateThreadWaiting]
+    [updateMessage, updateThreadWaiting, setIsGeneratingResponse]
   )
 
   useEffect(() => {
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index 62d4cacb6..5e8556f33 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -3,10 +3,9 @@
 import { PropsWithChildren, useEffect, useRef } from 'react'
 
 import { baseName } from '@janhq/core'
-import { useAtomValue, useSetAtom } from 'jotai'
+import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import { useDownloadState } from '@/hooks/useDownloadState'
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 
 import { modelBinFileName } from '@/utils/model'
 
@@ -14,14 +13,17 @@ import EventHandler from './EventHandler'
 
 import { appDownloadProgress } from './Jotai'
 
-import { downloadingModelsAtom } from '@/helpers/atoms/Model.atom'
+import {
+  downloadedModelsAtom,
+  downloadingModelsAtom,
+} from '@/helpers/atoms/Model.atom'
 
 export default function EventListenerWrapper({ children }: PropsWithChildren) {
   const setProgress = useSetAtom(appDownloadProgress)
   const models = useAtomValue(downloadingModelsAtom)
   const modelsRef = useRef(models)
 
-  const { setDownloadedModels, downloadedModels } = useGetDownloadedModels()
+  const [downloadedModels, setDownloadedModels] = useAtom(downloadedModelsAtom)
   const {
     setDownloadState,
     setDownloadStateSuccess,
diff --git a/web/containers/Providers/index.tsx b/web/containers/Providers/index.tsx
index c8a20bca7..e7a179ec4 100644
--- a/web/containers/Providers/index.tsx
+++ b/web/containers/Providers/index.tsx
@@ -23,6 +23,8 @@ import Umami from '@/utils/umami'
 
 import Loader from '../Loader'
 
+import DataLoader from './DataLoader'
+
 import KeyListener from './KeyListener'
 
 import { extensionManager } from '@/extension'
@@ -81,7 +83,9 @@ const Providers = (props: PropsWithChildren) => {
           <KeyListener>
             <FeatureToggleWrapper>
               <EventListenerWrapper>
-                <TooltipProvider delayDuration={0}>{children}</TooltipProvider>
+                <TooltipProvider delayDuration={0}>
+                  <DataLoader>{children}</DataLoader>
+                </TooltipProvider>
                 {!isMac && <GPUDriverPrompt />}
               </EventListenerWrapper>
               <Toaster />
diff --git a/web/containers/Toast/index.tsx b/web/containers/Toast/index.tsx
index 7cffa89b9..eae340fee 100644
--- a/web/containers/Toast/index.tsx
+++ b/web/containers/Toast/index.tsx
@@ -19,8 +19,8 @@ const ErrorIcon = () => {
       xmlns="http://www.w3.org/2000/svg"
     >
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M20 10C20 15.5228 15.5228 20 10 20H0.993697C0.110179 20 -0.332289 18.9229 0.292453 18.2929L2.2495 16.3195C0.843343 14.597 1.21409e-08 12.397 1.21409e-08 10C1.21409e-08 4.47715 4.47715 0 10 0C15.5228 0 20 4.47715 20 10ZM13.2071 6.79289C13.5976 7.18342 13.5976 7.81658 13.2071 8.20711L11.4142 10L13.2071 11.7929C13.5976 12.1834 13.5976 12.8166 13.2071 13.2071C12.8166 13.5976 12.1834 13.5976 11.7929 13.2071L10 11.4142L8.20711 13.2071C7.81658 13.5976 7.18342 13.5976 6.79289 13.2071C6.40237 12.8166 6.40237 12.1834 6.79289 11.7929L8.58579 10L6.79289 8.20711C6.40237 7.81658 6.40237 7.18342 6.79289 6.79289C7.18342 6.40237 7.81658 6.40237 8.20711 6.79289L10 8.58579L11.7929 6.79289C12.1834 6.40237 12.8166 6.40237 13.2071 6.79289Z"
         fill="#EA2E4E"
       />
@@ -38,8 +38,8 @@ const WarningIcon = () => {
       xmlns="http://www.w3.org/2000/svg"
     >
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M20 10C20 15.5228 15.5228 20 10 20H0.993697C0.110179 20 -0.332289 18.9229 0.292453 18.2929L2.2495 16.3195C0.843343 14.597 1.21409e-08 12.397 1.21409e-08 10C1.21409e-08 4.47715 4.47715 0 10 0C15.5228 0 20 4.47715 20 10ZM10.99 6C10.99 5.44772 10.5446 5 9.99502 5C9.44549 5 9 5.44772 9 6V10C9 10.5523 9.44549 11 9.99502 11C10.5446 11 10.99 10.5523 10.99 10V6ZM9.99502 13C9.44549 13 9 13.4477 9 14C9 14.5523 9.44549 15 9.99502 15H10.005C10.5545 15 11 14.5523 11 14C11 13.4477 10.5545 13 10.005 13H9.99502Z"
         fill="#FACC15"
       />
@@ -57,8 +57,8 @@ const SuccessIcon = () => {
       xmlns="http://www.w3.org/2000/svg"
     >
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M20 10C20 15.5228 15.5228 20 10 20H0.993697C0.110179 20 -0.332289 18.9229 0.292453 18.2929L2.2495 16.3195C0.843343 14.597 1.21409e-08 12.397 1.21409e-08 10C1.21409e-08 4.47715 4.47715 0 10 0C15.5228 0 20 4.47715 20 10ZM13.7071 8.70711C14.0976 8.31658 14.0976 7.68342 13.7071 7.29289C13.3166 6.90237 12.6834 6.90237 12.2929 7.29289L9 10.5858L7.70711 9.2929C7.31658 8.90237 6.68342 8.90237 6.29289 9.2929C5.90237 9.68342 5.90237 10.3166 6.29289 10.7071L8.29289 12.7071C8.48043 12.8946 8.73478 13 9 13C9.26522 13 9.51957 12.8946 9.70711 12.7071L13.7071 8.70711Z"
         fill="#34D399"
       />
@@ -76,8 +76,8 @@ const DefaultIcon = () => {
       xmlns="http://www.w3.org/2000/svg"
     >
       <path
-        fill-rule="evenodd"
-        clip-rule="evenodd"
+        fillRule="evenodd"
+        clipRule="evenodd"
         d="M10 20C15.5228 20 20 15.5228 20 10C20 4.47715 15.5228 0 10 0C4.47715 0 2.11188e-08 4.47715 2.11188e-08 10C2.11188e-08 12.397 0.843343 14.597 2.2495 16.3195L0.292453 18.2929C-0.332289 18.9229 0.110179 20 0.993697 20H10ZM5.5 8C5.5 7.44772 5.94772 7 6.5 7H13.5C14.0523 7 14.5 7.44772 14.5 8C14.5 8.55229 14.0523 9 13.5 9H6.5C5.94772 9 5.5 8.55229 5.5 8ZM6.5 11C5.94772 11 5.5 11.4477 5.5 12C5.5 12.5523 5.94772 13 6.5 13H9.5C10.0523 13 10.5 12.5523 10.5 12C10.5 11.4477 10.0523 11 9.5 11H6.5Z"
         fill="#60A5FA"
       />
diff --git a/web/helpers/atoms/Assistant.atom.ts b/web/helpers/atoms/Assistant.atom.ts
new file mode 100644
index 000000000..e90923d3d
--- /dev/null
+++ b/web/helpers/atoms/Assistant.atom.ts
@@ -0,0 +1,4 @@
+import { Assistant } from '@janhq/core/.'
+import { atom } from 'jotai'
+
+export const assistantsAtom = atom<Assistant[]>([])
diff --git a/web/helpers/atoms/Model.atom.ts b/web/helpers/atoms/Model.atom.ts
index 6eb7f2ad6..5c9188ad7 100644
--- a/web/helpers/atoms/Model.atom.ts
+++ b/web/helpers/atoms/Model.atom.ts
@@ -24,3 +24,7 @@ export const removeDownloadingModelAtom = atom(
     )
   }
 )
+
+export const downloadedModelsAtom = atom<Model[]>([])
+
+export const configuredModelsAtom = atom<Model[]>([])
diff --git a/web/helpers/atoms/SystemBar.atom.ts b/web/helpers/atoms/SystemBar.atom.ts
index 42ef7b29f..22a7573ec 100644
--- a/web/helpers/atoms/SystemBar.atom.ts
+++ b/web/helpers/atoms/SystemBar.atom.ts
@@ -5,3 +5,5 @@ export const usedRamAtom = atom<number>(0)
 export const availableRamAtom = atom<number>(0)
 
 export const cpuUsageAtom = atom<number>(0)
+
+export const nvidiaTotalVramAtom = atom<number>(0)
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 54a1fdbe0..1b61a0dd1 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -3,9 +3,9 @@ import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import { toaster } from '@/containers/Toast'
 
-import { useGetDownloadedModels } from './useGetDownloadedModels'
 import { LAST_USED_MODEL_ID } from './useRecommendedModel'
 
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 export const activeModelAtom = atom<Model | undefined>(undefined)
@@ -21,7 +21,7 @@ export function useActiveModel() {
   const [activeModel, setActiveModel] = useAtom(activeModelAtom)
   const activeThread = useAtomValue(activeThreadAtom)
   const [stateModel, setStateModel] = useAtom(stateModelAtom)
-  const { downloadedModels } = useGetDownloadedModels()
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
   const setLoadModelError = useSetAtom(loadModelErrorAtom)
 
   const startModel = async (modelId: string) => {
diff --git a/web/hooks/useAssistants.ts b/web/hooks/useAssistants.ts
new file mode 100644
index 000000000..8f2c4a92c
--- /dev/null
+++ b/web/hooks/useAssistants.ts
@@ -0,0 +1,28 @@
+import { useEffect } from 'react'
+
+import { Assistant, AssistantExtension, ExtensionTypeEnum } from '@janhq/core'
+
+import { useSetAtom } from 'jotai'
+
+import { extensionManager } from '@/extension'
+import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
+
+const useAssistants = () => {
+  const setAssistants = useSetAtom(assistantsAtom)
+
+  useEffect(() => {
+    const getAssistants = async () => {
+      const assistants = await getLocalAssistants()
+      setAssistants(assistants)
+    }
+
+    getAssistants()
+  }, [setAssistants])
+}
+
+const getLocalAssistants = async (): Promise<Assistant[]> =>
+  extensionManager
+    .get<AssistantExtension>(ExtensionTypeEnum.Assistant)
+    ?.getAssistants() ?? []
+
+export default useAssistants
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index ee8df22df..12a5e04ca 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -7,7 +7,7 @@ import {
   ThreadState,
   Model,
 } from '@janhq/core'
-import { atom, useSetAtom } from 'jotai'
+import { atom, useAtomValue, useSetAtom } from 'jotai'
 
 import { selectedModelAtom } from '@/containers/DropdownListSidebar'
 import { fileUploadAtom } from '@/containers/Providers/Jotai'
@@ -19,6 +19,7 @@ import useRecommendedModel from './useRecommendedModel'
 import useSetActiveThread from './useSetActiveThread'
 
 import { extensionManager } from '@/extension'
+
 import {
   threadsAtom,
   threadStatesAtom,
@@ -53,12 +54,21 @@ export const useCreateNewThread = () => {
 
   const { recommendedModel, downloadedModels } = useRecommendedModel()
 
+  const threads = useAtomValue(threadsAtom)
+
   const requestCreateNewThread = async (
     assistant: Assistant,
     model?: Model | undefined
   ) => {
     const defaultModel = model ?? recommendedModel ?? downloadedModels[0]
 
+    // check last thread message, if there empty last message use can not create thread
+    const lastMessage = threads[0]?.metadata?.lastMessage
+
+    if (!lastMessage && threads.length) {
+      return null
+    }
+
     const createdAt = Date.now()
     const assistantInfo: ThreadAssistantInfo = {
       assistant_id: assistant.id,
diff --git a/web/hooks/useDeleteModel.ts b/web/hooks/useDeleteModel.ts
index fa0cfb45e..d9f2b94be 100644
--- a/web/hooks/useDeleteModel.ts
+++ b/web/hooks/useDeleteModel.ts
@@ -1,13 +1,14 @@
 import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core'
 
+import { useAtom } from 'jotai'
+
 import { toaster } from '@/containers/Toast'
 
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
-
 import { extensionManager } from '@/extension/ExtensionManager'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
 export default function useDeleteModel() {
-  const { setDownloadedModels, downloadedModels } = useGetDownloadedModels()
+  const [downloadedModels, setDownloadedModels] = useAtom(downloadedModelsAtom)
 
   const deleteModel = async (model: Model) => {
     await extensionManager
diff --git a/web/hooks/useGetAssistants.ts b/web/hooks/useGetAssistants.ts
deleted file mode 100644
index 2b34bfbd1..000000000
--- a/web/hooks/useGetAssistants.ts
+++ /dev/null
@@ -1,27 +0,0 @@
-import { useEffect, useState } from 'react'
-
-import { Assistant, ExtensionTypeEnum, AssistantExtension } from '@janhq/core'
-
-import { extensionManager } from '@/extension/ExtensionManager'
-
-export const getAssistants = async (): Promise<Assistant[]> =>
-  extensionManager
-    .get<AssistantExtension>(ExtensionTypeEnum.Assistant)
-    ?.getAssistants() ?? []
-
-/**
- * Hooks for get assistants
- *
- * @returns assistants
- */
-export default function useGetAssistants() {
-  const [assistants, setAssistants] = useState<Assistant[]>([])
-
-  useEffect(() => {
-    getAssistants()
-      .then((data) => setAssistants(data))
-      .catch((err) => console.error(err))
-  }, [])
-
-  return { assistants }
-}
diff --git a/web/hooks/useGetConfiguredModels.ts b/web/hooks/useGetConfiguredModels.ts
deleted file mode 100644
index 8be052ae2..000000000
--- a/web/hooks/useGetConfiguredModels.ts
+++ /dev/null
@@ -1,30 +0,0 @@
-import { useCallback, useEffect, useState } from 'react'
-
-import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core'
-
-import { extensionManager } from '@/extension/ExtensionManager'
-
-export function useGetConfiguredModels() {
-  const [loading, setLoading] = useState<boolean>(false)
-  const [models, setModels] = useState<Model[]>([])
-
-  const fetchModels = useCallback(async () => {
-    setLoading(true)
-    const models = await getConfiguredModels()
-    setLoading(false)
-    setModels(models)
-  }, [])
-
-  useEffect(() => {
-    fetchModels()
-  }, [fetchModels])
-
-  return { loading, models }
-}
-
-const getConfiguredModels = async (): Promise<Model[]> => {
-  const models = await extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getConfiguredModels()
-  return models ?? []
-}
diff --git a/web/hooks/useGetDownloadedModels.ts b/web/hooks/useGetDownloadedModels.ts
deleted file mode 100644
index bba420858..000000000
--- a/web/hooks/useGetDownloadedModels.ts
+++ /dev/null
@@ -1,27 +0,0 @@
-import { useEffect } from 'react'
-
-import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core'
-
-import { atom, useAtom } from 'jotai'
-
-import { extensionManager } from '@/extension/ExtensionManager'
-
-export const downloadedModelsAtom = atom<Model[]>([])
-
-export function useGetDownloadedModels() {
-  const [downloadedModels, setDownloadedModels] = useAtom(downloadedModelsAtom)
-
-  useEffect(() => {
-    getDownloadedModels().then((downloadedModels) => {
-      setDownloadedModels(downloadedModels)
-    })
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [])
-
-  return { downloadedModels, setDownloadedModels }
-}
-
-export const getDownloadedModels = async (): Promise<Model[]> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getDownloadedModels() ?? []
diff --git a/web/hooks/useGetSystemResources.ts b/web/hooks/useGetSystemResources.ts
index de595ad7b..3f71040d7 100644
--- a/web/hooks/useGetSystemResources.ts
+++ b/web/hooks/useGetSystemResources.ts
@@ -10,15 +10,19 @@ import {
   cpuUsageAtom,
   totalRamAtom,
   usedRamAtom,
+  nvidiaTotalVramAtom,
 } from '@/helpers/atoms/SystemBar.atom'
 
 export default function useGetSystemResources() {
   const [ram, setRam] = useState<number>(0)
   const [cpu, setCPU] = useState<number>(0)
+
+  const [gpus, setGPUs] = useState<Record<string, never>[]>([])
   const setTotalRam = useSetAtom(totalRamAtom)
   const setUsedRam = useSetAtom(usedRamAtom)
   const setAvailableRam = useSetAtom(availableRamAtom)
   const setCpuUsage = useSetAtom(cpuUsageAtom)
+  const setTotalNvidiaVram = useSetAtom(nvidiaTotalVramAtom)
 
   const getSystemResources = async () => {
     if (
@@ -48,17 +52,30 @@ export default function useGetSystemResources() {
       )
     setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
     setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
+
+    const gpus = currentLoadInfor?.gpu ?? []
+    setGPUs(gpus)
+
+    let totalNvidiaVram = 0
+    if (gpus.length > 0) {
+      totalNvidiaVram = gpus.reduce(
+        (total: number, gpu: { memoryTotal: string }) =>
+          total + Number(gpu.memoryTotal),
+        0
+      )
+    }
+    setTotalNvidiaVram(totalNvidiaVram)
   }
 
   useEffect(() => {
     getSystemResources()
 
-    // Fetch interval - every 0.5s
+    // Fetch interval - every 2s
     // TODO: Will we really need this?
     // There is a possibility that this will be removed and replaced by the process event hook?
     const intervalId = setInterval(() => {
       getSystemResources()
-    }, 500)
+    }, 5000)
 
     // clean up interval
     return () => clearInterval(intervalId)
@@ -69,5 +86,6 @@ export default function useGetSystemResources() {
     totalRamAtom,
     ram,
     cpu,
+    gpus,
   }
 }
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
new file mode 100644
index 000000000..23e098007
--- /dev/null
+++ b/web/hooks/useModels.ts
@@ -0,0 +1,46 @@
+import { useEffect } from 'react'
+
+import { ExtensionTypeEnum, Model, ModelExtension } from '@janhq/core'
+
+import { useSetAtom } from 'jotai'
+
+import { extensionManager } from '@/extension'
+import {
+  configuredModelsAtom,
+  downloadedModelsAtom,
+} from '@/helpers/atoms/Model.atom'
+
+const useModels = () => {
+  const setDownloadedModels = useSetAtom(downloadedModelsAtom)
+  const setConfiguredModels = useSetAtom(configuredModelsAtom)
+
+  useEffect(() => {
+    const getDownloadedModels = async () => {
+      const models = await getLocalDownloadedModels()
+      setDownloadedModels(models)
+    }
+
+    getDownloadedModels()
+  }, [setDownloadedModels])
+
+  useEffect(() => {
+    const getConfiguredModels = async () => {
+      const models = await getLocalConfiguredModels()
+      setConfiguredModels(models)
+    }
+
+    getConfiguredModels()
+  }, [setConfiguredModels])
+}
+
+const getLocalConfiguredModels = async (): Promise<Model[]> =>
+  extensionManager
+    .get<ModelExtension>(ExtensionTypeEnum.Model)
+    ?.getConfiguredModels() ?? []
+
+const getLocalDownloadedModels = async (): Promise<Model[]> =>
+  extensionManager
+    .get<ModelExtension>(ExtensionTypeEnum.Model)
+    ?.getDownloadedModels() ?? []
+
+export default useModels
diff --git a/web/hooks/useRecommendedModel.ts b/web/hooks/useRecommendedModel.ts
index 427d2bf73..8122e2b77 100644
--- a/web/hooks/useRecommendedModel.ts
+++ b/web/hooks/useRecommendedModel.ts
@@ -5,9 +5,9 @@ import { Model, InferenceEngine } from '@janhq/core'
 import { atom, useAtomValue } from 'jotai'
 
 import { activeModelAtom } from './useActiveModel'
-import { getDownloadedModels } from './useGetDownloadedModels'
 
-import { activeThreadAtom, threadStatesAtom } from '@/helpers/atoms/Thread.atom'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
+import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 export const lastUsedModel = atom<Model | undefined>(undefined)
 
@@ -24,19 +24,20 @@ export const LAST_USED_MODEL_ID = 'last-used-model-id'
  */
 export default function useRecommendedModel() {
   const activeModel = useAtomValue(activeModelAtom)
-  const [downloadedModels, setDownloadedModels] = useState<Model[]>([])
+  const [sortedModels, setSortedModels] = useState<Model[]>([])
   const [recommendedModel, setRecommendedModel] = useState<Model | undefined>()
   const activeThread = useAtomValue(activeThreadAtom)
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
 
   const getAndSortDownloadedModels = useCallback(async (): Promise<Model[]> => {
-    const models = (await getDownloadedModels()).sort((a, b) =>
+    const models = downloadedModels.sort((a, b) =>
       a.engine !== InferenceEngine.nitro && b.engine === InferenceEngine.nitro
         ? 1
         : -1
     )
-    setDownloadedModels(models)
+    setSortedModels(models)
     return models
-  }, [])
+  }, [downloadedModels])
 
   const getRecommendedModel = useCallback(async (): Promise<
     Model | undefined
@@ -98,5 +99,5 @@ export default function useRecommendedModel() {
     getRecommendedModel()
   }, [getRecommendedModel])
 
-  return { recommendedModel, downloadedModels }
+  return { recommendedModel, downloadedModels: sortedModels }
 }
diff --git a/web/hooks/useSetActiveThread.ts b/web/hooks/useSetActiveThread.ts
index f5649ccaf..6cf94d45d 100644
--- a/web/hooks/useSetActiveThread.ts
+++ b/web/hooks/useSetActiveThread.ts
@@ -1,3 +1,5 @@
+import { useCallback } from 'react'
+
 import {
   InferenceEvent,
   ExtensionTypeEnum,
@@ -6,7 +8,7 @@ import {
   ConversationalExtension,
 } from '@janhq/core'
 
-import { useAtomValue, useSetAtom } from 'jotai'
+import { useSetAtom } from 'jotai'
 
 import { loadModelErrorAtom } from './useActiveModel'
 
@@ -14,43 +16,46 @@ import { extensionManager } from '@/extension'
 import { setConvoMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 import {
   ModelParams,
-  getActiveThreadIdAtom,
   isGeneratingResponseAtom,
   setActiveThreadIdAtom,
   setThreadModelParamsAtom,
 } from '@/helpers/atoms/Thread.atom'
 
 export default function useSetActiveThread() {
-  const activeThreadId = useAtomValue(getActiveThreadIdAtom)
   const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
   const setThreadMessage = useSetAtom(setConvoMessagesAtom)
   const setThreadModelParams = useSetAtom(setThreadModelParamsAtom)
   const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
   const setLoadModelError = useSetAtom(loadModelErrorAtom)
 
-  const setActiveThread = async (thread: Thread) => {
-    if (activeThreadId === thread.id) {
-      console.debug('Thread already active')
-      return
-    }
+  const setActiveThread = useCallback(
+    async (thread: Thread) => {
+      setIsGeneratingResponse(false)
+      events.emit(InferenceEvent.OnInferenceStopped, thread.id)
 
-    setIsGeneratingResponse(false)
-    setLoadModelError(undefined)
-    events.emit(InferenceEvent.OnInferenceStopped, thread.id)
+      // load the corresponding messages
+      const messages = await getLocalThreadMessage(thread.id)
+      setThreadMessage(thread.id, messages)
 
-    // load the corresponding messages
-    const messages = await extensionManager
-      .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-      ?.getAllMessages(thread.id)
-    setThreadMessage(thread.id, messages ?? [])
+      setActiveThreadId(thread.id)
+      const modelParams: ModelParams = {
+        ...thread.assistants[0]?.model?.parameters,
+        ...thread.assistants[0]?.model?.settings,
+      }
+      setThreadModelParams(thread.id, modelParams)
+    },
+    [
+      setActiveThreadId,
+      setThreadMessage,
+      setThreadModelParams,
+      setIsGeneratingResponse,
+    ]
+  )
 
-    setActiveThreadId(thread.id)
-    const modelParams: ModelParams = {
-      ...thread.assistants[0]?.model?.parameters,
-      ...thread.assistants[0]?.model?.settings,
-    }
-    setThreadModelParams(thread.id, modelParams)
-  }
-
-  return { activeThreadId, setActiveThread }
+  return { setActiveThread }
 }
+
+const getLocalThreadMessage = async (threadId: string) =>
+  extensionManager
+    .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+    ?.getAllMessages(threadId) ?? []
diff --git a/web/hooks/useSettings.ts b/web/hooks/useSettings.ts
index 168e72489..289355b36 100644
--- a/web/hooks/useSettings.ts
+++ b/web/hooks/useSettings.ts
@@ -47,14 +47,17 @@ export const useSettings = () => {
   const saveSettings = async ({
     runMode,
     notify,
+    gpusInUse,
   }: {
     runMode?: string | undefined
     notify?: boolean | undefined
+    gpusInUse?: string[] | undefined
   }) => {
     const settingsFile = await joinPath(['file://settings', 'settings.json'])
     const settings = await readSettings()
     if (runMode != null) settings.run_mode = runMode
     if (notify != null) settings.notify = notify
+    if (gpusInUse != null) settings.gpus_in_use = gpusInUse
     await fs.writeFileSync(settingsFile, JSON.stringify(settings))
   }
 
diff --git a/web/hooks/useThreads.ts b/web/hooks/useThreads.ts
index b7de014cc..1ac038b26 100644
--- a/web/hooks/useThreads.ts
+++ b/web/hooks/useThreads.ts
@@ -1,3 +1,5 @@
+import { useEffect } from 'react'
+
 import {
   ExtensionTypeEnum,
   Thread,
@@ -5,14 +7,13 @@ import {
   ConversationalExtension,
 } from '@janhq/core'
 
-import { useAtomValue, useSetAtom } from 'jotai'
+import { useSetAtom } from 'jotai'
 
 import useSetActiveThread from './useSetActiveThread'
 
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
   ModelParams,
-  activeThreadAtom,
   threadModelParamsAtom,
   threadStatesAtom,
   threadsAtom,
@@ -22,11 +23,10 @@ const useThreads = () => {
   const setThreadStates = useSetAtom(threadStatesAtom)
   const setThreads = useSetAtom(threadsAtom)
   const setThreadModelRuntimeParams = useSetAtom(threadModelParamsAtom)
-  const activeThread = useAtomValue(activeThreadAtom)
   const { setActiveThread } = useSetActiveThread()
 
-  const getThreads = async () => {
-    try {
+  useEffect(() => {
+    const getThreads = async () => {
       const localThreads = await getLocalThreads()
       const localThreadStates: Record<string, ThreadState> = {}
       const threadModelParams: Record<string, ModelParams> = {}
@@ -54,17 +54,19 @@ const useThreads = () => {
       setThreadStates(localThreadStates)
       setThreads(localThreads)
       setThreadModelRuntimeParams(threadModelParams)
-      if (localThreads.length && !activeThread) {
+
+      if (localThreads.length > 0) {
         setActiveThread(localThreads[0])
       }
-    } catch (error) {
-      console.error(error)
     }
-  }
 
-  return {
-    getThreads,
-  }
+    getThreads()
+  }, [
+    setActiveThread,
+    setThreadModelRuntimeParams,
+    setThreadStates,
+    setThreads,
+  ])
 }
 
 const getLocalThreads = async (): Promise<Thread[]> =>
diff --git a/web/screens/Chat/ChatBody/index.tsx b/web/screens/Chat/ChatBody/index.tsx
index 66f14d076..c67d6a538 100644
--- a/web/screens/Chat/ChatBody/index.tsx
+++ b/web/screens/Chat/ChatBody/index.tsx
@@ -11,7 +11,6 @@ import LogoMark from '@/containers/Brand/Logo/Mark'
 import { MainViewState } from '@/constants/screens'
 
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 
 import { useMainViewState } from '@/hooks/useMainViewState'
 
@@ -20,10 +19,13 @@ import ChatItem from '../ChatItem'
 import ErrorMessage from '../ErrorMessage'
 
 import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
 const ChatBody: React.FC = () => {
   const messages = useAtomValue(getCurrentChatMessagesAtom)
-  const { downloadedModels } = useGetDownloadedModels()
+
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
+
   const { setMainViewState } = useMainViewState()
 
   if (downloadedModels.length === 0)
diff --git a/web/screens/Chat/CleanThreadModal/index.tsx b/web/screens/Chat/CleanThreadModal/index.tsx
new file mode 100644
index 000000000..6ef505e6f
--- /dev/null
+++ b/web/screens/Chat/CleanThreadModal/index.tsx
@@ -0,0 +1,65 @@
+import React, { useCallback } from 'react'
+
+import {
+  Button,
+  Modal,
+  ModalClose,
+  ModalContent,
+  ModalFooter,
+  ModalHeader,
+  ModalPortal,
+  ModalTitle,
+  ModalTrigger,
+} from '@janhq/uikit'
+import { Paintbrush } from 'lucide-react'
+
+import useDeleteThread from '@/hooks/useDeleteThread'
+
+type Props = {
+  threadId: string
+}
+
+const CleanThreadModal: React.FC<Props> = ({ threadId }) => {
+  const { cleanThread } = useDeleteThread()
+  const onCleanThreadClick = useCallback(
+    (e: React.MouseEvent<HTMLButtonElement, MouseEvent>) => {
+      e.stopPropagation()
+      cleanThread(threadId)
+    },
+    [cleanThread, threadId]
+  )
+
+  return (
+    <Modal>
+      <ModalTrigger asChild onClick={(e) => e.stopPropagation()}>
+        <div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary">
+          <Paintbrush size={16} className="text-muted-foreground" />
+          <span className="text-bold text-black dark:text-muted-foreground">
+            Clean thread
+          </span>
+        </div>
+      </ModalTrigger>
+      <ModalPortal />
+      <ModalContent>
+        <ModalHeader>
+          <ModalTitle>Clean Thread</ModalTitle>
+        </ModalHeader>
+        <p>Are you sure you want to clean this thread?</p>
+        <ModalFooter>
+          <div className="flex gap-x-2">
+            <ModalClose asChild onClick={(e) => e.stopPropagation()}>
+              <Button themes="ghost">No</Button>
+            </ModalClose>
+            <ModalClose asChild>
+              <Button themes="danger" onClick={onCleanThreadClick} autoFocus>
+                Yes
+              </Button>
+            </ModalClose>
+          </div>
+        </ModalFooter>
+      </ModalContent>
+    </Modal>
+  )
+}
+
+export default React.memo(CleanThreadModal)
diff --git a/web/screens/Chat/DeleteThreadModal/index.tsx b/web/screens/Chat/DeleteThreadModal/index.tsx
new file mode 100644
index 000000000..edbdb09b4
--- /dev/null
+++ b/web/screens/Chat/DeleteThreadModal/index.tsx
@@ -0,0 +1,68 @@
+import React, { useCallback } from 'react'
+
+import {
+  Modal,
+  ModalTrigger,
+  ModalPortal,
+  ModalContent,
+  ModalHeader,
+  ModalTitle,
+  ModalFooter,
+  ModalClose,
+  Button,
+} from '@janhq/uikit'
+import { Trash2Icon } from 'lucide-react'
+
+import useDeleteThread from '@/hooks/useDeleteThread'
+
+type Props = {
+  threadId: string
+}
+
+const DeleteThreadModal: React.FC<Props> = ({ threadId }) => {
+  const { deleteThread } = useDeleteThread()
+  const onDeleteThreadClick = useCallback(
+    (e: React.MouseEvent<HTMLButtonElement, MouseEvent>) => {
+      e.stopPropagation()
+      deleteThread(threadId)
+    },
+    [deleteThread, threadId]
+  )
+
+  return (
+    <Modal>
+      <ModalTrigger asChild onClick={(e) => e.stopPropagation()}>
+        <div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary">
+          <Trash2Icon size={16} className="text-red-600 dark:text-red-300" />
+          <span className="text-bold text-red-600 dark:text-red-300">
+            Delete thread
+          </span>
+        </div>
+      </ModalTrigger>
+      <ModalPortal />
+      <ModalContent>
+        <ModalHeader>
+          <ModalTitle>Delete Thread</ModalTitle>
+        </ModalHeader>
+        <p>
+          Are you sure you want to delete this thread? This action cannot be
+          undone.
+        </p>
+        <ModalFooter>
+          <div className="flex gap-x-2">
+            <ModalClose asChild onClick={(e) => e.stopPropagation()}>
+              <Button themes="ghost">No</Button>
+            </ModalClose>
+            <ModalClose asChild>
+              <Button autoFocus themes="danger" onClick={onDeleteThreadClick}>
+                Yes
+              </Button>
+            </ModalClose>
+          </div>
+        </ModalFooter>
+      </ModalContent>
+    </Modal>
+  )
+}
+
+export default React.memo(DeleteThreadModal)
diff --git a/web/screens/Chat/RequestDownloadModel/index.tsx b/web/screens/Chat/RequestDownloadModel/index.tsx
index e62dc562d..88fdadd57 100644
--- a/web/screens/Chat/RequestDownloadModel/index.tsx
+++ b/web/screens/Chat/RequestDownloadModel/index.tsx
@@ -2,15 +2,18 @@ import React, { Fragment, useCallback } from 'react'
 
 import { Button } from '@janhq/uikit'
 
+import { useAtomValue } from 'jotai'
+
 import LogoMark from '@/containers/Brand/Logo/Mark'
 
 import { MainViewState } from '@/constants/screens'
 
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 import { useMainViewState } from '@/hooks/useMainViewState'
 
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
+
 const RequestDownloadModel: React.FC = () => {
-  const { downloadedModels } = useGetDownloadedModels()
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
   const { setMainViewState } = useMainViewState()
 
   const onClick = useCallback(() => {
diff --git a/web/screens/Chat/SimpleTextMessage/index.tsx b/web/screens/Chat/SimpleTextMessage/index.tsx
index 261bb3497..9be45e7e6 100644
--- a/web/screens/Chat/SimpleTextMessage/index.tsx
+++ b/web/screens/Chat/SimpleTextMessage/index.tsx
@@ -18,7 +18,7 @@ import hljs from 'highlight.js'
 
 import { useAtomValue } from 'jotai'
 import { FolderOpenIcon } from 'lucide-react'
-import { Marked, Renderer } from 'marked'
+import { Marked, Renderer, marked as markedDefault } from 'marked'
 
 import { markedHighlight } from 'marked-highlight'
 
@@ -37,13 +37,29 @@ import MessageToolbar from '../MessageToolbar'
 
 import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 
+function isMarkdownValue(value: string): boolean {
+  const tokenTypes: string[] = []
+  markedDefault(value, {
+    walkTokens: (token) => {
+      tokenTypes.push(token.type)
+    },
+  })
+  const isMarkdown = ['code', 'codespan'].some((tokenType) => {
+    return tokenTypes.includes(tokenType)
+  })
+  return isMarkdown
+}
+
 const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
   let text = ''
+  const isUser = props.role === ChatCompletionRole.User
+  const isSystem = props.role === ChatCompletionRole.System
+
   if (props.content && props.content.length > 0) {
     text = props.content[0]?.text?.value ?? ''
   }
+
   const clipboard = useClipboard({ timeout: 1000 })
-  const { onViewFile, onViewFileContainer } = usePath()
 
   const marked: Marked = new Marked(
     markedHighlight({
@@ -88,9 +104,8 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
     }
   )
 
+  const { onViewFile, onViewFileContainer } = usePath()
   const parsedText = marked.parse(text)
-  const isUser = props.role === ChatCompletionRole.User
-  const isSystem = props.role === ChatCompletionRole.System
   const [tokenCount, setTokenCount] = useState(0)
   const [lastTimestamp, setLastTimestamp] = useState<number | undefined>()
   const [tokenSpeed, setTokenSpeed] = useState(0)
@@ -260,16 +275,29 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
             </div>
           )}
 
-          <div
-            className={twMerge(
-              'message flex flex-grow flex-col gap-y-2 text-[15px] font-normal leading-relaxed',
-              isUser
-                ? 'whitespace-pre-wrap break-words'
-                : 'rounded-xl bg-secondary p-4'
-            )}
-            // eslint-disable-next-line @typescript-eslint/naming-convention
-            dangerouslySetInnerHTML={{ __html: parsedText }}
-          />
+          {isUser && !isMarkdownValue(text) ? (
+            <div
+              className={twMerge(
+                'message flex flex-grow flex-col gap-y-2 text-[15px] font-normal leading-relaxed',
+                isUser
+                  ? 'whitespace-pre-wrap break-words'
+                  : 'rounded-xl bg-secondary p-4'
+              )}
+            >
+              {text}
+            </div>
+          ) : (
+            <div
+              className={twMerge(
+                'message flex flex-grow flex-col gap-y-2 text-[15px] font-normal leading-relaxed',
+                isUser
+                  ? 'whitespace-pre-wrap break-words'
+                  : 'rounded-xl bg-secondary p-4'
+              )}
+              // eslint-disable-next-line @typescript-eslint/naming-convention
+              dangerouslySetInnerHTML={{ __html: parsedText }}
+            />
+          )}
         </>
       </div>
     </div>
diff --git a/web/screens/Chat/ThreadList/index.tsx b/web/screens/Chat/ThreadList/index.tsx
index b4a045b1d..2ad9a28c4 100644
--- a/web/screens/Chat/ThreadList/index.tsx
+++ b/web/screens/Chat/ThreadList/index.tsx
@@ -1,76 +1,39 @@
-import { useEffect, useState } from 'react'
+import { useCallback } from 'react'
 
-import {
-  Modal,
-  ModalTrigger,
-  ModalClose,
-  ModalFooter,
-  ModalPortal,
-  ModalContent,
-  ModalHeader,
-  ModalTitle,
-  Button,
-} from '@janhq/uikit'
+import { Thread } from '@janhq/core/'
 
 import { motion as m } from 'framer-motion'
 import { useAtomValue } from 'jotai'
-import {
-  GalleryHorizontalEndIcon,
-  MoreVerticalIcon,
-  Trash2Icon,
-  Paintbrush,
-} from 'lucide-react'
+import { GalleryHorizontalEndIcon, MoreVerticalIcon } from 'lucide-react'
 
 import { twMerge } from 'tailwind-merge'
 
-import { useCreateNewThread } from '@/hooks/useCreateNewThread'
-import useDeleteThread from '@/hooks/useDeleteThread'
-
-import useGetAssistants from '@/hooks/useGetAssistants'
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
 import useSetActiveThread from '@/hooks/useSetActiveThread'
 
-import useThreads from '@/hooks/useThreads'
-
 import { displayDate } from '@/utils/datetime'
 
+import CleanThreadModal from '../CleanThreadModal'
+
+import DeleteThreadModal from '../DeleteThreadModal'
+
 import {
-  activeThreadAtom,
+  getActiveThreadIdAtom,
   threadStatesAtom,
   threadsAtom,
 } from '@/helpers/atoms/Thread.atom'
 
 export default function ThreadList() {
-  const threads = useAtomValue(threadsAtom)
   const threadStates = useAtomValue(threadStatesAtom)
-  const { getThreads } = useThreads()
-  const { assistants } = useGetAssistants()
-  const { requestCreateNewThread } = useCreateNewThread()
-  const activeThread = useAtomValue(activeThreadAtom)
-  const { deleteThread, cleanThread } = useDeleteThread()
-  const { downloadedModels } = useGetDownloadedModels()
-  const [isThreadsReady, setIsThreadsReady] = useState(false)
+  const threads = useAtomValue(threadsAtom)
+  const activeThreadId = useAtomValue(getActiveThreadIdAtom)
+  const { setActiveThread } = useSetActiveThread()
 
-  const { activeThreadId, setActiveThread: onThreadClick } =
-    useSetActiveThread()
-
-  useEffect(() => {
-    getThreads().then(() => setIsThreadsReady(true))
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [])
-
-  useEffect(() => {
-    if (
-      isThreadsReady &&
-      downloadedModels.length !== 0 &&
-      threads.length === 0 &&
-      assistants.length !== 0 &&
-      !activeThread
-    ) {
-      requestCreateNewThread(assistants[0])
-    }
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [assistants, threads, downloadedModels, activeThread, isThreadsReady])
+  const onThreadClick = useCallback(
+    (thread: Thread) => {
+      setActiveThread(thread)
+    },
+    [setActiveThread]
+  )
 
   return (
     <div className="px-3 py-4">
@@ -83,133 +46,46 @@ export default function ThreadList() {
           <h2 className="font-semibold">No Thread History</h2>
         </div>
       ) : (
-        threads.map((thread, i) => {
-          const lastMessage =
-            threadStates[thread.id]?.lastMessage ?? 'No new message'
-          return (
-            <div
-              key={i}
-              className={twMerge(
-                `group/message relative mb-1 flex cursor-pointer flex-col transition-all hover:rounded-lg hover:bg-gray-100 hover:dark:bg-secondary/50`
-              )}
-              onClick={() => {
-                onThreadClick(thread)
-              }}
-            >
-              <div className="relative z-10 p-4 py-4">
-                <p className="line-clamp-1 text-xs leading-5 text-muted-foreground">
-                  {thread.updated && displayDate(thread.updated)}
-                </p>
-                <h2 className="line-clamp-1 font-bold">{thread.title}</h2>
-                <p className="mt-1 line-clamp-1 text-xs text-gray-700 group-hover/message:max-w-[160px] dark:text-gray-300">
-                  {lastMessage || 'No new message'}
-                </p>
-              </div>
-              <div
-                className={twMerge(
-                  `group/icon invisible absolute bottom-2 right-2 z-20 rounded-lg p-1 text-muted-foreground hover:bg-gray-200 group-hover/message:visible hover:dark:bg-secondary`
-                )}
-              >
-                <MoreVerticalIcon />
-                <div className="invisible absolute right-0 z-20 w-40 overflow-hidden rounded-lg border border-border bg-background shadow-lg group-hover/icon:visible">
-                  <Modal>
-                    <ModalTrigger asChild onClick={(e) => e.stopPropagation()}>
-                      <div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary">
-                        <Paintbrush
-                          size={16}
-                          className="text-muted-foreground"
-                        />
-                        <span className="text-bold text-black dark:text-muted-foreground">
-                          Clean thread
-                        </span>
-                      </div>
-                    </ModalTrigger>
-                    <ModalPortal />
-                    <ModalContent>
-                      <ModalHeader>
-                        <ModalTitle>Clean Thread</ModalTitle>
-                      </ModalHeader>
-                      <p>Are you sure you want to clean this thread?</p>
-                      <ModalFooter>
-                        <div className="flex gap-x-2">
-                          <ModalClose
-                            asChild
-                            onClick={(e) => e.stopPropagation()}
-                          >
-                            <Button themes="ghost">No</Button>
-                          </ModalClose>
-                          <ModalClose asChild>
-                            <Button
-                              themes="danger"
-                              onClick={(e) => {
-                                e.stopPropagation()
-                                cleanThread(thread.id)
-                              }}
-                              autoFocus
-                            >
-                              Yes
-                            </Button>
-                          </ModalClose>
-                        </div>
-                      </ModalFooter>
-                    </ModalContent>
-                  </Modal>
-                  <Modal>
-                    <ModalTrigger asChild onClick={(e) => e.stopPropagation()}>
-                      <div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary">
-                        <Trash2Icon
-                          size={16}
-                          className="text-red-600 dark:text-red-300"
-                        />
-                        <span className="text-bold text-red-600 dark:text-red-300">
-                          Delete thread
-                        </span>
-                      </div>
-                    </ModalTrigger>
-                    <ModalPortal />
-                    <ModalContent>
-                      <ModalHeader>
-                        <ModalTitle>Delete Thread</ModalTitle>
-                      </ModalHeader>
-                      <p>
-                        Are you sure you want to delete this thread? This action
-                        cannot be undone.
-                      </p>
-                      <ModalFooter>
-                        <div className="flex gap-x-2">
-                          <ModalClose
-                            asChild
-                            onClick={(e) => e.stopPropagation()}
-                          >
-                            <Button themes="ghost">No</Button>
-                          </ModalClose>
-                          <ModalClose asChild>
-                            <Button
-                              autoFocus
-                              themes="danger"
-                              onClick={(e) => {
-                                e.stopPropagation()
-                                deleteThread(thread.id)
-                              }}
-                            >
-                              Yes
-                            </Button>
-                          </ModalClose>
-                        </div>
-                      </ModalFooter>
-                    </ModalContent>
-                  </Modal>
-                </div>
-              </div>
-              {activeThreadId === thread.id && (
-                <m.div
-                  className="absolute inset-0 left-0 h-full w-full rounded-lg bg-gray-100 p-4 dark:bg-secondary/50"
-                  layoutId="active-thread"
-                />
-              )}
+        threads.map((thread) => (
+          <div
+            key={thread.id}
+            className={twMerge(
+              `group/message relative mb-1 flex cursor-pointer flex-col transition-all hover:rounded-lg hover:bg-gray-100 hover:dark:bg-secondary/50`
+            )}
+            onClick={() => {
+              onThreadClick(thread)
+            }}
+          >
+            <div className="relative z-10 p-4 py-4">
+              <p className="line-clamp-1 text-xs leading-5 text-muted-foreground">
+                {thread.updated && displayDate(thread.updated)}
+              </p>
+              <h2 className="line-clamp-1 font-bold">{thread.title}</h2>
+              <p className="mt-1 line-clamp-1 text-xs text-gray-700 group-hover/message:max-w-[160px] dark:text-gray-300">
+                {threadStates[thread.id]?.lastMessage
+                  ? threadStates[thread.id]?.lastMessage
+                  : 'No new message'}
+              </p>
             </div>
-          )
-        })
+            <div
+              className={twMerge(
+                `group/icon invisible absolute bottom-2 right-2 z-20 rounded-lg p-1 text-muted-foreground hover:bg-gray-200 group-hover/message:visible hover:dark:bg-secondary`
+              )}
+            >
+              <MoreVerticalIcon />
+              <div className="invisible absolute right-0 z-20 w-40 overflow-hidden rounded-lg border border-border bg-background shadow-lg group-hover/icon:visible">
+                <CleanThreadModal threadId={thread.id} />
+                <DeleteThreadModal threadId={thread.id} />
+              </div>
+            </div>
+            {activeThreadId === thread.id && (
+              <m.div
+                className="absolute inset-0 left-0 h-full w-full rounded-lg bg-gray-100 p-4 dark:bg-secondary/50"
+                layoutId="active-thread"
+              />
+            )}
+          </div>
+        ))
       )}
     </div>
   )
diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
index 3ffe2cbac..17b897d51 100644
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@@ -27,15 +27,18 @@ import useDownloadModel from '@/hooks/useDownloadModel'
 
 import { useDownloadState } from '@/hooks/useDownloadState'
 
-import { getAssistants } from '@/hooks/useGetAssistants'
-import { downloadedModelsAtom } from '@/hooks/useGetDownloadedModels'
 import { useMainViewState } from '@/hooks/useMainViewState'
 
 import { toGibibytes } from '@/utils/converter'
 
+import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
 
-import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
+import {
+  nvidiaTotalVramAtom,
+  totalRamAtom,
+} from '@/helpers/atoms/SystemBar.atom'
 
 type Props = {
   model: Model
@@ -49,7 +52,14 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
   const { modelDownloadStateAtom } = useDownloadState()
   const { requestCreateNewThread } = useCreateNewThread()
   const totalRam = useAtomValue(totalRamAtom)
+  const nvidiaTotalVram = useAtomValue(nvidiaTotalVramAtom)
+  // Default nvidia returns vram in MB, need to convert to bytes to match the unit of totalRamW
+  let ram = nvidiaTotalVram * 1024 * 1024
+  if (ram === 0) {
+    ram = totalRam
+  }
   const serverEnabled = useAtomValue(serverEnabledAtom)
+  const assistants = useAtomValue(assistantsAtom)
 
   const downloadAtom = useMemo(
     () => atom((get) => get(modelDownloadStateAtom)[model.id]),
@@ -60,17 +70,23 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
 
   const onDownloadClick = useCallback(() => {
     downloadModel(model)
-    // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [model])
 
   const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null
 
   let downloadButton = (
-    <Button onClick={() => onDownloadClick()}>Download</Button>
+    <Button
+      className="z-50"
+      onClick={(e) => {
+        e.stopPropagation()
+        onDownloadClick()
+      }}
+    >
+      Download
+    </Button>
   )
 
   const onUseModelClick = useCallback(async () => {
-    const assistants = await getAssistants()
     if (assistants.length === 0) {
       alert('No assistant available')
       return
@@ -107,7 +123,7 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
   }
 
   const getLabel = (size: number) => {
-    if (size * 1.25 >= totalRam) {
+    if (size * 1.25 >= ram) {
       return (
         <Badge className="rounded-md" themes="danger">
           Not enough RAM
diff --git a/web/screens/ExploreModels/ModelVersionItem/index.tsx b/web/screens/ExploreModels/ModelVersionItem/index.tsx
index 50d71b161..3a9385670 100644
--- a/web/screens/ExploreModels/ModelVersionItem/index.tsx
+++ b/web/screens/ExploreModels/ModelVersionItem/index.tsx
@@ -10,9 +10,11 @@ import { MainViewState } from '@/constants/screens'
 
 import useDownloadModel from '@/hooks/useDownloadModel'
 import { useDownloadState } from '@/hooks/useDownloadState'
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
+
 import { useMainViewState } from '@/hooks/useMainViewState'
 
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
+
 type Props = {
   model: Model
   isRecommended: boolean
@@ -20,7 +22,7 @@ type Props = {
 
 const ModelVersionItem: React.FC<Props> = ({ model }) => {
   const { downloadModel } = useDownloadModel()
-  const { downloadedModels } = useGetDownloadedModels()
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
   const { setMainViewState } = useMainViewState()
   const isDownloaded =
     downloadedModels.find(
diff --git a/web/screens/ExploreModels/index.tsx b/web/screens/ExploreModels/index.tsx
index 398b2db08..7002c60b7 100644
--- a/web/screens/ExploreModels/index.tsx
+++ b/web/screens/ExploreModels/index.tsx
@@ -1,4 +1,4 @@
-import { useState } from 'react'
+import { useCallback, useState } from 'react'
 
 import { openExternalUrl } from '@janhq/core'
 import {
@@ -12,24 +12,24 @@ import {
   SelectItem,
 } from '@janhq/uikit'
 
+import { useAtomValue } from 'jotai'
 import { SearchIcon } from 'lucide-react'
 
-import Loader from '@/containers/Loader'
-
-import { useGetConfiguredModels } from '@/hooks/useGetConfiguredModels'
-
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
-
 import ExploreModelList from './ExploreModelList'
 
+import {
+  configuredModelsAtom,
+  downloadedModelsAtom,
+} from '@/helpers/atoms/Model.atom'
+
 const ExploreModelsScreen = () => {
-  const { loading, models } = useGetConfiguredModels()
+  const configuredModels = useAtomValue(configuredModelsAtom)
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
   const [searchValue, setsearchValue] = useState('')
-  const { downloadedModels } = useGetDownloadedModels()
   const [sortSelected, setSortSelected] = useState('All Models')
   const sortMenu = ['All Models', 'Recommended', 'Downloaded']
 
-  const filteredModels = models.filter((x) => {
+  const filteredModels = configuredModels.filter((x) => {
     if (sortSelected === 'Downloaded') {
       return (
         x.name.toLowerCase().includes(searchValue.toLowerCase()) &&
@@ -45,11 +45,9 @@ const ExploreModelsScreen = () => {
     }
   })
 
-  const onHowToImportModelClick = () => {
+  const onHowToImportModelClick = useCallback(() => {
     openExternalUrl('https://jan.ai/guides/using-models/import-manually/')
-  }
-
-  if (loading) return <Loader description="loading ..." />
+  }, [])
 
   return (
     <div
diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx
index d2f7d81ee..f6c8fb4d8 100644
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@@ -33,7 +33,10 @@ const Advanced = () => {
   } = useContext(FeatureToggleContext)
   const [partialProxy, setPartialProxy] = useState<string>(proxy)
   const [gpuEnabled, setGpuEnabled] = useState<boolean>(false)
-
+  const [gpuList, setGpuList] = useState([
+    { id: 'none', vram: null, name: 'none' },
+  ])
+  const [gpusInUse, setGpusInUse] = useState<string[]>([])
   const { readSettings, saveSettings, validateSettings, setShowNotification } =
     useSettings()
 
@@ -54,6 +57,10 @@ const Advanced = () => {
     const setUseGpuIfPossible = async () => {
       const settings = await readSettings()
       setGpuEnabled(settings.run_mode === 'gpu')
+      setGpusInUse(settings.gpus_in_use || [])
+      if (settings.gpus) {
+        setGpuList(settings.gpus)
+      }
     }
     setUseGpuIfPossible()
   }, [readSettings])
@@ -69,6 +76,20 @@ const Advanced = () => {
     })
   }
 
+  const handleGPUChange = (gpuId: string) => {
+    let updatedGpusInUse = [...gpusInUse]
+    if (updatedGpusInUse.includes(gpuId)) {
+      updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
+      if (gpuEnabled && updatedGpusInUse.length === 0) {
+        updatedGpusInUse.push(gpuId)
+      }
+    } else {
+      updatedGpusInUse.push(gpuId)
+    }
+    setGpusInUse(updatedGpusInUse)
+    saveSettings({ gpusInUse: updatedGpusInUse })
+  }
+
   return (
     <div className="block w-full">
       {/* Keyboard shortcut  */}
@@ -133,10 +154,40 @@ const Advanced = () => {
           />
         </div>
       )}
-
       {/* Directory */}
+      {gpuEnabled && (
+        <div className="mt-4">
+          <label className="block text-sm font-medium text-gray-700">
+            Select GPU(s)
+          </label>
+          <div className="mt-2 space-y-2">
+            {gpuList.map((gpu) => (
+              <div key={gpu.id}>
+                <input
+                  type="checkbox"
+                  id={`gpu-${gpu.id}`}
+                  name="gpu"
+                  value={gpu.id}
+                  checked={gpusInUse.includes(gpu.id)}
+                  onChange={() => handleGPUChange(gpu.id)}
+                />
+                <label htmlFor={`gpu-${gpu.id}`}>
+                  {' '}
+                  {gpu.name} (VRAM: {gpu.vram} MB)
+                </label>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+      {/* Warning message */}
+      {gpuEnabled && gpusInUse.length > 1 && (
+        <p className="mt-2 italic text-red-500">
+          If enabling multi-GPU without the same GPU model or without NVLink, it
+          may affect token speed.
+        </p>
+      )}
       <DataFolder />
-
       {/* Proxy */}
       <div className="flex w-full items-start justify-between border-b border-border py-4 first:pt-0 last:border-none">
         <div className="flex-shrink-0 space-y-1.5">
diff --git a/web/screens/Settings/Models/index.tsx b/web/screens/Settings/Models/index.tsx
index 3c5a0c6e3..f8997e751 100644
--- a/web/screens/Settings/Models/index.tsx
+++ b/web/screens/Settings/Models/index.tsx
@@ -2,16 +2,17 @@ import { useState } from 'react'
 
 import { Input } from '@janhq/uikit'
 
+import { useAtomValue } from 'jotai'
 import { SearchIcon } from 'lucide-react'
 
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
-
 import RowModel from './Row'
 
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
+
 const Column = ['Name', 'Model ID', 'Size', 'Version', 'Status', '']
 
 export default function Models() {
-  const { downloadedModels } = useGetDownloadedModels()
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
   const [searchValue, setsearchValue] = useState('')
 
   const filteredDownloadedModels = downloadedModels.filter((x) => {