diff --git a/.github/workflows/jan-docs.yml b/.github/workflows/jan-docs.yml
index 2993d1525..bc8473687 100644
--- a/.github/workflows/jan-docs.yml
+++ b/.github/workflows/jan-docs.yml
@@ -6,7 +6,7 @@ on:
       - main
     paths:
       - 'docs/**'
-    tags: ["v[0-9]+.[0-9]+.[0-9]+-docs"]
+      - '.github/workflows/jan-docs.yml'
   pull_request:
     branches:
       - main
@@ -24,7 +24,7 @@ jobs:
       CLOUDFLARE_PROJECT_NAME: jan
     runs-on: ubuntu-latest
     permissions:
-      contents: read
+      contents: write
       deployments: write
       pull-requests: write
     steps:
@@ -78,13 +78,13 @@ jobs:
               Preview URL: ${{ steps.deployCloudflarePages.outputs.url }}
 
       - name: Add Custome Domain file
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+        if: github.event_name == 'push' && github.event.pull_request.head.repo.full_name != github.repository
         run: echo "${{ vars.DOCUSAURUS_DOMAIN }}" > ./docs/build/CNAME
 
       # Popular action to deploy to GitHub Pages:
       # Docs: https://github.com/peaceiris/actions-gh-pages#%EF%B8%8F-docusaurus
       - name: Deploy to GitHub Pages
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+        if: github.event_name == 'push' && github.event.pull_request.head.repo.full_name != github.repository
         uses: peaceiris/actions-gh-pages@v3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/jan-electron-build.yml b/.github/workflows/jan-electron-build.yml
index db585e3c1..118d6c3a0 100644
--- a/.github/workflows/jan-electron-build.yml
+++ b/.github/workflows/jan-electron-build.yml
@@ -28,12 +28,7 @@ jobs:
 
       - name: Update app version base on tag
         run: |
-          if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-              echo "Error: Tag is not valid!"
-              exit 1
-          fi
-          jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json
-          mv /tmp/package.json electron/package.json
+          make update-app-version
         env:
           VERSION_TAG: ${{ steps.tag.outputs.tag }}
 
@@ -49,25 +44,9 @@ jobs:
           p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
           p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
 
-
-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn install
-          yarn build
-
-      - name: Install yarn dependencies
-        run: |
-          yarn build:core
-          yarn install
-          yarn build:plugins
-        env:
-          APP_PATH: "."
-          DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
-
       - name: Build and publish app
         run: |
-          yarn build:publish
+          make build-and-publish
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           CSC_LINK: "/tmp/codesign.p12"
@@ -75,6 +54,8 @@ jobs:
           CSC_IDENTITY_AUTO_DISCOVERY: "true"
           APPLE_ID: ${{ secrets.APPLE_ID }}
           APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
+          APP_PATH: "."
+          DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
 
   build-windows-x64:
     runs-on: windows-latest
@@ -99,34 +80,13 @@ jobs:
       - name: Update app version base on tag
         shell: bash
         run: |
-          if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-              echo "Error: Tag is not valid!"
-              exit 1
-          fi
-          jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json
-          mv /tmp/package.json electron/package.json
+          make update-app-version
         env:
           VERSION_TAG: ${{ steps.tag.outputs.tag }}
 
-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn config set network-timeout 300000
-          yarn install
-          yarn build
-
-      - name: Install yarn dependencies
-        shell: powershell
-        run: |
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          $env:NITRO_VERSION = Get-Content .\plugins\inference-plugin\nitro\version.txt; echo $env:NITRO_VERSION
-          yarn build:plugins
-
       - name: Build and publish app
         run: |
-          yarn build:publish
+          make build-and-publish
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
@@ -158,31 +118,13 @@ jobs:
 
       - name: Update app version base on tag
         run: |
-          if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-              echo "Error: Tag is not valid!"
-              exit 1
-          fi
-          jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json
-          mv /tmp/package.json electron/package.json
+          make update-app-version
         env:
           VERSION_TAG: ${{ steps.tag.outputs.tag }}
 
-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn install
-          yarn build
-
-      - name: Install yarn dependencies
-        run: |
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          yarn build:plugins
-
       - name: Build and publish app
         run: |
-          yarn build:publish
+          make build-and-publish
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
diff --git a/.github/workflows/jan-electron-linter-and-test.yml b/.github/workflows/jan-electron-linter-and-test.yml
index 92f29c368..7717afbd4 100644
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@@ -43,21 +43,9 @@ jobs:
         with:
           node-version: 20
 
-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn install
-          yarn build
-
       - name: Linter and test
         run: |
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          yarn lint
-          yarn build:plugins
-          yarn build:test
-          yarn test
+          make test
         env:
           CSC_IDENTITY_AUTO_DISCOVERY: "false"
 
@@ -81,24 +69,10 @@ jobs:
         with:
           node-version: 20
 
-      - name: Build uikit
-        run: |
-          yarn config set network-timeout 300000
-          cd uikit
-          yarn install
-          yarn build
-
       - name: Linter and test
         shell: powershell
         run: |
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          $env:NITRO_VERSION = Get-Content .\plugins\inference-plugin\nitro\version.txt; echo $env:NITRO_VERSION
-          yarn build:plugins
-          yarn build:test
-          $env:CI="e2e"
-          yarn test
+          make test
 
   test-on-ubuntu:
     runs-on: [self-hosted, Linux, ubuntu-desktop]
@@ -118,19 +92,8 @@ jobs:
         with:
           node-version: 20
 
-      - name: Build uikit
-        run: |
-          cd uikit
-          yarn install
-          yarn build
-
       - name: Linter and test
         run: |
           export DISPLAY=$(w -h | awk 'NR==1 {print $2}')
           echo -e "Display ID: $DISPLAY"
-          yarn config set network-timeout 300000
-          yarn build:core
-          yarn install
-          yarn build:plugins
-          yarn build:test
-          yarn test
+          make test
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..3d04b745f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,62 @@
+# Makefile for Jan Electron App - Build, Lint, Test, and Clean
+
+# Default target, does nothing
+all:
+	@echo "Specify a target to run"
+
+# Builds the UI kit
+build-uikit:
+ifeq ($(OS),Windows_NT)
+	cd uikit && yarn config set network-timeout 300000 && yarn install && yarn build
+else
+	cd uikit && yarn install && yarn build
+endif
+# Updates the app version based on the tag
+update-app-version:
+	if [[ ! "${VERSION_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then \
+		echo "Error: Tag is not valid!"; \
+		exit 1; \
+	fi
+	jq --arg version "${VERSION_TAG#v}" '.version = $version' electron/package.json > /tmp/package.json
+	mv /tmp/package.json electron/package.json
+
+# Installs yarn dependencies and builds core and plugins
+install-and-build: build-uikit
+ifeq ($(OS),Windows_NT)
+	powershell -Command "yarn config set network-timeout 300000; \
+	$$env:NITRO_VERSION = Get-Content .\\plugins\\inference-plugin\\nitro\\version.txt; \
+	Write-Output \"Nitro version: $$env:NITRO_VERSION\"; yarn build:core; yarn install; yarn build:plugins"
+else
+	yarn build:core
+	yarn install
+	yarn build:plugins
+endif
+
+dev: install-and-build
+	yarn dev
+
+# Linting
+lint: install-and-build
+	yarn lint
+
+# Testing
+test: lint
+	yarn build:test
+	yarn test
+
+# Builds and publishes the app
+build-and-publish: install-and-build
+	yarn build:publish
+
+# Build
+build: install-and-build
+	yarn build
+
+clean:
+ifeq ($(OS),Windows_NT)
+	powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist -Recurse -Directory | Remove-Item -Recurse -Force"
+else
+	find . -name "node_modules" -type d -prune -exec rm -rf '{}' +
+	find . -name ".next" -type d -exec rm -rf '{}' +
+	find . -name "dist" -type d -exec rm -rf '{}' +
+endif
diff --git a/README.md b/README.md
index 8405eb74e..75214d940 100644
--- a/README.md
+++ b/README.md
@@ -91,11 +91,10 @@ Contributions are welcome! Please read the [CONTRIBUTING.md](CONTRIBUTING.md) fi
 
 - node >= 20.0.0
 - yarn >= 1.22.0
+- make >= 3.81
 
 ### Instructions
 
-Note: This instruction is tested on MacOS only.
-
 1. **Clone the Repository:**
 
 ```bash
@@ -104,25 +103,10 @@ Note: This instruction is tested on MacOS only.
    cd jan
 ```
 
-2. **Install dependencies:**
-
-```bash
-   yarn install
-
-   # Build core module
-   yarn build:core
-
-   # Packing base plugins
-   yarn build:plugins
-
-   # Packing uikit
-   yarn build:uikit
-```
-
-3. **Run development and Using Jan Desktop**
+2. **Run development and Using Jan Desktop**
 
    ```
-   yarn dev
+   make dev
    ```
 
    This will start the development server and open the desktop app.
@@ -134,19 +118,9 @@ Note: This instruction is tested on MacOS only.
 # Do step 1 and 2 in previous section
 git clone https://github.com/janhq/jan
 cd jan
-yarn install
-
-# Build core module
-yarn build:core
-
-# Package base plugins
-yarn build:plugins
-
-# Packing uikit
-yarn build:uikit
 
 # Build the app
-yarn build
+make build
 ```
 
 This will build the app MacOS m1/m2 for production (with code signing already done) and put the result in `dist` folder.
diff --git a/core/src/plugins/inference.ts b/core/src/plugins/inference.ts
index 663d0b258..6fc93ed37 100644
--- a/core/src/plugins/inference.ts
+++ b/core/src/plugins/inference.ts
@@ -1,4 +1,4 @@
-import { MessageRequest } from "../index";
+import { MessageRequest, ThreadMessage } from "../index";
 import { JanPlugin } from "../plugin";
 
 /**
@@ -16,10 +16,15 @@ export abstract class InferencePlugin extends JanPlugin {
    */
   abstract stopModel(): Promise<void>;
 
+  /**
+   * Stops the streaming inference.
+   */
+  abstract stopInference(): Promise<void>;
+
   /**
    * Processes an inference request.
    * @param data - The data for the inference request.
    * @returns The result of the inference request.
    */
-  abstract inferenceRequest(data: MessageRequest): Promise<any>;
+  abstract inferenceRequest(data: MessageRequest): Promise<ThreadMessage>;
 }
diff --git a/docs/README.md b/docs/README.md
index 9baac2b29..63d21064b 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -43,9 +43,9 @@ If you are using GitHub pages for hosting, this command is a convenient way to b
 
 ### Preview URL, Pre-release and Publishing Documentation
 
-When a PR is created, the preview URL will be automatically commented on the PR. For staging or pre-release documentation, use the following domains [https://pre-release.jan.ai/](https://pre-release.jan.ai/)
+When a PR is created, the preview URL will be automatically commented on the PR.
 
-To officially publish documentation, create a tag in the format `vx.y.z-docs` (e.g., `v0.1.1-docs`) on the `main` branch. The documentation will then be published to [https://jan.ai/](https://jan.ai/)
+The documentation will then be published to [https://jan.ai/](https://jan.ai/) when the PR is merged to `main`.
 
 ### Additional Plugins
 - @docusaurus/theme-live-codeblock
diff --git a/docs/docs/specs/messages.md b/docs/docs/specs/messages.md
index 66e4eeaa2..dbee33fe4 100644
--- a/docs/docs/specs/messages.md
+++ b/docs/docs/specs/messages.md
@@ -2,57 +2,114 @@
 title: Messages
 ---
 
-:::warning
+:::caution
 
-Draft Specification: functionality has not been implemented yet. 
-
-Feedback: [HackMD: Threads Spec](https://hackmd.io/BM_8o_OCQ-iLCYhunn2Aug)
+This is currently under development.
 
 :::
 
-Messages are within `threads` and capture additional metadata.
-> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages
+## Overview
 
-## Message Object
-> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages/object
-```json
-{
-  // Jan specific properties
-  "updatedAt": "...", // that's it I think
+`Messages` capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).
 
-  // OpenAI compatible properties: https://platform.openai.com/docs/api-reference/messages)
-  "id": "msg_dKYDWyQvtjDBi3tudL1yWKDa",
-  "object": "thread.message",
-  "created_at": 1698983503,
-  "thread_id": "thread_RGUhOuO9b2nrktrmsQ2uSR6I",
-  "role": "assistant",
-  "content": [
-    {
-      "type": "text",
-      "text": {
-        "value": "Hi! How can I help you today?",
-        "annotations": []
-      }
-    }
-  ],
-  "file_ids": [],
-  "assistant_id": "asst_ToSF7Gb04YMj8AMMm50ZLLtY",
-  "run_id": "run_BjylUJgDqYK9bOhy4yjAiMrn",
-  "metadata": {}
-}
+- Users and assistants can send multimedia messages.
+- An [OpenAI Message API](https://platform.openai.com/docs/api-reference/messages) compatible endpoint at `localhost:3000/v1/messages`.
+
+## Folder Structure
+
+Messages are saved in the `/threads/{thread_id}` folder in `messages.jsonl` files
+
+```sh
+jan/
+    threads/
+        assistant_name_unix_timestamp/
+            ...
+            messages.jsonl
+        jan_2341243134/
+            ...
+            messages.jsonl
 ```
 
-## Messages API
-> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages
+## `message.jsonl`
 
+Individual messages are saved in `jsonl` format for indexing purposes.
+
+```json
+{...message_2}
+{...message_1}
+{...message_0}
+```
+
+### Examples
+
+Here's a standard example `message` sent from a user.
+
+```json
+"id": "0",                            // Sequential or UUID
+"object": "thread.message",           // Defaults to "thread.message"
+"created_at": 1698983503,
+"thread_id": "thread_asdf",           // Defaults to parent thread
+"assistant_id": "jan",                // Defaults to parent thread
+"role": "user",                  // From either "user" or "assistant"
+"content": [
+  {
+    "type": "text",
+    "text": {
+      "value": "Hi!?",
+      "annotations": []
+    }
+  }
+],
+"metadata": {},                       // Defaults to {}
+// "run_id": "...",                   // Rather than `run` id abstraction
+// "file_ids": [],
+```
+
+Here's an example `message` response from an assistant.
+
+```json
+"id": "0",                            // Sequential or UUID
+"object": "thread.message",           // Defaults to "thread.message"
+"created_at": 1698983503,
+"thread_id": "thread_asdf",           // Defaults to parent thread
+"assistant_id": "jan",                // Defaults to parent thread
+"role": "assistant",                  // From either "user" or "assistant"
+"content": [                          // Usually from Chat Completion obj
+  {
+    "type": "text",
+    "text": {
+      "value": "Hi! How can I help you today?",
+      "annotations": []
+    }
+  }
+],
+"metadata": {},                       // Defaults to {}
+// "run_id": "...",                   // KIV
+// "file_ids": [],                    // KIV
+// "usage": {}                        // KIV: saving chat completion properties https://platform.openai.com/docs/api-reference/chat/object
+```
+
+## API Reference
+
+Jan's `messages` API is compatible with [OpenAI's Messages API](https://platform.openai.com/docs/api-reference/messages), with additional methods for managing messages locally.
+
+See [Jan Messages API](https://jan.ai/api-reference#tag/Messages)
+
+<!-- TODO clean this part up into API -->
+<!--
 ### Get list message
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages/getMessage
+
 - Example request
+
 ```shell
   curl {JAN_URL}/v1/threads/{thread_id}/messages/{message_id} \
     -H "Content-Type: application/json"
 ```
+
 - Example response
+
 ```json
 {
   "id": "msg_abc123",
@@ -75,9 +132,13 @@ Messages are within `threads` and capture additional metadata.
   "metadata": {}
 }
 ```
+
 ### Create message
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/messages/createMessage
+
 - Example request
+
 ```shell
   curl -X POST {JAN_URL}/v1/threads/{thread_id}/messages \
     -H "Content-Type: application/json" \
@@ -86,93 +147,112 @@ Messages are within `threads` and capture additional metadata.
       "content": "How does AI work? Explain it in simple terms."
     }'
 ```
+
 - Example response
+
 ```json
-  {
-    "id": "msg_abc123",
-    "object": "thread.message",
-    "created_at": 1699017614,
-    "thread_id": "thread_abc123",
-    "role": "user",
-    "content": [
-      {
-        "type": "text",
-        "text": {
-          "value": "How does AI work? Explain it in simple terms.",
-          "annotations": []
-        }
+{
+  "id": "msg_abc123",
+  "object": "thread.message",
+  "created_at": 1699017614,
+  "thread_id": "thread_abc123",
+  "role": "user",
+  "content": [
+    {
+      "type": "text",
+      "text": {
+        "value": "How does AI work? Explain it in simple terms.",
+        "annotations": []
       }
-    ],
-    "file_ids": [],
-    "assistant_id": null,
-    "run_id": null,
-    "metadata": {}
-  }
+    }
+  ],
+  "file_ids": [],
+  "assistant_id": null,
+  "run_id": null,
+  "metadata": {}
+}
 ```
+
 ### Get message
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/assistants/listAssistants
+
 - Example request
+
 ```shell
   curl {JAN_URL}/v1/threads/{thread_id}/messages/{message_id} \
     -H "Content-Type: application/json"
 ```
+
 - Example response
+
 ```json
-  {
-    "id": "msg_abc123",
-    "object": "thread.message",
-    "created_at": 1699017614,
-    "thread_id": "thread_abc123",
-    "role": "user",
-    "content": [
-      {
-        "type": "text",
-        "text": {
-          "value": "How does AI work? Explain it in simple terms.",
-          "annotations": []
-        }
+{
+  "id": "msg_abc123",
+  "object": "thread.message",
+  "created_at": 1699017614,
+  "thread_id": "thread_abc123",
+  "role": "user",
+  "content": [
+    {
+      "type": "text",
+      "text": {
+        "value": "How does AI work? Explain it in simple terms.",
+        "annotations": []
       }
-    ],
-    "file_ids": [],
-    "assistant_id": null,
-    "run_id": null,
-    "metadata": {}
-  }
+    }
+  ],
+  "file_ids": [],
+  "assistant_id": null,
+  "run_id": null,
+  "metadata": {}
+}
 ```
 
 ### Modify message
+
 > Jan: TODO: Do we need to modify message? Or let user create new message?
 
 # Get message file
+
 > OpenAI Equivalent: https://api.openai.com/v1/threads/{thread_id}/messages/{message_id}/files/{file_id}
+
 - Example request
+
 ```shell
   curl {JAN_URL}/v1/threads/{thread_id}/messages/{message_id}/files/{file_id} \
     -H "Content-Type: application/json"
 ```
+
 - Example response
+
 ```json
-  {
-    "id": "file-abc123",
-    "object": "thread.message.file",
-    "created_at": 1699061776,
-    "message_id": "msg_abc123"
-  }
+{
+  "id": "file-abc123",
+  "object": "thread.message.file",
+  "created_at": 1699061776,
+  "message_id": "msg_abc123"
+}
 ```
+
 # List message files
+
 > OpenAI Equivalent: https://api.openai.com/v1/threads/{thread_id}/messages/{message_id}/files
-```
+
+````
 - Example request
 ```shell
   curl {JAN_URL}/v1/threads/{thread_id}/messages/{message_id}/files/{file_id} \
     -H "Content-Type: application/json"
-```
+````
+
 - Example response
+
 ```json
-  {
-    "id": "file-abc123",
-    "object": "thread.message.file",
-    "created_at": 1699061776,
-    "message_id": "msg_abc123"
-  }
-```
\ No newline at end of file
+{
+  "id": "file-abc123",
+  "object": "thread.message.file",
+  "created_at": 1699061776,
+  "message_id": "msg_abc123"
+}
+``` -->
diff --git a/docs/docs/specs/models.md b/docs/docs/specs/models.md
index 851626431..471904bc5 100644
--- a/docs/docs/specs/models.md
+++ b/docs/docs/specs/models.md
@@ -4,7 +4,7 @@ title: Models
 
 :::caution
 
-Draft Specification: functionality has not been implemented yet.
+This is currently under development.
 
 :::
 
@@ -46,19 +46,19 @@ jan/                               # Jan root folder
 - `model.json` contains metadata and default parameters used to run a model.
 - The only required field is `source_url`.
 
-### GGUF Example
+### Example
 
 Here's a standard example `model.json` for a GGUF model.
 
 - `source_url`: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/.
 
 ```json
-"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
-"type": "model",                    // Defaults to "model"
-"version": "1",                     // Defaults to 1
 "id": "zephyr-7b"                   // Defaults to foldername
+"object": "model",                    // Defaults to "model"
+"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
 "name": "Zephyr 7B"                 // Defaults to foldername
 "owned_by": "you"                   // Defaults to you
+"version": "1",                     // Defaults to 1
 "created": 1231231                  // Defaults to file creation time
 "description": ""
 "state": enum[null, "downloading", "ready", "starting", "stopping", ...]
@@ -95,7 +95,7 @@ See [Jan Models API](https://jan.ai/api-reference#tag/Models)
 
 :::caution
 
-This is current under development.
+This is currently under development.
 
 :::
 
diff --git a/docs/docs/specs/threads.md b/docs/docs/specs/threads.md
index d5bc15c8f..c9d9c9d4c 100644
--- a/docs/docs/specs/threads.md
+++ b/docs/docs/specs/threads.md
@@ -2,89 +2,101 @@
 title: Threads
 ---
 
-:::warning
+:::caution
 
-Draft Specification: functionality has not been implemented yet. 
-
-Feedback: [HackMD: Threads Spec](https://hackmd.io/BM_8o_OCQ-iLCYhunn2Aug)
+This is currently under development.
 
 :::
 
-## User Stories
+## Overview
 
-_Users can chat with an assistant in a thread_
+`Threads` are conversations between an `assistant` and the user:
 
-- See [Messages Spec](./messages.md)
+- Users can tweak `model` params and `assistant` behavior within each thread.
+- Users can import and export threads.
+- An [OpenAI Thread API](https://platform.openai.com/docs/api-reference/threads) compatible endpoint at `localhost:3000/v1/threads`.
 
-_Users can change assistant and model parameters in a thread_
+## Folder Structure
 
-- Wireframes of
-
-_Users can delete all thread history_
-
-- Wireframes of settings page.
-
-## Jan Thread Object
-
-- A `Jan Thread Object` is a "representation of a conversation thread" between an `assistant` and the user
-- Objects are defined by `thread-uuid.json` files in `json` format
-- Objects are designed to be compatible with `OpenAI Thread Objects` with additional properties needed to run on our infrastructure.
-- Objects contain a `models` field, to track when the user overrides the assistant's default model parameters.
-
-| Property   | Type                                            | Description                                                                                                                                                                                    | Validation                     |
-| ---------- | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ |
-| `object`   | enum: `model`, `assistant`, `thread`, `message` | The Jan Object type                                                                                                                                                                            | Defaults to `thread`           |
-| `assistants`   | array                                           | An array of Jan Assistant Objects. Threads can "override" an assistant's parameters. Thread-level model parameters are directly saved in the `thread.models` property! (see Models spec) | Defaults to `assistant.name` |
-| `messages` | array                                           | An array of Jan Message Objects. (see Messages spec)                                                                                                                                           | Defaults to `[]`               |
-| `metadata` | map                                             | Useful for storing additional information about the object in a structured format.                                                                                                             | Defaults to `{}`               |
-
-### Generic Example
-
-```json
-// janroot/threads/jan_1700123404.json
-"assistants": ["assistant-123"],
-"messages": [
-    {...message0}, {...message1}
-],
-"metadata": {
-    "summary": "funny physics joke",
-},
-```
-
-## Filesystem
-
-- `Jan Thread Objects`'s `json` files always has the naming schema: `assistant_uuid` + `unix_time_thread_created_at. See below.
-- Threads are all saved in the `janroot/threads` folder in a flat folder structure.
-- The folder is standalone and can be easily zipped, exported, and cleared.
+- Threads are saved in the `/threads` folder.
+- Threads are organized by folders, one for each thread, and can be easily zipped, exported, and cleared.
+- Thread folders follow the naming: `assistant_id` + `thread_created_at`.
+- Thread folders also contain `messages.jsonl` files. See [messages](/specs/messages).
 
 ```sh
-janroot/
+jan/
     threads/
-        jan_1700123404.json
-        homework_helper_700120003.json
+        assistant_name_unix_timestamp/
+            thread.json
+            messages.jsonl
+        jan_2341243134/
+            thread.json
 ```
 
-## Jan API
+## `thread.json`
+
+- Each `thread` folder contains a `thread.json` file, which is a representation of a thread.
+- `thread.json` contains metadata and model parameter overrides.
+- There are no required fields.
+
+### Example
+
+Here's a standard example `thread.json` for a conversation between the user and the default Jan assistant.
+
+```json
+"id": "thread_....",                  // Defaults to foldername
+"object": "thread",                   // Defaults to "thread"
+"title": "funny physics joke",        // Defaults to ""
+"assistants": [
+  {
+    "assistant_id": "jan",            // Defaults to "jan"
+    "model": {                        // Defaults to 1 currently active model (can be changed before thread is begun)
+      "settings": {},                 // Defaults to and overrides assistant.json's "settings" (and if none, then model.json "settings")
+      "parameters": {},               // Defaults to and overrides assistant.json's "parameters" (and if none, then model.json "parameters")
+    }
+  },
+],
+"created": 1231231                    // Defaults to file creation time
+"metadata": {},                       // Defaults to {}
+```
+
+## API Reference
+
+Jan's Threads API is compatible with [OpenAI's Threads API](https://platform.openai.com/docs/api-reference/threads), with additional methods for managing threads locally.
+
+See [Jan Threads API](https://jan.ai/api-reference#tag/Threads)
+
+<!-- TODO clean this part up into API -->
+<!--
 ### Get thread
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/threads/getThread
+
 - Example request
+
 ```shell
     curl {JAN_URL}/v1/threads/{thread_id}
 ```
+
 - Example response
+
 ```json
-    {
-    "id": "thread_abc123",
-    "object": "thread",
-    "created_at": 1699014083,
-    "assistants": ["assistant-001"],
-    "metadata": {},
-    "messages": []
-    }
+{
+  "id": "thread_abc123",
+  "object": "thread",
+  "created_at": 1699014083,
+  "assistants": ["assistant-001"],
+  "metadata": {},
+  "messages": []
+}
 ```
+
 ### Create Thread
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/threads/createThread
+
 - Example request
+
 ```shell
     curl -X POST {JAN_URL}/v1/threads \
     -H "Content-Type: application/json" \
@@ -99,18 +111,24 @@ janroot/
         }]
     }'
 ```
+
 - Example response
+
 ```json
-    {
-    "id": 'thread_abc123',
-    "object": 'thread',
-    "created_at": 1699014083,
-    "metadata": {}
-    }
+{
+  "id": "thread_abc123",
+  "object": "thread",
+  "created_at": 1699014083,
+  "metadata": {}
+}
 ```
+
 ### Modify Thread
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/threads/modifyThread
+
 - Example request
+
 ```shell
     curl -X POST {JAN_URL}/v1/threads/{thread_id} \
     -H "Content-Type: application/json" \
@@ -125,69 +143,85 @@ janroot/
         }]
     }'
 ```
+
 - Example response
+
 ```json
-    {
-    "id": 'thread_abc123',
-    "object": 'thread',
-    "created_at": 1699014083,
-    "metadata": {}
-    }
+{
+  "id": "thread_abc123",
+  "object": "thread",
+  "created_at": 1699014083,
+  "metadata": {}
+}
 ```
 
 - https://platform.openai.com/docs/api-reference/threads/modifyThread
 
 ### Delete Thread
+
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/threads/deleteThread
+
 - Example request
+
 ```shell
     curl -X DELETE {JAN_URL}/v1/threads/{thread_id}
 ```
+
 - Example response
+
 ```json
-    {
-    "id": "thread_abc123",
-    "object": "thread.deleted",
-    "deleted": true
-    }
+{
+  "id": "thread_abc123",
+  "object": "thread.deleted",
+  "deleted": true
+}
 ```
 
 ### List Threads
+
 > This is a Jan-only endpoint, not supported by OAI yet.
+
 - Example request
+
 ```shell
     curl {JAN_URL}/v1/threads \
     -H "Content-Type: application/json" \
 ```
+
 - Example response
+
 ```json
-    [
-        {
-            "id": "thread_abc123",
-            "object": "thread",
-            "created_at": 1699014083,
-            "assistants": ["assistant-001"],
-            "metadata": {},
-            "messages": []
-        },
-        {
-            "id": "thread_abc456",
-            "object": "thread",
-            "created_at": 1699014083,
-            "assistants": ["assistant-002", "assistant-002"],
-            "metadata": {},
-        }
-    ]
+[
+  {
+    "id": "thread_abc123",
+    "object": "thread",
+    "created_at": 1699014083,
+    "assistants": ["assistant-001"],
+    "metadata": {},
+    "messages": []
+  },
+  {
+    "id": "thread_abc456",
+    "object": "thread",
+    "created_at": 1699014083,
+    "assistants": ["assistant-002", "assistant-002"],
+    "metadata": {}
+  }
+]
 ```
 
 ### Get & Modify `Thread.Assistants`
+
 -> Can achieve this goal by calling `Modify Thread` API
 
 #### `GET v1/threads/{thread_id}/assistants`
+
 -> Can achieve this goal by calling `Get Thread` API
 
 #### `POST v1/threads/{thread_id}/assistants/{assistant_id}`
+
 -> Can achieve this goal by calling `Modify Assistant` API with `thread.assistant[]`
 
 ### List `Thread.Messages`
--> Can achieve this goal by calling `Get Thread` API
+
+-> Can achieve this goal by calling `Get Thread` API -->
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 83eb50f5f..035e51bd0 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -67,8 +67,8 @@ const sidebars = {
           items: [
             "specs/chats",
             "specs/models",
-            // "specs/threads",
-            // "specs/messages",
+            "specs/threads",
+            "specs/messages",
             // "specs/assistants",
             // "specs/files",
             // "specs/jan",
diff --git a/docs/src/styles/tweaks.scss b/docs/src/styles/tweaks.scss
index 8c5c7a8e9..d5d016436 100644
--- a/docs/src/styles/tweaks.scss
+++ b/docs/src/styles/tweaks.scss
@@ -97,6 +97,9 @@
       @apply leading-loose;
       line-height: 1.5;
     }
+    + div {
+      margin-top: 12px;
+    }
   }
 }
 
@@ -113,3 +116,11 @@
 .table-of-contents {
   font-size: 14px;
 }
+
+.task-list-item {
+  list-style: none;
+}
+
+blockquote {
+  margin-bottom: 12px;
+}
diff --git a/plugins/inference-plugin/nitro/version.txt b/plugins/inference-plugin/nitro/version.txt
index 84aa3a7dd..a34eaa5d0 100644
--- a/plugins/inference-plugin/nitro/version.txt
+++ b/plugins/inference-plugin/nitro/version.txt
@@ -1 +1 @@
-0.1.8
\ No newline at end of file
+0.1.11
\ No newline at end of file
diff --git a/plugins/inference-plugin/package.json b/plugins/inference-plugin/package.json
index 3b7159c3c..f19c864cd 100644
--- a/plugins/inference-plugin/package.json
+++ b/plugins/inference-plugin/package.json
@@ -36,6 +36,7 @@
   "dependencies": {
     "@janhq/core": "file:../../core",
     "download-cli": "^1.1.1",
+    "electron-log": "^5.0.1",
     "fetch-retry": "^5.0.6",
     "kill-port": "^2.0.1",
     "path-browserify": "^1.0.1",
@@ -55,6 +56,7 @@
   "bundleDependencies": [
     "tcp-port-used",
     "kill-port",
-    "fetch-retry"
+    "fetch-retry",
+    "electron-log"
   ]
 }
diff --git a/plugins/inference-plugin/src/helpers/sse.ts b/plugins/inference-plugin/src/helpers/sse.ts
index f63cc027b..987751221 100644
--- a/plugins/inference-plugin/src/helpers/sse.ts
+++ b/plugins/inference-plugin/src/helpers/sse.ts
@@ -4,7 +4,7 @@ import { Observable } from "rxjs";
  * @param recentMessages - An array of recent messages to use as context for the inference.
  * @returns An Observable that emits the generated response as a string.
  */
-export function requestInference(recentMessages: any[]): Observable<string> {
+export function requestInference(recentMessages: any[], controller?: AbortController): Observable<string> {
   return new Observable((subscriber) => {
     const requestBody = JSON.stringify({
       messages: recentMessages,
@@ -20,6 +20,7 @@ export function requestInference(recentMessages: any[]): Observable<string> {
         "Access-Control-Allow-Origin": "*",
       },
       body: requestBody,
+      signal: controller?.signal
     })
       .then(async (response) => {
         const stream = response.body;
diff --git a/plugins/inference-plugin/src/index.ts b/plugins/inference-plugin/src/index.ts
index 8cabf0343..4f358dc56 100644
--- a/plugins/inference-plugin/src/index.ts
+++ b/plugins/inference-plugin/src/index.ts
@@ -28,6 +28,8 @@ import { fs } from "@janhq/core";
  * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
  */
 export default class JanInferencePlugin implements InferencePlugin {
+  controller = new AbortController();
+  isCancelled = false;
   /**
    * Returns the type of the plugin.
    * @returns {PluginType} The type of the plugin.
@@ -40,7 +42,9 @@ export default class JanInferencePlugin implements InferencePlugin {
    * Subscribes to events emitted by the @janhq/core package.
    */
   onLoad(): void {
-    events.on(EventName.OnNewMessageRequest, this.handleMessageRequest);
+    events.on(EventName.OnNewMessageRequest, (data) =>
+      JanInferencePlugin.handleMessageRequest(data, this)
+    );
   }
 
   /**
@@ -70,23 +74,32 @@ export default class JanInferencePlugin implements InferencePlugin {
     return executeOnMain(MODULE, "killSubprocess");
   }
 
+  /**
+   * Stops streaming inference.
+   * @returns {Promise<void>} A promise that resolves when the streaming is stopped.
+   */
+  async stopInference(): Promise<void> {
+    this.isCancelled = true;
+    this.controller?.abort();
+  }
+
   /**
    * Makes a single response inference request.
    * @param {MessageRequest} data - The data for the inference request.
    * @returns {Promise<any>} A promise that resolves with the inference response.
    */
-  async inferenceRequest(data: MessageRequest): Promise<any> {
-    const message = {
-      ...data,
-      message: "",
-      user: "assistant",
+  async inferenceRequest(data: MessageRequest): Promise<ThreadMessage> {
+    const message: ThreadMessage = {
+      threadId: data.threadId,
+      content: "",
       createdAt: new Date().toISOString(),
+      status: MessageStatus.Ready,
     };
 
     return new Promise(async (resolve, reject) => {
       requestInference(data.messages ?? []).subscribe({
         next: (content) => {
-          message.message = content;
+          message.content = content;
         },
         complete: async () => {
           resolve(message);
@@ -100,9 +113,14 @@ export default class JanInferencePlugin implements InferencePlugin {
 
   /**
    * Handles a new message request by making an inference request and emitting events.
+   * Function registered in event manager, should be static to avoid binding issues.
+   * Pass instance as a reference.
    * @param {MessageRequest} data - The data for the new message request.
    */
-  private async handleMessageRequest(data: MessageRequest) {
+  private static async handleMessageRequest(
+    data: MessageRequest,
+    instance: JanInferencePlugin
+  ) {
     const message: ThreadMessage = {
       threadId: data.threadId,
       content: "",
@@ -113,7 +131,10 @@ export default class JanInferencePlugin implements InferencePlugin {
     };
     events.emit(EventName.OnNewMessageResponse, message);
 
-    requestInference(data.messages).subscribe({
+    instance.isCancelled = false;
+    instance.controller = new AbortController();
+
+    requestInference(data.messages, instance.controller).subscribe({
       next: (content) => {
         message.content = content;
         events.emit(EventName.OnMessageResponseUpdate, message);
@@ -125,7 +146,8 @@ export default class JanInferencePlugin implements InferencePlugin {
       },
       error: async (err) => {
         message.content =
-          message.content.trim() + "\n" + "Error occurred: " + err.message;
+          message.content.trim() +
+          (instance.isCancelled ? "" : "\n" + "Error occurred: " + err.message);
         message.status = MessageStatus.Ready;
         events.emit(EventName.OnMessageResponseUpdate, message);
       },
diff --git a/plugins/inference-plugin/src/module.ts b/plugins/inference-plugin/src/module.ts
index a9e60e4cd..a1a1d4ea0 100644
--- a/plugins/inference-plugin/src/module.ts
+++ b/plugins/inference-plugin/src/module.ts
@@ -5,6 +5,8 @@ const { spawn } = require("child_process");
 const tcpPortUsed = require("tcp-port-used");
 const fetchRetry = require("fetch-retry")(global.fetch);
 
+const log = require("electron-log");
+
 // The PORT to use for the Nitro subprocess
 const PORT = 3928;
 const LOCAL_HOST = "127.0.0.1";
@@ -35,6 +37,7 @@ interface InitModelResponse {
 function initModel(modelFile: string): Promise<InitModelResponse> {
   // 1. Check if the model file exists
   currentModelFile = modelFile;
+  log.info("Started to load model " + modelFile);
 
   return (
     // 1. Check if the port is used, if used, attempt to unload model / kill nitro process
@@ -42,13 +45,12 @@ function initModel(modelFile: string): Promise<InitModelResponse> {
       .then(checkAndUnloadNitro)
       // 2. Spawn the Nitro subprocess
       .then(spawnNitroProcess)
-      // 3. Wait until the port is used (Nitro http server is up)
-      .then(() => tcpPortUsed.waitUntilUsed(PORT, 300, 30000))
       // 4. Load the model into the Nitro subprocess (HTTP POST request)
       .then(loadLLMModel)
       // 5. Check if the model is loaded successfully
       .then(validateModelStatus)
       .catch((err) => {
+        log.error("error: " + JSON.stringify(err));
         return { error: err };
       })
   );
@@ -63,6 +65,7 @@ function loadLLMModel(): Promise<Response> {
     llama_model_path: currentModelFile,
     ctx_len: 2048,
     ngl: 100,
+    cont_batching: false,
     embedding: false, // Always enable embedding mode on
   };
 
@@ -77,6 +80,7 @@ function loadLLMModel(): Promise<Response> {
     retryDelay: 500,
   }).catch((err) => {
     console.error(err);
+    log.error("error: " + JSON.stringify(err));
     // Fetch error, Nitro server might not started properly
     throw new Error("Model loading failed.");
   });
@@ -112,7 +116,8 @@ async function validateModelStatus(): Promise<InitModelResponse> {
       return { error: "Model loading failed" };
     })
     .catch((err) => {
-      return { error: `Model loading failed. ${err.message}` };
+      log.error("Model loading failed" + err.toString());
+      return { error: `Model loading failed.` };
     });
 }
 
@@ -158,46 +163,53 @@ function checkAndUnloadNitro() {
  * Using child-process to spawn the process
  * Should run exactly platform specified Nitro binary version
  */
-function spawnNitroProcess() {
-  let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default
-  let binaryName;
+async function spawnNitroProcess(): Promise<void> {
+  return new Promise((resolve, reject) => {
+    let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default
+    let binaryName;
 
-  if (process.platform === "win32") {
-    // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
-    binaryName = "win-start.bat";
-  } else if (process.platform === "darwin") {
-    // Mac OS platform
-    if (process.arch === "arm64") {
-      binaryFolder = path.join(binaryFolder, "mac-arm64");
+    if (process.platform === "win32") {
+      // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
+      binaryName = "win-start.bat";
+    } else if (process.platform === "darwin") {
+      // Mac OS platform
+      if (process.arch === "arm64") {
+        binaryFolder = path.join(binaryFolder, "mac-arm64");
+      } else {
+        binaryFolder = path.join(binaryFolder, "mac-x64");
+      }
+      binaryName = "nitro";
     } else {
-      binaryFolder = path.join(binaryFolder, "mac-x64");
+      // Linux
+      // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
+      binaryName = "linux-start.sh"; // For other platforms
     }
-    binaryName = "nitro";
-  } else {
-    // Linux
-    // Todo: Need to check for CUDA support to switch between CUDA and non-CUDA binaries
-    binaryName = "linux-start.sh"; // For other platforms
-  }
 
-  const binaryPath = path.join(binaryFolder, binaryName);
+    const binaryPath = path.join(binaryFolder, binaryName);
 
-  // Execute the binary
-  subprocess = spawn(binaryPath, [1, "0.0.0.0", PORT], {
-    cwd: binaryFolder,
-  });
+    // Execute the binary
+    subprocess = spawn(binaryPath, [1, "0.0.0.0", PORT], {
+      cwd: binaryFolder,
+    });
 
-  // Handle subprocess output
-  subprocess.stdout.on("data", (data) => {
-    console.log(`stdout: ${data}`);
-  });
+    // Handle subprocess output
+    subprocess.stdout.on("data", (data) => {
+      console.log(`stdout: ${data}`);
+    });
 
-  subprocess.stderr.on("data", (data) => {
-    console.error(`stderr: ${data}`);
-  });
+    subprocess.stderr.on("data", (data) => {
+      log.error("subprocess error:" + data.toString());
+      console.error(`stderr: ${data}`);
+    });
 
-  subprocess.on("close", (code) => {
-    console.log(`child process exited with code ${code}`);
-    subprocess = null;
+    subprocess.on("close", (code) => {
+      console.log(`child process exited with code ${code}`);
+      subprocess = null;
+      reject(`Nitro process exited. ${code ?? ""}`);
+    });
+    tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
+      resolve();
+    });
   });
 }
 
@@ -206,11 +218,14 @@ function spawnNitroProcess() {
  * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
  */
 function validateModelVersion(): Promise<void> {
+  log.info("validateModelVersion");
   // Read the file
   return new Promise((resolve, reject) => {
     fs.open(currentModelFile, "r", (err, fd) => {
       if (err) {
+        log.error("validateModelVersion error" + JSON.stringify(err));
         console.error(err.message);
+        reject(err);
         return;
       }
 
@@ -220,7 +235,13 @@ function validateModelVersion(): Promise<void> {
       // Model version will be the 5th byte of the file
       fs.read(fd, buffer, 0, 1, 4, (err, bytesRead, buffer) => {
         if (err) {
+          log.error("validateModelVersion open error" + JSON.stringify(err));
           console.error(err.message);
+          fs.close(fd, (err) => {
+            log.error("validateModelVersion close error" + JSON.stringify(err));
+            if (err) console.error(err.message);
+          });
+          reject(err);
         } else {
           // Interpret the byte as ASCII
           if (buffer[0] === 0x01) {
diff --git a/web/containers/ModalCancelDownload/index.tsx b/web/containers/ModalCancelDownload/index.tsx
index 3413d02c4..33476fda1 100644
--- a/web/containers/ModalCancelDownload/index.tsx
+++ b/web/containers/ModalCancelDownload/index.tsx
@@ -16,7 +16,6 @@ import {
 import { atom, useAtomValue } from 'jotai'
 
 import { useDownloadState } from '@/hooks/useDownloadState'
-import useGetPerformanceTag from '@/hooks/useGetPerformanceTag'
 
 import { formatDownloadPercentage } from '@/utils/converter'
 
@@ -30,7 +29,6 @@ export default function ModalCancelDownload({
   isFromList,
 }: Props) {
   const { modelDownloadStateAtom } = useDownloadState()
-  useGetPerformanceTag()
   const downloadAtom = useMemo(
     () => atom((get) => get(modelDownloadStateAtom)[suitableModel.name]),
     // eslint-disable-next-line react-hooks/exhaustive-deps
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 57e3a0118..20b8aec15 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -49,7 +49,7 @@ export function useActiveModel() {
     console.debug('Init model: ', modelId)
     const path = join('models', model.name, modelId)
     const res = await initModel(path)
-    if (res?.error) {
+    if (res?.error && (!activeModel?.id || modelId === activeModel?.id)) {
       const errorMessage = `${res.error}`
       alert(errorMessage)
       setStateModel(() => ({
diff --git a/web/hooks/useGetPerformanceTag.ts b/web/hooks/useGetPerformanceTag.ts
index 4c9934778..435f82c4c 100644
--- a/web/hooks/useGetPerformanceTag.ts
+++ b/web/hooks/useGetPerformanceTag.ts
@@ -1,40 +1,37 @@
-import { useState } from 'react'
-
 import { ModelVersion } from '@janhq/core/lib/types'
-import { useAtomValue } from 'jotai'
 
 import { ModelPerformance, TagType } from '@/constants/tagType'
 
-import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
-
 // Recommendation:
 // `Recommended (green)`: "Max RAM required" is 80% of users max  RAM.
 // `Slow on your device (yellow)`: Max RAM required is 80-100% of users max RAM
 // `Not enough RAM (red)`: User RAM is below "Max RAM required"
 
 export default function useGetPerformanceTag() {
-  const [performanceTag, setPerformanceTag] = useState<TagType | undefined>()
-  const totalRam = useAtomValue(totalRamAtom)
-
-  const getPerformanceForModel = async (modelVersion: ModelVersion) => {
+  async function getPerformanceForModel(
+    modelVersion: ModelVersion,
+    totalRam: number
+  ): Promise<{ title: string; performanceTag: TagType }> {
     const requiredRam = modelVersion.maxRamRequired
-    setPerformanceTag(calculateRamPerformance(requiredRam, totalRam))
+    const performanceTag = calculateRamPerformance(requiredRam, totalRam)
+
+    let title = ''
+
+    switch (performanceTag) {
+      case ModelPerformance.PerformancePositive:
+        title = 'Recommended'
+        break
+      case ModelPerformance.PerformanceNeutral:
+        title = 'Slow on your device'
+        break
+      case ModelPerformance.PerformanceNegative:
+        title = 'Not enough RAM'
+        break
+    }
+    return { title, performanceTag }
   }
 
-  let title = ''
-  switch (performanceTag) {
-    case ModelPerformance.PerformancePositive:
-      title = 'Recommended'
-      break
-    case ModelPerformance.PerformanceNeutral:
-      title = 'Slow on your device'
-      break
-    case ModelPerformance.PerformanceNegative:
-      title = 'Not enough RAM'
-      break
-  }
-
-  return { performanceTag, title, getPerformanceForModel }
+  return { getPerformanceForModel }
 }
 
 const calculateRamPerformance = (
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 12f54abd0..7a3b71ea5 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -63,13 +63,13 @@ export default function useSendChatMessage() {
         if (
           currentConvo &&
           currentConvo.id === newMessage.threadId &&
-          result?.message &&
-          result?.message?.trim().length > 0 &&
-          result.message.split(' ').length <= 10
+          result?.content &&
+          result?.content?.trim().length > 0 &&
+          result.content.split(' ').length <= 10
         ) {
           const updatedConv = {
             ...currentConvo,
-            summary: result.message,
+            summary: result.content,
           }
           updateConversation(updatedConv)
           pluginManager
diff --git a/web/package.json b/web/package.json
index 59d142dbf..16522cace 100644
--- a/web/package.json
+++ b/web/package.json
@@ -15,8 +15,8 @@
     "@headlessui/react": "^1.7.15",
     "@heroicons/react": "^2.0.18",
     "@hookform/resolvers": "^3.3.2",
-    "@janhq/core": "./core",
-    "@janhq/uikit": "./uikit",
+    "@janhq/core": "link:./core",
+    "@janhq/uikit": "link:./uikit",
     "autoprefixer": "10.4.16",
     "class-variance-authority": "^0.7.0",
     "framer-motion": "^10.16.4",
diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
index fe3c9c3e9..31a9933ee 100644
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@@ -1,5 +1,5 @@
 /* eslint-disable react-hooks/exhaustive-deps */
-import { useCallback, useEffect, useMemo } from 'react'
+import { useCallback, useEffect, useMemo, useState } from 'react'
 
 import { ModelCatalog, ModelVersion } from '@janhq/core/lib/types'
 import { Badge, Button } from '@janhq/uikit'
@@ -20,6 +20,8 @@ import { useMainViewState } from '@/hooks/useMainViewState'
 
 import { toGigabytes } from '@/utils/converter'
 
+import { totalRamAtom } from '@/helpers/atoms/SystemBar.atom'
+
 type Props = {
   suitableModel: ModelVersion
   exploreModel: ModelCatalog
@@ -32,8 +34,12 @@ const ExploreModelItemHeader: React.FC<Props> = ({
   const { downloadModel } = useDownloadModel()
   const { downloadedModels } = useGetDownloadedModels()
   const { modelDownloadStateAtom, downloadStates } = useDownloadState()
-  const { performanceTag, title, getPerformanceForModel } =
-    useGetPerformanceTag()
+  const { getPerformanceForModel } = useGetPerformanceTag()
+  const [title, setTitle] = useState<string>('Recommended')
+  const totalRam = useAtomValue(totalRamAtom)
+  const [performanceTag, setPerformanceTag] = useState<TagType>(
+    ModelPerformance.PerformancePositive
+  )
   const downloadAtom = useMemo(
     () => atom((get) => get(modelDownloadStateAtom)[suitableModel.name]),
     [suitableModel.name]
@@ -41,9 +47,20 @@ const ExploreModelItemHeader: React.FC<Props> = ({
   const downloadState = useAtomValue(downloadAtom)
   const { setMainViewState } = useMainViewState()
 
+  const calculatePerformance = useCallback(
+    (suitableModel: ModelVersion) => async () => {
+      const { title, performanceTag } = await getPerformanceForModel(
+        suitableModel,
+        totalRam
+      )
+      setPerformanceTag(performanceTag)
+      setTitle(title)
+    },
+    [totalRam]
+  )
+
   useEffect(() => {
-    getPerformanceForModel(suitableModel)
-    // eslint-disable-next-line react-hooks/exhaustive-deps
+    calculatePerformance(suitableModel)
   }, [suitableModel])
 
   const onDownloadClick = useCallback(() => {