diff --git a/.github/ISSUE_TEMPLATE/roadmap.md b/.github/ISSUE_TEMPLATE/roadmap.md
index dbb0dfdd5..7947f31bf 100644
--- a/.github/ISSUE_TEMPLATE/roadmap.md
+++ b/.github/ISSUE_TEMPLATE/roadmap.md
@@ -1,3 +1,12 @@
+---
+name: Roadmap
+about: Plan Roadmap items with subtasks
+title: 'roadmap: '
+labels: 'type: planning'
+assignees: ''
+
+---
+
 ## Goal
 
 ## Tasklist
diff --git a/.github/workflows/jan-electron-build-beta.yml b/.github/workflows/jan-electron-build-beta.yml
index b29038b55..9cae31d67 100644
--- a/.github/workflows/jan-electron-build-beta.yml
+++ b/.github/workflows/jan-electron-build-beta.yml
@@ -70,6 +70,8 @@ jobs:
     permissions:
       contents: write
     steps:
+      - name: Getting the repo
+        uses: actions/checkout@v3
       - name: Sync temp to latest
         run: |
           # sync temp-beta to beta by copy files that are different or new
diff --git a/.github/workflows/jan-electron-build-nightly.yml b/.github/workflows/jan-electron-build-nightly.yml
index 60720052c..e08a35169 100644
--- a/.github/workflows/jan-electron-build-nightly.yml
+++ b/.github/workflows/jan-electron-build-nightly.yml
@@ -12,6 +12,8 @@ on:
           - none
           - aws-s3
         default: none
+  pull_request_review:
+    types: [submitted]
 
 jobs:
   set-public-provider:
@@ -33,6 +35,9 @@ jobs:
             elif [ "${{ github.event_name }}" == "push" ]; then
               echo "::set-output name=public_provider::aws-s3"
               echo "::set-output name=ref::${{ github.ref }}"
+            elif [ "${{ github.event_name }}" == "pull_request_review" ]; then
+              echo "::set-output name=public_provider::none"
+              echo "::set-output name=ref::${{ github.ref }}"
             else
               echo "::set-output name=public_provider::none"
               echo "::set-output name=ref::${{ github.ref }}"
@@ -116,3 +121,24 @@ jobs:
       build_reason: Manual
       push_to_branch: dev
       new_version: ${{ needs.get-update-version.outputs.new_version }}
+
+
+  comment-pr-build-url:
+    needs: [build-macos, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, sync-temp-to-latest]
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request_review'
+    steps:
+      - name: Set up GitHub CLI
+        run: |
+          curl -sSL https://github.com/cli/cli/releases/download/v2.33.0/gh_2.33.0_linux_amd64.tar.gz | tar xz
+          sudo cp gh_2.33.0_linux_amd64/bin/gh /usr/local/bin/
+
+      - name: Comment build URL on PR
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          PR_URL=${{ github.event.pull_request.html_url }}
+          RUN_ID=${{ github.run_id }}
+          COMMENT="This is the build for this pull request. You can download it from the Artifacts section here: [Build URL](https://github.com/${{ github.repository }}/actions/runs/${RUN_ID})."
+          gh pr comment $PR_URL --body "$COMMENT"
+    
\ No newline at end of file
diff --git a/.github/workflows/publish-npm-core.yml b/.github/workflows/publish-npm-core.yml
new file mode 100644
index 000000000..b6d400957
--- /dev/null
+++ b/.github/workflows/publish-npm-core.yml
@@ -0,0 +1,53 @@
+name: Publish plugin models Package to npmjs
+on:
+  push:
+    tags: ["v[0-9]+.[0-9]+.[0-9]+-core"]
+    paths: ["core/**"]
+  pull_request:
+    paths: ["core/**"]
+jobs:
+  build-and-publish-plugins:
+    environment: production
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: "0"
+          token: ${{ secrets.PAT_SERVICE_ACCOUNT }}
+
+      - name: Install jq
+        uses: dcarbone/install-jq-action@v2.0.1
+
+      - name: Extract tag name without v prefix
+        id: get_version
+        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
+        env:
+          GITHUB_REF: ${{ github.ref }}
+
+      - name: "Get Semantic Version from tag"
+        if: github.event_name == 'push'
+        run: |
+          # Get the tag from the event
+          tag=${GITHUB_REF#refs/tags/v}
+          # remove the -core suffix
+          new_version=$(echo $tag | sed -n 's/-core//p')
+          echo $new_version
+          # Replace the old version with the new version in package.json
+          jq --arg version "$new_version" '.version = $version' core/package.json > /tmp/package.json && mv /tmp/package.json core/package.json
+
+          # Print the new version
+          echo "Updated package.json version to: $new_version"
+          cat core/package.json
+
+      # Setup .npmrc file to publish to npm
+      - uses: actions/setup-node@v3
+        with:
+          node-version: "20.x"
+          registry-url: "https://registry.npmjs.org"
+
+      - run: cd core && yarn install && yarn build
+
+      - run: cd core && yarn publish --access public
+        if: github.event_name == 'push'
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
diff --git a/.github/workflows/publish-npm-joi.yml b/.github/workflows/publish-npm-joi.yml
new file mode 100644
index 000000000..c943468d8
--- /dev/null
+++ b/.github/workflows/publish-npm-joi.yml
@@ -0,0 +1,53 @@
+name: Publish plugin models Package to npmjs
+on:
+  push:
+    tags: ["v[0-9]+.[0-9]+.[0-9]+-joi"]
+    paths: ["joi/**"]
+  pull_request:
+    paths: ["joi/**"]
+jobs:
+  build-and-publish-plugins:
+    environment: production
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: "0"
+          token: ${{ secrets.PAT_SERVICE_ACCOUNT }}
+
+      - name: Install jq
+        uses: dcarbone/install-jq-action@v2.0.1
+
+      - name: Extract tag name without v prefix
+        id: get_version
+        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
+        env:
+          GITHUB_REF: ${{ github.ref }}
+
+      - name: "Get Semantic Version from tag"
+        if: github.event_name == 'push'
+        run: |
+          # Get the tag from the event
+          tag=${GITHUB_REF#refs/tags/v}
+          # remove the -joi suffix
+          new_version=$(echo $tag | sed -n 's/-joi//p')
+          echo $new_version
+          # Replace the old version with the new version in package.json
+          jq --arg version "$new_version" '.version = $version' joi/package.json > /tmp/package.json && mv /tmp/package.json joi/package.json
+
+          # Print the new version
+          echo "Updated package.json version to: $new_version"
+          cat joi/package.json
+
+      # Setup .npmrc file to publish to npm
+      - uses: actions/setup-node@v3
+        with:
+          node-version: "20.x"
+          registry-url: "https://registry.npmjs.org"
+
+      - run: cd joi && yarn install && yarn build
+
+      - run: cd joi && yarn publish --access public
+        if: github.event_name == 'push'
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml
index afd5f6647..9d12c4394 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@@ -111,8 +111,10 @@ jobs:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
-          AWS_EC2_METADATA_DISABLED: "true"
-          AWS_MAX_ATTEMPTS: "5"
+          AWS_EC2_METADATA_DISABLED: 'true'
+          AWS_MAX_ATTEMPTS: '5'
+          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
 
       - name: Build and publish app to github
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && inputs.public_provider == 'github' && inputs.beta == false
@@ -122,6 +124,8 @@ jobs:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
           ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
+          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
 
       - name: Build and publish app to github
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && inputs.public_provider == 'github' && inputs.beta == true
@@ -131,8 +135,10 @@ jobs:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
-          AWS_EC2_METADATA_DISABLED: "true"
-          AWS_MAX_ATTEMPTS: "5"
+          AWS_EC2_METADATA_DISABLED: 'true'
+          AWS_MAX_ATTEMPTS: '5'
+          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
 
       - name: Upload Artifact .deb file
         if: inputs.public_provider != 'github'
diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml
index 256bd8c5a..b415d665d 100644
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@@ -140,18 +140,20 @@ jobs:
           fi
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          CSC_LINK: "/tmp/codesign.p12"
+          CSC_LINK: '/tmp/codesign.p12'
           CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
-          CSC_IDENTITY_AUTO_DISCOVERY: "true"
+          CSC_IDENTITY_AUTO_DISCOVERY: 'true'
           APPLE_ID: ${{ secrets.APPLE_ID }}
           APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
-          APP_PATH: "."
+          APP_PATH: '.'
           DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
           AWS_DEFAULT_REGION: auto
-          AWS_EC2_METADATA_DISABLED: "true"
-          AWS_MAX_ATTEMPTS: "5"
+          AWS_EC2_METADATA_DISABLED: 'true'
+          AWS_MAX_ATTEMPTS: '5'
+          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
 
       - name: Build and publish app to github
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && inputs.public_provider == 'github' && inputs.beta == false
@@ -159,15 +161,17 @@ jobs:
           make build-and-publish
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          CSC_LINK: "/tmp/codesign.p12"
+          CSC_LINK: '/tmp/codesign.p12'
           CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
-          CSC_IDENTITY_AUTO_DISCOVERY: "true"
+          CSC_IDENTITY_AUTO_DISCOVERY: 'true'
           APPLE_ID: ${{ secrets.APPLE_ID }}
           APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
-          APP_PATH: "."
+          APP_PATH: '.'
           DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
           ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
           ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
+          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
 
       - name: Build and publish app to github
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && inputs.public_provider == 'github' && inputs.beta == true
@@ -175,18 +179,20 @@ jobs:
           make build-and-publish
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          CSC_LINK: "/tmp/codesign.p12"
+          CSC_LINK: '/tmp/codesign.p12'
           CSC_KEY_PASSWORD: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
-          CSC_IDENTITY_AUTO_DISCOVERY: "true"
+          CSC_IDENTITY_AUTO_DISCOVERY: 'true'
           APPLE_ID: ${{ secrets.APPLE_ID }}
           APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
-          APP_PATH: "."
+          APP_PATH: '.'
           DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
           AWS_DEFAULT_REGION: auto
-          AWS_EC2_METADATA_DISABLED: "true"
-          AWS_MAX_ATTEMPTS: "5"
+          AWS_EC2_METADATA_DISABLED: 'true'
+          AWS_MAX_ATTEMPTS: '5'
+          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
 
       - name: Upload Artifact
         if: inputs.public_provider != 'github'
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index 488366a6d..52ff22ce3 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -149,8 +149,10 @@ jobs:
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
           AWS_DEFAULT_REGION: auto
-          AWS_EC2_METADATA_DISABLED: "true"
-          AWS_MAX_ATTEMPTS: "5"
+          AWS_EC2_METADATA_DISABLED: 'true'
+          AWS_MAX_ATTEMPTS: '5'
+          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
 
       - name: Build app and publish app to github
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && inputs.public_provider == 'github' && inputs.beta == false
@@ -165,6 +167,8 @@ jobs:
           AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
           AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
           AZURE_CERT_NAME: homebrewltd
+          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
 
       - name: Build app and publish app to github
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && inputs.public_provider == 'github' && inputs.beta == true
@@ -175,14 +179,16 @@ jobs:
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
           AWS_DEFAULT_REGION: auto
-          AWS_EC2_METADATA_DISABLED: "true"
-          AWS_MAX_ATTEMPTS: "5"
+          AWS_EC2_METADATA_DISABLED: 'true'
+          AWS_MAX_ATTEMPTS: '5'
           AZURE_KEY_VAULT_URI: ${{ secrets.AZURE_KEY_VAULT_URI }}
           AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
           AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
           AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
           # AZURE_CERT_NAME: ${{ secrets.AZURE_CERT_NAME }}
           AZURE_CERT_NAME: homebrewltd
+          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
 
       - name: Upload Artifact
         if: inputs.public_provider != 'github'
@@ -190,4 +196,3 @@ jobs:
         with:
           name: jan-win-x64-${{ inputs.new_version }}
           path: ./electron/dist/*.exe
-
diff --git a/README.md b/README.md
index 043960537..8052a34dc 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ From PCs to multi-GPU clusters, Jan & Cortex supports universal architectures:
   <tr style="text-align:center">
     <td style="text-align:center"><b>Version Type</b></td>
     <td style="text-align:center"><b>Windows</b></td>
-    <td colspan="2" style="text-align:center"><b>MacOS</b></td>
+    <td style="text-align:center"><b>MacOS Universal</b></td>
     <td colspan="2" style="text-align:center"><b>Linux</b></td>
   </tr>
   <tr style="text-align:center">
@@ -59,15 +59,9 @@ From PCs to multi-GPU clusters, Jan & Cortex supports universal architectures:
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://app.jan.ai/download/latest/mac-x64'>
+      <a href='https://app.jan.ai/download/latest/mac-universal'>
         <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
-        <b>Intel</b>
-      </a>
-    </td>
-    <td style="text-align:center">
-      <a href='https://app.jan.ai/download/latest/mac-arm64'>
-        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
-        <b>M1/M2/M3/M4</b>
+        <b>jan.dmg</b>
       </a>
     </td>
     <td style="text-align:center">
@@ -92,15 +86,9 @@ From PCs to multi-GPU clusters, Jan & Cortex supports universal architectures:
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://app.jan.ai/download/beta/mac-x64'>
+      <a href='https://app.jan.ai/download/beta/mac-universal'>
         <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
-        <b>Intel</b>
-      </a>
-    </td>
-    <td style="text-align:center">
-      <a href='https://app.jan.ai/download/beta/mac-arm64'>
-        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
-        <b>M1/M2/M3/M4</b>
+        <b>jan.dmg</b>
       </a>
     </td>
     <td style="text-align:center">
@@ -125,15 +113,9 @@ From PCs to multi-GPU clusters, Jan & Cortex supports universal architectures:
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://app.jan.ai/download/nightly/mac-x64'>
+      <a href='https://app.jan.ai/download/nightly/mac-universal'>
         <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
-        <b>Intel</b>
-      </a>
-    </td>
-    <td style="text-align:center">
-      <a href='https://app.jan.ai/download/nightly/mac-arm64'>
-        <img src='https://github.com/janhq/jan/blob/dev/docs/static/img/mac.png' style="height:15px; width: 15px" />
-        <b>M1/M2/M3/M4</b>
+        <b>jan.dmg</b>
       </a>
     </td>
     <td style="text-align:center">
diff --git a/core/src/browser/extensions/conversational.ts b/core/src/browser/extensions/conversational.ts
index ec53fbbbf..49fedd544 100644
--- a/core/src/browser/extensions/conversational.ts
+++ b/core/src/browser/extensions/conversational.ts
@@ -1,4 +1,10 @@
-import { Thread, ThreadInterface, ThreadMessage, MessageInterface } from '../../types'
+import {
+  Thread,
+  ThreadInterface,
+  ThreadMessage,
+  MessageInterface,
+  ThreadAssistantInfo,
+} from '../../types'
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
 
 /**
@@ -17,10 +23,21 @@ export abstract class ConversationalExtension
     return ExtensionTypeEnum.Conversational
   }
 
-  abstract getThreads(): Promise<Thread[]>
-  abstract saveThread(thread: Thread): Promise<void>
+  abstract listThreads(): Promise<Thread[]>
+  abstract createThread(thread: Partial<Thread>): Promise<Thread>
+  abstract modifyThread(thread: Thread): Promise<void>
   abstract deleteThread(threadId: string): Promise<void>
-  abstract addNewMessage(message: ThreadMessage): Promise<void>
-  abstract writeMessages(threadId: string, messages: ThreadMessage[]): Promise<void>
-  abstract getAllMessages(threadId: string): Promise<ThreadMessage[]>
+  abstract createMessage(message: Partial<ThreadMessage>): Promise<ThreadMessage>
+  abstract deleteMessage(threadId: string, messageId: string): Promise<void>
+  abstract listMessages(threadId: string): Promise<ThreadMessage[]>
+  abstract getThreadAssistant(threadId: string): Promise<ThreadAssistantInfo>
+  abstract createThreadAssistant(
+    threadId: string,
+    assistant: ThreadAssistantInfo
+  ): Promise<ThreadAssistantInfo>
+  abstract modifyThreadAssistant(
+    threadId: string,
+    assistant: ThreadAssistantInfo
+  ): Promise<ThreadAssistantInfo>
+  abstract modifyMessage(message: ThreadMessage): Promise<ThreadMessage>
 }
diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts
index d0528b0ab..2d1bdb3c2 100644
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@@ -2,7 +2,6 @@ import { events } from '../../events'
 import { BaseExtension } from '../../extension'
 import { MessageRequest, Model, ModelEvent } from '../../../types'
 import { EngineManager } from './EngineManager'
-import { ModelManager } from '../../models/manager'
 
 /**
  * Base AIEngine
diff --git a/core/src/browser/extensions/engines/OAIEngine.ts b/core/src/browser/extensions/engines/OAIEngine.ts
index df51d37bb..6b4c20a19 100644
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@@ -71,7 +71,7 @@ export abstract class OAIEngine extends AIEngine {
       return
     }
 
-    const timestamp = Date.now()
+    const timestamp = Date.now() / 1000
     const message: ThreadMessage = {
       id: ulid(),
       thread_id: data.threadId,
@@ -80,8 +80,8 @@ export abstract class OAIEngine extends AIEngine {
       role: ChatCompletionRole.Assistant,
       content: [],
       status: MessageStatus.Pending,
-      created: timestamp,
-      updated: timestamp,
+      created_at: timestamp,
+      completed_at: timestamp,
       object: 'thread.message',
     }
 
diff --git a/core/src/node/api/HttpServer.ts b/core/src/node/api/HttpServer.ts
deleted file mode 100644
index 32d597717..000000000
--- a/core/src/node/api/HttpServer.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-export interface HttpServer {
-  post: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  get: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  patch: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  put: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  delete: (route: string, handler: (req: any, res: any) => Promise<any>) => void
-  register: (router: any, opts?: any) => void
-}
diff --git a/core/src/node/api/index.test.ts b/core/src/node/api/index.test.ts
deleted file mode 100644
index c35d6e792..000000000
--- a/core/src/node/api/index.test.ts
+++ /dev/null
@@ -1,7 +0,0 @@
-
-import * as restfulV1 from './restful/v1';
-
-it('should re-export from restful/v1', () => {
-  const restfulV1Exports = require('./restful/v1');
-  expect(restfulV1Exports).toBeDefined();
-})
diff --git a/core/src/node/api/index.ts b/core/src/node/api/index.ts
index ab0c51656..56becd054 100644
--- a/core/src/node/api/index.ts
+++ b/core/src/node/api/index.ts
@@ -1,3 +1 @@
-export * from './HttpServer'
-export * from './restful/v1'
 export * from './common/handler'
diff --git a/core/src/node/api/processors/app.test.ts b/core/src/node/api/processors/app.test.ts
index 5c4daef29..f09c6cb6b 100644
--- a/core/src/node/api/processors/app.test.ts
+++ b/core/src/node/api/processors/app.test.ts
@@ -2,7 +2,6 @@ jest.mock('../../helper', () => ({
   ...jest.requireActual('../../helper'),
   getJanDataFolderPath: () => './app',
 }))
-import { dirname } from 'path'
 import { App } from './app'
 
 it('should call stopServer', () => {
diff --git a/core/src/node/api/processors/app.ts b/core/src/node/api/processors/app.ts
index a0808c5ac..96af8468e 100644
--- a/core/src/node/api/processors/app.ts
+++ b/core/src/node/api/processors/app.ts
@@ -3,7 +3,6 @@ import { basename, dirname, isAbsolute, join, relative } from 'path'
 import { Processor } from './Processor'
 import {
   log as writeLog,
-  appResourcePath,
   getAppConfigurations as appConfiguration,
   updateAppConfiguration,
   normalizeFilePath,
@@ -91,8 +90,6 @@ export class App implements Processor {
       port: args?.port,
       isCorsEnabled: args?.isCorsEnabled,
       isVerboseEnabled: args?.isVerboseEnabled,
-      schemaPath: join(appResourcePath(), 'docs', 'openapi', 'jan.yaml'),
-      baseDir: join(appResourcePath(), 'docs', 'openapi'),
       prefix: args?.prefix,
     })
   }
diff --git a/core/src/node/api/restful/app/download.test.ts b/core/src/node/api/restful/app/download.test.ts
deleted file mode 100644
index b2af1bb0d..000000000
--- a/core/src/node/api/restful/app/download.test.ts
+++ /dev/null
@@ -1,62 +0,0 @@
-import { HttpServer } from '../../HttpServer'
-import { DownloadManager } from '../../../helper/download'
-
-describe('downloadRouter', () => {
-  let app: HttpServer
-
-  beforeEach(() => {
-    app = {
-      register: jest.fn(),
-      post: jest.fn(),
-      get: jest.fn(),
-      patch: jest.fn(),
-      put: jest.fn(),
-      delete: jest.fn(),
-    }
-  })
-
-  it('should return download progress for a given modelId', async () => {
-    const modelId = '123'
-    const downloadProgress = { progress: 50 }
-
-    DownloadManager.instance.downloadProgressMap[modelId] = downloadProgress as any
-
-    const req = { params: { modelId } }
-    const res = {
-      status: jest.fn(),
-      send: jest.fn(),
-    }
-
-    jest.spyOn(app, 'get').mockImplementation((path, handler) => {
-      if (path === `/download/getDownloadProgress/${modelId}`) {
-        res.status(200)
-        res.send(downloadProgress)
-      }
-    })
-
-    app.get(`/download/getDownloadProgress/${modelId}`, req as any)
-    expect(res.status).toHaveBeenCalledWith(200)
-    expect(res.send).toHaveBeenCalledWith(downloadProgress)
-  })
-
-  it('should return 404 if download progress is not found', async () => {
-    const modelId = '123'
-
-    const req = { params: { modelId } }
-    const res = {
-      status: jest.fn(),
-      send: jest.fn(),
-    }
-
-
-    jest.spyOn(app, 'get').mockImplementation((path, handler) => {
-      if (path === `/download/getDownloadProgress/${modelId}`) {
-        res.status(404)
-        res.send({ message: 'Download progress not found' })
-      }
-    })
-    app.get(`/download/getDownloadProgress/${modelId}`, req as any)
-    expect(res.status).toHaveBeenCalledWith(404)
-    expect(res.send).toHaveBeenCalledWith({ message: 'Download progress not found' })
-  })
-})
diff --git a/core/src/node/api/restful/app/download.ts b/core/src/node/api/restful/app/download.ts
deleted file mode 100644
index 5e0c83d01..000000000
--- a/core/src/node/api/restful/app/download.ts
+++ /dev/null
@@ -1,23 +0,0 @@
-import { DownloadRoute } from '../../../../types/api'
-import { DownloadManager } from '../../../helper/download'
-import { HttpServer } from '../../HttpServer'
-
-export const downloadRouter = async (app: HttpServer) => {
-  app.get(`/download/${DownloadRoute.getDownloadProgress}/:modelId`, async (req, res) => {
-    const modelId = req.params.modelId
-
-    console.debug(`Getting download progress for model ${modelId}`)
-    console.debug(
-      `All Download progress: ${JSON.stringify(DownloadManager.instance.downloadProgressMap)}`
-    )
-
-    // check if null DownloadManager.instance.downloadProgressMap
-    if (!DownloadManager.instance.downloadProgressMap[modelId]) {
-      return res.status(404).send({
-        message: 'Download progress not found',
-      })
-    } else {
-      return res.status(200).send(DownloadManager.instance.downloadProgressMap[modelId])
-    }
-  })
-}
diff --git a/core/src/node/api/restful/app/handlers.test.ts b/core/src/node/api/restful/app/handlers.test.ts
deleted file mode 100644
index 680623d86..000000000
--- a/core/src/node/api/restful/app/handlers.test.ts
+++ /dev/null
@@ -1,16 +0,0 @@
-// 
-import { jest } from '@jest/globals';
-
-import { HttpServer } from '../../HttpServer';
-import { handleRequests } from './handlers';
-import { Handler, RequestHandler } from '../../common/handler';
-
-it('should initialize RequestHandler and call handle', () => {
-  const mockHandle = jest.fn();
-  jest.spyOn(RequestHandler.prototype, 'handle').mockImplementation(mockHandle);
-  
-  const mockApp = { post: jest.fn() };
-  handleRequests(mockApp as unknown as HttpServer);
-  
-  expect(mockHandle).toHaveBeenCalled();
-});
diff --git a/core/src/node/api/restful/app/handlers.ts b/core/src/node/api/restful/app/handlers.ts
deleted file mode 100644
index 43c3f7add..000000000
--- a/core/src/node/api/restful/app/handlers.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-import { HttpServer } from '../../HttpServer'
-import { Handler, RequestHandler } from '../../common/handler'
-
-export function handleRequests(app: HttpServer) {
-  const restWrapper: Handler = (route: string, listener: (...args: any[]) => any) => {
-    app.post(`/app/${route}`, async (request: any, reply: any) => {
-      const args = JSON.parse(request.body) as any[]
-      reply.send(JSON.stringify(await listener(...args)))
-    })
-  }
-  const handler = new RequestHandler(restWrapper)
-  handler.handle()
-}
diff --git a/core/src/node/api/restful/common.test.ts b/core/src/node/api/restful/common.test.ts
deleted file mode 100644
index b40f6606f..000000000
--- a/core/src/node/api/restful/common.test.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-
-import { commonRouter } from './common';
-import { JanApiRouteConfiguration } from './helper/configuration';
-
-test('commonRouter sets up routes for each key in JanApiRouteConfiguration', async () => {
-  const mockHttpServer = {
-    get: jest.fn(),
-    post: jest.fn(),
-    patch: jest.fn(),
-    put: jest.fn(),
-    delete: jest.fn(),
-  };
-  await commonRouter(mockHttpServer as any);
-
-  const expectedRoutes = Object.keys(JanApiRouteConfiguration);
-  expectedRoutes.forEach((key) => {
-    expect(mockHttpServer.get).toHaveBeenCalledWith(`/${key}`, expect.any(Function));
-    expect(mockHttpServer.get).toHaveBeenCalledWith(`/${key}/:id`, expect.any(Function));
-    expect(mockHttpServer.delete).toHaveBeenCalledWith(`/${key}/:id`, expect.any(Function));
-  });
-});
diff --git a/core/src/node/api/restful/common.ts b/core/src/node/api/restful/common.ts
deleted file mode 100644
index 989104e03..000000000
--- a/core/src/node/api/restful/common.ts
+++ /dev/null
@@ -1,82 +0,0 @@
-import { HttpServer } from '../HttpServer'
-import {
-  chatCompletions,
-  downloadModel,
-  getBuilder,
-  retrieveBuilder,
-  createMessage,
-  createThread,
-  getMessages,
-  retrieveMessage,
-  updateThread,
-  models,
-} from './helper/builder'
-
-import { JanApiRouteConfiguration } from './helper/configuration'
-
-export const commonRouter = async (app: HttpServer) => {
-  const normalizeData = (data: any) => {
-    return {
-      object: 'list',
-      data,
-    }
-  }
-  // Common Routes
-  // Read & Delete :: Threads | Models | Assistants
-  Object.keys(JanApiRouteConfiguration).forEach((key) => {
-    app.get(`/${key}`, async (_req, _res) => {
-      if (key.includes('models')) {
-        return models(_req, _res)
-      }
-      return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
-    })
-
-    app.get(`/${key}/:id`, async (_req: any, _res: any) => {
-      if (key.includes('models')) {
-        return models(_req, _res)
-      }
-      return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id)
-    })
-
-    app.delete(`/${key}/:id`, async (_req: any, _res: any) => {
-      if (key.includes('models')) {
-        return models(_req, _res)
-      }
-      return retrieveBuilder(JanApiRouteConfiguration[key], _req.params.id)
-    })
-  })
-
-  // Threads
-  app.post(`/threads`, async (req, res) => createThread(req.body))
-
-  app.get(`/threads/:threadId/messages`, async (req, res) =>
-    getMessages(req.params.threadId).then(normalizeData)
-  )
-
-  app.get(`/threads/:threadId/messages/:messageId`, async (req, res) =>
-    retrieveMessage(req.params.threadId, req.params.messageId)
-  )
-
-  app.post(`/threads/:threadId/messages`, async (req, res) =>
-    createMessage(req.params.threadId as any, req.body as any)
-  )
-
-  app.patch(`/threads/:threadId`, async (request: any) =>
-    updateThread(request.params.threadId, request.body)
-  )
-
-  // Models
-  app.get(`/models/download/:modelId`, async (request: any) =>
-    downloadModel(request.params.modelId, {
-      ignoreSSL: request.query.ignoreSSL === 'true',
-      proxy: request.query.proxy,
-    })
-  )
-
-  app.post(`/models/start`, async (request: any, reply: any) => models(request, reply))
-
-  app.post(`/models/stop`, async (request: any, reply: any) => models(request, reply))
-
-  // Chat Completion
-  app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))
-}
diff --git a/core/src/node/api/restful/helper/builder.test.ts b/core/src/node/api/restful/helper/builder.test.ts
deleted file mode 100644
index cfaee6007..000000000
--- a/core/src/node/api/restful/helper/builder.test.ts
+++ /dev/null
@@ -1,251 +0,0 @@
-import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, appendFileSync } from 'fs'
-import {
-  getBuilder,
-  retrieveBuilder,
-  getMessages,
-  retrieveMessage,
-  createThread,
-  updateThread,
-  createMessage,
-  downloadModel,
-  chatCompletions,
-} from './builder'
-import { RouteConfiguration } from './configuration'
-
-jest.mock('fs')
-jest.mock('path')
-jest.mock('../../../helper', () => ({
-  getEngineConfiguration: jest.fn(),
-  getJanDataFolderPath: jest.fn().mockReturnValue('/mock/path'),
-}))
-jest.mock('request')
-jest.mock('request-progress')
-jest.mock('node-fetch')
-
-describe('builder helper functions', () => {
-  const mockConfiguration: RouteConfiguration = {
-    dirName: 'mockDir',
-    metadataFileName: 'metadata.json',
-    delete: {
-      object: 'mockObject',
-    },
-  }
-
-  beforeEach(() => {
-    jest.clearAllMocks()
-  })
-
-  describe('getBuilder', () => {
-    it('should return an empty array if directory does not exist', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(false)
-      const result = await getBuilder(mockConfiguration)
-      expect(result).toEqual([])
-    })
-
-    it('should return model data if directory exists', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await getBuilder(mockConfiguration)
-      expect(result).toEqual([{ id: 'model1' }])
-    })
-  })
-
-  describe('retrieveBuilder', () => {
-    it('should return undefined if no data matches the id', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await retrieveBuilder(mockConfiguration, 'nonexistentId')
-      expect(result).toBeUndefined()
-    })
-
-    it('should return the matching data', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await retrieveBuilder(mockConfiguration, 'model1')
-      expect(result).toEqual({ id: 'model1' })
-    })
-  })
-
-  describe('getMessages', () => {
-    it('should return an empty array if message file does not exist', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(false)
-
-      const result = await getMessages('thread1')
-      expect(result).toEqual([])
-    })
-
-    it('should return messages if message file exists', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['messages.jsonl'])
-      ;(readFileSync as jest.Mock).mockReturnValue('{"id":"msg1"}\n{"id":"msg2"}\n')
-
-      const result = await getMessages('thread1')
-      expect(result).toEqual([{ id: 'msg1' }, { id: 'msg2' }])
-    })
-  })
-
-  describe('retrieveMessage', () => {
-    it('should return a message if no messages match the id', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['messages.jsonl'])
-      ;(readFileSync as jest.Mock).mockReturnValue('{"id":"msg1"}\n')
-
-      const result = await retrieveMessage('thread1', 'nonexistentId')
-      expect(result).toEqual({ message: 'Not found' })
-    })
-
-    it('should return the matching message', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['messages.jsonl'])
-      ;(readFileSync as jest.Mock).mockReturnValue('{"id":"msg1"}\n')
-
-      const result = await retrieveMessage('thread1', 'msg1')
-      expect(result).toEqual({ id: 'msg1' })
-    })
-  })
-
-  describe('createThread', () => {
-    it('should return a message if thread has no assistants', async () => {
-      const result = await createThread({})
-      expect(result).toEqual({ message: 'Thread must have at least one assistant' })
-    })
-
-    it('should create a thread and return the updated thread', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(false)
-
-      const thread = { assistants: [{ assistant_id: 'assistant1' }] }
-      const result = await createThread(thread)
-      expect(mkdirSync).toHaveBeenCalled()
-      expect(writeFileSync).toHaveBeenCalled()
-      expect(result.id).toBeDefined()
-    })
-  })
-
-  describe('updateThread', () => {
-    it('should return a message if thread is not found', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await updateThread('nonexistentId', {})
-      expect(result).toEqual({ message: 'Thread not found' })
-    })
-
-    it('should update the thread and return the updated thread', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await updateThread('model1', { name: 'updatedName' })
-      expect(writeFileSync).toHaveBeenCalled()
-      expect(result.name).toEqual('updatedName')
-    })
-  })
-
-  describe('createMessage', () => {
-    it('should create a message and return the created message', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(false)
-      const message = { role: 'user', content: 'Hello' }
-
-      const result = (await createMessage('thread1', message)) as any
-      expect(mkdirSync).toHaveBeenCalled()
-      expect(appendFileSync).toHaveBeenCalled()
-      expect(result.id).toBeDefined()
-    })
-  })
-
-  describe('downloadModel', () => {
-    it('should return a message if model is not found', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(JSON.stringify({ id: 'model1' }))
-
-      const result = await downloadModel('nonexistentId')
-      expect(result).toEqual({ message: 'Model not found' })
-    })
-
-    it('should start downloading the model', async () => {
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(
-        JSON.stringify({ id: 'model1', object: 'model', sources: ['http://example.com'] })
-      )
-      const result = await downloadModel('model1')
-      expect(result).toEqual({ message: 'Starting download model1' })
-    })
-  })
-
-  describe('chatCompletions', () => {
-    it('should return the error on status not ok', async () => {
-      const request = { body: { model: 'model1' } }
-      const mockSend = jest.fn()
-      const reply = {
-        code: jest.fn().mockReturnThis(),
-        send: jest.fn(),
-        headers: jest.fn().mockReturnValue({
-          send: mockSend,
-        }),
-        raw: {
-          writeHead: jest.fn(),
-          pipe: jest.fn(),
-        },
-      }
-
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(
-        JSON.stringify({ id: 'model1', engine: 'openai' })
-      )
-
-      // Mock fetch
-      const fetch = require('node-fetch')
-      fetch.mockResolvedValue({
-        status: 400,
-        headers: new Map([
-          ['content-type', 'application/json'],
-          ['x-request-id', '123456'],
-        ]),
-        body: { pipe: jest.fn() },
-        text: jest.fn().mockResolvedValue({ error: 'Mock error response' }),
-      })
-      await chatCompletions(request, reply)
-      expect(reply.code).toHaveBeenCalledWith(400)
-      expect(mockSend).toHaveBeenCalledWith(
-        expect.objectContaining({
-          error: 'Mock error response',
-        })
-      )
-    })
-
-    it('should return the chat completions', async () => {
-      const request = { body: { model: 'model1' } }
-      const reply = {
-        code: jest.fn().mockReturnThis(),
-        send: jest.fn(),
-        raw: { writeHead: jest.fn(), pipe: jest.fn() },
-      }
-
-      ;(existsSync as jest.Mock).mockReturnValue(true)
-      ;(readdirSync as jest.Mock).mockReturnValue(['file1'])
-      ;(readFileSync as jest.Mock).mockReturnValue(
-        JSON.stringify({ id: 'model1', engine: 'openai' })
-      )
-
-      // Mock fetch
-      const fetch = require('node-fetch')
-      fetch.mockResolvedValue({
-        status: 200,
-        body: { pipe: jest.fn() },
-        json: jest.fn().mockResolvedValue({ completions: ['completion1'] }),
-      })
-      await chatCompletions(request, reply)
-      expect(reply.raw.writeHead).toHaveBeenCalledWith(200, expect.any(Object))
-    })
-  })
-})
diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts
deleted file mode 100644
index e081708cf..000000000
--- a/core/src/node/api/restful/helper/builder.ts
+++ /dev/null
@@ -1,340 +0,0 @@
-import {
-  existsSync,
-  readdirSync,
-  readFileSync,
-  writeFileSync,
-  mkdirSync,
-  appendFileSync,
-  createWriteStream,
-  rmdirSync,
-} from 'fs'
-import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
-import { join } from 'path'
-import { ContentType, InferenceEngine, MessageStatus, ThreadMessage } from '../../../../types'
-import { getJanDataFolderPath } from '../../../helper'
-import { CORTEX_API_URL } from './consts'
-
-// TODO: Refactor these
-export const getBuilder = async (configuration: RouteConfiguration) => {
-  const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
-  try {
-    if (!existsSync(directoryPath)) {
-      console.debug('model folder not found')
-      return []
-    }
-
-    const files: string[] = readdirSync(directoryPath)
-
-    const allDirectories: string[] = []
-    for (const file of files) {
-      if (file === '.DS_Store') continue
-      allDirectories.push(file)
-    }
-
-    const results = allDirectories
-      .map((dirName) => {
-        const jsonPath = join(directoryPath, dirName, configuration.metadataFileName)
-        return readModelMetadata(jsonPath)
-      })
-      .filter((data) => !!data)
-    const modelData = results
-      .map((result: any) => {
-        try {
-          return JSON.parse(result)
-        } catch (err) {
-          console.error(err)
-        }
-      })
-      .filter((e: any) => !!e)
-
-    return modelData
-  } catch (err) {
-    console.error(err)
-    return []
-  }
-}
-
-const readModelMetadata = (path: string): string | undefined => {
-  if (existsSync(path)) {
-    return readFileSync(path, 'utf-8')
-  } else {
-    return undefined
-  }
-}
-
-export const retrieveBuilder = async (configuration: RouteConfiguration, id: string) => {
-  const data = await getBuilder(configuration)
-  const filteredData = data.filter((d: any) => d.id === id)[0]
-
-  if (!filteredData) {
-    return undefined
-  }
-
-  return filteredData
-}
-
-export const getMessages = async (threadId: string): Promise<ThreadMessage[]> => {
-  const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
-  const messageFile = 'messages.jsonl'
-  try {
-    const files: string[] = readdirSync(threadDirPath)
-    if (!files.includes(messageFile)) {
-      console.error(`${threadDirPath} not contains message file`)
-      return []
-    }
-
-    const messageFilePath = join(threadDirPath, messageFile)
-    if (!existsSync(messageFilePath)) {
-      console.debug('message file not found')
-      return []
-    }
-
-    const lines = readFileSync(messageFilePath, 'utf-8')
-      .toString()
-      .split('\n')
-      .filter((line: any) => line !== '')
-
-    const messages: ThreadMessage[] = []
-    lines.forEach((line: string) => {
-      messages.push(JSON.parse(line) as ThreadMessage)
-    })
-    return messages
-  } catch (err) {
-    console.error(err)
-    return []
-  }
-}
-
-export const retrieveMessage = async (threadId: string, messageId: string) => {
-  const messages = await getMessages(threadId)
-  const filteredMessages = messages.filter((m) => m.id === messageId)
-  if (!filteredMessages || filteredMessages.length === 0) {
-    return {
-      message: 'Not found',
-    }
-  }
-
-  return filteredMessages[0]
-}
-
-export const createThread = async (thread: any) => {
-  const threadMetadataFileName = 'thread.json'
-  // TODO: add validation
-  if (!thread.assistants || thread.assistants.length === 0) {
-    return {
-      message: 'Thread must have at least one assistant',
-    }
-  }
-
-  const threadId = generateThreadId(thread.assistants[0].assistant_id)
-  try {
-    const updatedThread = {
-      ...thread,
-      id: threadId,
-      created: Date.now(),
-      updated: Date.now(),
-    }
-    const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
-    const threadJsonPath = join(threadDirPath, threadMetadataFileName)
-
-    if (!existsSync(threadDirPath)) {
-      mkdirSync(threadDirPath)
-    }
-
-    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
-    return updatedThread
-  } catch (err) {
-    return {
-      error: err,
-    }
-  }
-}
-
-export const updateThread = async (threadId: string, thread: any) => {
-  const threadMetadataFileName = 'thread.json'
-  const currentThreadData = await retrieveBuilder(JanApiRouteConfiguration.threads, threadId)
-  if (!currentThreadData) {
-    return {
-      message: 'Thread not found',
-    }
-  }
-  // we don't want to update the id and object
-  delete thread.id
-  delete thread.object
-
-  const updatedThread = {
-    ...currentThreadData,
-    ...thread,
-    updated: Date.now(),
-  }
-  try {
-    const threadDirPath = join(getJanDataFolderPath(), 'threads', updatedThread.id)
-    const threadJsonPath = join(threadDirPath, threadMetadataFileName)
-
-    await writeFileSync(threadJsonPath, JSON.stringify(updatedThread, null, 2))
-    return updatedThread
-  } catch (err) {
-    return {
-      message: err,
-    }
-  }
-}
-
-const generateThreadId = (assistantId: string) => {
-  return `${assistantId}_${(Date.now() / 1000).toFixed(0)}`
-}
-
-export const createMessage = async (threadId: string, message: any) => {
-  const threadMessagesFileName = 'messages.jsonl'
-
-  try {
-    const { ulid } = require('ulidx')
-    const msgId = ulid()
-    const createdAt = Date.now()
-    const threadMessage: ThreadMessage = {
-      id: msgId,
-      thread_id: threadId,
-      status: MessageStatus.Ready,
-      created: createdAt,
-      updated: createdAt,
-      object: 'thread.message',
-      role: message.role,
-      content: [
-        {
-          type: ContentType.Text,
-          text: {
-            value: message.content,
-            annotations: [],
-          },
-        },
-      ],
-    }
-
-    const threadDirPath = join(getJanDataFolderPath(), 'threads', threadId)
-    const threadMessagePath = join(threadDirPath, threadMessagesFileName)
-
-    if (!existsSync(threadDirPath)) {
-      mkdirSync(threadDirPath)
-    }
-    appendFileSync(threadMessagePath, JSON.stringify(threadMessage) + '\n')
-    return threadMessage
-  } catch (err) {
-    return {
-      message: err,
-    }
-  }
-}
-
-export const downloadModel = async (
-  modelId: string,
-  network?: { proxy?: string; ignoreSSL?: boolean }
-) => {
-  const strictSSL = !network?.ignoreSSL
-  const proxy = network?.proxy?.startsWith('http') ? network.proxy : undefined
-  const model = await retrieveBuilder(JanApiRouteConfiguration.models, modelId)
-  if (!model || model.object !== 'model') {
-    return {
-      message: 'Model not found',
-    }
-  }
-
-  const directoryPath = join(getJanDataFolderPath(), 'models', modelId)
-  if (!existsSync(directoryPath)) {
-    mkdirSync(directoryPath)
-  }
-
-  // path to model binary
-  const modelBinaryPath = join(directoryPath, modelId)
-
-  const request = require('request')
-  const progress = require('request-progress')
-
-  for (const source of model.sources) {
-    const rq = request({ url: source, strictSSL, proxy })
-    progress(rq, {})
-      ?.on('progress', function (state: any) {
-        console.debug('progress', JSON.stringify(state, null, 2))
-      })
-      ?.on('error', function (err: Error) {
-        console.error('error', err)
-      })
-      ?.on('end', function () {
-        console.debug('end')
-      })
-      .pipe(createWriteStream(modelBinaryPath))
-  }
-
-  return {
-    message: `Starting download ${modelId}`,
-  }
-}
-
-/**
- * Proxy /models to cortex
- * @param request
- * @param reply
- */
-export const models = async (request: any, reply: any) => {
-  const fetch = require('node-fetch')
-  const headers: Record<string, any> = {
-    'Content-Type': 'application/json',
-  }
-
-  const response = await fetch(`${CORTEX_API_URL}/models${request.url.split('/models')[1] ?? ""}`, {
-    method: request.method,
-    headers: headers,
-    body: JSON.stringify(request.body),
-  })
-
-  if (response.status !== 200) {
-    // Forward the error response to client via reply
-    const responseBody = await response.text()
-    const responseHeaders = Object.fromEntries(response.headers)
-    reply.code(response.status).headers(responseHeaders).send(responseBody)
-  } else {
-    reply.raw.writeHead(200, {
-      'Content-Type': 'application/json',
-      'Cache-Control': 'no-cache',
-      'Connection': 'keep-alive',
-      'Access-Control-Allow-Origin': '*',
-    })
-    response.body.pipe(reply.raw)
-  }
-}
-
-/**
- * Proxy chat completions
- * @param request
- * @param reply
- */
-export const chatCompletions = async (request: any, reply: any) => {
-  const headers: Record<string, any> = {
-    'Content-Type': 'application/json',
-  }
-
-  // add engine for new cortex cpp engine
-  if (request.body.engine === InferenceEngine.nitro) {
-    request.body.engine = InferenceEngine.cortex_llamacpp
-  }
-
-  const fetch = require('node-fetch')
-  const response = await fetch(`${CORTEX_API_URL}/chat/completions`, {
-    method: 'POST',
-    headers: headers,
-    body: JSON.stringify(request.body),
-  })
-  if (response.status !== 200) {
-    // Forward the error response to client via reply
-    const responseBody = await response.text()
-    const responseHeaders = Object.fromEntries(response.headers)
-    reply.code(response.status).headers(responseHeaders).send(responseBody)
-  } else {
-    reply.raw.writeHead(200, {
-      'Content-Type': request.body.stream === true ? 'text/event-stream' : 'application/json',
-      'Cache-Control': 'no-cache',
-      'Connection': 'keep-alive',
-      'Access-Control-Allow-Origin': '*',
-    })
-    response.body.pipe(reply.raw)
-  }
-}
diff --git a/core/src/node/api/restful/helper/configuration.test.ts b/core/src/node/api/restful/helper/configuration.test.ts
deleted file mode 100644
index ae002312a..000000000
--- a/core/src/node/api/restful/helper/configuration.test.ts
+++ /dev/null
@@ -1,24 +0,0 @@
-import { JanApiRouteConfiguration } from './configuration'
-
-describe('JanApiRouteConfiguration', () => {
-  it('should have the correct models configuration', () => {
-    const modelsConfig = JanApiRouteConfiguration.models;
-    expect(modelsConfig.dirName).toBe('models');
-    expect(modelsConfig.metadataFileName).toBe('model.json');
-    expect(modelsConfig.delete.object).toBe('model');
-  });
-
-  it('should have the correct assistants configuration', () => {
-    const assistantsConfig = JanApiRouteConfiguration.assistants;
-    expect(assistantsConfig.dirName).toBe('assistants');
-    expect(assistantsConfig.metadataFileName).toBe('assistant.json');
-    expect(assistantsConfig.delete.object).toBe('assistant');
-  });
-
-  it('should have the correct threads configuration', () => {
-    const threadsConfig = JanApiRouteConfiguration.threads;
-    expect(threadsConfig.dirName).toBe('threads');
-    expect(threadsConfig.metadataFileName).toBe('thread.json');
-    expect(threadsConfig.delete.object).toBe('thread');
-  });
-});
\ No newline at end of file
diff --git a/core/src/node/api/restful/helper/configuration.ts b/core/src/node/api/restful/helper/configuration.ts
deleted file mode 100644
index 88e5ffb61..000000000
--- a/core/src/node/api/restful/helper/configuration.ts
+++ /dev/null
@@ -1,31 +0,0 @@
-export const JanApiRouteConfiguration: Record<string, RouteConfiguration> = {
-  models: {
-    dirName: 'models',
-    metadataFileName: 'model.json',
-    delete: {
-      object: 'model',
-    },
-  },
-  assistants: {
-    dirName: 'assistants',
-    metadataFileName: 'assistant.json',
-    delete: {
-      object: 'assistant',
-    },
-  },
-  threads: {
-    dirName: 'threads',
-    metadataFileName: 'thread.json',
-    delete: {
-      object: 'thread',
-    },
-  },
-}
-
-export type RouteConfiguration = {
-  dirName: string
-  metadataFileName: string
-  delete: {
-    object: string
-  }
-}
diff --git a/core/src/node/api/restful/helper/consts.test.ts b/core/src/node/api/restful/helper/consts.test.ts
deleted file mode 100644
index 524f0cbeb..000000000
--- a/core/src/node/api/restful/helper/consts.test.ts
+++ /dev/null
@@ -1,5 +0,0 @@
-import { CORTEX_DEFAULT_PORT } from './consts'
-
-it('should test CORTEX_DEFAULT_PORT', () => {
-  expect(CORTEX_DEFAULT_PORT).toBe(39291)
-})
diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts
deleted file mode 100644
index 412d304ee..000000000
--- a/core/src/node/api/restful/helper/consts.ts
+++ /dev/null
@@ -1,7 +0,0 @@
-export const CORTEX_DEFAULT_PORT = 39291
-
-export const LOCAL_HOST = '127.0.0.1'
-
-export const SUPPORTED_MODEL_FORMAT = '.gguf'
-
-export const CORTEX_API_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1`
diff --git a/core/src/node/api/restful/v1.test.ts b/core/src/node/api/restful/v1.test.ts
deleted file mode 100644
index 8e22496e9..000000000
--- a/core/src/node/api/restful/v1.test.ts
+++ /dev/null
@@ -1,16 +0,0 @@
-
-import { v1Router } from './v1';
-import { commonRouter } from './common';
-
-test('should define v1Router function', () => {
-  expect(v1Router).toBeDefined();
-});
-
-test('should register commonRouter', () => {
-  const mockApp = {
-    register: jest.fn(),
-  };
-  v1Router(mockApp as any);
-  expect(mockApp.register).toHaveBeenCalledWith(commonRouter);
-});
-
diff --git a/core/src/node/api/restful/v1.ts b/core/src/node/api/restful/v1.ts
deleted file mode 100644
index 9d57de841..000000000
--- a/core/src/node/api/restful/v1.ts
+++ /dev/null
@@ -1,16 +0,0 @@
-import { HttpServer } from '../HttpServer'
-import { commonRouter } from './common'
-
-export const v1Router = async (app: HttpServer) => {
-  // MARK: Public API Routes
-  app.register(commonRouter)
-
-  // MARK: Internal Application Routes
-  // DEPRECATED: Vulnerability possible issues
-  // handleRequests(app)
-
-  // Expanded route for tracking download progress
-  // TODO: Replace by Observer Wrapper (ZeroMQ / Vanilla Websocket)
-  // DEPRECATED: Jan FE Docker deploy is deprecated
-  // app.register(downloadRouter)
-}
diff --git a/core/src/node/helper/path.ts b/core/src/node/helper/path.ts
index 4efbea463..5f6386640 100644
--- a/core/src/node/helper/path.ts
+++ b/core/src/node/helper/path.ts
@@ -1,5 +1,4 @@
-import { join, resolve } from 'path'
-import { getJanDataFolderPath } from './config'
+import { join } from 'path'
 
 /**
  * Normalize file path
@@ -34,4 +33,5 @@ export function appResourcePath() {
 
   // server
   return join(global.core.appPath(), '../../..')
-}
\ No newline at end of file
+}
+
diff --git a/core/src/types/assistant/assistantEntity.ts b/core/src/types/assistant/assistantEntity.ts
index 27592e26b..42617a4b5 100644
--- a/core/src/types/assistant/assistantEntity.ts
+++ b/core/src/types/assistant/assistantEntity.ts
@@ -36,3 +36,10 @@ export type Assistant = {
   /** Represents the metadata of the object. */
   metadata?: Record<string, unknown>
 }
+
+export interface CodeInterpreterTool {
+  /**
+   * The type of tool being defined: `code_interpreter`
+   */
+  type: 'code_interpreter'
+}
diff --git a/core/src/types/message/messageEntity.ts b/core/src/types/message/messageEntity.ts
index 26bcad1a7..302b824ee 100644
--- a/core/src/types/message/messageEntity.ts
+++ b/core/src/types/message/messageEntity.ts
@@ -1,3 +1,4 @@
+import { CodeInterpreterTool } from '../assistant'
 import { ChatCompletionMessage, ChatCompletionRole } from '../inference'
 import { ModelInfo } from '../model'
 import { Thread } from '../thread'
@@ -15,6 +16,10 @@ export type ThreadMessage = {
   thread_id: string
   /** The assistant id of this thread. **/
   assistant_id?: string
+  /**
+   * A list of files attached to the message, and the tools they were added to.
+   */
+  attachments?: Array<Attachment> | null
   /** The role of the author of this message. **/
   role: ChatCompletionRole
   /** The content of this message. **/
@@ -22,9 +27,9 @@ export type ThreadMessage = {
   /** The status of this message. **/
   status: MessageStatus
   /** The timestamp indicating when this message was created. Represented in Unix time. **/
-  created: number
+  created_at: number
   /** The timestamp indicating when this message was updated. Represented in Unix time. **/
-  updated: number
+  completed_at: number
   /** The additional metadata of this message. **/
   metadata?: Record<string, unknown>
 
@@ -52,6 +57,11 @@ export type MessageRequest = {
    */
   assistantId?: string
 
+  /**
+   * A list of files attached to the message, and the tools they were added to.
+   */
+  attachments: Array<Attachment> | null
+
   /** Messages for constructing a chat completion request **/
   messages?: ChatCompletionMessage[]
 
@@ -97,8 +107,7 @@ export enum ErrorCode {
  */
 export enum ContentType {
   Text = 'text',
-  Image = 'image',
-  Pdf = 'pdf',
+  Image = 'image_url',
 }
 
 /**
@@ -108,8 +117,15 @@ export enum ContentType {
 export type ContentValue = {
   value: string
   annotations: string[]
-  name?: string
-  size?: number
+}
+
+/**
+ * The `ImageContentValue` type defines the shape of a content value object of image type
+ * @data_transfer_object
+ */
+export type ImageContentValue = {
+  detail?: string
+  url?: string
 }
 
 /**
@@ -118,5 +134,37 @@ export type ContentValue = {
  */
 export type ThreadContent = {
   type: ContentType
-  text: ContentValue
+  text?: ContentValue
+  image_url?: ImageContentValue
+}
+
+export interface Attachment {
+  /**
+   * The ID of the file to attach to the message.
+   */
+  file_id?: string
+
+  /**
+   * The tools to add this file to.
+   */
+  tools?: Array<CodeInterpreterTool | Attachment.AssistantToolsFileSearchTypeOnly>
+}
+
+export namespace Attachment {
+  export interface AssistantToolsFileSearchTypeOnly {
+    /**
+     * The type of tool being defined: `file_search`
+     */
+    type: 'file_search'
+  }
+}
+
+/**
+ * On an incomplete message, details about why the message is incomplete.
+ */
+export interface IncompleteDetails {
+  /**
+   * The reason the message is incomplete.
+   */
+  reason: 'content_filter' | 'max_tokens' | 'run_cancelled' | 'run_expired' | 'run_failed'
 }
diff --git a/core/src/types/message/messageInterface.ts b/core/src/types/message/messageInterface.ts
index f6579da88..1ea04298a 100644
--- a/core/src/types/message/messageInterface.ts
+++ b/core/src/types/message/messageInterface.ts
@@ -11,20 +11,20 @@ export interface MessageInterface {
    * @param {ThreadMessage} message - The message to be added.
    * @returns {Promise<void>} A promise that resolves when the message has been added.
    */
-  addNewMessage(message: ThreadMessage): Promise<void>
-
-  /**
-   * Writes an array of messages to a specific thread.
-   * @param {string} threadId - The ID of the thread to write the messages to.
-   * @param {ThreadMessage[]} messages - The array of messages to be written.
-   * @returns {Promise<void>} A promise that resolves when the messages have been written.
-   */
-  writeMessages(threadId: string, messages: ThreadMessage[]): Promise<void>
+  createMessage(message: ThreadMessage): Promise<ThreadMessage>
 
   /**
    * Retrieves all messages from a specific thread.
    * @param {string} threadId - The ID of the thread to retrieve the messages from.
    * @returns {Promise<ThreadMessage[]>} A promise that resolves to an array of messages from the thread.
    */
-  getAllMessages(threadId: string): Promise<ThreadMessage[]>
+  listMessages(threadId: string): Promise<ThreadMessage[]>
+
+  /**
+   * Deletes a specific message from a thread.
+   * @param {string} threadId - The ID of the thread from which the message will be deleted.
+   * @param {string} messageId - The ID of the message to be deleted.
+   * @returns {Promise<void>} A promise that resolves when the message has been successfully deleted.
+   */
+  deleteMessage(threadId: string, messageId: string): Promise<void>
 }
diff --git a/core/src/types/thread/threadInterface.ts b/core/src/types/thread/threadInterface.ts
index 792c8c8a5..4a78812c6 100644
--- a/core/src/types/thread/threadInterface.ts
+++ b/core/src/types/thread/threadInterface.ts
@@ -11,15 +11,23 @@ export interface ThreadInterface {
    * @abstract
    * @returns {Promise<Thread[]>} A promise that resolves to an array of threads.
    */
-  getThreads(): Promise<Thread[]>
+  listThreads(): Promise<Thread[]>
 
   /**
-   * Saves a thread.
+   * Create a thread.
    * @abstract
    * @param {Thread} thread - The thread to save.
    * @returns {Promise<void>} A promise that resolves when the thread is saved.
    */
-  saveThread(thread: Thread): Promise<void>
+  createThread(thread: Thread): Promise<Thread>
+
+  /**
+   * modify a thread.
+   * @abstract
+   * @param {Thread} thread - The thread to save.
+   * @returns {Promise<void>} A promise that resolves when the thread is saved.
+   */
+  modifyThread(thread: Thread): Promise<void>
 
   /**
    * Deletes a thread.
diff --git a/core/tsconfig.json b/core/tsconfig.json
index b30d65851..bf79a8720 100644
--- a/core/tsconfig.json
+++ b/core/tsconfig.json
@@ -13,7 +13,8 @@
     "declarationDir": "dist/types",
     "outDir": "dist/lib",
     "importHelpers": true,
-    "types": ["@types/jest"]
+    "types": ["@types/jest"],
+    "resolveJsonModule": true
   },
   "include": ["src"],
   "exclude": ["**/*.test.ts"]
diff --git a/docs/openapi/.gitkeep b/docs/openapi/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/docs/openapi/jan.json b/docs/openapi/jan.json
deleted file mode 100644
index 844a8f7ce..000000000
--- a/docs/openapi/jan.json
+++ /dev/null
@@ -1,2397 +0,0 @@
-{
-  "openapi": "3.0.0",
-  "info": {
-    "title": "API Reference",
-    "description": "# Introduction\nJan API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference).\n",
-    "version": "0.1.8",
-    "contact": {
-      "name": "Jan Discord",
-      "url": "https://discord.gg/7EcEz7MrvA"
-    },
-    "license": {
-      "name": "AGPLv3",
-      "url": "https://github.com/janhq/nitro/blob/main/LICENSE"
-    }
-  },
-  "servers": [
-    {
-      "url": "http://localhost:1337/v1"
-    }
-  ],
-  "tags": [
-    {
-      "name": "Models",
-      "description": "List and describe the various models available in the API."
-    },
-    {
-      "name": "Chat",
-      "description": "Given a list of messages comprising a conversation, the model will return a response.\n"
-    },
-    {
-      "name": "Messages",
-      "description": "Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats).\n"
-    },
-    {
-      "name": "Threads"
-    },
-    {
-      "name": "Assistants",
-      "description": "Configures and utilizes different AI assistants for varied tasks"
-    }
-  ],
-  "x-tagGroups": [
-    {
-      "name": "Endpoints",
-      "tags": ["Models", "Chat"]
-    },
-    {
-      "name": "Chat",
-      "tags": ["Assistants", "Messages", "Threads"]
-    }
-  ],
-  "paths": {
-    "/chat/completions": {
-      "post": {
-        "operationId": "createChatCompletion",
-        "tags": ["Chat"],
-        "summary": "Create chat completion\n",
-        "description": "Creates a model response for the given chat conversation.  <a  href = \"https://platform.openai.com/docs/api-reference/chat/create\"> Equivalent to OpenAI's create chat completion. </a>\n",
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/ChatCompletionRequest"
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ChatCompletionResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/models": {
-      "get": {
-        "operationId": "listModels",
-        "tags": ["Models"],
-        "summary": "List models",
-        "description": "Lists the currently available models, and provides basic information about each one such as the owner and availability.  <a  href = \"https://platform.openai.com/docs/api-reference/models/list\"> Equivalent to OpenAI's list model. </a>\n",
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ListModelsResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/models/download/{model_id}": {
-      "get": {
-        "operationId": "downloadModel",
-        "tags": ["Models"],
-        "summary": "Download a specific model.",
-        "description": "Download a model.\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "model_id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "mistral-ins-7b-q4"
-            },
-            "description": "The ID of the model to use for this request.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/DownloadModelResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/models/{model_id}": {
-      "get": {
-        "operationId": "retrieveModel",
-        "tags": ["Models"],
-        "summary": "Retrieve model",
-        "description": "Get a model instance, providing basic information about the model such as the owner and permissioning.  <a  href = \"https://platform.openai.com/docs/api-reference/models/retrieve\"> Equivalent to OpenAI's retrieve model. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "model_id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "mistral-ins-7b-q4"
-            },
-            "description": "The ID of the model to use for this request.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/GetModelResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "delete": {
-        "operationId": "deleteModel",
-        "tags": ["Models"],
-        "summary": "Delete model",
-        "description": "Delete a model. <a  href = \"https://platform.openai.com/docs/api-reference/models/delete\"> Equivalent to OpenAI's delete model. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "model_id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "mistral-ins-7b-q4"
-            },
-            "description": "The model id to delete\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/DeleteModelResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/threads": {
-      "post": {
-        "operationId": "createThread",
-        "tags": ["Threads"],
-        "summary": "Create thread",
-        "description": "Create a thread.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/createThread\"> Equivalent to OpenAI's create thread. </a>\n",
-        "requestBody": {
-          "required": false,
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/CreateThreadObject"
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "Thread created successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/CreateThreadResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "get": {
-        "operationId": "listThreads",
-        "tags": ["Threads"],
-        "summary": "List threads",
-        "description": "Retrieves a list of all threads available in the system.\n",
-        "responses": {
-          "200": {
-            "description": "List of threads retrieved successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "array",
-                  "items": {
-                    "$ref": "#/components/schemas/ThreadObject"
-                  },
-                  "example": [
-                    {
-                      "id": "thread_abc123",
-                      "object": "thread",
-                      "created_at": 1699014083,
-                      "assistants": ["assistant-001"],
-                      "metadata": {},
-                      "messages": []
-                    },
-                    {
-                      "id": "thread_abc456",
-                      "object": "thread",
-                      "created_at": 1699014083,
-                      "assistants": ["assistant-002", "assistant-003"],
-                      "metadata": {}
-                    }
-                  ]
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/threads/{thread_id}": {
-      "get": {
-        "operationId": "getThread",
-        "tags": ["Threads"],
-        "summary": "Retrieve thread",
-        "description": "Retrieves detailed information about a specific thread using its thread_id.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/getThread\"> Equivalent to OpenAI's retrieve thread. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread to retrieve.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Thread details retrieved successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/GetThreadResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "patch": {
-        "operationId": "modifyThread",
-        "tags": ["Threads"],
-        "summary": "Modify thread",
-        "description": "Modifies a thread.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/modifyThread\"> Equivalent to OpenAI's modify thread. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread to be modified.\n"
-          }
-        ],
-        "requestBody": {
-          "required": true,
-          "content": {
-            "application/json": {
-              "schema": {
-                "type": "object",
-                "properties": {
-                  "title": {
-                    "type": "string",
-                    "description": "Set the title of the thread",
-                    "items": {
-                      "$ref": "#/components/schemas/ThreadMessageObject"
-                    }
-                  }
-                }
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "Thread modified successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ModifyThreadResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "delete": {
-        "operationId": "deleteThread",
-        "tags": ["Threads"],
-        "summary": "Delete thread",
-        "description": "Delete a thread.  <a  href = \"https://platform.openai.com/docs/api-reference/threads/deleteThread\"> Equivalent to OpenAI's delete thread. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread to be deleted.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Thread deleted successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/DeleteThreadResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/assistants": {
-      "get": {
-        "operationId": "listAssistants",
-        "tags": ["Assistants"],
-        "summary": "List assistants",
-        "description": "Return a list of assistants. <a href = \"https://platform.openai.com/docs/api-reference/assistants/listAssistants\"> Equivalent to OpenAI's list assistants. </a>\n",
-        "responses": {
-          "200": {
-            "description": "List of assistants retrieved successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "array",
-                  "items": {
-                    "type": "object",
-                    "properties": {
-                      "id": {
-                        "type": "string"
-                      },
-                      "object": {
-                        "type": "string"
-                      },
-                      "version": {
-                        "type": "integer"
-                      },
-                      "created_at": {
-                        "type": "integer"
-                      },
-                      "name": {
-                        "type": "string"
-                      },
-                      "description": {
-                        "type": "string"
-                      },
-                      "avatar": {
-                        "type": "string",
-                        "format": "uri"
-                      },
-                      "models": {
-                        "type": "array",
-                        "items": {
-                          "type": "object",
-                          "properties": {
-                            "model_id": {
-                              "type": "string"
-                            }
-                          }
-                        }
-                      },
-                      "instructions": {
-                        "type": "string"
-                      },
-                      "events": {
-                        "type": "object",
-                        "properties": {
-                          "in": {
-                            "type": "array",
-                            "items": {}
-                          },
-                          "out": {
-                            "type": "array",
-                            "items": {}
-                          }
-                        }
-                      },
-                      "metadata": {
-                        "type": "object"
-                      },
-                      "x-codeSamples": {
-                        "type": "object",
-                        "properties": {
-                          "cURL": {
-                            "type": "object",
-                            "properties": {
-                              "lang": {
-                                "type": "string",
-                                "example": "cURL"
-                              },
-                              "source": {
-                                "type": "string",
-                                "example": "curl http://localhost:1337/v1/assistants \\\n  -H \"Content-Type: application/json\"\n"
-                              }
-                            }
-                          },
-                          "JavaScript": {
-                            "type": "object",
-                            "properties": {
-                              "lang": {
-                                "type": "string",
-                                "example": "JavaScript"
-                              },
-                              "source": {
-                                "type": "string",
-                                "example": "fetch('http://localhost:1337/v1/assistants', {\n  method: 'GET',\n  headers: {\n    'Content-Type': 'application/json'\n  }\n})\n"
-                              }
-                            }
-                          },
-                          "Node.js": {
-                            "type": "object",
-                            "properties": {
-                              "lang": {
-                                "type": "string",
-                                "example": "Node.js"
-                              },
-                              "source": {
-                                "type": "string",
-                                "example": "const fetch = require('node-fetch');\n\nfetch('http://localhost:1337/v1/assistants', {\n  method: 'GET',\n  headers: {\n    'Content-Type': 'application/json'\n  }\n})\n"
-                              }
-                            }
-                          },
-                          "Python": {
-                            "type": "object",
-                            "properties": {
-                              "lang": {
-                                "type": "string",
-                                "example": "Python"
-                              },
-                              "source": {
-                                "type": "string",
-                                "example": "import requests\n\nurl = 'http://localhost:1337/v1/assistants'\nheaders = {'Content-Type': 'application/json'}\n\nresponse = requests.get(url, headers=headers)\n"
-                              }
-                            }
-                          }
-                        }
-                      }
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/assistants/{assistant_id}": {
-      "get": {
-        "operationId": "getAssistant",
-        "tags": ["Assistants"],
-        "summary": "Retrieve assistant",
-        "description": "Retrieves an assistant. <a href = \"https://platform.openai.com/docs/api-reference/assistants/getAssistant\"> Equivalent to OpenAI's retrieve assistants. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "assistant_id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "jan"
-            },
-            "description": "The ID of the assistant to retrieve.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "string",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/RetrieveAssistantResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/threads/{thread_id}/messages": {
-      "get": {
-        "operationId": "listMessages",
-        "tags": ["Messages"],
-        "summary": "List messages",
-        "description": "Retrieves all messages from the given thread. <a  href = \"https://platform.openai.com/docs/api-reference/messages/listMessages\"> Equivalent to OpenAI's list messages. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread from which to retrieve messages.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "List of messages retrieved successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ListMessagesResponse"
-                }
-              }
-            }
-          }
-        }
-      },
-      "post": {
-        "operationId": "createMessage",
-        "tags": ["Messages"],
-        "summary": "Create message",
-        "description": "Create a message. <a  href = \"https://platform.openai.com/docs/api-reference/messages/createMessage\"> Equivalent to OpenAI's list messages. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread to which the message will be posted.\n"
-          }
-        ],
-        "requestBody": {
-          "required": true,
-          "content": {
-            "application/json": {
-              "schema": {
-                "type": "object",
-                "properties": {
-                  "role": {
-                    "type": "string",
-                    "description": "Role of the sender, either 'user' or 'assistant'.\n",
-                    "example": "user",
-                    "enum": ["user", "assistant"]
-                  },
-                  "content": {
-                    "type": "string",
-                    "description": "Text content of the message.\n",
-                    "example": "How does AI work? Explain it in simple terms."
-                  }
-                },
-                "required": ["role", "content"]
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "Message created successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/CreateMessageResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/threads/{thread_id}/messages/{message_id}": {
-      "get": {
-        "operationId": "retrieveMessage",
-        "tags": ["Messages"],
-        "summary": "Retrieve message",
-        "description": "Retrieve a specific message from a thread using its thread_id and message_id. <a  href = \"https://platform.openai.com/docs/api-reference/messages/getMessage\"> Equivalent to OpenAI's retrieve messages. </a>\n",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "thread_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the thread containing the message.\n"
-          },
-          {
-            "in": "path",
-            "name": "message_id",
-            "required": true,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The ID of the message to retrieve.\n"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/GetMessageResponse"
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  },
-  "x-webhooks": {
-    "ModelObject": {
-      "post": {
-        "summary": "The model object",
-        "description": "Describe a model offering that can be used with the API. <a  href = \"https://platform.openai.com/docs/api-reference/models/object\"> Equivalent to OpenAI's model object. </a>\n",
-        "operationId": "ModelObject",
-        "tags": ["Models"],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/ModelObject"
-              }
-            }
-          }
-        }
-      }
-    },
-    "AssistantObject": {
-      "post": {
-        "summary": "The assistant object",
-        "description": "Build assistants that can call models and use tools to perform tasks. <a  href = \"https://platform.openai.com/docs/api-reference/assistants\"> Equivalent to OpenAI's assistants object. </a>\n",
-        "operationId": "AssistantObjects",
-        "tags": ["Assistants"],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/AssistantObject"
-              }
-            }
-          }
-        }
-      }
-    },
-    "MessageObject": {
-      "post": {
-        "summary": "The message object",
-        "description": "Information about a message in the thread.  <a  href = \"https://platform.openai.com/docs/api-reference/messages/object\"> Equivalent to OpenAI's message object. </a>\n",
-        "operationId": "MessageObject",
-        "tags": ["Messages"],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": null
-              }
-            }
-          }
-        }
-      }
-    },
-    "ThreadObject": {
-      "post": {
-        "summary": "The thread object",
-        "description": "Represents a thread that contains messages. <a  href = \"https://platform.openai.com/docs/api-reference/threads/object\"> Equivalent to OpenAI's thread object. </a>",
-        "operationId": "ThreadObject",
-        "tags": ["Threads"],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": null
-              }
-            }
-          }
-        }
-      }
-    }
-  },
-  "components": {
-    "schemas": {
-      "ThreadObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the thread, defaults to foldername.\n",
-            "example": "thread_...."
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, defaults to thread.\n",
-            "example": "thread"
-          },
-          "title": {
-            "type": "string",
-            "description": "A brief summary or description of the thread, defaults to an empty string.\n",
-            "example": "funny physics joke"
-          },
-          "assistants": {
-            "type": "array",
-            "description": "",
-            "items": {
-              "properties": {
-                "assistant_id": {
-                  "type": "string",
-                  "description": "The identifier of assistant, defaults to \"jan\"\n",
-                  "example": "jan"
-                },
-                "model": {
-                  "type": "object",
-                  "properties": {
-                    "id": {
-                      "type": "string",
-                      "description": "",
-                      "example": "..."
-                    },
-                    "settings": {
-                      "type": "object",
-                      "description": "Defaults to and overrides assistant.json's \"settings\" (and if none, then model.json \"settings\")\n"
-                    },
-                    "parameters": {
-                      "type": "object",
-                      "description": "Defaults to and overrides assistant.json's \"parameters\" (and if none, then model.json \"parameters\")\n"
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "created": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the thread, defaults to file creation time.\n",
-            "example": 1231231
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the thread, defaults to an empty object.\n",
-            "example": {}
-          }
-        }
-      },
-      "GetThreadResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the thread.",
-            "example": "thread_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object",
-            "example": "thread"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the thread.",
-            "example": 1699014083
-          },
-          "assistants": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "List of assistants involved in the thread.",
-            "example": ["assistant-001"]
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the thread.",
-            "example": {}
-          },
-          "messages": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "List of messages within the thread.",
-            "example": []
-          }
-        }
-      },
-      "CreateThreadResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the newly created thread.",
-            "example": "thread_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread.",
-            "example": "thread"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the thread.",
-            "example": 1699014083
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the newly created thread.",
-            "example": {}
-          }
-        }
-      },
-      "CreateThreadObject": {
-        "type": "object",
-        "properties": {
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread.",
-            "example": "thread"
-          },
-          "title": {
-            "type": "string",
-            "description": "A brief summary or description of the thread, defaults to an empty string.\n",
-            "example": "funny physics joke"
-          },
-          "assistants": {
-            "type": "array",
-            "description": "assistant involved in the thread",
-            "items": {
-              "properties": {
-                "assistant_id": {
-                  "type": "string",
-                  "description": "The identifier of assistant, defaults to \"jan\"\n",
-                  "example": "jan"
-                },
-                "assistant_name": {
-                  "type": "string",
-                  "description": "The name of assistant, defaults to \"Jan\"\n",
-                  "example": "Jan"
-                },
-                "instructions": {
-                  "type": "string",
-                  "description": "The instruction of assistant, defaults to \"Be my grammar corrector\"\n"
-                },
-                "model": {
-                  "type": "object",
-                  "properties": {
-                    "id": {
-                      "type": "string",
-                      "description": "Model id",
-                      "example": "mistral-ins-7b-q4"
-                    },
-                    "settings": {
-                      "type": "object",
-                      "description": "Defaults to and overrides assistant.json's \"settings\" (and if none, then model.json \"settings\")\n"
-                    },
-                    "parameters": {
-                      "type": "object",
-                      "description": "Defaults to and overrides assistant.json's \"parameters\" (and if none, then model.json \"parameters\")\n"
-                    },
-                    "engine": {
-                      "type": "string",
-                      "description": "Engine id",
-                      "example": "nitro"
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the thread, defaults to an empty object.\n"
-          }
-        }
-      },
-      "ThreadMessageObject": {
-        "type": "object",
-        "properties": {
-          "role": {
-            "type": "string",
-            "description": "\"Role of the sender, either 'user' or 'assistant'.\"\n",
-            "enum": ["user", "assistant"]
-          },
-          "content": {
-            "type": "string",
-            "description": "\"Text content of the message.\"\n"
-          },
-          "file_ids": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "\"Array of file IDs associated with the message, if any.\"\n"
-          }
-        }
-      },
-      "ModifyThreadResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "\"The identifier of the modified thread.\"\n",
-            "example": "thread_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread.",
-            "example": "thread"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the thread.",
-            "example": 1699014083
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the modified thread.",
-            "example": {}
-          }
-        }
-      },
-      "DeleteThreadResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the deleted thread.",
-            "example": "thread_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating the thread has been deleted.",
-            "example": "thread.deleted"
-          },
-          "deleted": {
-            "type": "boolean",
-            "description": "Indicates whether the thread was successfully deleted.",
-            "example": true
-          }
-        }
-      },
-      "ListModelsResponse": {
-        "type": "object",
-        "properties": {
-          "object": {
-            "type": "string",
-            "enum": ["list"]
-          },
-          "data": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/Model"
-            }
-          }
-        },
-        "required": ["object", "data"]
-      },
-      "Model": {
-        "type": "object",
-        "properties": {
-          "source_url": {
-            "type": "string",
-            "format": "uri",
-            "description": "URL to the source of the model.",
-            "example": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf"
-          },
-          "id": {
-            "type": "string",
-            "description": "Unique identifier used in chat-completions model_name, matches folder name.",
-            "example": "trinity-v1.2-7b"
-          },
-          "object": {
-            "type": "string",
-            "example": "model"
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the model.",
-            "example": "Trinity-v1.2 7B Q4"
-          },
-          "version": {
-            "type": "string",
-            "default": "1.0",
-            "description": "The version number of the model."
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the model.",
-            "example": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes."
-          },
-          "format": {
-            "type": "string",
-            "description": "State format of the model, distinct from the engine.",
-            "example": "gguf"
-          },
-          "settings": {
-            "type": "object",
-            "properties": {
-              "ctx_len": {
-                "type": "integer",
-                "description": "Context length.",
-                "example": 4096
-              },
-              "prompt_template": {
-                "type": "string",
-                "example": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-              }
-            },
-            "additionalProperties": false
-          },
-          "parameters": {
-            "type": "object",
-            "properties": {
-              "temperature": {
-                "example": 0.7
-              },
-              "top_p": {
-                "example": 0.95
-              },
-              "stream": {
-                "example": true
-              },
-              "max_tokens": {
-                "example": 4096
-              },
-              "stop": {
-                "example": []
-              },
-              "frequency_penalty": {
-                "example": 0
-              },
-              "presence_penalty": {
-                "example": 0
-              }
-            },
-            "additionalProperties": false
-          },
-          "metadata": {
-            "type": "object",
-            "properties": {
-              "author": {
-                "type": "string",
-                "example": "Jan"
-              },
-              "tags": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                },
-                "example": ["7B", "Merged", "Featured"]
-              },
-              "size": {
-                "type": "integer",
-                "example": 4370000000
-              },
-              "cover": {
-                "type": "string",
-                "format": "uri",
-                "example": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
-              }
-            },
-            "additionalProperties": false
-          },
-          "engine": {
-            "example": "nitro"
-          }
-        }
-      },
-      "ModelObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the model.\n",
-            "example": "trinity-v1.2-7b"
-          },
-          "object": {
-            "type": "string",
-            "description": "The type of the object, indicating it's a model.\n",
-            "default": "model"
-          },
-          "created": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the model.\n",
-            "example": 1253935178
-          },
-          "owned_by": {
-            "type": "string",
-            "description": "The entity that owns the model.\n",
-            "example": "_"
-          }
-        }
-      },
-      "GetModelResponse": {
-        "type": "object",
-        "properties": {
-          "source_url": {
-            "type": "string",
-            "format": "uri",
-            "description": "URL to the source of the model.",
-            "example": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
-          },
-          "id": {
-            "type": "string",
-            "description": "Unique identifier used in chat-completions model_name, matches folder name.",
-            "example": "mistral-ins-7b-q4"
-          },
-          "object": {
-            "type": "string",
-            "example": "model"
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the model.",
-            "example": "Mistral Instruct 7B Q4"
-          },
-          "version": {
-            "type": "string",
-            "default": "1.0",
-            "description": "The version number of the model."
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the model.",
-            "example": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes."
-          },
-          "format": {
-            "type": "string",
-            "description": "State format of the model, distinct from the engine.",
-            "example": "gguf"
-          },
-          "settings": {
-            "type": "object",
-            "properties": {
-              "ctx_len": {
-                "type": "integer",
-                "description": "Context length.",
-                "example": 4096
-              },
-              "prompt_template": {
-                "type": "string",
-                "example": "[INST] {prompt} [/INST]"
-              }
-            },
-            "additionalProperties": false
-          },
-          "parameters": {
-            "type": "object",
-            "properties": {
-              "temperature": {
-                "example": 0.7
-              },
-              "top_p": {
-                "example": 0.95
-              },
-              "stream": {
-                "example": true
-              },
-              "max_tokens": {
-                "example": 4096
-              },
-              "stop": {
-                "example": []
-              },
-              "frequency_penalty": {
-                "example": 0
-              },
-              "presence_penalty": {
-                "example": 0
-              }
-            },
-            "additionalProperties": false
-          },
-          "metadata": {
-            "type": "object",
-            "properties": {
-              "author": {
-                "type": "string",
-                "example": "MistralAI"
-              },
-              "tags": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                },
-                "example": ["7B", "Featured", "Foundation Model"]
-              },
-              "size": {
-                "example": 4370000000,
-                "type": "integer"
-              },
-              "cover": {
-                "example": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png",
-                "type": "string"
-              }
-            },
-            "additionalProperties": false
-          },
-          "engine": {
-            "example": "nitro"
-          }
-        }
-      },
-      "DeleteModelResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the model that was deleted.",
-            "example": "mistral-ins-7b-q4"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a model.",
-            "default": "model"
-          },
-          "deleted": {
-            "type": "boolean",
-            "description": "Indicates whether the model was successfully deleted.",
-            "example": true
-          }
-        }
-      },
-      "StartModelResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the model that was started.",
-            "example": "model-zephyr-7B"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a model.",
-            "default": "model"
-          },
-          "state": {
-            "type": "string",
-            "description": "The current state of the model after the start operation.",
-            "example": "running"
-          }
-        },
-        "required": ["id", "object", "state"]
-      },
-      "StopModelResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the model that was started.",
-            "example": "model-zephyr-7B"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a model.",
-            "default": "model"
-          },
-          "state": {
-            "type": "string",
-            "description": "The current state of the model after the start operation.",
-            "example": "stopped"
-          }
-        },
-        "required": ["id", "object", "state"]
-      },
-      "DownloadModelResponse": {
-        "type": "object",
-        "properties": {
-          "message": {
-            "type": "string",
-            "description": "Message indicates Jan starting download corresponding model.",
-            "example": "Starting download mistral-ins-7b-q4"
-          }
-        }
-      },
-      "MessageObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "Sequential or UUID identifier of the message.\n",
-            "example": 0
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, defaults to 'thread.message'.\n",
-            "example": "thread.message"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the message.\n"
-          },
-          "thread_id": {
-            "type": "string",
-            "description": "Identifier of the thread to which this message belongs. Defaults to parent thread.\n",
-            "example": "thread_asdf"
-          },
-          "assistant_id": {
-            "type": "string",
-            "description": "Identifier of the assistant involved in the message. Defaults to parent thread.\n",
-            "example": "jan"
-          },
-          "role": {
-            "type": "string",
-            "enum": ["user", "assistant"],
-            "description": "Role of the sender, either 'user' or 'assistant'.\n"
-          },
-          "content": {
-            "type": "array",
-            "items": {
-              "type": "object",
-              "properties": {
-                "type": {
-                  "type": "string",
-                  "description": "Type of content, e.g., 'text'.\n"
-                },
-                "text": {
-                  "type": "object",
-                  "properties": {
-                    "value": {
-                      "type": "string",
-                      "description": "Text content of the message.\n",
-                      "example": "Hi!?"
-                    },
-                    "annotations": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "description": "Annotations for the text content, if any.\n",
-                      "example": []
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the message, defaults to an empty object.\n",
-            "example": {}
-          }
-        }
-      },
-      "GetMessageResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the message.",
-            "example": "msg_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread message.",
-            "default": "thread.message"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the message.",
-            "example": 1699017614
-          },
-          "thread_id": {
-            "type": "string",
-            "description": "Identifier of the thread to which this message belongs.",
-            "example": "thread_abc123"
-          },
-          "role": {
-            "type": "string",
-            "description": "Role of the sender, either 'user' or 'assistant'.",
-            "example": "user"
-          },
-          "content": {
-            "type": "array",
-            "items": {
-              "type": "object",
-              "properties": {
-                "type": {
-                  "type": "string",
-                  "description": "Type of content, e.g., 'text'.",
-                  "example": "text"
-                },
-                "text": {
-                  "type": "object",
-                  "properties": {
-                    "value": {
-                      "type": "string",
-                      "description": "Text content of the message.",
-                      "example": "How does AI work? Explain it in simple terms."
-                    },
-                    "annotations": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "description": "Annotations for the text content, if any.",
-                      "example": []
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "file_ids": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Array of file IDs associated with the message, if any.",
-            "example": []
-          },
-          "assistant_id": {
-            "type": "string",
-            "description": "Identifier of the assistant involved in the message, if applicable.",
-            "example": null
-          },
-          "run_id": {
-            "type": "string",
-            "description": "Run ID associated with the message, if applicable.",
-            "example": null
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the message.",
-            "example": {}
-          }
-        }
-      },
-      "CreateMessageResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the created message.",
-            "example": "msg_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread message.",
-            "example": "thread.message"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the message.",
-            "example": 1699017614
-          },
-          "thread_id": {
-            "type": "string",
-            "description": "Identifier of the thread to which this message belongs.",
-            "example": "thread_abc123"
-          },
-          "role": {
-            "type": "string",
-            "description": "Role of the sender, either 'user' or 'assistant'.",
-            "example": "user"
-          },
-          "content": {
-            "type": "array",
-            "items": {
-              "type": "object",
-              "properties": {
-                "type": {
-                  "type": "string",
-                  "description": "Type of content, e.g., 'text'.",
-                  "example": "text"
-                },
-                "text": {
-                  "type": "object",
-                  "properties": {
-                    "value": {
-                      "type": "string",
-                      "description": "Text content of the message.",
-                      "example": "How does AI work? Explain it in simple terms."
-                    },
-                    "annotations": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "description": "Annotations for the text content, if any.",
-                      "example": []
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "file_ids": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Array of file IDs associated with the message, if any.",
-            "example": []
-          },
-          "assistant_id": {
-            "type": "string",
-            "description": "Identifier of the assistant involved in the message, if applicable.",
-            "example": null
-          },
-          "run_id": {
-            "type": "string",
-            "description": "Run ID associated with the message, if applicable.",
-            "example": null
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the message.",
-            "example": {}
-          }
-        }
-      },
-      "ListMessagesResponse": {
-        "type": "object",
-        "properties": {
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a list.",
-            "default": "list"
-          },
-          "data": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/ListMessageObject"
-            }
-          },
-          "first_id": {
-            "type": "string",
-            "description": "Identifier of the first message in the list.",
-            "example": "msg_abc123"
-          },
-          "last_id": {
-            "type": "string",
-            "description": "Identifier of the last message in the list.",
-            "example": "msg_abc456"
-          },
-          "has_more": {
-            "type": "boolean",
-            "description": "Indicates whether there are more messages to retrieve.",
-            "example": false
-          }
-        }
-      },
-      "ListMessageObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the message.",
-            "example": "msg_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread message.",
-            "example": "thread.message"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the message.",
-            "example": 1699017614
-          },
-          "thread_id": {
-            "type": "string",
-            "description": "Identifier of the thread to which this message belongs.",
-            "example": "thread_abc123"
-          },
-          "role": {
-            "type": "string",
-            "description": "Role of the sender, either 'user' or 'assistant'.",
-            "example": "user"
-          },
-          "content": {
-            "type": "array",
-            "items": {
-              "type": "object",
-              "properties": {
-                "type": {
-                  "type": "string",
-                  "description": "Type of content, e.g., 'text'."
-                },
-                "text": {
-                  "type": "object",
-                  "properties": {
-                    "value": {
-                      "type": "string",
-                      "description": "Text content of the message.",
-                      "example": "How does AI work? Explain it in simple terms."
-                    },
-                    "annotations": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "description": "Annotations for the text content, if any."
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "file_ids": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Array of file IDs associated with the message, if any.",
-            "example": []
-          },
-          "assistant_id": {
-            "type": "string",
-            "description": "Identifier of the assistant involved in the message, if applicable.",
-            "example": null
-          },
-          "run_id": {
-            "type": "string",
-            "description": "Run ID associated with the message, if applicable.",
-            "example": null
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the message.",
-            "example": {}
-          }
-        }
-      },
-      "MessageFileObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the file.",
-            "example": "file-abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a thread message file.",
-            "example": "thread.message.file"
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the file.",
-            "example": 1699061776
-          },
-          "message_id": {
-            "type": "string",
-            "description": "Identifier of the message to which this file is associated.",
-            "example": "msg_abc123"
-          }
-        }
-      },
-      "ListMessageFilesResponse": {
-        "type": "object",
-        "properties": {
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's a list.",
-            "default": "list"
-          },
-          "data": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/MessageFileObject"
-            }
-          }
-        }
-      },
-      "ChatObject": {
-        "type": "object",
-        "properties": {
-          "messages": {
-            "type": "array",
-            "description": "Contains input data or prompts for the model to process.\n",
-            "items": {
-              "type": "object",
-              "properties": {
-                "content": {
-                  "type": "string"
-                },
-                "role": {
-                  "type": "string"
-                }
-              }
-            },
-            "example": [
-              {
-                "content": "Hello there :wave:",
-                "role": "assistant"
-              },
-              {
-                "content": "Can you write a long story",
-                "role": "user"
-              }
-            ]
-          },
-          "stream": {
-            "type": "boolean",
-            "default": true,
-            "description": "Enables continuous output generation, allowing for streaming of model responses."
-          },
-          "model": {
-            "type": "string",
-            "example": "gpt-3.5-turbo",
-            "description": "Specifies the model being used for inference or processing tasks."
-          },
-          "max_tokens": {
-            "type": "number",
-            "default": 2048,
-            "description": "The maximum number of tokens the model will generate in a single response."
-          },
-          "stop": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Defines specific tokens or phrases at which the model will stop generating further output.",
-            "example": ["hello"]
-          },
-          "frequency_penalty": {
-            "type": "number",
-            "default": 0,
-            "description": "Adjusts the likelihood of the model repeating words or phrases in its output."
-          },
-          "presence_penalty": {
-            "type": "number",
-            "default": 0,
-            "description": "Influences the generation of new and varied concepts in the model's output."
-          },
-          "temperature": {
-            "type": "number",
-            "default": 0.7,
-            "description": "Controls the randomness of the model's output."
-          },
-          "top_p": {
-            "type": "number",
-            "default": 0.95,
-            "description": "Set probability threshold for more relevant outputs."
-          },
-          "cache_prompt": {
-            "type": "boolean",
-            "default": true,
-            "description": "Optimize performance in repeated or similar requests."
-          }
-        }
-      },
-      "ChatCompletionRequest": {
-        "type": "object",
-        "properties": {
-          "messages": {
-            "type": "array",
-            "description": "Contains input data or prompts for the model to process.\n",
-            "items": {
-              "type": "object",
-              "properties": {
-                "content": {
-                  "type": "string"
-                },
-                "role": {
-                  "type": "string"
-                }
-              }
-            },
-            "example": [
-              {
-                "content": "You are a helpful assistant.",
-                "role": "system"
-              },
-              {
-                "content": "Hello!",
-                "role": "user"
-              }
-            ]
-          },
-          "model": {
-            "type": "string",
-            "example": "tinyllama-1.1b",
-            "description": "Specifies the model being used for inference or processing tasks.\n"
-          },
-          "stream": {
-            "type": "boolean",
-            "default": true,
-            "description": "Enables continuous output generation, allowing for streaming of model responses.\n"
-          },
-          "max_tokens": {
-            "type": "number",
-            "default": 2048,
-            "description": "The maximum number of tokens the model will generate in a single response.\n"
-          },
-          "stop": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "description": "Defines specific tokens or phrases at which the model will stop generating further output.\n",
-            "example": ["hello"]
-          },
-          "frequency_penalty": {
-            "type": "number",
-            "default": 0,
-            "description": "Adjusts the likelihood of the model repeating words or phrases in its output.\n"
-          },
-          "presence_penalty": {
-            "type": "number",
-            "default": 0,
-            "description": "Influences the generation of new and varied concepts in the model's output.\n"
-          },
-          "temperature": {
-            "type": "number",
-            "default": 0.7,
-            "description": "Controls the randomness of the model's output.\n"
-          },
-          "top_p": {
-            "type": "number",
-            "default": 0.95,
-            "description": "Set probability threshold for more relevant outputs.\n"
-          }
-        }
-      },
-      "ChatCompletionResponse": {
-        "type": "object",
-        "description": "Description of the response structure",
-        "properties": {
-          "choices": {
-            "type": "array",
-            "description": "Array of choice objects",
-            "items": {
-              "type": "object",
-              "properties": {
-                "finish_reason": {
-                  "type": "string",
-                  "nullable": true,
-                  "example": null,
-                  "description": "Reason for finishing the response, if applicable"
-                },
-                "index": {
-                  "type": "integer",
-                  "example": 0,
-                  "description": "Index of the choice"
-                },
-                "message": {
-                  "type": "object",
-                  "properties": {
-                    "content": {
-                      "type": "string",
-                      "example": "Hello user. What can I help you with?",
-                      "description": "Content of the message"
-                    },
-                    "role": {
-                      "type": "string",
-                      "example": "assistant",
-                      "description": "Role of the sender"
-                    }
-                  }
-                }
-              }
-            }
-          },
-          "created": {
-            "type": "integer",
-            "example": 1700193928,
-            "description": "Timestamp of when the response was created"
-          },
-          "id": {
-            "type": "string",
-            "example": "ebwd2niJvJB1Q2Whyvkz",
-            "description": "Unique identifier of the response"
-          },
-          "model": {
-            "type": "string",
-            "nullable": true,
-            "example": "_",
-            "description": "Model used for generating the response"
-          },
-          "object": {
-            "type": "string",
-            "example": "chat.completion",
-            "description": "Type of the response object"
-          },
-          "system_fingerprint": {
-            "type": "string",
-            "nullable": true,
-            "example": "_",
-            "description": "System fingerprint"
-          },
-          "usage": {
-            "type": "object",
-            "description": "Information about the usage of tokens",
-            "properties": {
-              "completion_tokens": {
-                "type": "integer",
-                "example": 500,
-                "description": "Number of tokens used for completion"
-              },
-              "prompt_tokens": {
-                "type": "integer",
-                "example": 33,
-                "description": "Number of tokens used in the prompt"
-              },
-              "total_tokens": {
-                "type": "integer",
-                "example": 533,
-                "description": "Total number of tokens used"
-              }
-            }
-          }
-        }
-      },
-      "AssistantObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Math Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "ListAssistantsResponse": {
-        "type": "object"
-      },
-      "CreateAssistantResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Math Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "RetrieveAssistantResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Math Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "ModifyAssistantObject": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Math Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "ModifyAssistantResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating it's an assistant.",
-            "default": "assistant"
-          },
-          "version": {
-            "type": "integer",
-            "description": "Version number of the assistant.",
-            "example": 1
-          },
-          "created_at": {
-            "type": "integer",
-            "format": "int64",
-            "description": "Unix timestamp representing the creation time of the assistant.",
-            "example": 1698984975
-          },
-          "name": {
-            "type": "string",
-            "description": "Name of the assistant.",
-            "example": "Physics Tutor"
-          },
-          "description": {
-            "type": "string",
-            "description": "Description of the assistant. Can be null.",
-            "example": null
-          },
-          "avatar": {
-            "type": "string",
-            "description": "URL of the assistant's avatar. Jan-specific property.",
-            "example": "https://pic.png"
-          },
-          "models": {
-            "type": "array",
-            "description": "List of models associated with the assistant. Jan-specific property.",
-            "items": {
-              "type": "object",
-              "properties": {
-                "model_id": {
-                  "type": "string",
-                  "example": "model_0"
-                }
-              }
-            }
-          },
-          "instructions": {
-            "type": "string",
-            "description": "A system prompt for the assistant.",
-            "example": "Be concise!"
-          },
-          "events": {
-            "type": "object",
-            "description": "Event subscription settings for the assistant.",
-            "properties": {
-              "in": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              },
-              "out": {
-                "type": "array",
-                "items": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "metadata": {
-            "type": "object",
-            "description": "Metadata associated with the assistant."
-          }
-        }
-      },
-      "DeleteAssistantResponse": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "The identifier of the deleted assistant.",
-            "example": "asst_abc123"
-          },
-          "object": {
-            "type": "string",
-            "description": "Type of the object, indicating the assistant has been deleted.",
-            "example": "assistant.deleted"
-          },
-          "deleted": {
-            "type": "boolean",
-            "description": "Indicates whether the assistant was successfully deleted.",
-            "example": true
-          }
-        }
-      }
-    }
-  }
-}
diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml
deleted file mode 100644
index 35fd43175..000000000
--- a/docs/openapi/jan.yaml
+++ /dev/null
@@ -1,1043 +0,0 @@
----
-openapi: 3.0.0
-info:
-  title: API Reference
-  description: >
-    # Introduction
-
-    Jan API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference).
-version: 0.1.8
-contact:
-  name: Jan Discord
-  url: https://discord.gg/7EcEz7MrvA
-license:
-  name: AGPLv3
-  url: https://github.com/janhq/nitro/blob/main/LICENSE
-servers:
-  - url: /v1
-tags:
-  - name: Models
-    description: List and describe the various models available in the API.
-  - name: Chat
-    description: >
-      Given a list of messages comprising a conversation, the model will
-      return a response.
-  - name: Messages
-    description: >
-      Messages capture a conversation's content. This can include the
-      content from LLM responses and other metadata from [chat
-      completions](/specs/chats).
-  - name: Threads
-  - name: Assistants
-    description: Configures and utilizes different AI assistants for varied tasks
-x-tagGroups:
-  - name: Endpoints
-    tags:
-      - Models
-      - Chat
-  - name: Chat
-    tags:
-      - Assistants
-      - Messages
-      - Threads
-paths:
-  /chat/completions:
-    post:
-      operationId: createChatCompletion
-      tags:
-        - Chat
-      summary: |
-        Create chat completion
-      description: >
-        Creates a model response for the given chat conversation.  <a  href
-        = "https://platform.openai.com/docs/api-reference/chat/create">
-        Equivalent to OpenAI's create chat completion. </a>
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/chat.yaml#/components/schemas/ChatCompletionRequest
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/chat.yaml#/components/schemas/ChatCompletionResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X 'POST' \
-              'http://localhost:1337/v1/chat/completions' \
-              -H 'accept: application/json' \
-              -H 'Content-Type: application/json' \
-              -d '{
-                "messages": [
-                  {
-                    "content": "You are a helpful assistant.",
-                    "role": "system"
-                  },
-                  {
-                    "content": "Hello!",
-                    "role": "user"
-                  }
-                ],
-                "model": "tinyllama-1.1b",
-                "stream": true,
-                "max_tokens": 2048,
-                "stop": [
-                  "hello"
-                ],
-                "frequency_penalty": 0,
-                "presence_penalty": 0,
-                "temperature": 0.7,
-                "top_p": 0.95
-              }'
-        - lang: JavaScript
-          source: |-
-            const data = {
-              messages: [
-                {
-                  content: 'You are a helpful assistant.',
-                  role: 'system'
-                },
-                {
-                  content: 'Hello!',
-                  role: 'user'
-                }
-              ],
-              model: 'tinyllama-1.1b',
-              stream: true,
-              max_tokens: 2048,
-              stop: ['hello'],
-              frequency_penalty: 0,
-              presence_penalty: 0,
-              temperature: 0.7,
-              top_p: 0.95
-            };
-
-            fetch('http://localhost:1337/v1/chat/completions', {
-              method: 'POST',
-              headers: {
-                'Content-Type': 'application/json',
-                'Accept': 'application/json'
-              },
-              body: JSON.stringify(data)
-            })
-              .then(response => response.json())
-              .then(data => console.log(data));
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            const data = {
-              messages: [
-                {
-                  content: 'You are a helpful assistant.',
-                  role: 'system'
-                },
-                {
-                  content: 'Hello!',
-                  role: 'user'
-                }
-              ],
-              model: 'tinyllama-1.1b',
-              stream: true,
-              max_tokens: 2048,
-              stop: ['hello'],
-              frequency_penalty: 0,
-              presence_penalty: 0,
-              temperature: 0.7,
-              top_p: 0.95
-            };
-
-            fetch('http://localhost:1337/v1/chat/completions', {
-              method: 'POST',
-              headers: {
-                'Content-Type': 'application/json',
-                'Accept': 'application/json'
-              },
-              body: JSON.stringify(data)
-            })
-              .then(response => response.json())
-              .then(data => console.log(data));
-        - lang: Python
-          source: >-
-            import requests
-
-            import json
-
-
-            data = {
-              "messages": [
-                {
-                  "content": "You are a helpful assistant.",
-                  "role": "system"
-                },
-                {
-                  "content": "Hello!",
-                  "role": "user"
-                }
-              ],
-              "model": "tinyllama-1.1b",
-              "stream": true,
-              "max_tokens": 2048,
-              "stop": [
-                "hello"
-              ],
-              "frequency_penalty": 0,
-              "presence_penalty": 0,
-              "temperature": 0.7,
-              "top_p": 0.95
-            }
-
-
-            response = requests.post('http://localhost:1337/v1/chat/completions', json=data)
-
-            print(response.json())
-  /models:
-    get:
-      operationId: listModels
-      tags:
-        - Models
-      summary: List models
-      description: >
-        Lists the currently available models, and provides basic
-        information about each one such as the owner and availability.  <a  href
-        = "https://platform.openai.com/docs/api-reference/models/list">
-        Equivalent to OpenAI's list model. </a>
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/models.yaml#/components/schemas/ListModelsResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl -X 'GET' \
-              'http://localhost:1337/v1/models' \
-              -H 'accept: application/json'
-        - lang: JavaScript
-          source: |-
-            const response = await fetch('http://localhost:1337/v1/models', {
-              method: 'GET',
-              headers: {Accept: 'application/json'}
-            });
-            const data = await response.json();
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            const url = 'http://localhost:1337/v1/models';
-            const options = {
-              method: 'GET',
-              headers: { Accept: 'application/json' }
-            };
-
-            fetch(url, options)
-              .then(res => res.json())
-              .then(json => console.log(json));
-        - lang: Python
-          source: |-
-            import requests
-
-            url = 'http://localhost:1337/v1/models'
-            headers = {'Accept': 'application/json'}
-            response = requests.get(url, headers=headers)
-            data = response.json()
-  '/models/download/{model_id}':
-    get:
-      operationId: downloadModel
-      tags:
-        - Models
-      summary: Download a specific model.
-      description: |
-        Download a model.
-      parameters:
-        - in: path
-          name: model_id
-          required: true
-          schema:
-            type: string
-            example: mistral-ins-7b-q4
-          description: |
-            The ID of the model to use for this request.
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/models.yaml#/components/schemas/DownloadModelResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl -X 'GET' \  
-              'http://localhost:1337/v1/models/download/{model_id}' \  
-              -H 'accept: application/json'
-        - lang: JavaScript
-          source: >-
-            const response = await
-            fetch('http://localhost:1337/v1/models/download/{model_id}', {
-              method: 'GET',
-              headers: {accept: 'application/json'}
-            });
-
-
-            const data = await response.json();
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/models/download/{model_id}', {
-              method: 'GET',
-              headers: {accept: 'application/json'}
-            })
-            .then(res => res.json())
-            .then(data => console.log(data));
-        - lang: Python
-          source: >-
-            import requests
-
-
-            response = requests.get('http://localhost:1337/v1/models/download/{model_id}', headers={'accept': 'application/json'})
-
-            data = response.json()
-  '/models/{model_id}':
-    get:
-      operationId: retrieveModel
-      tags:
-        - Models
-      summary: Retrieve model
-      description: >
-        Get a model instance, providing basic information about the model
-        such as the owner and permissioning.  <a  href =
-        "https://platform.openai.com/docs/api-reference/models/retrieve">
-        Equivalent to OpenAI's retrieve model. </a>
-      parameters:
-        - in: path
-          name: model_id
-          required: true
-          schema:
-            type: string
-            example: mistral-ins-7b-q4
-          description: |
-            The ID of the model to use for this request.
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/models.yaml#/components/schemas/GetModelResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl -X 'GET' \  
-              'http://localhost:1337/v1/models/{model_id}' \  
-              -H 'accept: application/json'
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch');
-
-            const modelId = 'mistral-ins-7b-q4';
-
-            fetch(`http://localhost:1337/v1/models/${modelId}`, {
-              method: 'GET',
-              headers: {'accept': 'application/json'}
-            })
-            .then(res => res.json())
-            .then(json => console.log(json));
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            const modelId = 'mistral-ins-7b-q4';
-
-            fetch(`http://localhost:1337/v1/models/${modelId}`, {
-              method: 'GET',
-              headers: {'accept': 'application/json'}
-            })
-            .then(res => res.json())
-            .then(json => console.log(json));
-        - lang: Python
-          source: >-
-            import requests
-
-
-            model_id = 'mistral-ins-7b-q4'
-
-
-            response = requests.get(f'http://localhost:1337/v1/models/{model_id}', headers={'accept': 'application/json'})
-
-            print(response.json())
-    delete:
-      operationId: deleteModel
-      tags:
-        - Models
-      summary: Delete model
-      description: >
-        Delete a model. <a  href =
-        "https://platform.openai.com/docs/api-reference/models/delete">
-        Equivalent to OpenAI's delete model. </a>
-      parameters:
-        - in: path
-          name: model_id
-          required: true
-          schema:
-            type: string
-            example: mistral-ins-7b-q4
-          description: |
-            The model id to delete
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/models.yaml#/components/schemas/DeleteModelResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl -X 'DELETE' \
-              'http://localhost:1337/v1/models/{model_id}' \
-              -H 'accept: application/json'
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch');
-
-            const modelId = 'mistral-ins-7b-q4';
-
-            fetch(`http://localhost:1337/v1/models/${modelId}`, {
-              method: 'DELETE',
-              headers: { 'accept': 'application/json' }
-            })
-              .then(res => res.json())
-              .then(json => console.log(json));
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            const modelId = 'mistral-ins-7b-q4';
-
-            fetch(`http://localhost:1337/v1/models/${modelId}`, {
-              method: 'DELETE',
-              headers: { 'accept': 'application/json' }
-            })
-              .then(res => res.json())
-              .then(json => console.log(json));
-        - lang: Python
-          source: >-
-            import requests
-
-
-            model_id = 'mistral-ins-7b-q4'
-
-
-            response = requests.delete(f'http://localhost:1337/v1/models/{model_id}', headers={'accept': 'application/json'})
-  /threads:
-    post:
-      operationId: createThread
-      tags:
-        - Threads
-      summary: Create thread
-      description: >
-        Create a thread.  <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/createThread">
-        Equivalent to OpenAI's create thread. </a>
-      requestBody:
-        required: false
-        content:
-          application/json:
-            schema:
-              $ref: specs/threads.yaml#/components/schemas/CreateThreadObject
-      responses:
-        '200':
-          description: Thread created successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/threads.yaml#/components/schemas/CreateThreadResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X POST http://localhost:1337/v1/threads \  
-              -H "Content-Type: application/json" \  
-              -d '{
-                  "messages": [{
-                      "role": "user",
-                      "content": "Hello, what is AI?",
-                      "file_ids": ["file-abc123"]
-                  }, {
-                      "role": "user",
-                      "content": "How does AI work? Explain it in simple terms."
-                  }]
-              }'
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/threads', {
-              method: 'POST',
-              headers: {
-                'Content-Type': 'application/json'
-              },
-              body: JSON.stringify({
-                messages: [
-                  {
-                    role: 'user',
-                    content: 'Hello, what is AI?',
-                    file_ids: ['file-abc123']
-                  },
-                  {
-                    role: 'user',
-                    content: 'How does AI work? Explain it in simple terms.'
-                  }
-                ]
-              })
-            });
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/threads', {
-              method: 'POST',
-              headers: {
-                'Content-Type': 'application/json'
-              },
-              body: JSON.stringify({
-                messages: [
-                  {
-                    role: 'user',
-                    content: 'Hello, what is AI?',
-                    file_ids: ['file-abc123']
-                  },
-                  {
-                    role: 'user',
-                    content: 'How does AI work? Explain it in simple terms.'
-                  }
-                ]
-              })
-            });
-        - lang: Python
-          source: |-
-            import requests
-
-            url = 'http://localhost:1337/v1/threads'
-            payload = {
-              'messages': [
-                {
-                  'role': 'user',
-                  'content': 'Hello, what is AI?',
-                  'file_ids': ['file-abc123']
-                },
-                {
-                  'role': 'user',
-                  'content': 'How does AI work? Explain it in simple terms.'
-                }
-              ]
-            }
-
-            response = requests.post(url, json=payload)
-            print(response.text)
-    get:
-      operationId: listThreads
-      tags:
-        - Threads
-      summary: List threads
-      description: |
-        Retrieves a list of all threads available in the system.
-      responses:
-        '200':
-          description: List of threads retrieved successfully
-          content:
-            application/json:
-              schema:
-                type: array
-                items:
-                  $ref: specs/threads.yaml#/components/schemas/ThreadObject
-                example:
-                  - id: thread_abc123
-                    object: thread
-                    created_at: 1699014083
-                    assistants:
-                      - assistant-001
-                    metadata: {}
-                    messages: []
-                  - id: thread_abc456
-                    object: thread
-                    created_at: 1699014083
-                    assistants:
-                      - assistant-002
-                      - assistant-003
-                    metadata: {}
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl http://localhost:1337/v1/threads \ 
-             -H "Content-Type: application/json"
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch'); 
-
-            fetch('http://localhost:1337/v1/threads', { 
-             method: 'GET', 
-             headers: {'Content-Type': 'application/json'} 
-            }).then(res => res.json()) 
-            .then(json => console.log(json));
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/threads', {
-             method: 'GET',
-             headers: {'Content-Type': 'application/json'}
-            }).then(res => res.json())
-            .then(json => console.log(json));
-        - lang: Python
-          source: |-
-            import requests
-
-            url = 'http://localhost:1337/v1/threads'
-            headers = {'Content-Type': 'application/json'}
-
-            response = requests.get(url, headers=headers)
-            print(response.json())
-  '/threads/{thread_id}':
-    get:
-      operationId: getThread
-      tags:
-        - Threads
-      summary: Retrieve thread
-      description: >
-        Retrieves detailed information about a specific thread using its
-        thread_id.  <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/getThread">
-        Equivalent to OpenAI's retrieve thread. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread to retrieve.
-      responses:
-        '200':
-          description: Thread details retrieved successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/threads.yaml#/components/schemas/GetThreadResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl http://localhost:1337/v1/threads/{thread_id}
-    patch:
-      operationId: modifyThread
-      tags:
-        - Threads
-      summary: Modify thread
-      description: >
-        Modifies a thread.  <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/modifyThread">
-        Equivalent to OpenAI's modify thread. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread to be modified.
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              properties:
-                title:
-                  type: string
-                  description: Set the title of the thread
-                  items:
-                    $ref: specs/threads.yaml#/components/schemas/ThreadMessageObject
-      responses:
-        '200':
-          description: Thread modified successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/threads.yaml#/components/schemas/ModifyThreadResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X POST http://localhost:1337/v1/threads/{thread_id} \
-              -H "Content-Type: application/json" \
-              -d '{
-                  "messages": [{
-                      "role": "user",
-                      "content": "Hello, what is AI?",
-                      "file_ids": ["file-abc123"]
-                  }, {
-                      "role": "user",
-                      "content": "How does AI work? Explain it in simple terms."
-                  }]
-              }'
-    delete:
-      operationId: deleteThread
-      tags:
-        - Threads
-      summary: Delete thread
-      description: >
-        Delete a thread.  <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/deleteThread">
-        Equivalent to OpenAI's delete thread. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread to be deleted.
-      responses:
-        '200':
-          description: Thread deleted successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/threads.yaml#/components/schemas/DeleteThreadResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X DELETE http://localhost:1337/v1/threads/{thread_id}
-  /assistants:
-    get:
-      operationId: listAssistants
-      tags:
-        - Assistants
-      summary: List assistants
-      description: >
-        Return a list of assistants. <a href =
-        "https://platform.openai.com/docs/api-reference/assistants/listAssistants">
-        Equivalent to OpenAI's list assistants. </a>
-      responses:
-        '200':
-          description: List of assistants retrieved successfully
-          content:
-            application/json:
-              schema:
-                type: array
-                example:
-                  - id: asst_abc123
-                    object: assistant
-                    version: 1
-                    created_at: 1698984975
-                    name: Math Tutor
-                    description: null
-                    avatar: https://pic.png
-                    models:
-                      - model_id: model_0
-                    instructions: Be concise
-                    events:
-                      in: []
-                      out: []
-                    metadata: {}
-                  - id: asst_abc456
-                    object: assistant
-                    version: 1
-                    created_at: 1698984975
-                    name: Physics Tutor
-                    description: null
-                    avatar: https://pic.png
-                    models:
-                      - model_id: model_1
-                    instructions: Be concise!
-                    events:
-                      in: []
-                      out: []
-                    metadata: {}
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl http://localhost:1337/v1/assistants \
-              -H "Content-Type: application/json"
-        - lang: JavaScript
-          source: |-
-            fetch('http://localhost:1337/v1/assistants', {
-              method: 'GET',
-              headers: {
-                'Content-Type': 'application/json'
-              }
-            })
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            fetch('http://localhost:1337/v1/assistants', {
-              method: 'GET',
-              headers: {
-                'Content-Type': 'application/json'
-              }
-            })
-        - lang: Python
-          source: |-
-            import requests
-
-            url = 'http://localhost:1337/v1/assistants'
-            headers = {'Content-Type': 'application/json'}
-
-            response = requests.get(url, headers=headers)
-  '/assistants/{assistant_id}':
-    get:
-      operationId: getAssistant
-      tags:
-        - Assistants
-      summary: Retrieve assistant
-      description: >
-        Retrieves an assistant. <a href =
-        "https://platform.openai.com/docs/api-reference/assistants/getAssistant">
-        Equivalent to OpenAI's retrieve assistants. </a>
-      parameters:
-        - in: path
-          name: assistant_id
-          required: true
-          schema:
-            type: string
-            example: jan
-          description: |
-            The ID of the assistant to retrieve.
-      responses:
-        '200':
-          description: null
-          content:
-            application/json:
-              schema:
-                $ref: specs/assistants.yaml#/components/schemas/RetrieveAssistantResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |-
-            curl http://localhost:1337/v1/assistants/{assistant_id} \
-              -H "Content-Type: application/json"
-        - lang: JavaScript
-          source: |-
-            const fetch = require('node-fetch');
-
-            let assistantId = 'abc123';
-
-            fetch(`http://localhost:1337/v1/assistants/${assistantId}`, {
-              method: 'GET',
-              headers: {
-                'Content-Type': 'application/json'
-              }
-            })
-        - lang: Node.js
-          source: |-
-            const fetch = require('node-fetch');
-
-            let assistantId = 'abc123';
-
-            fetch(`http://localhost:1337/v1/assistants/${assistantId}`, {
-              method: 'GET',
-              headers: {
-                'Content-Type': 'application/json'
-              }
-            })
-        - lang: Python
-          source: >-
-            import requests
-
-
-            assistant_id = 'abc123'
-
-
-            response = requests.get(f'http://localhost:1337/v1/assistants/{assistant_id}', headers={'Content-Type': 'application/json'})
-  '/threads/{thread_id}/messages':
-    get:
-      operationId: listMessages
-      tags:
-        - Messages
-      summary: List messages
-      description: >
-        Retrieves all messages from the given thread. <a  href =
-        "https://platform.openai.com/docs/api-reference/messages/listMessages">
-        Equivalent to OpenAI's list messages. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread from which to retrieve messages.
-      responses:
-        '200':
-          description: List of messages retrieved successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/messages.yaml#/components/schemas/ListMessagesResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl http://localhost:1337/v1/threads/{thread_id}/messages \
-              -H "Content-Type: application/json"
-    post:
-      operationId: createMessage
-      tags:
-        - Messages
-      summary: Create message
-      description: >
-        Create a message. <a  href =
-        "https://platform.openai.com/docs/api-reference/messages/createMessage">
-        Equivalent to OpenAI's list messages. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread to which the message will be posted.
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              properties:
-                role:
-                  type: string
-                  description: |
-                    Role of the sender, either 'user' or 'assistant'.
-                  example: user
-                  enum:
-                    - user
-                    - assistant
-                content:
-                  type: string
-                  description: |
-                    Text content of the message.
-                  example: How does AI work? Explain it in simple terms.
-              required:
-                - role
-                - content
-      responses:
-        '200':
-          description: Message created successfully
-          content:
-            application/json:
-              schema:
-                $ref: specs/messages.yaml#/components/schemas/CreateMessageResponse
-      x-codeSamples:
-        - lang: cURL
-          source: |
-            curl -X POST http://localhost:1337/v1/threads/{thread_id}/messages \
-              -H "Content-Type: application/json" \
-              -d '{
-                "role": "user",
-                "content": "How does AI work? Explain it in simple terms."
-              }'
-  '/threads/{thread_id}/messages/{message_id}':
-    get:
-      operationId: retrieveMessage
-      tags:
-        - Messages
-      summary: Retrieve message
-      description: >
-        Retrieve a specific message from a thread using its thread_id and
-        message_id. <a  href =
-        "https://platform.openai.com/docs/api-reference/messages/getMessage">
-        Equivalent to OpenAI's retrieve messages. </a>
-      parameters:
-        - in: path
-          name: thread_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the thread containing the message.
-        - in: path
-          name: message_id
-          required: true
-          schema:
-            type: string
-          description: |
-            The ID of the message to retrieve.
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: specs/messages.yaml#/components/schemas/GetMessageResponse
-      x-codeSamples:
-        - lang: cURL
-          source: >
-            curl http://localhost:1337/v1/threads/{thread_id}/messages/{message_id}
-            \
-              -H "Content-Type: application/json"
-x-webhooks:
-  ModelObject:
-    post:
-      summary: The model object
-      description: >
-        Describe a model offering that can be used with the API. <a  href =
-        "https://platform.openai.com/docs/api-reference/models/object">
-        Equivalent to OpenAI's model object. </a>
-      operationId: ModelObject
-      tags:
-        - Models
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/models.yaml#/components/schemas/ModelObject
-  AssistantObject:
-    post:
-      summary: The assistant object
-      description: >
-        Build assistants that can call models and use tools to perform
-        tasks. <a  href =
-        "https://platform.openai.com/docs/api-reference/assistants"> Equivalent
-        to OpenAI's assistants object. </a>
-      operationId: AssistantObjects
-      tags:
-        - Assistants
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/assistants.yaml#/components/schemas/AssistantObject
-  MessageObject:
-    post:
-      summary: The message object
-      description: >
-        Information about a message in the thread.  <a  href =
-        "https://platform.openai.com/docs/api-reference/messages/object">
-        Equivalent to OpenAI's message object. </a>
-      operationId: MessageObject
-      tags:
-        - Messages
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/messages.yaml#/components/schemas/MessageObject
-  ThreadObject:
-    post:
-      summary: The thread object
-      description: Represents a thread that contains messages. <a  href =
-        "https://platform.openai.com/docs/api-reference/threads/object">
-        Equivalent to OpenAI's thread object. </a>
-      operationId: ThreadObject
-      tags:
-        - Threads
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: specs/threads.yaml#/components/schemas/ThreadObject
diff --git a/docs/openapi/specs/assistants.yaml b/docs/openapi/specs/assistants.yaml
deleted file mode 100644
index 5db1f6a97..000000000
--- a/docs/openapi/specs/assistants.yaml
+++ /dev/null
@@ -1,319 +0,0 @@
----
-components:
-  schemas:
-    AssistantObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Math Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    ListAssistantsResponse: null
-    CreateAssistantResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Math Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    RetrieveAssistantResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Math Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    ModifyAssistantObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Math Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    ModifyAssistantResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's an assistant.
-          default: assistant
-        version:
-          type: integer
-          description: Version number of the assistant.
-          example: 1
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the assistant.
-          example: 1698984975
-        name:
-          type: string
-          description: Name of the assistant.
-          example: Physics Tutor
-        description:
-          type: string
-          description: Description of the assistant. Can be null.
-          example: null
-        avatar:
-          type: string
-          description: URL of the assistant's avatar. Jan-specific property.
-          example: https://pic.png
-        models:
-          type: array
-          description: List of models associated with the assistant. Jan-specific property.
-          items:
-            type: object
-            properties:
-              model_id:
-                type: string
-                example: model_0
-        instructions:
-          type: string
-          description: A system prompt for the assistant.
-          example: Be concise!
-        events:
-          type: object
-          description: Event subscription settings for the assistant.
-          properties:
-            in:
-              type: array
-              items:
-                type: string
-            out:
-              type: array
-              items:
-                type: string
-        metadata:
-          type: object
-          description: Metadata associated with the assistant.
-    DeleteAssistantResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the deleted assistant.
-          example: asst_abc123
-        object:
-          type: string
-          description: Type of the object, indicating the assistant has been deleted.
-          example: assistant.deleted
-        deleted:
-          type: boolean
-          description: Indicates whether the assistant was successfully deleted.
-          example: true
diff --git a/docs/openapi/specs/chat.yaml b/docs/openapi/specs/chat.yaml
deleted file mode 100644
index c9358d796..000000000
--- a/docs/openapi/specs/chat.yaml
+++ /dev/null
@@ -1,196 +0,0 @@
----
-components:
-  schemas:
-    ChatObject:
-      type: object
-      properties:
-        messages:
-          type: arrays
-          description: |
-            Contains input data or prompts for the model to process.
-          example:
-            - content: 'Hello there :wave:'
-              role: assistant
-            - content: Can you write a long story
-              role: user
-        stream:
-          type: boolean
-          default: true
-          description:
-            Enables continuous output generation, allowing for streaming of
-            model responses.
-        model:
-          type: string
-          example: gpt-3.5-turbo
-          description: Specifies the model being used for inference or processing tasks.
-        max_tokens:
-          type: number
-          default: 2048
-          description:
-            The maximum number of tokens the model will generate in a single
-            response.
-        stop:
-          type: arrays
-          example:
-            - hello
-          description:
-            Defines specific tokens or phrases at which the model will stop
-            generating further output/
-        frequency_penalty:
-          type: number
-          default: 0
-          description:
-            Adjusts the likelihood of the model repeating words or phrases in
-            its output.
-        presence_penalty:
-          type: number
-          default: 0
-          description:
-            Influences the generation of new and varied concepts in the model's
-            output.
-        temperature:
-          type: number
-          default: 0.7
-          min: 0
-          max: 1
-          description: Controls the randomness of the model's output.
-        top_p:
-          type: number
-          default: 0.95
-          min: 0
-          max: 1
-          description: Set probability threshold for more relevant outputs.
-        cache_prompt:
-          type: boolean
-          default: true
-          description: Optimize performance in repeated or similar requests.
-    ChatCompletionRequest:
-      type: object
-      properties:
-        messages:
-          type: arrays
-          description: |
-            Contains input data or prompts for the model to process.
-          example:
-            - content: You are a helpful assistant.
-              role: system
-            - content: Hello!
-              role: user
-        model:
-          type: string
-          example: tinyllama-1.1b
-          description: |
-            Specifies the model being used for inference or processing tasks.
-        stream:
-          type: boolean
-          default: true
-          description: >
-            Enables continuous output generation, allowing for streaming of
-            model responses.
-        max_tokens:
-          type: number
-          default: 2048
-          description: >
-            The maximum number of tokens the model will generate in a single
-            response.
-        stop:
-          type: arrays
-          example:
-            - hello
-          description: >
-            Defines specific tokens or phrases at which the model will stop
-            generating further output.
-        frequency_penalty:
-          type: number
-          default: 0
-          description: >
-            Adjusts the likelihood of the model repeating words or phrases in
-            its output.
-        presence_penalty:
-          type: number
-          default: 0
-          description: >
-            Influences the generation of new and varied concepts in the model's
-            output.
-        temperature:
-          type: number
-          default: 0.7
-          min: 0
-          max: 1
-          description: |
-            Controls the randomness of the model's output.
-        top_p:
-          type: number
-          default: 0.95
-          min: 0
-          max: 1
-          description: |
-            Set probability threshold for more relevant outputs.
-    ChatCompletionResponse:
-      type: object
-      description: Description of the response structure
-      properties:
-        choices:
-          type: array
-          description: Array of choice objects
-          items:
-            type: object
-            properties:
-              finish_reason:
-                type: string
-                nullable: true
-                example: null
-                description: Reason for finishing the response, if applicable
-              index:
-                type: integer
-                example: 0
-                description: Index of the choice
-              message:
-                type: object
-                properties:
-                  content:
-                    type: string
-                    example: Hello user. What can I help you with?
-                    description: Content of the message
-                  role:
-                    type: string
-                    example: assistant
-                    description: Role of the sender
-        created:
-          type: integer
-          example: 1700193928
-          description: Timestamp of when the response was created
-        id:
-          type: string
-          example: ebwd2niJvJB1Q2Whyvkz
-          description: Unique identifier of the response
-        model:
-          type: string
-          nullable: true
-          example: _
-          description: Model used for generating the response
-        object:
-          type: string
-          example: chat.completion
-          description: Type of the response object
-        system_fingerprint:
-          type: string
-          nullable: true
-          example: _
-          description: System fingerprint
-        usage:
-          type: object
-          description: Information about the usage of tokens
-          properties:
-            completion_tokens:
-              type: integer
-              example: 500
-              description: Number of tokens used for completion
-            prompt_tokens:
-              type: integer
-              example: 33
-              description: Number of tokens used in the prompt
-            total_tokens:
-              type: integer
-              example: 533
-              description: Total number of tokens used
diff --git a/docs/openapi/specs/messages.yaml b/docs/openapi/specs/messages.yaml
deleted file mode 100644
index 22d82b787..000000000
--- a/docs/openapi/specs/messages.yaml
+++ /dev/null
@@ -1,313 +0,0 @@
----
-components:
-  schemas:
-    MessageObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: |
-            Sequential or UUID identifier of the message.
-          example: 0
-        object:
-          type: string
-          description: |
-            Type of the object, defaults to 'thread.message'.
-          example: thread.message
-        created_at:
-          type: integer
-          format: int64
-          description: |
-            Unix timestamp representing the creation time of the message.
-        thread_id:
-          type: string
-          description: >
-            Identifier of the thread to which this message belongs. Defaults to
-            parent thread.
-          example: thread_asdf
-        assistant_id:
-          type: string
-          description: >
-            Identifier of the assistant involved in the message. Defaults to
-            parent thread.
-          example: jan
-        role:
-          type: string
-          enum:
-            - user
-            - assistant
-          description: |
-            Role of the sender, either 'user' or 'assistant'.
-        content:
-          type: array
-          items:
-            type: object
-            properties:
-              type:
-                type: string
-                description: |
-                  Type of content, e.g., 'text'.
-              text:
-                type: object
-                properties:
-                  value:
-                    type: string
-                    description: |
-                      Text content of the message.
-                    example: Hi!?
-                  annotations:
-                    type: array
-                    items:
-                      type: string
-                    description: |
-                      Annotations for the text content, if any.
-                    example: []
-        metadata:
-          type: object
-          description: |
-            Metadata associated with the message, defaults to an empty object.
-          example: {}
-    GetMessageResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the message.
-          example: msg_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread message.
-          default: thread.message
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the message.
-          example: 1699017614
-        thread_id:
-          type: string
-          description: Identifier of the thread to which this message belongs.
-          example: thread_abc123
-        role:
-          type: string
-          description: Role of the sender, either 'user' or 'assistant'.
-          example: user
-        content:
-          type: array
-          items:
-            type: object
-            properties:
-              type:
-                type: string
-                description: Type of content, e.g., 'text'.
-                example: text
-              text:
-                type: object
-                properties:
-                  value:
-                    type: string
-                    description: Text content of the message.
-                    example: How does AI work? Explain it in simple terms.
-                  annotations:
-                    type: array
-                    items:
-                      type: string
-                    description: Annotations for the text content, if any.
-                    example: []
-        file_ids:
-          type: array
-          items:
-            type: string
-          description: Array of file IDs associated with the message, if any.
-          example: []
-        assistant_id:
-          type: string
-          description: Identifier of the assistant involved in the message, if applicable.
-          example: null
-        run_id:
-          type: string
-          description: Run ID associated with the message, if applicable.
-          example: null
-        metadata:
-          type: object
-          description: Metadata associated with the message.
-          example: {}
-    CreateMessageResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the created message.
-          example: msg_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread message.
-          example: thread.message
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the message.
-          example: 1699017614
-        thread_id:
-          type: string
-          description: Identifier of the thread to which this message belongs.
-          example: thread_abc123
-        role:
-          type: string
-          description: Role of the sender, either 'user' or 'assistant'.
-          example: user
-        content:
-          type: array
-          items:
-            type: object
-            properties:
-              type:
-                type: string
-                description: Type of content, e.g., 'text'.
-                example: text
-              text:
-                type: object
-                properties:
-                  value:
-                    type: string
-                    description: Text content of the message.
-                    example: How does AI work? Explain it in simple terms.
-                  annotations:
-                    type: array
-                    items:
-                      type: string
-                    description: Annotations for the text content, if any.
-                    example: []
-        file_ids:
-          type: array
-          items:
-            type: string
-          description: Array of file IDs associated with the message, if any.
-          example: []
-        assistant_id:
-          type: string
-          description: Identifier of the assistant involved in the message, if applicable.
-          example: null
-        run_id:
-          type: string
-          description: Run ID associated with the message, if applicable.
-          example: null
-        metadata:
-          type: object
-          description: Metadata associated with the message.
-          example: {}
-    ListMessagesResponse:
-      type: object
-      properties:
-        object:
-          type: string
-          description: Type of the object, indicating it's a list.
-          default: list
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ListMessageObject'
-        first_id:
-          type: string
-          description: Identifier of the first message in the list.
-          example: msg_abc123
-        last_id:
-          type: string
-          description: Identifier of the last message in the list.
-          example: msg_abc456
-        has_more:
-          type: boolean
-          description: Indicates whether there are more messages to retrieve.
-          example: false
-    ListMessageObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the message.
-          example: msg_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread message.
-          example: thread.message
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the message.
-          example: 1699017614
-        thread_id:
-          type: string
-          description: Identifier of the thread to which this message belongs.
-          example: thread_abc123
-        role:
-          type: string
-          description: Role of the sender, either 'user' or 'assistant'.
-          example: user
-        content:
-          type: array
-          items:
-            type: object
-            properties:
-              type:
-                type: string
-                description: Type of content, e.g., 'text'.
-              text:
-                type: object
-                properties:
-                  value:
-                    type: string
-                    description: Text content of the message.
-                    example: How does AI work? Explain it in simple terms.
-                  annotations:
-                    type: array
-                    items:
-                      type: string
-                    description: Annotations for the text content, if any.
-        file_ids:
-          type: array
-          items:
-            type: string
-          description: Array of file IDs associated with the message, if any.
-          example: []
-        assistant_id:
-          type: string
-          description: Identifier of the assistant involved in the message, if applicable.
-          example: null
-        run_id:
-          type: string
-          description: Run ID associated with the message, if applicable.
-          example: null
-        metadata:
-          type: object
-          description: Metadata associated with the message.
-          example: {}
-    MessageFileObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the file.
-          example: file-abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread message file.
-          example: thread.message.file
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the file.
-          example: 1699061776
-        message_id:
-          type: string
-          description: Identifier of the message to which this file is associated.
-          example: msg_abc123
-    ListMessageFilesResponse:
-      type: object
-      properties:
-        object:
-          type: string
-          description: Type of the object, indicating it's a list.
-          default: list
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/MessageFileObject'
diff --git a/docs/openapi/specs/models.yaml b/docs/openapi/specs/models.yaml
deleted file mode 100644
index ff2040bb5..000000000
--- a/docs/openapi/specs/models.yaml
+++ /dev/null
@@ -1,259 +0,0 @@
----
-components:
-  schemas:
-    ListModelsResponse:
-      type: object
-      properties:
-        object:
-          type: string
-          enum:
-            - list
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Model'
-      required:
-        - object
-        - data
-    Model:
-      type: object
-      properties:
-        source_url:
-          type: string
-          format: uri
-          description: URL to the source of the model.
-          example: https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf
-        id:
-          type: string
-          description:
-            Unique identifier used in chat-completions model_name, matches
-            folder name.
-          example: trinity-v1.2-7b
-        object:
-          type: string
-          example: model
-        name:
-          type: string
-          description: Name of the model.
-          example: Trinity-v1.2 7B Q4
-        version:
-          type: string
-          default: '1.0'
-          description: The version number of the model.
-        description:
-          type: string
-          description: Description of the model.
-          example:
-            Trinity is an experimental model merge using the Slerp method.
-            Recommended for daily assistance purposes.
-        format:
-          type: string
-          description: State format of the model, distinct from the engine.
-          example: gguf
-        settings:
-          type: object
-          properties:
-            ctx_len:
-              type: integer
-              description: Context length.
-              example: 4096
-            prompt_template:
-              type: string
-              example: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-          additionalProperties: false
-        parameters:
-          type: object
-          properties:
-            temperature:
-              example: 0.7
-            top_p:
-              example: 0.95
-            stream:
-              example: true
-            max_tokens:
-              example: 4096
-            stop:
-              example: []
-            frequency_penalty:
-              example: 0
-            presence_penalty:
-              example: 0
-          additionalProperties: false
-        metadata:
-          author:
-            type: string
-            example: Jan
-          tags:
-            example:
-              - 7B
-              - Merged
-              - Featured
-          size:
-            example: 4370000000,
-          cover:
-            example: https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png
-        engine:
-          example: nitro
-    ModelObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: |
-            The identifier of the model.
-          example: trinity-v1.2-7b
-        object:
-          type: string
-          description: |
-            The type of the object, indicating it's a model.
-          default: model
-        created:
-          type: integer
-          format: int64
-          description: |
-            Unix timestamp representing the creation time of the model.
-          example: 1253935178
-        owned_by:
-          type: string
-          description: |
-            The entity that owns the model.
-          example: _
-    GetModelResponse:
-      type: object
-      properties:
-        source_url:
-          type: string
-          format: uri
-          description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
-        id:
-          type: string
-          description:
-            Unique identifier used in chat-completions model_name, matches
-            folder name.
-          example: mistral-ins-7b-q4
-        object:
-          type: string
-          example: model
-        name:
-          type: string
-          description: Name of the model.
-          example: Mistral Instruct 7B Q4
-        version:
-          type: string
-          default: '1.0'
-          description: The version number of the model.
-        description:
-          type: string
-          description: Description of the model.
-          example:
-            Trinity is an experimental model merge using the Slerp method.
-            Recommended for daily assistance purposes.
-        format:
-          type: string
-          description: State format of the model, distinct from the engine.
-          example: gguf
-        settings:
-          type: object
-          properties:
-            ctx_len:
-              type: integer
-              description: Context length.
-              example: 4096
-            prompt_template:
-              type: string
-              example: '[INST] {prompt} [/INST]'
-          additionalProperties: false
-        parameters:
-          type: object
-          properties:
-            temperature:
-              example: 0.7
-            top_p:
-              example: 0.95
-            stream:
-              example: true
-            max_tokens:
-              example: 4096
-            stop:
-              example: []
-            frequency_penalty:
-              example: 0
-            presence_penalty:
-              example: 0
-          additionalProperties: false
-        metadata:
-          author:
-            type: string
-            example: MistralAI
-          tags:
-            example:
-              - 7B
-              - Featured
-              - Foundation Model
-          size:
-            example: 4370000000,
-          cover:
-            example: https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png
-        engine:
-          example: nitro
-    DeleteModelResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the model that was deleted.
-          example: mistral-ins-7b-q4
-        object:
-          type: string
-          description: Type of the object, indicating it's a model.
-          default: model
-        deleted:
-          type: boolean
-          description: Indicates whether the model was successfully deleted.
-          example: true
-    StartModelResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the model that was started.
-          example: model-zephyr-7B
-        object:
-          type: string
-          description: Type of the object, indicating it's a model.
-          default: model
-        state:
-          type: string
-          description: The current state of the model after the start operation.
-          example: running
-      required:
-        - id
-        - object
-        - state
-    StopModelResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the model that was started.
-          example: model-zephyr-7B
-        object:
-          type: string
-          description: Type of the object, indicating it's a model.
-          default: model
-        state:
-          type: string
-          description: The current state of the model after the start operation.
-          example: stopped
-      required:
-        - id
-        - object
-        - state
-    DownloadModelResponse:
-      type: object
-      properties:
-        message:
-          type: string
-          description: Message indicates Jan starting download corresponding model.
-          example: Starting download mistral-ins-7b-q4
diff --git a/docs/openapi/specs/threads.yaml b/docs/openapi/specs/threads.yaml
deleted file mode 100644
index 285fcc82d..000000000
--- a/docs/openapi/specs/threads.yaml
+++ /dev/null
@@ -1,227 +0,0 @@
----
-components:
-  schemas:
-    ThreadObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: |
-            The identifier of the thread, defaults to foldername.
-          example: thread_....
-        object:
-          type: string
-          description: |
-            Type of the object, defaults to thread.
-          example: thread
-        title:
-          type: string
-          description: >
-            A brief summary or description of the thread, defaults to an empty
-            string.
-          example: funny physics joke
-        assistants:
-          type: array
-          description: ''
-          items:
-            properties:
-              assistant_id:
-                type: string
-                description: |
-                  The identifier of assistant, defaults to "jan"
-                example: jan
-              model:
-                type: object
-                properties:
-                  id:
-                    type: string
-                    description: ''
-                    example: ...
-                  settings:
-                    type: object
-                    description: >
-                      Defaults to and overrides assistant.json's "settings" (and if none,
-                      then model.json "settings")
-                  parameters:
-                    type: object
-                    description: >
-                      Defaults to and overrides assistant.json's "parameters" (and if
-                      none, then model.json "parameters")
-        created:
-          type: integer
-          format: int64
-          description: >
-            Unix timestamp representing the creation time of the thread,
-            defaults to file creation time.
-          example: 1231231
-        metadata:
-          type: object
-          description: |
-            Metadata associated with the thread, defaults to an empty object.
-          example: {}
-    GetThreadResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the thread.
-          example: thread_abc123
-        object:
-          type: string
-          description: Type of the object
-          example: thread
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the thread.
-          example: 1699014083
-        assistants:
-          type: array
-          items:
-            type: string
-          description: List of assistants involved in the thread.
-          example:
-            - assistant-001
-        metadata:
-          type: object
-          description: Metadata associated with the thread.
-          example: {}
-        messages:
-          type: array
-          items:
-            type: string
-          description: List of messages within the thread.
-          example: []
-    CreateThreadResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the newly created thread.
-          example: thread_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread.
-          example: thread
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the thread.
-          example: 1699014083
-        metadata:
-          type: object
-          description: Metadata associated with the newly created thread.
-          example: {}
-    CreateThreadObject:
-      type: object
-      properties:
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread.
-          example: thread
-        title:
-          type: string
-          description: >
-            A brief summary or description of the thread, defaults to an empty
-            string.
-          example: funny physics joke
-        assistants:
-          type: array
-          description: assistant involved in the thread
-          items:
-            properties:
-              assistant_id:
-                type: string
-                description: |
-                  The identifier of assistant, defaults to "jan"
-                example: jan
-              assistant_name:
-                type: string
-                description: |
-                  The name of assistant, defaults to "Jan"
-                example: Jan
-              instructions:
-                type: string
-                description: >
-                  The instruction of assistant, defaults to "Be my grammar corrector"
-              model:
-                type: object
-                properties:
-                  id:
-                    type: string
-                    description: Model id
-                    example: mistral-ins-7b-q4
-                  settings:
-                    type: object
-                    description: >
-                      Defaults to and overrides assistant.json's "settings" (and if none,
-                      then model.json "settings")
-                  parameters:
-                    type: object
-                    description: >
-                      Defaults to and overrides assistant.json's "parameters" (and if
-                      none, then model.json "parameters")
-                  engine:
-                    type: string
-                    description: Engine id
-                    example: nitro
-        metadata:
-          type: object
-          description: |
-            Metadata associated with the thread, defaults to an empty object.
-    ThreadMessageObject:
-      type: object
-      properties:
-        role:
-          type: string
-          description: |
-            "Role of the sender, either 'user' or 'assistant'."
-          enum:
-            - user
-            - assistant
-        content:
-          type: string
-          description: |
-            "Text content of the message."
-        file_ids:
-          type: array
-          items:
-            type: string
-          description: |
-            "Array of file IDs associated with the message, if any."
-    ModifyThreadResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: |
-            "The identifier of the modified thread."
-          example: thread_abc123
-        object:
-          type: string
-          description: Type of the object, indicating it's a thread.
-          example: thread
-        created_at:
-          type: integer
-          format: int64
-          description: Unix timestamp representing the creation time of the thread.
-          example: 1699014083
-        metadata:
-          type: object
-          description: Metadata associated with the modified thread.
-          example: {}
-    DeleteThreadResponse:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The identifier of the deleted thread.
-          example: thread_abc123
-        object:
-          type: string
-          description: Type of the object, indicating the thread has been deleted.
-          example: thread.deleted
-        deleted:
-          type: boolean
-          description: Indicates whether the thread was successfully deleted.
-          example: true
diff --git a/docs/openapi/version.txt b/docs/openapi/version.txt
deleted file mode 100644
index 5656be624..000000000
--- a/docs/openapi/version.txt
+++ /dev/null
@@ -1 +0,0 @@
-v1.23.2
\ No newline at end of file
diff --git a/docs/src/components/Download/CardDownload.tsx b/docs/src/components/Download/CardDownload.tsx
index f75543d62..f61f9f462 100644
--- a/docs/src/components/Download/CardDownload.tsx
+++ b/docs/src/components/Download/CardDownload.tsx
@@ -18,17 +18,12 @@ type SystemType = {
 
 const systemsTemplate: SystemType[] = [
   {
-    name: 'Mac M1, M2, M3',
-    label: 'Apple Silicon',
+    name: 'Mac ',
+    label: 'Universal',
     logo: FaApple,
-    fileFormat: '{appname}-mac-arm64-{tag}.dmg',
-  },
-  {
-    name: 'Mac (Intel)',
-    label: 'Apple Intel',
-    logo: FaApple,
-    fileFormat: '{appname}-mac-x64-{tag}.dmg',
+    fileFormat: '{appname}-mac-universal-{tag}.dmg',
   },
+
   {
     name: 'Windows',
     label: 'Standard (64-bit)',
diff --git a/docs/src/components/DropdownDownload/index.tsx b/docs/src/components/DropdownDownload/index.tsx
index 87461122e..26e0f49d6 100644
--- a/docs/src/components/DropdownDownload/index.tsx
+++ b/docs/src/components/DropdownDownload/index.tsx
@@ -24,14 +24,9 @@ type GpuInfo = {
 
 const systemsTemplate: SystemType[] = [
   {
-    name: 'Download for Mac (M1/M2/M3)',
+    name: 'Download for Mac',
     logo: FaApple,
-    fileFormat: '{appname}-mac-arm64-{tag}.dmg',
-  },
-  {
-    name: 'Download for Mac (Intel)',
-    logo: FaApple,
-    fileFormat: '{appname}-mac-x64-{tag}.dmg',
+    fileFormat: '{appname}-mac-universal-{tag}.dmg',
   },
   {
     name: 'Download for Windows',
@@ -66,27 +61,20 @@ const DropdownDownload = ({ lastRelease }: Props) => {
     type: '',
   })
 
-  const changeDefaultSystem = useCallback(
-    async (systems: SystemType[]) => {
-      const userAgent = navigator.userAgent
-      if (userAgent.includes('Windows')) {
-        // windows user
-        setDefaultSystem(systems[2])
-      } else if (userAgent.includes('Linux')) {
-        // linux user
-        setDefaultSystem(systems[3])
-      } else if (userAgent.includes('Mac OS')) {
-        if (gpuInfo.type === 'Apple Silicon') {
-          setDefaultSystem(systems[0])
-        } else {
-          setDefaultSystem(systems[1])
-        }
-      } else {
-        setDefaultSystem(systems[1])
-      }
-    },
-    [gpuInfo.type]
-  )
+  const changeDefaultSystem = useCallback(async (systems: SystemType[]) => {
+    const userAgent = navigator.userAgent
+    if (userAgent.includes('Windows')) {
+      // windows user
+      setDefaultSystem(systems[2])
+    } else if (userAgent.includes('Linux')) {
+      // linux user
+      setDefaultSystem(systems[3])
+    } else if (userAgent.includes('Mac OS')) {
+      setDefaultSystem(systems[0])
+    } else {
+      setDefaultSystem(systems[1])
+    }
+  }, [])
 
   function getUnmaskedInfo(gl: WebGLRenderingContext): {
     renderer: string
diff --git a/electron/docs/openapi/.gitkeep b/electron/docs/openapi/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/electron/package.json b/electron/package.json
index c4609e7af..f8611734a 100644
--- a/electron/package.json
+++ b/electron/package.json
@@ -15,7 +15,6 @@
       "build/**/*.{js,map}",
       "pre-install",
       "themes",
-      "docs/**/*",
       "scripts/**/*",
       "icons/**/*",
       "themes",
diff --git a/electron/tests/config/fixtures.ts b/electron/tests/config/fixtures.ts
index bc3f8a7d1..f61eddfae 100644
--- a/electron/tests/config/fixtures.ts
+++ b/electron/tests/config/fixtures.ts
@@ -108,7 +108,7 @@ export const test = base.extend<
 })
 
 test.beforeAll(async () => {
-  await rmSync(path.join(__dirname, '../../test-data'), {
+  rmSync(path.join(__dirname, '../../test-data'), {
     recursive: true,
     force: true,
   })
@@ -122,6 +122,5 @@ test.beforeAll(async () => {
 })
 
 test.afterAll(async () => {
-  // temporally disabling this due to the config for parallel testing WIP
   // teardownElectron()
 })
diff --git a/electron/tests/e2e/navigation.e2e.spec.ts b/electron/tests/e2e/navigation.e2e.spec.ts
index b599a951c..1b463d381 100644
--- a/electron/tests/e2e/navigation.e2e.spec.ts
+++ b/electron/tests/e2e/navigation.e2e.spec.ts
@@ -2,11 +2,8 @@ import { expect } from '@playwright/test'
 import { page, test, TIMEOUT } from '../config/fixtures'
 
 test('renders left navigation panel', async () => {
-  const settingsBtn = await page
-    .getByTestId('Thread')
-    .first()
-    .isEnabled({ timeout: TIMEOUT })
-  expect([settingsBtn].filter((e) => !e).length).toBe(0)
+  const threadBtn = page.getByTestId('Thread').first()
+  await expect(threadBtn).toBeVisible({ timeout: TIMEOUT })
   // Chat section should be there
   await page.getByTestId('Local API Server').first().click({
     timeout: TIMEOUT,
diff --git a/electron/tests/e2e/thread.e2e.spec.ts b/electron/tests/e2e/thread.e2e.spec.ts
index dfd131988..312cb1f46 100644
--- a/electron/tests/e2e/thread.e2e.spec.ts
+++ b/electron/tests/e2e/thread.e2e.spec.ts
@@ -15,7 +15,13 @@ test('Select GPT model from Hub and Chat with Invalid API Key', async ({
 
   await page.getByTestId('txt-input-chat').fill('dummy value')
 
-  await page.getByTestId('btn-send-chat').click()
+  const denyButton = page.locator('[data-testid="btn-deny-product-analytics"]')
+
+  if ((await denyButton.count()) > 0) {
+    await denyButton.click({ force: true })
+  } else {
+    await page.getByTestId('btn-send-chat').click({ force: true })
+  }
 
   await page.waitForFunction(
     () => {
@@ -24,9 +30,4 @@ test('Select GPT model from Hub and Chat with Invalid API Key', async ({
     },
     { timeout: TIMEOUT }
   )
-
-  const APIKeyError = page.getByTestId('passthrough-error-message')
-  await expect(APIKeyError).toBeVisible({
-    timeout: TIMEOUT,
-  })
 })
diff --git a/extensions/assistant-extension/src/index.ts b/extensions/assistant-extension/src/index.ts
index 6705483d6..621d8e216 100644
--- a/extensions/assistant-extension/src/index.ts
+++ b/extensions/assistant-extension/src/index.ts
@@ -127,7 +127,7 @@ export default class JanAssistantExtension extends AssistantExtension {
     thread_location: undefined,
     id: 'jan',
     object: 'assistant',
-    created_at: Date.now(),
+    created_at: Date.now() / 1000,
     name: 'Jan',
     description: 'A default assistant that can use all downloaded models',
     model: '*',
@@ -141,7 +141,7 @@ export default class JanAssistantExtension extends AssistantExtension {
           top_k: 2,
           chunk_size: 1024,
           chunk_overlap: 64,
-          retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+          retrieval_template: `Use the following pieces of context to answer the question at the end.
 ----------------
 CONTEXT: {CONTEXT}
 ----------------
diff --git a/extensions/assistant-extension/src/node/index.ts b/extensions/assistant-extension/src/node/index.ts
index 83a4a1983..11e8f49c4 100644
--- a/extensions/assistant-extension/src/node/index.ts
+++ b/extensions/assistant-extension/src/node/index.ts
@@ -9,13 +9,14 @@ export function toolRetrievalUpdateTextSplitter(
   retrieval.updateTextSplitter(chunkSize, chunkOverlap)
 }
 export async function toolRetrievalIngestNewDocument(
+  thread: string,
   file: string,
   model: string,
   engine: string,
   useTimeWeighted: boolean
 ) {
-  const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
-  const threadPath = path.dirname(filePath.replace('files', ''))
+  const threadPath = path.join(getJanDataFolderPath(), 'threads', thread)
+  const filePath = path.join(getJanDataFolderPath(), 'files', file)
   retrieval.updateEmbeddingEngine(model, engine)
   return retrieval
     .ingestAgentKnowledge(filePath, `${threadPath}/memory`, useTimeWeighted)
diff --git a/extensions/assistant-extension/src/tools/retrieval.ts b/extensions/assistant-extension/src/tools/retrieval.ts
index 763192287..b1a0c3cba 100644
--- a/extensions/assistant-extension/src/tools/retrieval.ts
+++ b/extensions/assistant-extension/src/tools/retrieval.ts
@@ -35,6 +35,7 @@ export class RetrievalTool extends InferenceTool {
         await executeOnMain(
           NODE,
           'toolRetrievalIngestNewDocument',
+          data.thread?.id,
           docFile,
           data.model?.id,
           data.model?.engine,
diff --git a/extensions/conversational-extension/package.json b/extensions/conversational-extension/package.json
index 036fcfab2..ea3006449 100644
--- a/extensions/conversational-extension/package.json
+++ b/extensions/conversational-extension/package.json
@@ -18,12 +18,14 @@
   "devDependencies": {
     "cpx": "^1.5.0",
     "rimraf": "^3.0.2",
+    "ts-loader": "^9.5.0",
     "webpack": "^5.88.2",
-    "webpack-cli": "^5.1.4",
-    "ts-loader": "^9.5.0"
+    "webpack-cli": "^5.1.4"
   },
   "dependencies": {
-    "@janhq/core": "file:../../core"
+    "@janhq/core": "file:../../core",
+    "ky": "^1.7.2",
+    "p-queue": "^8.0.1"
   },
   "engines": {
     "node": ">=18.0.0"
diff --git a/extensions/conversational-extension/src/@types/global.d.ts b/extensions/conversational-extension/src/@types/global.d.ts
new file mode 100644
index 000000000..757b5eebf
--- /dev/null
+++ b/extensions/conversational-extension/src/@types/global.d.ts
@@ -0,0 +1,14 @@
+export {}
+declare global {
+  declare const API_URL: string
+  declare const SOCKET_URL: string
+
+  interface Core {
+    api: APIFunctions
+    events: EventEmitter
+  }
+  interface Window {
+    core?: Core | undefined
+    electronAPI?: any | undefined
+  }
+}
diff --git a/extensions/conversational-extension/src/Conversational.test.ts b/extensions/conversational-extension/src/Conversational.test.ts
deleted file mode 100644
index 3d1d6fc60..000000000
--- a/extensions/conversational-extension/src/Conversational.test.ts
+++ /dev/null
@@ -1,408 +0,0 @@
-/**
- * @jest-environment jsdom
- */
-jest.mock('@janhq/core', () => ({
-  ...jest.requireActual('@janhq/core/node'),
-  fs: {
-    existsSync: jest.fn(),
-    mkdir: jest.fn(),
-    writeFileSync: jest.fn(),
-    readdirSync: jest.fn(),
-    readFileSync: jest.fn(),
-    appendFileSync: jest.fn(),
-    rm: jest.fn(),
-    writeBlob: jest.fn(),
-    joinPath: jest.fn(),
-    fileStat: jest.fn(),
-  },
-  joinPath: jest.fn(),
-  ConversationalExtension: jest.fn(),
-}))
-
-import { fs } from '@janhq/core'
-
-import JSONConversationalExtension from '.'
-
-describe('JSONConversationalExtension Tests', () => {
-  let extension: JSONConversationalExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JSONConversationalExtension()
-  })
-
-  it('should create thread folder on load if it does not exist', async () => {
-    // @ts-ignore
-    jest.spyOn(fs, 'existsSync').mockResolvedValue(false)
-    const mkdirSpy = jest.spyOn(fs, 'mkdir').mockResolvedValue({})
-
-    await extension.onLoad()
-
-    expect(mkdirSpy).toHaveBeenCalledWith('file://threads')
-  })
-
-  it('should log message on unload', () => {
-    const consoleSpy = jest.spyOn(console, 'debug').mockImplementation()
-
-    extension.onUnload()
-
-    expect(consoleSpy).toHaveBeenCalledWith(
-      'JSONConversationalExtension unloaded'
-    )
-  })
-
-  it('should return sorted threads', async () => {
-    jest
-      .spyOn(extension, 'getValidThreadDirs')
-      .mockResolvedValue(['dir1', 'dir2'])
-    jest
-      .spyOn(extension, 'readThread')
-      .mockResolvedValueOnce({ updated: '2023-01-01' })
-      .mockResolvedValueOnce({ updated: '2023-01-02' })
-
-    const threads = await extension.getThreads()
-
-    expect(threads).toEqual([
-      { updated: '2023-01-02' },
-      { updated: '2023-01-01' },
-    ])
-  })
-
-  it('should ignore broken threads', async () => {
-    jest
-      .spyOn(extension, 'getValidThreadDirs')
-      .mockResolvedValue(['dir1', 'dir2'])
-    jest
-      .spyOn(extension, 'readThread')
-      .mockResolvedValueOnce(JSON.stringify({ updated: '2023-01-01' }))
-      .mockResolvedValueOnce('this_is_an_invalid_json_content')
-
-    const threads = await extension.getThreads()
-
-    expect(threads).toEqual([{ updated: '2023-01-01' }])
-  })
-
-  it('should save thread', async () => {
-    // @ts-ignore
-    jest.spyOn(fs, 'existsSync').mockResolvedValue(false)
-    const mkdirSpy = jest.spyOn(fs, 'mkdir').mockResolvedValue({})
-    const writeFileSyncSpy = jest
-      .spyOn(fs, 'writeFileSync')
-      .mockResolvedValue({})
-
-    const thread = { id: '1', updated: '2023-01-01' } as any
-    await extension.saveThread(thread)
-
-    expect(mkdirSpy).toHaveBeenCalled()
-    expect(writeFileSyncSpy).toHaveBeenCalled()
-  })
-
-  it('should delete thread', async () => {
-    const rmSpy = jest.spyOn(fs, 'rm').mockResolvedValue({})
-
-    await extension.deleteThread('1')
-
-    expect(rmSpy).toHaveBeenCalled()
-  })
-
-  it('should add new message', async () => {
-    // @ts-ignore
-    jest.spyOn(fs, 'existsSync').mockResolvedValue(false)
-    const mkdirSpy = jest.spyOn(fs, 'mkdir').mockResolvedValue({})
-    const appendFileSyncSpy = jest
-      .spyOn(fs, 'appendFileSync')
-      .mockResolvedValue({})
-
-    const message = {
-      thread_id: '1',
-      content: [{ type: 'text', text: { annotations: [] } }],
-    } as any
-    await extension.addNewMessage(message)
-
-    expect(mkdirSpy).toHaveBeenCalled()
-    expect(appendFileSyncSpy).toHaveBeenCalled()
-  })
-
-  it('should store image', async () => {
-    const writeBlobSpy = jest.spyOn(fs, 'writeBlob').mockResolvedValue({})
-
-    await extension.storeImage(
-      'data:image/png;base64,abcd',
-      'path/to/image.png'
-    )
-
-    expect(writeBlobSpy).toHaveBeenCalled()
-  })
-
-  it('should store file', async () => {
-    const writeBlobSpy = jest.spyOn(fs, 'writeBlob').mockResolvedValue({})
-
-    await extension.storeFile(
-      'data:application/pdf;base64,abcd',
-      'path/to/file.pdf'
-    )
-
-    expect(writeBlobSpy).toHaveBeenCalled()
-  })
-
-  it('should write messages', async () => {
-    // @ts-ignore
-    jest.spyOn(fs, 'existsSync').mockResolvedValue(false)
-    const mkdirSpy = jest.spyOn(fs, 'mkdir').mockResolvedValue({})
-    const writeFileSyncSpy = jest
-      .spyOn(fs, 'writeFileSync')
-      .mockResolvedValue({})
-
-    const messages = [{ id: '1', thread_id: '1', content: [] }] as any
-    await extension.writeMessages('1', messages)
-
-    expect(mkdirSpy).toHaveBeenCalled()
-    expect(writeFileSyncSpy).toHaveBeenCalled()
-  })
-
-  it('should get all messages on string response', async () => {
-    jest.spyOn(fs, 'readdirSync').mockResolvedValue(['messages.jsonl'])
-    jest.spyOn(fs, 'readFileSync').mockResolvedValue('{"id":"1"}\n{"id":"2"}\n')
-
-    const messages = await extension.getAllMessages('1')
-
-    expect(messages).toEqual([{ id: '1' }, { id: '2' }])
-  })
-
-  it('should get all messages on object response', async () => {
-    jest.spyOn(fs, 'readdirSync').mockResolvedValue(['messages.jsonl'])
-    jest.spyOn(fs, 'readFileSync').mockResolvedValue({ id: 1 })
-
-    const messages = await extension.getAllMessages('1')
-
-    expect(messages).toEqual([{ id: 1 }])
-  })
-
-  it('get all messages return empty on error', async () => {
-    jest.spyOn(fs, 'readdirSync').mockRejectedValue(['messages.jsonl'])
-
-    const messages = await extension.getAllMessages('1')
-
-    expect(messages).toEqual([])
-  })
-
-  it('return empty messages on no messages file', async () => {
-    jest.spyOn(fs, 'readdirSync').mockResolvedValue([])
-
-    const messages = await extension.getAllMessages('1')
-
-    expect(messages).toEqual([])
-  })
-
-  it('should ignore error message', async () => {
-    jest.spyOn(fs, 'readdirSync').mockResolvedValue(['messages.jsonl'])
-    jest
-      .spyOn(fs, 'readFileSync')
-      .mockResolvedValue('{"id":"1"}\nyolo\n{"id":"2"}\n')
-
-    const messages = await extension.getAllMessages('1')
-
-    expect(messages).toEqual([{ id: '1' }, { id: '2' }])
-  })
-
-  it('should create thread folder on load if it does not exist', async () => {
-    // @ts-ignore
-    jest.spyOn(fs, 'existsSync').mockResolvedValue(false)
-    const mkdirSpy = jest.spyOn(fs, 'mkdir').mockResolvedValue({})
-
-    await extension.onLoad()
-
-    expect(mkdirSpy).toHaveBeenCalledWith('file://threads')
-  })
-
-  it('should log message on unload', () => {
-    const consoleSpy = jest.spyOn(console, 'debug').mockImplementation()
-
-    extension.onUnload()
-
-    expect(consoleSpy).toHaveBeenCalledWith(
-      'JSONConversationalExtension unloaded'
-    )
-  })
-
-  it('should return sorted threads', async () => {
-    jest
-      .spyOn(extension, 'getValidThreadDirs')
-      .mockResolvedValue(['dir1', 'dir2'])
-    jest
-      .spyOn(extension, 'readThread')
-      .mockResolvedValueOnce({ updated: '2023-01-01' })
-      .mockResolvedValueOnce({ updated: '2023-01-02' })
-
-    const threads = await extension.getThreads()
-
-    expect(threads).toEqual([
-      { updated: '2023-01-02' },
-      { updated: '2023-01-01' },
-    ])
-  })
-
-  it('should ignore broken threads', async () => {
-    jest
-      .spyOn(extension, 'getValidThreadDirs')
-      .mockResolvedValue(['dir1', 'dir2'])
-    jest
-      .spyOn(extension, 'readThread')
-      .mockResolvedValueOnce(JSON.stringify({ updated: '2023-01-01' }))
-      .mockResolvedValueOnce('this_is_an_invalid_json_content')
-
-    const threads = await extension.getThreads()
-
-    expect(threads).toEqual([{ updated: '2023-01-01' }])
-  })
-
-  it('should save thread', async () => {
-    // @ts-ignore
-    jest.spyOn(fs, 'existsSync').mockResolvedValue(false)
-    const mkdirSpy = jest.spyOn(fs, 'mkdir').mockResolvedValue({})
-    const writeFileSyncSpy = jest
-      .spyOn(fs, 'writeFileSync')
-      .mockResolvedValue({})
-
-    const thread = { id: '1', updated: '2023-01-01' } as any
-    await extension.saveThread(thread)
-
-    expect(mkdirSpy).toHaveBeenCalled()
-    expect(writeFileSyncSpy).toHaveBeenCalled()
-  })
-
-  it('should delete thread', async () => {
-    const rmSpy = jest.spyOn(fs, 'rm').mockResolvedValue({})
-
-    await extension.deleteThread('1')
-
-    expect(rmSpy).toHaveBeenCalled()
-  })
-
-  it('should add new message', async () => {
-    // @ts-ignore
-    jest.spyOn(fs, 'existsSync').mockResolvedValue(false)
-    const mkdirSpy = jest.spyOn(fs, 'mkdir').mockResolvedValue({})
-    const appendFileSyncSpy = jest
-      .spyOn(fs, 'appendFileSync')
-      .mockResolvedValue({})
-
-    const message = {
-      thread_id: '1',
-      content: [{ type: 'text', text: { annotations: [] } }],
-    } as any
-    await extension.addNewMessage(message)
-
-    expect(mkdirSpy).toHaveBeenCalled()
-    expect(appendFileSyncSpy).toHaveBeenCalled()
-  })
-
-  it('should add new image message', async () => {
-    jest
-      .spyOn(fs, 'existsSync')
-      // @ts-ignore
-      .mockResolvedValueOnce(false)
-      // @ts-ignore
-      .mockResolvedValueOnce(false)
-      // @ts-ignore
-      .mockResolvedValueOnce(true)
-    const mkdirSpy = jest.spyOn(fs, 'mkdir').mockResolvedValue({})
-    const appendFileSyncSpy = jest
-      .spyOn(fs, 'appendFileSync')
-      .mockResolvedValue({})
-    jest.spyOn(fs, 'writeBlob').mockResolvedValue({})
-
-    const message = {
-      thread_id: '1',
-      content: [
-        { type: 'image', text: { annotations: ['data:image;base64,hehe'] } },
-      ],
-    } as any
-    await extension.addNewMessage(message)
-
-    expect(mkdirSpy).toHaveBeenCalled()
-    expect(appendFileSyncSpy).toHaveBeenCalled()
-  })
-
-  it('should add new pdf message', async () => {
-    jest
-      .spyOn(fs, 'existsSync')
-      // @ts-ignore
-      .mockResolvedValueOnce(false)
-      // @ts-ignore
-      .mockResolvedValueOnce(false)
-      // @ts-ignore
-      .mockResolvedValueOnce(true)
-    const mkdirSpy = jest.spyOn(fs, 'mkdir').mockResolvedValue({})
-    const appendFileSyncSpy = jest
-      .spyOn(fs, 'appendFileSync')
-      .mockResolvedValue({})
-    jest.spyOn(fs, 'writeBlob').mockResolvedValue({})
-
-    const message = {
-      thread_id: '1',
-      content: [
-        { type: 'pdf', text: { annotations: ['data:pdf;base64,hehe'] } },
-      ],
-    } as any
-    await extension.addNewMessage(message)
-
-    expect(mkdirSpy).toHaveBeenCalled()
-    expect(appendFileSyncSpy).toHaveBeenCalled()
-  })
-
-  it('should store image', async () => {
-    const writeBlobSpy = jest.spyOn(fs, 'writeBlob').mockResolvedValue({})
-
-    await extension.storeImage(
-      'data:image/png;base64,abcd',
-      'path/to/image.png'
-    )
-
-    expect(writeBlobSpy).toHaveBeenCalled()
-  })
-
-  it('should store file', async () => {
-    const writeBlobSpy = jest.spyOn(fs, 'writeBlob').mockResolvedValue({})
-
-    await extension.storeFile(
-      'data:application/pdf;base64,abcd',
-      'path/to/file.pdf'
-    )
-
-    expect(writeBlobSpy).toHaveBeenCalled()
-  })
-})
-
-describe('test readThread', () => {
-  let extension: JSONConversationalExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JSONConversationalExtension()
-  })
-
-  it('should read thread', async () => {
-    jest
-      .spyOn(fs, 'readFileSync')
-      .mockResolvedValue(JSON.stringify({ id: '1' }))
-    const thread = await extension.readThread('1')
-    expect(thread).toEqual(`{"id":"1"}`)
-  })
-
-  it('getValidThreadDirs should return valid thread directories', async () => {
-    jest
-      .spyOn(fs, 'readdirSync')
-      .mockResolvedValueOnce(['1', '2', '3'])
-      .mockResolvedValueOnce(['thread.json'])
-      .mockResolvedValueOnce(['thread.json'])
-      .mockResolvedValueOnce([])
-      // @ts-ignore
-    jest.spyOn(fs, 'existsSync').mockResolvedValue(true)
-    jest.spyOn(fs, 'fileStat').mockResolvedValue({
-      isDirectory: true,
-    } as any)
-    const validThreadDirs = await extension.getValidThreadDirs()
-    expect(validThreadDirs).toEqual(['1', '2'])
-  })
-})
diff --git a/extensions/conversational-extension/src/index.ts b/extensions/conversational-extension/src/index.ts
index b34f09181..b3530e5d5 100644
--- a/extensions/conversational-extension/src/index.ts
+++ b/extensions/conversational-extension/src/index.ts
@@ -1,90 +1,71 @@
 import {
-  fs,
-  joinPath,
   ConversationalExtension,
   Thread,
+  ThreadAssistantInfo,
   ThreadMessage,
 } from '@janhq/core'
-import { safelyParseJSON } from './jsonUtil'
+import ky from 'ky'
+import PQueue from 'p-queue'
+
+type ThreadList = {
+  data: Thread[]
+}
+
+type MessageList = {
+  data: ThreadMessage[]
+}
 
 /**
  * JSONConversationalExtension is a ConversationalExtension implementation that provides
  * functionality for managing threads.
  */
-export default class JSONConversationalExtension extends ConversationalExtension {
-  private static readonly _threadFolder = 'file://threads'
-  private static readonly _threadInfoFileName = 'thread.json'
-  private static readonly _threadMessagesFileName = 'messages.jsonl'
+export default class CortexConversationalExtension extends ConversationalExtension {
+  queue = new PQueue({ concurrency: 1 })
 
   /**
    * Called when the extension is loaded.
    */
   async onLoad() {
-    if (!(await fs.existsSync(JSONConversationalExtension._threadFolder))) {
-      await fs.mkdir(JSONConversationalExtension._threadFolder)
-    }
+    this.queue.add(() => this.healthz())
   }
 
   /**
    * Called when the extension is unloaded.
    */
-  onUnload() {
-    console.debug('JSONConversationalExtension unloaded')
-  }
+  onUnload() {}
 
   /**
    * Returns a Promise that resolves to an array of Conversation objects.
    */
-  async getThreads(): Promise<Thread[]> {
-    try {
-      const threadDirs = await this.getValidThreadDirs()
-
-      const promises = threadDirs.map((dirName) => this.readThread(dirName))
-      const promiseResults = await Promise.allSettled(promises)
-      const convos = promiseResults
-        .map((result) => {
-          if (result.status === 'fulfilled') {
-            return typeof result.value === 'object'
-              ? result.value
-              : safelyParseJSON(result.value)
-          }
-          return undefined
-        })
-        .filter((convo) => !!convo)
-      convos.sort(
-        (a, b) => new Date(b.updated).getTime() - new Date(a.updated).getTime()
-      )
-
-      return convos
-    } catch (error) {
-      console.error(error)
-      return []
-    }
+  async listThreads(): Promise<Thread[]> {
+    return this.queue.add(() =>
+      ky
+        .get(`${API_URL}/v1/threads`)
+        .json<ThreadList>()
+        .then((e) => e.data)
+    ) as Promise<Thread[]>
   }
 
   /**
    * Saves a Thread object to a json file.
    * @param thread The Thread object to save.
    */
-  async saveThread(thread: Thread): Promise<void> {
-    try {
-      const threadDirPath = await joinPath([
-        JSONConversationalExtension._threadFolder,
-        thread.id,
-      ])
-      const threadJsonPath = await joinPath([
-        threadDirPath,
-        JSONConversationalExtension._threadInfoFileName,
-      ])
-      if (!(await fs.existsSync(threadDirPath))) {
-        await fs.mkdir(threadDirPath)
-      }
+  async createThread(thread: Thread): Promise<Thread> {
+    return this.queue.add(() =>
+      ky.post(`${API_URL}/v1/threads`, { json: thread }).json<Thread>()
+    ) as Promise<Thread>
+  }
 
-      await fs.writeFileSync(threadJsonPath, JSON.stringify(thread, null, 2))
-    } catch (err) {
-      console.error(err)
-      Promise.reject(err)
-    }
+  /**
+   * Saves a Thread object to a json file.
+   * @param thread The Thread object to save.
+   */
+  async modifyThread(thread: Thread): Promise<void> {
+    return this.queue
+      .add(() =>
+        ky.post(`${API_URL}/v1/threads/${thread.id}`, { json: thread })
+      )
+      .then()
   }
 
   /**
@@ -92,189 +73,126 @@ export default class JSONConversationalExtension extends ConversationalExtension
    * @param threadId The ID of the thread to delete.
    */
   async deleteThread(threadId: string): Promise<void> {
-    const path = await joinPath([
-      JSONConversationalExtension._threadFolder,
-      `${threadId}`,
-    ])
-    try {
-      await fs.rm(path)
-    } catch (err) {
-      console.error(err)
-    }
+    return this.queue
+      .add(() => ky.delete(`${API_URL}/v1/threads/${threadId}`))
+      .then()
   }
 
-  async addNewMessage(message: ThreadMessage): Promise<void> {
-    try {
-      const threadDirPath = await joinPath([
-        JSONConversationalExtension._threadFolder,
-        message.thread_id,
-      ])
-      const threadMessagePath = await joinPath([
-        threadDirPath,
-        JSONConversationalExtension._threadMessagesFileName,
-      ])
-      if (!(await fs.existsSync(threadDirPath))) await fs.mkdir(threadDirPath)
-
-      if (message.content[0]?.type === 'image') {
-        const filesPath = await joinPath([threadDirPath, 'files'])
-        if (!(await fs.existsSync(filesPath))) await fs.mkdir(filesPath)
-
-        const imagePath = await joinPath([filesPath, `${message.id}.png`])
-        const base64 = message.content[0].text.annotations[0]
-        await this.storeImage(base64, imagePath)
-        if ((await fs.existsSync(imagePath)) && message.content?.length) {
-          // Use file path instead of blob
-          message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.png`
-        }
-      }
-
-      if (message.content[0]?.type === 'pdf') {
-        const filesPath = await joinPath([threadDirPath, 'files'])
-        if (!(await fs.existsSync(filesPath))) await fs.mkdir(filesPath)
-
-        const filePath = await joinPath([filesPath, `${message.id}.pdf`])
-        const blob = message.content[0].text.annotations[0]
-        await this.storeFile(blob, filePath)
-
-        if ((await fs.existsSync(filePath)) && message.content?.length) {
-          // Use file path instead of blob
-          message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.pdf`
-        }
-      }
-      await fs.appendFileSync(threadMessagePath, JSON.stringify(message) + '\n')
-      Promise.resolve()
-    } catch (err) {
-      Promise.reject(err)
-    }
+  /**
+   * Adds a new message to a specified thread.
+   * @param message The ThreadMessage object to be added.
+   * @returns A Promise that resolves when the message has been added.
+   */
+  async createMessage(message: ThreadMessage): Promise<ThreadMessage> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/threads/${message.thread_id}/messages`, {
+          json: message,
+        })
+        .json<ThreadMessage>()
+    ) as Promise<ThreadMessage>
   }
 
-  async storeImage(base64: string, filePath: string): Promise<void> {
-    const base64Data = base64.replace(/^data:image\/\w+;base64,/, '')
-
-    try {
-      await fs.writeBlob(filePath, base64Data)
-    } catch (err) {
-      console.error(err)
-    }
+  /**
+   * Modifies a message in a thread.
+   * @param message
+   * @returns
+   */
+  async modifyMessage(message: ThreadMessage): Promise<ThreadMessage> {
+    return this.queue.add(() =>
+      ky
+        .post(
+          `${API_URL}/v1/threads/${message.thread_id}/messages/${message.id}`,
+          {
+            json: message,
+          }
+        )
+        .json<ThreadMessage>()
+    ) as Promise<ThreadMessage>
   }
 
-  async storeFile(base64: string, filePath: string): Promise<void> {
-    const base64Data = base64.replace(/^data:application\/pdf;base64,/, '')
-    try {
-      await fs.writeBlob(filePath, base64Data)
-    } catch (err) {
-      console.error(err)
-    }
+  /**
+   * Deletes a specific message from a thread.
+   * @param threadId The ID of the thread containing the message.
+   * @param messageId The ID of the message to be deleted.
+   * @returns A Promise that resolves when the message has been successfully deleted.
+   */
+  async deleteMessage(threadId: string, messageId: string): Promise<void> {
+    return this.queue
+      .add(() =>
+        ky.delete(`${API_URL}/v1/threads/${threadId}/messages/${messageId}`)
+      )
+      .then()
   }
 
-  async writeMessages(
+  /**
+   * Retrieves all messages for a specified thread.
+   * @param threadId The ID of the thread to get messages from.
+   * @returns A Promise that resolves to an array of ThreadMessage objects.
+   */
+  async listMessages(threadId: string): Promise<ThreadMessage[]> {
+    return this.queue.add(() =>
+      ky
+        .get(`${API_URL}/v1/threads/${threadId}/messages?order=asc`)
+        .json<MessageList>()
+        .then((e) => e.data)
+    ) as Promise<ThreadMessage[]>
+  }
+
+  /**
+   * Retrieves the assistant information for a specified thread.
+   * @param threadId The ID of the thread for which to retrieve assistant information.
+   * @returns A Promise that resolves to a ThreadAssistantInfo object containing
+   * the details of the assistant associated with the specified thread.
+   */
+  async getThreadAssistant(threadId: string): Promise<ThreadAssistantInfo> {
+    return this.queue.add(() =>
+      ky.get(`${API_URL}/v1/assistants/${threadId}`).json<ThreadAssistantInfo>()
+    ) as Promise<ThreadAssistantInfo>
+  }
+  /**
+   * Creates a new assistant for the specified thread.
+   * @param threadId The ID of the thread for which the assistant is being created.
+   * @param assistant The information about the assistant to be created.
+   * @returns A Promise that resolves to the newly created ThreadAssistantInfo object.
+   */
+  async createThreadAssistant(
     threadId: string,
-    messages: ThreadMessage[]
-  ): Promise<void> {
-    try {
-      const threadDirPath = await joinPath([
-        JSONConversationalExtension._threadFolder,
-        threadId,
-      ])
-      const threadMessagePath = await joinPath([
-        threadDirPath,
-        JSONConversationalExtension._threadMessagesFileName,
-      ])
-      if (!(await fs.existsSync(threadDirPath))) await fs.mkdir(threadDirPath)
-      await fs.writeFileSync(
-        threadMessagePath,
-        messages.map((msg) => JSON.stringify(msg)).join('\n') +
-          (messages.length ? '\n' : '')
-      )
-      Promise.resolve()
-    } catch (err) {
-      Promise.reject(err)
-    }
+    assistant: ThreadAssistantInfo
+  ): Promise<ThreadAssistantInfo> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/assistants/${threadId}`, { json: assistant })
+        .json<ThreadAssistantInfo>()
+    ) as Promise<ThreadAssistantInfo>
   }
 
   /**
-   * A promise builder for reading a thread from a file.
-   * @param threadDirName the thread dir we are reading from.
-   * @returns data of the thread
+   * Modifies an existing assistant for the specified thread.
+   * @param threadId The ID of the thread for which the assistant is being modified.
+   * @param assistant The updated information for the assistant.
+   * @returns A Promise that resolves to the updated ThreadAssistantInfo object.
    */
-  async readThread(threadDirName: string): Promise<any> {
-    return fs.readFileSync(
-      await joinPath([
-        JSONConversationalExtension._threadFolder,
-        threadDirName,
-        JSONConversationalExtension._threadInfoFileName,
-      ]),
-      'utf-8'
-    )
+  async modifyThreadAssistant(
+    threadId: string,
+    assistant: ThreadAssistantInfo
+  ): Promise<ThreadAssistantInfo> {
+    return this.queue.add(() =>
+      ky
+        .patch(`${API_URL}/v1/assistants/${threadId}`, { json: assistant })
+        .json<ThreadAssistantInfo>()
+    ) as Promise<ThreadAssistantInfo>
   }
 
   /**
-   * Returns a Promise that resolves to an array of thread directories.
-   * @private
+   * Do health check on cortex.cpp
+   * @returns
    */
-  async getValidThreadDirs(): Promise<string[]> {
-    const fileInsideThread: string[] = await fs.readdirSync(
-      JSONConversationalExtension._threadFolder
-    )
-
-    const threadDirs: string[] = []
-    for (let i = 0; i < fileInsideThread.length; i++) {
-      const path = await joinPath([
-        JSONConversationalExtension._threadFolder,
-        fileInsideThread[i],
-      ])
-      if (!(await fs.fileStat(path))?.isDirectory) continue
-
-      const isHavingThreadInfo = (await fs.readdirSync(path)).includes(
-        JSONConversationalExtension._threadInfoFileName
-      )
-      if (!isHavingThreadInfo) {
-        console.debug(`Ignore ${path} because it does not have thread info`)
-        continue
-      }
-
-      threadDirs.push(fileInsideThread[i])
-    }
-    return threadDirs
-  }
-
-  async getAllMessages(threadId: string): Promise<ThreadMessage[]> {
-    try {
-      const threadDirPath = await joinPath([
-        JSONConversationalExtension._threadFolder,
-        threadId,
-      ])
-
-      const files: string[] = await fs.readdirSync(threadDirPath)
-      if (
-        !files.includes(JSONConversationalExtension._threadMessagesFileName)
-      ) {
-        console.debug(`${threadDirPath} not contains message file`)
-        return []
-      }
-
-      const messageFilePath = await joinPath([
-        threadDirPath,
-        JSONConversationalExtension._threadMessagesFileName,
-      ])
-
-      let readResult = await fs.readFileSync(messageFilePath, 'utf-8')
-
-      if (typeof readResult === 'object') {
-        readResult = JSON.stringify(readResult)
-      }
-
-      const result = readResult.split('\n').filter((line) => line !== '')
-
-      const messages: ThreadMessage[] = []
-      result.forEach((line: string) => {
-        const message = safelyParseJSON(line)
-        if (message) messages.push(safelyParseJSON(line))
+  healthz(): Promise<void> {
+    return ky
+      .get(`${API_URL}/healthz`, {
+        retry: { limit: 20, delay: () => 500, methods: ['get'] },
       })
-      return messages
-    } catch (err) {
-      console.error(err)
-      return []
-    }
+      .then(() => {})
   }
 }
diff --git a/extensions/conversational-extension/src/jsonUtil.ts b/extensions/conversational-extension/src/jsonUtil.ts
deleted file mode 100644
index 7f83cadce..000000000
--- a/extensions/conversational-extension/src/jsonUtil.ts
+++ /dev/null
@@ -1,14 +0,0 @@
-// Note about performance
-// The v8 JavaScript engine used by Node.js cannot optimise functions which contain a try/catch block.
-// v8 4.5 and above can optimise try/catch
-export function safelyParseJSON(json) {
-  // This function cannot be optimised, it's best to
-  // keep it small!
-  var parsed
-  try {
-    parsed = JSON.parse(json)
-  } catch (e) {
-    return undefined
-  }
-  return parsed // Could be undefined!
-}
diff --git a/extensions/conversational-extension/webpack.config.js b/extensions/conversational-extension/webpack.config.js
index e4a0b2179..0448af421 100644
--- a/extensions/conversational-extension/webpack.config.js
+++ b/extensions/conversational-extension/webpack.config.js
@@ -17,7 +17,12 @@ module.exports = {
     filename: 'index.js', // Adjust the output file name as needed
     library: { type: 'module' }, // Specify ESM output format
   },
-  plugins: [new webpack.DefinePlugin({})],
+  plugins: [
+    new webpack.DefinePlugin({
+      API_URL: JSON.stringify('http://127.0.0.1:39291'),
+      SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
+    }),
+  ],
   resolve: {
     extensions: ['.ts', '.js'],
   },
diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index a6a3a43c3..cf13dd42b 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.4
\ No newline at end of file
+1.0.5-rc2
diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index 7d9a9213a..0e7eef20e 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -2,7 +2,7 @@
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
-set ENGINE_VERSION=0.1.40
+set ENGINE_VERSION=0.1.42
 
 @REM Download cortex.llamacpp binaries
 set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/v%ENGINE_VERSION%/cortex.llamacpp-%ENGINE_VERSION%-windows-amd64
@@ -38,4 +38,4 @@ for %%F in (%SUBFOLDERS%) do (
     )
 )
 
-echo DLL files moved successfully.
\ No newline at end of file
+echo DLL files moved successfully.
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index f62e5961b..b0f3b36e3 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -2,7 +2,7 @@
 
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
-ENGINE_VERSION=0.1.40
+ENGINE_VERSION=0.1.42
 CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
 ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
 CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
index 8fa61e91d..266281a75 100644
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -120,7 +120,7 @@ export default [
         SETTINGS: JSON.stringify(defaultSettingJson),
         CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
         CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
-        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.40'),
+        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.42'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-groq-extension/resources/models.json b/extensions/inference-groq-extension/resources/models.json
index 04b60bfdd..b4b013dad 100644
--- a/extensions/inference-groq-extension/resources/models.json
+++ b/extensions/inference-groq-extension/resources/models.json
@@ -61,37 +61,6 @@
     },
     "engine": "groq"
   },
-  {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama-3.1-70b-versatile",
-    "object": "model",
-    "name": "Groq Llama 3.1 70b Versatile",
-    "version": "1.1",
-    "description": "Groq Llama 3.1 70b Versatile with supercharged speed!",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 8000,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
-    "engine": "groq"
-  },
   {
     "sources": [
       {
diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json
index 9700383d6..d5b2a1d7a 100644
--- a/extensions/inference-openai-extension/package.json
+++ b/extensions/inference-openai-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-openai-extension",
   "productName": "OpenAI Inference Engine",
-  "version": "1.0.4",
+  "version": "1.0.5",
   "description": "This extension enables OpenAI chat completion API calls",
   "main": "dist/index.js",
   "module": "dist/module.js",
diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/inference-openai-extension/resources/models.json
index a34bc5460..fc6896882 100644
--- a/extensions/inference-openai-extension/resources/models.json
+++ b/extensions/inference-openai-extension/resources/models.json
@@ -67,7 +67,9 @@
     "version": "1.1",
     "description": "OpenAI GPT 4o is a new flagship model with fast speed and high quality",
     "format": "api",
-    "settings": {},
+    "settings": {
+      "vision_model": true
+    },
     "parameters": {
       "max_tokens": 4096,
       "temperature": 0.7,
@@ -97,10 +99,10 @@
     "format": "api",
     "settings": {},
     "parameters": {
+      "max_tokens": 32768,
       "temperature": 1,
       "top_p": 1,
       "stream": true,
-      "max_tokens": 32768,
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
@@ -124,9 +126,9 @@
     "format": "api",
     "settings": {},
     "parameters": {
+      "max_tokens": 65536,
       "temperature": 1,
       "top_p": 1,
-      "max_tokens": 65536,
       "stream": true,
       "frequency_penalty": 0,
       "presence_penalty": 0
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 18bc4e0aa..2612ed815 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -74,6 +74,11 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine {
    * @returns
    */
   transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => {
+    // Remove empty stop words
+    if (payload.stop?.length === 0) {
+      const { stop, ...params } = payload
+      payload = params
+    }
     // Transform the payload for preview models
     if (this.previewModels.includes(payload.model)) {
       const { max_tokens, stop, ...params } = payload
diff --git a/joi/package.json b/joi/package.json
index 576c33d72..8de32f820 100644
--- a/joi/package.json
+++ b/joi/package.json
@@ -5,7 +5,6 @@
   "module": "dist/esm/index.js",
   "types": "dist/index.d.ts",
   "description": "A collection of UI component",
-  "private": true,
   "files": [
     "dist"
   ],
diff --git a/package.json b/package.json
index 255dda6c7..8b24f14ba 100644
--- a/package.json
+++ b/package.json
@@ -27,12 +27,12 @@
     "pre-install:linux": "find extensions -type f -path \"**/*.tgz\" -exec cp {} pre-install \\;",
     "pre-install:win32": "powershell -Command \"Get-ChildItem -Path \"extensions\" -Recurse -File -Filter \"*.tgz\" | ForEach-Object { Copy-Item -Path $_.FullName -Destination \"pre-install\" }\"",
     "pre-install": "run-script-os",
-    "copy:assets": "cpx \"pre-install/*.tgz\" \"electron/pre-install/\" && cpx \"themes/**\" \"electron/themes\" && cpx \"docs/openapi/**\" \"electron/docs/openapi\"",
+    "copy:assets": "cpx \"pre-install/*.tgz\" \"electron/pre-install/\" && cpx \"themes/**\" \"electron/themes\"",
     "dev:electron": "yarn copy:assets && yarn workspace jan dev",
     "dev:web": "yarn workspace @janhq/web dev",
-    "dev:server": "yarn copy:assets && yarn workspace @janhq/server dev",
+    "dev:server": "yarn workspace @janhq/server dev",
     "dev": "turbo run dev --parallel --filter=!@janhq/server",
-    "build:server": "yarn copy:assets && cd server && yarn install && yarn run build",
+    "build:server": "cd server && yarn install && yarn run build",
     "build:core": "cd core && yarn install && yarn run build",
     "build:web": "yarn workspace @janhq/web build && cpx \"web/out/**\" \"electron/renderer/\"",
     "build:electron": "yarn copy:assets && yarn workspace jan build",
diff --git a/server/cortex.json b/server/cortex.json
new file mode 100644
index 000000000..917cff354
--- /dev/null
+++ b/server/cortex.json
@@ -0,0 +1,6308 @@
+{
+  "openapi": "3.0.0",
+  "paths": {
+    "/assistants": {
+      "post": {
+        "operationId": "AssistantsController_create",
+        "summary": "Create assistant",
+        "description": "Creates a new assistant.",
+        "parameters": [],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/CreateAssistantDto"
+              }
+            }
+          }
+        },
+        "responses": {
+          "201": {
+            "description": "The assistant has been successfully created."
+          }
+        },
+        "tags": ["Assistants"]
+      },
+      "get": {
+        "operationId": "AssistantsController_findAll",
+        "summary": "List assistants",
+        "description": "Returns a list of assistants.",
+        "parameters": [
+          {
+            "name": "limit",
+            "required": false,
+            "in": "query",
+            "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
+            "schema": {
+              "type": "number"
+            }
+          },
+          {
+            "name": "order",
+            "required": false,
+            "in": "query",
+            "description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "after",
+            "required": false,
+            "in": "query",
+            "description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "before",
+            "required": false,
+            "in": "query",
+            "description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/AssistantEntity"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Assistants"]
+      }
+    },
+    "/assistants/{id}": {
+      "get": {
+        "operationId": "AssistantsController_findOne",
+        "summary": "Get assistant",
+        "description": "Retrieves a specific assistant defined by an assistant's `id`.",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the assistant.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/AssistantEntity"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Assistants"]
+      },
+      "delete": {
+        "operationId": "AssistantsController_remove",
+        "summary": "Delete assistant",
+        "description": "Deletes a specific assistant defined by an assistant's `id`.",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the assistant.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "The assistant has been successfully deleted.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/DeleteAssistantResponseDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Assistants"]
+      }
+    },
+    "/healthz": {
+      "get": {
+        "operationId": "HealthController_check",
+        "summary": "Check health",
+        "description": "Performs a comprehensive check of the application's health status.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {}
+            }
+          }
+        },
+        "tags": ["Server"]
+      }
+    },
+    "/processManager/destroy": {
+      "delete": {
+        "operationId": "Terminate server process",
+        "summary": "Terminate server",
+        "description": "Initiates the shutdown process for the server, ensuring that all active connections are gracefully closed and any ongoing processes are properly terminated.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {}
+            }
+          }
+        },
+        "tags": ["Server"]
+      }
+    },
+    "/embeddings": {
+      "post": {
+        "summary": "Create embeddings",
+        "description": "Creates an embedding vector representing the input text.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "oneOf": [
+                      {
+                        "type": "string",
+                        "description": "The string that will be turned into an embedding."
+                      },
+                      {
+                        "type": "array",
+                        "description": "The array of strings that will be turned into an embedding.",
+                        "items": {
+                          "type": "string"
+                        }
+                      },
+                      {
+                        "type": "array",
+                        "description": "The array of integers that will be turned into an embedding.",
+                        "items": {
+                          "type": "integer"
+                        }
+                      },
+                      {
+                        "type": "array",
+                        "description": "The array of arrays containing integers that will be turned into an embedding.",
+                        "items": {
+                          "type": "array",
+                          "items": {
+                            "type": "integer"
+                          }
+                        }
+                      }
+                    ],
+                    "description": "Input text to embed, encoded as a string or array of tokens. Cannot be empty."
+                  },
+                  "model": {
+                    "type": "string",
+                    "description": "ID of the model to use.",
+                    "example": "text-embedding-ada-002"
+                  },
+                  "encoding_format": {
+                    "type": "string",
+                    "description": "The format to return the embeddings in.",
+                    "enum": ["float", "base64"],
+                    "default": "float"
+                  }
+                },
+                "required": ["input", "model"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "A list of embedding vectors",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "data": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "index": {
+                            "type": "integer",
+                            "description": "The index of the embedding in the list of embeddings."
+                          },
+                          "embedding": {
+                            "type": "array",
+                            "items": {
+                              "type": "number"
+                            },
+                            "description": "The embedding vector, which is a list of floats."
+                          },
+                          "object": {
+                            "type": "string",
+                            "description": "The object type, which is always 'embedding'.",
+                            "example": "embedding"
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Embeddings"]
+      }
+    },
+    "/chat/completions": {
+      "post": {
+        "operationId": "ChatController_create",
+        "summary": "Create chat completion",
+        "description": "Creates a model response for the given conversation. The following parameters are not working for the `TensorRT-LLM` engine:\n- `frequency_penalty`\n- `presence_penalty`\n- `top_p`",
+        "parameters": [],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/CreateChatCompletionDto"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "oneOf": [
+                    {
+                      "title": "Chat Completion Response",
+                      "$ref": "#/components/schemas/ChatCompletionResponseDto"
+                    },
+                    {
+                      "title": "Chat Completion Chunk Response",
+                      "$ref": "#/components/schemas/ChatCompletionChunkResponseDto"
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Chat"]
+      }
+    },
+    "/models/pull": {
+      "post": {
+        "operationId": "ModelsController_pullModel",
+        "summary": "Pull a model",
+        "description": "Pull a model from a remote source.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/PullModelRequest"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string"
+                    },
+                    "task": {
+                      "type": "object",
+                      "properties": {
+                        "id": {
+                          "type": "string"
+                        },
+                        "items": {
+                          "type": "array",
+                          "items": {
+                            "type": "object",
+                            "properties": {
+                              "bytes": {
+                                "type": "integer"
+                              },
+                              "checksum": {
+                                "type": "string"
+                              },
+                              "downloadUrl": {
+                                "type": "string"
+                              },
+                              "downloadedBytes": {
+                                "type": "integer"
+                              },
+                              "id": {
+                                "type": "string"
+                              },
+                              "localPath": {
+                                "type": "string"
+                              }
+                            }
+                          }
+                        },
+                        "type": {
+                          "type": "string"
+                        }
+                      }
+                    }
+                  }
+                },
+                "example": {
+                  "message": "Model start downloading!",
+                  "task": {
+                    "id": "TheBloke:Mistral-7B-Instruct-v0.1-GGUF:mistral-7b-instruct-v0.1.Q3_K_L.gguf",
+                    "items": [
+                      {
+                        "bytes": 3822024352,
+                        "checksum": "N/A",
+                        "downloadUrl": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q3_K_L.gguf",
+                        "downloadedBytes": 0,
+                        "id": "TheBloke:Mistral-7B-Instruct-v0.1-GGUF:mistral-7b-instruct-v0.1.Q3_K_L.gguf",
+                        "localPath": "/Users/user_name/cortexcpp/models/huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q3_K_L.gguf"
+                      }
+                    ],
+                    "type": "Model"
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SimpleErrorResponse"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      },
+      "delete": {
+        "tags": ["Pulling Models"],
+        "summary": "Stop model download",
+        "description": "Stops the download of a model with the corresponding taskId provided in the request body",
+        "operationId": "ModelsController_stopModelDownload",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "taskId": {
+                    "type": "string",
+                    "description": "The unique identifier of the download task to be stopped"
+                  }
+                },
+                "required": ["taskId"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Download stopped successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Download stopped successfully"
+                    },
+                    "taskId": {
+                      "type": "string",
+                      "example": "task-123456"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Invalid taskId"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Task not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Download task not found"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "An unexpected error occurred"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/models/add": {
+      "post": {
+        "operationId": "ModelsController_addModel",
+        "summary": "Add a remote model",
+        "description": "Add a new remote model configuration to the system.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/AddModelRequest"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string"
+                    },
+                    "model": {
+                      "type": "object",
+                      "properties": {
+                        "model": {
+                          "type": "string"
+                        },
+                        "engine": {
+                          "type": "string"
+                        },
+                        "version": {
+                          "type": "string"
+                        }
+                      }
+                    }
+                  }
+                },
+                "example": {
+                  "message": "Model added successfully!",
+                  "model": {
+                    "model": "claude-3-5-sonnet-20241022",
+                    "engine": "anthropic",
+                    "version": "2023-06-01"
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SimpleErrorResponse"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      }
+    },
+    "/models": {
+      "get": {
+        "operationId": "ModelsController_findAll",
+        "summary": "List models",
+        "description": "Lists the currently available models, and provides basic information about each one such as the owner and availability. [Equivalent to OpenAI's list model](https://platform.openai.com/docs/api-reference/models/list).",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ListModelsResponseDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/start": {
+      "post": {
+        "operationId": "ModelsController_startModel",
+        "summary": "Start model",
+        "description": "Load a model into memory. Note: Request body parameters will override those loaded from model.yml",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ModelStartDto"
+              },
+              "example": {
+                "model": "llama3:8b-gguf-q6-k"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "The model has been successfully started.",
+            "content": {
+              "application/json": {
+                "example": {
+                  "message": "Started successfully!"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/stop": {
+      "post": {
+        "operationId": "ModelsController_stopModel",
+        "summary": "Stop model",
+        "description": "Unload model from memory",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ModelStopDto"
+              },
+              "example": {
+                "model": "llama3:8b-gguf-q6-k"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "The model has been successfully started.",
+            "content": {
+              "application/json": {
+                "example": {
+                  "message": "Stopped successfully!"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/{id}": {
+      "get": {
+        "operationId": "ModelsController_findOne",
+        "summary": "Get model",
+        "description": "Retrieves a model instance, providing basic information about the model such as the owner and permissions. [Equivalent to OpenAI's list model](https://platform.openai.com/docs/api-reference/models/retrieve).",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the model.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ModelDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      },
+      "delete": {
+        "operationId": "ModelsController_remove",
+        "summary": "Delete model",
+        "description": "Deletes a model. [Equivalent to OpenAI's delete model](https://platform.openai.com/docs/api-reference/models/delete).",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the model.",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "The model has been successfully deleted.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/DeleteModelResponseDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/{model}": {
+      "patch": {
+        "operationId": "ModelsController_update",
+        "summary": "Update model",
+        "description": "Updates a model instance defined by a model's `id`.",
+        "parameters": [
+          {
+            "name": "model",
+            "required": true,
+            "in": "path",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/UpdateModelDto"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "The model has been successfully updated.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/UpdateModelDto"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Running Models"]
+      }
+    },
+    "/models/import": {
+      "post": {
+        "operationId": "ModelsController_importModel",
+        "summary": "Import model",
+        "description": "Imports a model from a specified path.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ImportModelRequest"
+              },
+              "example": {
+                "model": "model-id",
+                "modelPath": "/path/to/gguf",
+                "name": "model display name",
+                "option": "symlink"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Model is imported successfully!",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ImportModelResponse"
+                },
+                "example": {
+                  "message": "Model is imported successfully!",
+                  "modelHandle": "model-id",
+                  "result": "OK"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      }
+    },
+    "/models/sources": {
+      "post": {
+        "summary": "Add a model source",
+        "description": "User can add a Huggingface Organization or Repository",
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "source": {
+                    "type": "string",
+                    "description": "The url of model source to add",
+                    "example": "https://huggingface.co/cortexso/tinyllama"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Added model source"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      },
+      "delete": {
+        "summary": "Remove a model source",
+        "description": "User can remove a Huggingface Organization or Repository",
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "source": {
+                    "type": "string",
+                    "description": "The url of model source to remove",
+                    "example": "https://huggingface.co/cortexso/tinyllama"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful uninstallation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "description": "Removed model source successfully!",
+                      "example": "Removed model source successfully!"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "description": "Error message describing the issue with the request"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      }
+    },
+    "/threads": {
+      "post": {
+        "summary": "Create Thread",
+        "description": "Creates a new thread with optional metadata.",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "metadata": {
+                    "type": "object",
+                    "properties": {
+                      "title": {
+                        "type": "string",
+                        "description": "Title of the thread"
+                      }
+                    },
+                    "description": "Optional metadata for the thread"
+                  }
+                }
+              },
+              "example": {
+                "metadata": {
+                  "title": "New Thread"
+                }
+              }
+            }
+          },
+          "required": false
+        },
+        "responses": {
+          "200": {
+            "description": "Thread created successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the thread was created"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the thread"
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "properties": {
+                        "title": {
+                          "type": "string",
+                          "description": "Title of the thread"
+                        }
+                      },
+                      "description": "Metadata associated with the thread"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread'"
+                    }
+                  },
+                  "required": ["created_at", "id", "object"]
+                },
+                "example": {
+                  "created_at": 1734020845,
+                  "id": "0001KNP3QDX314435VAEGW1Z2X",
+                  "metadata": {
+                    "title": "New Thread"
+                  },
+                  "object": "thread"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      },
+      "get": {
+        "summary": "List Threads",
+        "description": "Returns a list of threads with their metadata.",
+        "responses": {
+          "200": {
+            "description": "List of threads retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "object": {
+                      "type": "string",
+                      "description": "Type of the list response, always 'list'"
+                    },
+                    "data": {
+                      "type": "array",
+                      "description": "Array of thread objects",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "created_at": {
+                            "type": "integer",
+                            "description": "Unix timestamp of when the thread was created"
+                          },
+                          "id": {
+                            "type": "string",
+                            "description": "Unique identifier for the thread"
+                          },
+                          "metadata": {
+                            "type": "object",
+                            "properties": {
+                              "title": {
+                                "type": "string",
+                                "description": "Title of the thread"
+                              },
+                              "lastMessage": {
+                                "type": "string",
+                                "description": "Content of the last message in the thread"
+                              }
+                            },
+                            "description": "Metadata associated with the thread"
+                          },
+                          "object": {
+                            "type": "string",
+                            "description": "Type of object, always 'thread'"
+                          }
+                        },
+                        "required": ["created_at", "id", "object"]
+                      }
+                    }
+                  },
+                  "required": ["object", "data"]
+                },
+                "example": {
+                  "data": [
+                    {
+                      "created_at": 1734020845,
+                      "id": "0001KNP3QDX314435VAEGW1Z2X",
+                      "metadata": {
+                        "title": "New Thread"
+                      },
+                      "object": "thread"
+                    },
+                    {
+                      "created_at": 1734020803,
+                      "id": "0001KNP3P3DAQSDVEQGRBTCTNJ",
+                      "metadata": {
+                        "title": ""
+                      },
+                      "object": "thread"
+                    }
+                  ],
+                  "object": "list"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      }
+    },
+    "/threads/{id}": {
+      "get": {
+        "summary": "Retrieve Thread",
+        "description": "Retrieves a specific thread by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to retrieve",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Thread retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the thread was created"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the thread"
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "properties": {
+                        "lastMessage": {
+                          "type": "string",
+                          "description": "Content of the last message in the thread"
+                        },
+                        "title": {
+                          "type": "string",
+                          "description": "Title of the thread"
+                        }
+                      },
+                      "description": "Metadata associated with the thread"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread'"
+                    }
+                  },
+                  "required": ["created_at", "id", "object"]
+                },
+                "example": {
+                  "created_at": 1732370026,
+                  "id": "jan_1732370027",
+                  "metadata": {
+                    "lastMessage": "Based on the context, I'm not sure how to build a unique experience quickly and easily. The text mentions that there are some concerns about Android apps providing consistent experiences for different users, which makes me skeptical about building one.\n\nSpecifically, it says:\n\n* \"Might not pass CTS\" (Computer Science Technology standards)\n* \"Might not comply with CDD\" (Consumer Development Division standards)\n\nThis suggests that building a unique experience for all users could be challenging or impossible. Therefore, I don't know how to build a unique experience quickly and easily.\n\nWould you like me to try again?",
+                    "title": "hello"
+                  },
+                  "object": "thread"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      },
+      "patch": {
+        "summary": "Modify Thread",
+        "description": "Updates a specific thread's metadata.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to modify",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "metadata": {
+                    "type": "object",
+                    "properties": {
+                      "title": {
+                        "type": "string",
+                        "description": "New title for the thread"
+                      }
+                    },
+                    "description": "Metadata to update"
+                  }
+                }
+              },
+              "example": {
+                "metadata": {
+                  "title": "my title"
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Thread modified successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the thread was created"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the thread"
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "properties": {
+                        "title": {
+                          "type": "string",
+                          "description": "Updated title of the thread"
+                        }
+                      },
+                      "description": "Updated metadata for the thread"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread'"
+                    }
+                  },
+                  "required": ["created_at", "id", "object"]
+                },
+                "example": {
+                  "created_at": 1733301054,
+                  "id": "0001KN04SY7D75K0MPTXMXCH39",
+                  "metadata": {
+                    "title": "my title"
+                  },
+                  "object": "thread"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      },
+      "delete": {
+        "summary": "Delete Thread",
+        "description": "Deletes a specific thread by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to delete",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Thread deleted successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "deleted": {
+                      "type": "boolean",
+                      "description": "Indicates if the thread was successfully deleted"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "ID of the deleted thread"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.deleted'"
+                    }
+                  },
+                  "required": ["deleted", "id", "object"]
+                },
+                "example": {
+                  "deleted": true,
+                  "id": "jan_1732370027",
+                  "object": "thread.deleted"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Threads"]
+      }
+    },
+    "/threads/{thread_id}/messages": {
+      "post": {
+        "summary": "Create Message",
+        "description": "Creates a new message in a thread.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to create the message in",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "role": {
+                    "type": "string",
+                    "description": "Role of the message sender",
+                    "enum": ["user", "assistant"]
+                  },
+                  "content": {
+                    "type": "string",
+                    "description": "The content of the message"
+                  }
+                },
+                "required": ["role", "content"]
+              },
+              "example": {
+                "role": "user",
+                "content": "Hello, world!"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Message created successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the message"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.message'"
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was created"
+                    },
+                    "completed_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was completed"
+                    },
+                    "thread_id": {
+                      "type": "string",
+                      "description": "ID of the thread this message belongs to"
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "Role of the message sender",
+                      "enum": ["user", "assistant"]
+                    },
+                    "status": {
+                      "type": "string",
+                      "description": "Status of the message",
+                      "enum": ["completed"]
+                    },
+                    "content": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "description": "Type of content",
+                            "enum": ["text"]
+                          },
+                          "text": {
+                            "type": "object",
+                            "properties": {
+                              "value": {
+                                "type": "string",
+                                "description": "The message text"
+                              },
+                              "annotations": {
+                                "type": "array",
+                                "description": "Array of annotations for the text"
+                              }
+                            }
+                          }
+                        }
+                      }
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "description": "Additional metadata for the message"
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created_at",
+                    "completed_at",
+                    "thread_id",
+                    "role",
+                    "status",
+                    "content"
+                  ]
+                },
+                "example": {
+                  "completed_at": 1734023130,
+                  "content": [
+                    {
+                      "text": {
+                        "annotations": [],
+                        "value": "Hello, world!"
+                      },
+                      "type": "text"
+                    }
+                  ],
+                  "created_at": 1734023130,
+                  "id": "0001KNP5YT00GW0X476W5TVBFE",
+                  "metadata": {},
+                  "object": "thread.message",
+                  "role": "user",
+                  "status": "completed",
+                  "thread_id": "jan_1732370027"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      },
+      "get": {
+        "summary": "List Messages",
+        "description": "Retrieves a list of messages in a thread with optional pagination and filtering.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread to list messages from",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "limit",
+            "in": "query",
+            "required": false,
+            "description": "Maximum number of messages to return",
+            "schema": {
+              "type": "integer"
+            }
+          },
+          {
+            "name": "order",
+            "in": "query",
+            "required": false,
+            "description": "Sort order of messages",
+            "schema": {
+              "type": "string",
+              "enum": ["asc", "desc"]
+            }
+          },
+          {
+            "name": "after",
+            "in": "query",
+            "required": false,
+            "description": "Cursor for fetching messages after this message ID",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "before",
+            "in": "query",
+            "required": false,
+            "description": "Cursor for fetching messages before this message ID",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "run_id",
+            "in": "query",
+            "required": false,
+            "description": "Filter messages by run ID",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Messages retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "object": {
+                      "type": "string",
+                      "description": "Type of the list response, always 'list'"
+                    },
+                    "data": {
+                      "type": "array",
+                      "description": "Array of message objects",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "id": {
+                            "type": "string",
+                            "description": "Unique identifier for the message"
+                          },
+                          "object": {
+                            "type": "string",
+                            "description": "Type of object, always 'thread.message'"
+                          },
+                          "created_at": {
+                            "type": "integer",
+                            "description": "Unix timestamp of when the message was created"
+                          },
+                          "thread_id": {
+                            "type": "string",
+                            "description": "ID of the thread this message belongs to"
+                          },
+                          "role": {
+                            "type": "string",
+                            "description": "Role of the message sender",
+                            "enum": ["assistant", "user"]
+                          },
+                          "status": {
+                            "type": "string",
+                            "description": "Status of the message",
+                            "enum": ["completed"]
+                          },
+                          "content": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {
+                                "type": {
+                                  "type": "string",
+                                  "description": "Type of content",
+                                  "enum": ["text"]
+                                },
+                                "text": {
+                                  "type": "object",
+                                  "properties": {
+                                    "value": {
+                                      "type": "string",
+                                      "description": "The message text"
+                                    },
+                                    "annotations": {
+                                      "type": "array",
+                                      "description": "Array of annotations for the text"
+                                    }
+                                  }
+                                }
+                              }
+                            }
+                          },
+                          "metadata": {
+                            "type": "object",
+                            "description": "Additional metadata for the message"
+                          },
+                          "attachments": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {
+                                "file_id": {
+                                  "type": "string",
+                                  "description": "ID of the attached file"
+                                },
+                                "tools": {
+                                  "type": "array",
+                                  "items": {
+                                    "type": "object",
+                                    "properties": {
+                                      "type": {
+                                        "type": "string",
+                                        "description": "Type of tool used"
+                                      }
+                                    }
+                                  }
+                                }
+                              }
+                            }
+                          }
+                        },
+                        "required": [
+                          "id",
+                          "object",
+                          "created_at",
+                          "thread_id",
+                          "role",
+                          "content"
+                        ]
+                      }
+                    }
+                  },
+                  "required": ["object", "data"]
+                },
+                "example": {
+                  "data": [
+                    {
+                      "content": [
+                        {
+                          "text": {
+                            "annotations": [],
+                            "value": "Based on the context, I'm not sure how to build a unique experience quickly and easily..."
+                          },
+                          "type": "text"
+                        }
+                      ],
+                      "created_at": 1732633637,
+                      "id": "01JDMG6CG6DD4B3RQN82QD8Q7P",
+                      "metadata": {},
+                      "object": "thread.message",
+                      "role": "assistant",
+                      "status": "completed",
+                      "thread_id": "jan_1732370027"
+                    }
+                  ],
+                  "object": "list"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      }
+    },
+    "/threads/{thread_id}/messages/{message_id}": {
+      "get": {
+        "summary": "Retrieve Message",
+        "description": "Retrieves a specific message from a thread by its ID.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread containing the message",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "message_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the message to retrieve",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Message retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the message"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.message'"
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was created"
+                    },
+                    "thread_id": {
+                      "type": "string",
+                      "description": "ID of the thread this message belongs to"
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "Role of the message sender",
+                      "enum": ["assistant", "user"]
+                    },
+                    "status": {
+                      "type": "string",
+                      "description": "Status of the message",
+                      "enum": ["completed"]
+                    },
+                    "content": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "description": "Type of content",
+                            "enum": ["text"]
+                          },
+                          "text": {
+                            "type": "object",
+                            "properties": {
+                              "value": {
+                                "type": "string",
+                                "description": "The message text"
+                              },
+                              "annotations": {
+                                "type": "array",
+                                "description": "Array of annotations for the text"
+                              }
+                            }
+                          }
+                        }
+                      }
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "description": "Additional metadata for the message"
+                    },
+                    "attachments": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "file_id": {
+                            "type": "string",
+                            "description": "ID of the attached file"
+                          },
+                          "tools": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {
+                                "type": {
+                                  "type": "string",
+                                  "description": "Type of tool used"
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created_at",
+                    "thread_id",
+                    "role",
+                    "content"
+                  ]
+                },
+                "example": {
+                  "attachments": [
+                    {
+                      "file_id": "01JDMG617BHMPW859VE18BPQ7Y",
+                      "tools": [
+                        {
+                          "type": "file_search"
+                        }
+                      ]
+                    }
+                  ],
+                  "content": [
+                    {
+                      "text": {
+                        "annotations": [],
+                        "value": "summary this"
+                      },
+                      "type": "text"
+                    }
+                  ],
+                  "created_at": 1732633625,
+                  "id": "01JDMG617BHMPW859VE18BPQ7Y",
+                  "metadata": {},
+                  "object": "thread.message",
+                  "role": "user",
+                  "status": "completed",
+                  "thread_id": "jan_1732370027"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      },
+      "patch": {
+        "summary": "Modify Message",
+        "description": "Modifies a specific message's content or metadata in a thread.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread containing the message",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "message_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the message to modify",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "content": {
+                    "type": "object",
+                    "description": "New content for the message"
+                  },
+                  "metadata": {
+                    "type": "object",
+                    "description": "Updated metadata for the message",
+                    "additionalProperties": true
+                  }
+                }
+              },
+              "example": {
+                "content": {},
+                "metadata": {
+                  "test": 1
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Message modified successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "Unique identifier for the message"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.message'"
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was created"
+                    },
+                    "completed_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp of when the message was completed"
+                    },
+                    "thread_id": {
+                      "type": "string",
+                      "description": "ID of the thread this message belongs to"
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "Role of the message sender",
+                      "enum": ["user", "assistant"]
+                    },
+                    "status": {
+                      "type": "string",
+                      "description": "Status of the message",
+                      "enum": ["completed"]
+                    },
+                    "content": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "description": "Type of content",
+                            "enum": ["text"]
+                          },
+                          "text": {
+                            "type": "object",
+                            "properties": {
+                              "value": {
+                                "type": "string",
+                                "description": "The message text"
+                              },
+                              "annotations": {
+                                "type": "array",
+                                "description": "Array of annotations for the text"
+                              }
+                            }
+                          }
+                        }
+                      }
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "description": "Additional metadata for the message",
+                      "additionalProperties": true
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created_at",
+                    "completed_at",
+                    "thread_id",
+                    "role",
+                    "status",
+                    "content"
+                  ]
+                },
+                "example": {
+                  "completed_at": 1734023130,
+                  "content": [
+                    {
+                      "text": {
+                        "annotations": [],
+                        "value": "Hello, world!"
+                      },
+                      "type": "text"
+                    }
+                  ],
+                  "created_at": 1734023130,
+                  "id": "0001KNP5YT00GW0X476W5TVBFE",
+                  "metadata": {
+                    "test": 1
+                  },
+                  "object": "thread.message",
+                  "role": "user",
+                  "status": "completed",
+                  "thread_id": "jan_1732370027"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      },
+      "delete": {
+        "summary": "Delete Message",
+        "description": "Deletes a specific message from a thread.",
+        "parameters": [
+          {
+            "name": "thread_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the thread containing the message",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "message_id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the message to delete",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Message deleted successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "deleted": {
+                      "type": "boolean",
+                      "description": "Indicates if the message was successfully deleted"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "ID of the deleted message"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'thread.message.deleted'"
+                    }
+                  },
+                  "required": ["deleted", "id", "object"]
+                },
+                "example": {
+                  "deleted": true,
+                  "id": "01JDCMZPBGDP276D6Z2QN2MJMX",
+                  "object": "thread.message.deleted"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Messages"]
+      }
+    },
+    "/system": {
+      "delete": {
+        "operationId": "SystemController_delete",
+        "summary": "Stop api server",
+        "description": "Stops the Cortex API endpoint server for the detached mode.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": ""
+          }
+        },
+        "tags": ["System"]
+      },
+      "get": {
+        "operationId": "SystemController_get",
+        "summary": "Get health status",
+        "description": "Retrieves the health status of your Cortex's system.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "Ok"
+          }
+        },
+        "tags": ["System"]
+      }
+    },
+    "/system/events/download": {
+      "get": {
+        "operationId": "SystemController_downloadEvent",
+        "summary": "Get download status",
+        "description": "Retrieves the model's download status.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["System"]
+      }
+    },
+    "/system/events/model": {
+      "get": {
+        "operationId": "SystemController_modelEvent",
+        "summary": "Get model status",
+        "description": "Retrieves all the available model statuses within Cortex.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["System"]
+      }
+    },
+    "/system/events/resources": {
+      "get": {
+        "operationId": "SystemController_resourcesEvent",
+        "summary": "Get resources status",
+        "description": "Retrieves the resources status of the system.",
+        "parameters": [],
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["System"]
+      }
+    },
+    "/engines/{name}": {
+      "get": {
+        "operationId": "EnginesController_listInstalledEngines",
+        "summary": "List installed engines",
+        "description": "List installed engines for a particular engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "engine": {
+                        "type": "string",
+                        "example": "llama-cpp"
+                      },
+                      "name": {
+                        "type": "string",
+                        "example": "mac-arm64"
+                      },
+                      "version": {
+                        "type": "string",
+                        "example": "0.1.35-28.10.24"
+                      }
+                    }
+                  }
+                },
+                "example": [
+                  {
+                    "engine": "llama-cpp",
+                    "name": "mac-arm64",
+                    "version": "0.1.35-28.10.24"
+                  },
+                  {
+                    "engine": "llama-cpp",
+                    "name": "linux-amd64-avx",
+                    "version": "0.1.35-27.10.24"
+                  }
+                ]
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/releases": {
+      "get": {
+        "summary": "List released engines",
+        "description": "List released engines of a specific engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "draft": {
+                        "type": "boolean",
+                        "example": false
+                      },
+                      "name": {
+                        "type": "string",
+                        "example": "v0.1.39-20.11.24"
+                      },
+                      "prerelease": {
+                        "type": "boolean",
+                        "example": true
+                      },
+                      "published_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "example": "2024-11-20T17:39:40Z"
+                      },
+                      "url": {
+                        "type": "string",
+                        "example": "https://api.github.com/repos/janhq/cortex.llamacpp/releases/186479804"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/releases/latest": {
+      "get": {
+        "summary": "Get latest release",
+        "description": "Return variants for the latest engine release of a specific engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "created_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "example": "2024-11-15T10:39:39Z"
+                      },
+                      "download_count": {
+                        "type": "integer",
+                        "example": 76
+                      },
+                      "name": {
+                        "type": "string",
+                        "example": "0.1.39-linux-amd64-avx-cuda-11-7"
+                      },
+                      "size": {
+                        "type": "integer",
+                        "example": 151215080
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/install": {
+      "post": {
+        "summary": "Install an engine",
+        "description": "Install an engine of a specific type, with optional version and variant. If none are provided, the latest version and most suitable variant will be installed.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm",
+                "openai",
+                "anthropic"
+              ],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "version": {
+                    "type": "string",
+                    "description": "The version of the engine to install (optional)",
+                    "example": "v0.1.39"
+                  },
+                  "variant": {
+                    "type": "string",
+                    "description": "The variant of the engine to install (optional)",
+                    "example": "mac-arm64"
+                  },
+                  "type": {
+                    "type": "string",
+                    "description": "The type of connection, remote or local",
+                    "example": "remote"
+                  },
+                  "url": {
+                    "type": "string",
+                    "description": "The URL for the API endpoint for remote engine",
+                    "example": "https://api.openai.com"
+                  },
+                  "api_key": {
+                    "type": "string",
+                    "description": "The API key for authentication for remote engine",
+                    "example": ""
+                  },
+                  "metadata": {
+                    "type": "object",
+                    "properties": {
+                      "get_models_url": {
+                        "type": "string",
+                        "description": "The URL to get models",
+                        "example": "https://api.openai.com/v1/models"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine starts installing!"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      },
+      "delete": {
+        "summary": "Uninstall an engine",
+        "description": "Uninstall an engine based on engine, version, and variant. If version and variant are not provided, all versions and variants of the engine will be uninstalled.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm",
+                "openai",
+                "anthropic"
+              ],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "version": {
+                    "type": "string",
+                    "description": "The version of the engine to uninstall (optional)",
+                    "example": "v0.1.39"
+                  },
+                  "variant": {
+                    "type": "string",
+                    "description": "The variant of the engine to uninstall (optional)",
+                    "example": "mac-arm64"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful uninstallation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "description": "Engine llama-cpp uninstalled successfully!",
+                      "example": "Engine llama-cpp uninstalled successfully!"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "description": "Error message describing the issue with the request"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/update": {
+      "post": {
+        "summary": "Update engine",
+        "description": "Updates the specified engine type using the engine variant currently set as default.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The name of the engine to update"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Engine updated successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine updated successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/default": {
+      "get": {
+        "summary": "Get default engine variant",
+        "description": "Retrieves the default engine variant for the specified engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "engine": {
+                      "type": "string",
+                      "example": "llama-cpp"
+                    },
+                    "name": {
+                      "type": "string",
+                      "example": "mac-arm64"
+                    },
+                    "version": {
+                      "type": "string",
+                      "example": "0.1.35-28.10.24"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      },
+      "post": {
+        "summary": "Set default engine variant",
+        "description": "Sets the default engine variant for the specified engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "required": ["version", "variant"],
+                "properties": {
+                  "version": {
+                    "type": "string",
+                    "description": "The version of the engine variant",
+                    "example": "0.1.34"
+                  },
+                  "variant": {
+                    "type": "string",
+                    "description": "The variant of the engine",
+                    "example": "mac-arm64"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Default engine variant set successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/engines/{name}/load": {
+      "post": {
+        "summary": "Load engine",
+        "description": "Loads the specified engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm",
+                "openai",
+                "anthropic"
+              ],
+              "default": "llama-cpp"
+            },
+            "description": "The name of the engine to update"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Engine loaded successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine loaded successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      },
+      "delete": {
+        "summary": "Unload engine",
+        "description": "Unloads the specified engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The name of the engine to update"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Engine unloaded successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine unloaded successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/hardware": {
+      "get": {
+        "summary": "Get hardware information",
+        "description": "Retrieves detailed information about the system's hardware configuration, including CPU, GPU(s), operating system, power status, RAM, and storage.",
+        "responses": {
+          "200": {
+            "description": "Hardware information retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "cpu": {
+                      "$ref": "#/components/schemas/CPUDto"
+                    },
+                    "gpus": {
+                      "type": "array",
+                      "items": {
+                        "$ref": "#/components/schemas/GPUDto"
+                      }
+                    },
+                    "os": {
+                      "$ref": "#/components/schemas/OperatingSystemDto"
+                    },
+                    "power": {
+                      "$ref": "#/components/schemas/PowerDto"
+                    },
+                    "ram": {
+                      "$ref": "#/components/schemas/RAMDto"
+                    },
+                    "storage": {
+                      "$ref": "#/components/schemas/StorageDto"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Hardware"]
+      }
+    },
+    "/hardware/activate": {
+      "post": {
+        "summary": "Activate GPUs",
+        "description": "Activates the specified GPUs based on their indices provided in the request body.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "gpus": {
+                    "type": "array",
+                    "items": {
+                      "type": "integer"
+                    },
+                    "example": [0, 1, 2],
+                    "description": "An array of GPU indices to activate."
+                  }
+                },
+                "required": ["gpus"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "The hardware configuration has been activated.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "The hardware configuration has been activated.",
+                      "description": "Confirmation message indicating successful activation."
+                    },
+                    "activated_gpus": {
+                      "type": "array",
+                      "items": {
+                        "type": "integer"
+                      },
+                      "example": [0, 1, 2],
+                      "description": "List of GPU indices that were activated."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad Request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Invalid GPU index provided",
+                      "description": "Error message indicating what went wrong."
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Hardware"]
+      }
+    },
+    "/files": {
+      "post": {
+        "summary": "Upload a File",
+        "description": "Uploads a file to the Cortex server.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "multipart/form-data": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "file": {
+                    "type": "string",
+                    "format": "binary"
+                  },
+                  "purpose": {
+                    "type": "string",
+                    "enum": ["assistants"],
+                    "description": "The intended purpose of the uploaded file"
+                  }
+                },
+                "required": ["file", "purpose"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "bytes": {
+                      "type": "integer",
+                      "example": 3211109
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "example": 1733942093
+                    },
+                    "filename": {
+                      "type": "string",
+                      "example": "Enterprise_Application_Infrastructure_v2_20140903_toCTC_v1.0.pdf"
+                    },
+                    "id": {
+                      "type": "string",
+                      "example": "file-0001KNKPTDDAQSDVEQGRBTCTNJ"
+                    },
+                    "object": {
+                      "type": "string",
+                      "example": "file"
+                    },
+                    "purpose": {
+                      "type": "string",
+                      "example": "assistants"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      },
+      "get": {
+        "summary": "List files",
+        "description": "Lists all the files in the current directory.",
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "data": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "bytes": {
+                            "type": "integer",
+                            "example": 3211109
+                          },
+                          "created_at": {
+                            "type": "integer",
+                            "example": 1733942093
+                          },
+                          "filename": {
+                            "type": "string",
+                            "example": "Enterprise_Application_Infrastructure_v2_20140903_toCTC_v1.0.pdf"
+                          },
+                          "id": {
+                            "type": "string",
+                            "example": "file-0001KNKPTDDAQSDVEQGRBTCTNJ"
+                          },
+                          "object": {
+                            "type": "string",
+                            "example": "file"
+                          },
+                          "purpose": {
+                            "type": "string",
+                            "example": "assistants"
+                          }
+                        }
+                      }
+                    },
+                    "object": {
+                      "type": "string",
+                      "example": "list"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      }
+    },
+    "/files/{id}": {
+      "get": {
+        "summary": "Retrieve File",
+        "description": "Retrieves a file by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the file to retrieve",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "thread",
+            "in": "query",
+            "required": false,
+            "description": "Optional thread identifier",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully retrieved file",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "bytes": {
+                      "type": "integer",
+                      "example": 3211109
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "example": 1733942093
+                    },
+                    "filename": {
+                      "type": "string",
+                      "example": "Enterprise_Application_Infrastructure_v2_20140903_toCTC_v1.0.pdf"
+                    },
+                    "id": {
+                      "type": "string",
+                      "example": "file-0001KNKPTDDAQSDVEQGRBTCTNJ"
+                    },
+                    "object": {
+                      "type": "string",
+                      "example": "file"
+                    },
+                    "purpose": {
+                      "type": "string",
+                      "example": "assistants"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      },
+      "delete": {
+        "summary": "Delete File",
+        "description": "Deletes a file by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the file to delete",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "File successfully deleted",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "deleted": {
+                      "type": "boolean",
+                      "description": "Indicates if the file was successfully deleted"
+                    },
+                    "id": {
+                      "type": "string",
+                      "description": "The ID of the deleted file"
+                    },
+                    "object": {
+                      "type": "string",
+                      "description": "Type of object, always 'file'"
+                    }
+                  },
+                  "required": ["deleted", "id", "object"]
+                },
+                "example": {
+                  "deleted": true,
+                  "id": "file-0001KNP26FC62D620DGYNG2R8H",
+                  "object": "file"
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "File not found or invalid request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "description": "Error message describing the issue"
+                    }
+                  },
+                  "required": ["message"]
+                },
+                "example": {
+                  "message": "File not found: file-0001KNP26FC62D620DGYNG2R8H"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      }
+    },
+    "/files/{id}/content": {
+      "get": {
+        "summary": "Get File Content",
+        "description": "Retrieves the content of a file by its ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the file to retrieve content from",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "thread",
+            "in": "query",
+            "required": false,
+            "description": "Optional thread identifier",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "File content retrieved successfully",
+            "content": {
+              "*/*": {
+                "schema": {
+                  "type": "string",
+                  "format": "binary",
+                  "description": "The raw content of the file"
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "File not found or invalid request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "description": "Error message describing the issue"
+                    }
+                  },
+                  "required": ["message"]
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Files"]
+      }
+    },
+    "/configs": {
+      "get": {
+        "summary": "Get Configurations",
+        "description": "Retrieves the current configuration settings of the Cortex server.",
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "allowed_origins": {
+                      "type": "array",
+                      "items": {
+                        "type": "string"
+                      },
+                      "example": ["http://127.0.0.1:39281", "https://cortex.so"]
+                    },
+                    "cors": {
+                      "type": "boolean",
+                      "example": false
+                    },
+                    "proxy_username": {
+                      "type": "string",
+                      "example": "username"
+                    },
+                    "proxy_password": {
+                      "type": "string",
+                      "example": "password"
+                    },
+                    "proxy_url": {
+                      "type": "string",
+                      "example": "http://proxy.example.com:8080"
+                    },
+                    "verify_proxy_ssl": {
+                      "type": "boolean",
+                      "description": "test",
+                      "example": false
+                    },
+                    "verify_proxy_host_ssl": {
+                      "type": "boolean",
+                      "example": false
+                    },
+                    "verify_peer_ssl": {
+                      "type": "boolean",
+                      "example": false
+                    },
+                    "verify_host_ssl": {
+                      "type": "boolean",
+                      "example": false
+                    },
+                    "no_proxy": {
+                      "type": "string",
+                      "example": "localhost"
+                    },
+                    "huggingface_token": {
+                      "type": "string",
+                      "example": "your_token"
+                    }
+                  }
+                },
+                "example": {
+                  "allowed_origins": [
+                    "http://127.0.0.1:39281",
+                    "https://cortex.so"
+                  ],
+                  "cors": false,
+                  "proxy_username": "username",
+                  "proxy_password": "password",
+                  "proxy_url": "http://proxy.example.com:8080",
+                  "verify_proxy_ssl": false,
+                  "verify_proxy_host_ssl": false,
+                  "verify_peer_ssl": false,
+                  "verify_host_ssl": false,
+                  "no_proxy": "localhost",
+                  "huggingface_token": "your_token"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Configurations"]
+      },
+      "patch": {
+        "tags": ["Configurations"],
+        "summary": "Update configuration settings",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "cors": {
+                    "type": "boolean",
+                    "description": "Indicates whether CORS is enabled.",
+                    "example": false
+                  },
+                  "allowed_origins": {
+                    "type": "array",
+                    "items": {
+                      "type": "string"
+                    },
+                    "description": "List of allowed origins.",
+                    "example": ["http://127.0.0.1:39281", "https://cortex.so"]
+                  },
+                  "proxy_username": {
+                    "type": "string",
+                    "description": "Username for the proxy server.",
+                    "example": "username"
+                  },
+                  "proxy_password": {
+                    "type": "string",
+                    "description": "Password for the proxy server.",
+                    "example": "password"
+                  },
+                  "proxy_url": {
+                    "type": "string",
+                    "description": "URL for the proxy server.",
+                    "example": "http://proxy.example.com:8080"
+                  },
+                  "verify_proxy_ssl": {
+                    "type": "boolean",
+                    "description": "Indicates whether to verify the SSL certificate of the proxy server.",
+                    "example": false
+                  },
+                  "verify_proxy_host_ssl": {
+                    "type": "boolean",
+                    "description": "Indicates whether to verify the SSL certificate of the proxy server host.",
+                    "example": false
+                  },
+                  "verify_peer_ssl": {
+                    "type": "boolean",
+                    "description": "Indicates whether to verify the SSL certificate of the peer.",
+                    "example": false
+                  },
+                  "verify_host_ssl": {
+                    "type": "boolean",
+                    "description": "Indicates whether to verify the SSL certificate of the host.",
+                    "example": false
+                  },
+                  "no_proxy": {
+                    "type": "string",
+                    "description": "List of hosts that should not be proxied.",
+                    "example": "localhost"
+                  },
+                  "huggingface_token": {
+                    "type": "string",
+                    "description": "HuggingFace token to pull models.",
+                    "example": "your_token"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Configuration updated successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "config": {
+                      "type": "object",
+                      "properties": {
+                        "allowed_origins": {
+                          "type": "array",
+                          "items": {
+                            "type": "string"
+                          },
+                          "example": [
+                            "http://127.0.0.1:39281",
+                            "https://cortex.so"
+                          ]
+                        },
+                        "cors": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "proxy_username": {
+                          "type": "string",
+                          "example": "username"
+                        },
+                        "proxy_password": {
+                          "type": "string",
+                          "example": "password"
+                        },
+                        "proxy_url": {
+                          "type": "string",
+                          "example": "http://proxy.example.com:8080"
+                        },
+                        "verify_proxy_ssl": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "verify_proxy_host_ssl": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "verify_peer_ssl": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "verify_host_ssl": {
+                          "type": "boolean",
+                          "example": false
+                        },
+                        "no_proxy": {
+                          "type": "string",
+                          "example": "localhost"
+                        },
+                        "huggingface_token": {
+                          "type": "string",
+                          "example": "your_token"
+                        }
+                      }
+                    },
+                    "message": {
+                      "type": "string",
+                      "example": "Configuration updated successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "info": {
+    "title": "Cortex API",
+    "description": "Cortex API enables API commands for seamless interaction with LLMs.",
+    "version": "1.0",
+    "contact": {}
+  },
+  "tags": [
+    {
+      "name": "Chat",
+      "description": "This endpoint initiates interaction with a Large Language Models (LLM)."
+    },
+    {
+      "name": "Embeddings",
+      "description": "This endpoint create embeddings for a given input text or tokens."
+    },
+    {
+      "name": "Assistants",
+      "description": "These endpoints manage the lifecycle of an Assistant within a conversation thread."
+    },
+    {
+      "name": "Pulling Models",
+      "description": "These endpoints handle downloading and importing models."
+    },
+    {
+      "name": "Running Models",
+      "description": "These endpoints support a range of operations that allow users to effectively control and interact with their models"
+    },
+    {
+      "name": "Server",
+      "description": "These endpoints manage the lifecycle of Server, including heath check and shutdown."
+    },
+    {
+      "name": "Configurations",
+      "description": "These endpoints manage the configuration of the Cortex server."
+    },
+    {
+      "name": "Messages",
+      "description": "These endpoints manage the retrieval and storage of conversation content, including responses from LLMs and other metadata related to chat interactions."
+    },
+    {
+      "name": "Threads",
+      "description": "These endpoints handle the creation, retrieval, updating, and deletion of conversation threads."
+    },
+    {
+      "name": "Engines",
+      "description": "Endpoints for managing the available engines within Cortex."
+    },
+    {
+      "name": "Files",
+      "description": "Endpoints for managing the files within Cortex."
+    },
+    {
+      "name": "Hardware",
+      "description": "Endpoints for managing the available hardware within Cortex."
+    },
+    {
+      "name": "System",
+      "description": "Endpoints for stopping the Cortex API server, checking its status, and fetching system events."
+    }
+  ],
+  "x-tagGroups": [
+    {
+      "name": "CORTEX",
+      "tags": [
+        "Chat",
+        "Embeddings",
+        "Engines",
+        "Files",
+        "Hardware",
+        "Events",
+        "Threads",
+        "Messages",
+        "Pulling Models",
+        "Running Models",
+        "Processes",
+        "Status",
+        "Server",
+        "Configurations"
+      ]
+    }
+  ],
+  "servers": [
+    {
+      "url": "/v1"
+    }
+  ],
+  "components": {
+    "schemas": {
+      "CreateAssistantDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The unique identifier of the assistant.",
+            "example": "jan",
+            "default": "jan"
+          },
+          "avatar": {
+            "type": "string",
+            "description": "The avatar of the assistant.",
+            "example": "",
+            "default": ""
+          },
+          "name": {
+            "type": "string",
+            "description": "The name of the assistant.",
+            "example": "Jan",
+            "default": "Jan"
+          },
+          "description": {
+            "type": "string",
+            "description": "The description of the assistant.",
+            "example": "A default assistant that can use all downloaded models",
+            "default": "A default assistant that can use all downloaded models"
+          },
+          "model": {
+            "type": "string",
+            "description": "The model of the assistant."
+          },
+          "instructions": {
+            "type": "string",
+            "description": "The instructions for the assistant.",
+            "example": "",
+            "default": ""
+          },
+          "tools": {
+            "description": "The tools associated with the assistant.",
+            "example": [],
+            "default": [],
+            "type": "array",
+            "items": {
+              "type": "array",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": ["function"]
+                },
+                "function": {
+                  "$ref": "#/components/schemas/Function"
+                }
+              },
+              "required": ["type", "function"]
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "nullable": true,
+            "description": "The metadata of the assistant."
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Top p.",
+            "example": "0.7",
+            "default": "0.7"
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Temperature.",
+            "example": "0.7",
+            "default": "0.7"
+          }
+        },
+        "required": [
+          "id",
+          "name",
+          "description",
+          "model",
+          "instructions",
+          "tools",
+          "metadata"
+        ]
+      },
+      "AssistantEntity": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string"
+          },
+          "avatar": {
+            "type": "string"
+          },
+          "object": {
+            "type": "string"
+          },
+          "created_at": {
+            "type": "number"
+          },
+          "name": {
+            "type": "string",
+            "nullable": true
+          },
+          "description": {
+            "type": "string",
+            "nullable": true
+          },
+          "model": {
+            "type": "string"
+          },
+          "instructions": {
+            "type": "string",
+            "nullable": true
+          },
+          "tools": {
+            "type": "array"
+          },
+          "metadata": {
+            "type": "object",
+            "nullable": true
+          },
+          "top_p": {
+            "type": "number",
+            "nullable": true
+          },
+          "temperature": {
+            "type": "number",
+            "nullable": true
+          },
+          "response_format": {
+            "type": "object",
+            "nullable": true
+          },
+          "tool_resources": {
+            "type": "object",
+            "nullable": true
+          }
+        },
+        "required": [
+          "id",
+          "object",
+          "created_at",
+          "name",
+          "description",
+          "model",
+          "instructions",
+          "tools",
+          "metadata",
+          "top_p",
+          "temperature",
+          "response_format",
+          "tool_resources"
+        ]
+      },
+      "DeleteAssistantResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "assistant_123",
+            "description": "The identifier of the assistant that was deleted."
+          },
+          "object": {
+            "type": "string",
+            "example": "assistant",
+            "description": "Type of the object, indicating it's a assistant.",
+            "default": "assistant"
+          },
+          "deleted": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates whether the assistant was successfully deleted."
+          }
+        },
+        "required": ["id", "object", "deleted"]
+      },
+      "Message": {
+        "type": "object",
+        "discriminator": {
+          "propertyName": "role",
+          "mapping": {
+            "system": "SystemMessage",
+            "user": "UserMessage",
+            "assistant": "AssistantMessage",
+            "tool": "ToolMessage",
+            "function": "FunctionMessage"
+          }
+        },
+        "properties": {
+          "role": {
+            "type": "string",
+            "enum": ["system", "user", "assistant", "tool"]
+          },
+          "name": {
+            "type": "string",
+            "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+          }
+        },
+        "required": ["role"]
+      },
+      "SystemMessage": {
+        "allOf": [
+          {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "description": "The role of the messages author, in this case `system`."
+              },
+              "content": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "array",
+                    "items": {
+                      "$ref": "#/components/schemas/TextContentPart"
+                    }
+                  }
+                ]
+              },
+              "name": {
+                "type": "string",
+                "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+              }
+            },
+            "required": ["content", "role"]
+          }
+        ]
+      },
+      "UserMessage": {
+        "allOf": [
+          {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "description": "The role of the messages author, in this case `user`."
+              },
+              "content": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "anyOf": [
+                        {
+                          "type": "object",
+                          "title": "Text Content Part",
+                          "description": "Text Content",
+                          "$ref": "#/components/schemas/TextContentPart"
+                        },
+                        {
+                          "type": "object",
+                          "title": "Image Content Part",
+                          "description": "Image Content",
+                          "$ref": "#/components/schemas/ImageContentPart"
+                        },
+                        {
+                          "type": "object",
+                          "title": "Audio Content Part",
+                          "description": "Audio Content",
+                          "$ref": "#/components/schemas/AudioContentPart"
+                        }
+                      ]
+                    }
+                  }
+                ]
+              },
+              "name": {
+                "type": "string",
+                "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+              }
+            },
+            "required": ["content", "role"]
+          }
+        ]
+      },
+      "AssistantMessage": {
+        "allOf": [
+          {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "description": "The role of the messages author, in this case `assistant`."
+              },
+              "content": {
+                "description": "The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified.",
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "anyOf": [
+                        {
+                          "$ref": "#/components/schemas/TextContentPart"
+                        },
+                        {
+                          "$ref": "#/components/schemas/RefusalContentPart"
+                        }
+                      ]
+                    }
+                  }
+                ]
+              },
+              "name": {
+                "type": "string",
+                "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+              },
+              "refusal": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
+              },
+              "audio": {
+                "type": "object",
+                "anyOf": [
+                  {
+                    "$ref": "#/components/schemas/Audio"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
+              },
+              "tool_calls": {
+                "type": "array",
+                "items": {
+                  "$ref": "#/components/schemas/ToolCall"
+                }
+              },
+              "function_call": {
+                "deprecated": true,
+                "anyOf": [
+                  {
+                    "$ref": "#/components/schemas/FunctionCall"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ]
+              }
+            }
+          }
+        ]
+      },
+      "ToolMessage": {
+        "allOf": [
+          {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "description": "The role of the messages author, in this case `tool`."
+              },
+              "content": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "anyOf": [
+                        {
+                          "$ref": "#/components/schemas/TextContentPart"
+                        }
+                      ]
+                    }
+                  }
+                ]
+              },
+              "tool_call_id": {
+                "type": "string"
+              }
+            },
+            "required": ["content", "tool_call_id"]
+          }
+        ]
+      },
+      "FunctionMessage": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/Message"
+          }
+        ],
+        "deprecated": true
+      },
+      "TextContentPart": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string",
+            "enum": ["text"]
+          },
+          "text": {
+            "type": "string"
+          }
+        },
+        "required": ["type", "text"]
+      },
+      "ImageContentPart": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string",
+            "enum": ["image_url"]
+          },
+          "image_url": {
+            "$ref": "#/components/schemas/ImageUrl"
+          }
+        },
+        "required": ["type", "image_url"]
+      },
+      "AudioContentPart": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string",
+            "description": "The type of the content part. Always `input_audio`."
+          },
+          "input_audio": {
+            "$ref": "#/components/schemas/InputAudio"
+          }
+        },
+        "required": ["type", "input_audio"]
+      },
+      "RefusalContentPart": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string"
+          },
+          "refusal": {
+            "type": "string"
+          }
+        },
+        "required": ["type", "refusal"]
+      },
+      "ImageUrl": {
+        "type": "object",
+        "properties": {
+          "url": {
+            "type": "string",
+            "description": "Either a URL of the image or the base64 encoded image data."
+          },
+          "detail": {
+            "type": "string",
+            "default": "auto",
+            "description": "Specifies the detail level of the image. Defaults to `auto`."
+          }
+        },
+        "required": ["url"]
+      },
+      "InputAudio": {
+        "type": "object",
+        "properties": {
+          "data": {
+            "type": "string",
+            "description": "Base64 encoded audio data."
+          },
+          "format": {
+            "type": "string",
+            "enum": ["wav", "mp3"],
+            "description": "The format of the encoded audio data. Currently supports `wav` and `mp3`."
+          }
+        },
+        "required": ["data", "format"]
+      },
+      "Audio": {
+        "type": "object",
+        "description": "Data about a previous audio response from the model.",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "Unique identifier for a previous audio response from the model."
+          }
+        },
+        "required": ["id"]
+      },
+      "ToolCall": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string"
+          },
+          "type": {
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/FunctionCall"
+          }
+        },
+        "required": ["id", "type", "function"]
+      },
+      "FunctionCall": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          },
+          "arguments": {
+            "type": "string"
+          }
+        },
+        "required": ["name", "arguments"]
+      },
+      "CreateChatCompletionDto": {
+        "type": "object",
+        "properties": {
+          "messages": {
+            "description": "Array of chat messages to be used for generating the chat completion. Depending on the model you use, different message types (modalities) are supported, like text, images, and audio. Currently, cortex only support text modalities.",
+            "type": "array",
+            "items": {
+              "anyOf": [
+                {
+                  "title": "System Message",
+                  "description": "System Message",
+                  "$ref": "#/components/schemas/SystemMessage"
+                },
+                {
+                  "title": "User Message",
+                  "description": "User Message",
+                  "$ref": "#/components/schemas/UserMessage"
+                },
+                {
+                  "title": "Assistant Message",
+                  "description": "Assistant Message",
+                  "$ref": "#/components/schemas/AssistantMessage"
+                },
+                {
+                  "title": "Tool Message",
+                  "description": "Tool Message",
+                  "$ref": "#/components/schemas/ToolMessage"
+                }
+              ]
+            }
+          },
+          "model": {
+            "type": "string",
+            "description": "The unique identifier of the model.",
+            "example": "mistral"
+          },
+          "stream": {
+            "type": "boolean",
+            "description": "Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.",
+            "example": true
+          },
+          "max_tokens": {
+            "type": "number",
+            "description": "Sets the upper limit on the number of tokens the model can generate in a single output. This value is now deprecated in favor of `max_completion_tokens`.",
+            "example": 4096
+          },
+          "max_completion_tokens": {
+            "type": "number",
+            "description": "Sets the upper limit on the number of tokens the model can generate in a single output."
+          },
+          "stop": {
+            "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
+            "example": ["End"],
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "description": "Modifies the likelihood of the model repeating the same words or phrases within a single output.",
+            "example": 0.2
+          },
+          "presence_penalty": {
+            "type": "number",
+            "description": "Reduces the likelihood of repeating tokens, promoting novelty in the output.",
+            "example": 0.6
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Influences the randomness of the model's output.",
+            "example": 0.8
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Sets probability threshold for more relevant outputs.",
+            "example": 0.95
+          },
+          "modalities": {
+            "type": "array",
+            "items": {
+              "type": "string",
+              "enum": ["text", "audio"]
+            },
+            "description": "Specifies the modalities (types of input) supported by the model. Currently, cortex only support text modalities. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "example": ["text"]
+          },
+          "audio": {
+            "description": "Parameters for audio output. Required when audio output is requested with `modalities: ['audio']`. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "type": "object",
+            "properties": {
+              "voice": {
+                "type": "string",
+                "description": "The voice of the generated audio."
+              },
+              "format": {
+                "type": "string",
+                "description": "Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.",
+                "enum": ["mp3", "wav", "flac", "opus", "pcm16"]
+              }
+            },
+            "required": ["voice", "format"]
+          },
+          "store": {
+            "type": "boolean",
+            "description": "Whether or not to store the output of this chat completion request for use in our model distillation or evals products. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "default": false,
+            "example": false
+          },
+          "metadata": {
+            "type": "object",
+            "description": "Developer-defined tags and values used for filtering completions in the dashboard. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "example": {
+              "type": "conversation"
+            }
+          },
+          "logit_bias": {
+            "type": "object",
+            "description": "Modify the likelihood of specified tokens appearing in the completion. \n\nAccepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.",
+            "example": {
+              "15496": -100,
+              "51561": -100
+            },
+            "default": null
+          },
+          "logprobs": {
+            "type": "boolean",
+            "description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.",
+            "example": false,
+            "default": false
+          },
+          "top_logprobs": {
+            "type": "number",
+            "description": "An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used."
+          },
+          "n": {
+            "type": "number",
+            "description": "How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.",
+            "example": 1,
+            "default": 1
+          },
+          "response_format": {
+            "type": "object",
+            "description": "An object specifying the format that the model must output. Setting to { \"type\": \"json_object\" } enables JSON mode, which guarantees the message the model generates is valid JSON. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
+            "properties": {
+              "type": {
+                "type": "string",
+                "description": "The format of the generated output. Must be one of `text`, `json_schema` or `json_object`.",
+                "enum": ["text", "json_object", "json_schema"]
+              }
+            },
+            "required": ["type"]
+          },
+          "seed": {
+            "type": "number",
+            "description": "This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.",
+            "example": 123,
+            "default": null
+          },
+          "service_tier": {
+            "type": "string",
+            "description": "Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:\n\n - If set to 'auto', and the Project is Scale tier enabled, the system will utilize scale tier credits until they are exhausted.\n- If set to 'auto', and the Project is not Scale tier enabled, the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\n- If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\nWhen not set, the default behavior is 'auto'.\nWhen this parameter is set, the response body will include the service_tier utilized.\n\n We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582)."
+          },
+          "stream_options": {
+            "type": "object",
+            "default": null,
+            "description": "Options for streaming response. Only set this when you set `stream: true`.",
+            "properties": {
+              "include_usage": {
+                "type": "boolean",
+                "description": "If set, an additional chunk will be streamed before the data: `[DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.",
+                "example": false,
+                "default": false
+              }
+            }
+          },
+          "tools": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": ["function"]
+                },
+                "function": {
+                  "$ref": "#/components/schemas/Function"
+                }
+              },
+              "required": ["type", "function"]
+            }
+          },
+          "tool_choice": {
+            "anyOf": [
+              {
+                "type": "string",
+                "enum": ["none", "auto", "required"]
+              },
+              {
+                "type": "object",
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": ["function"]
+                  },
+                  "function": {
+                    "type": "object",
+                    "properties": {
+                      "name": {
+                        "type": "string"
+                      }
+                    },
+                    "required": ["name"]
+                  }
+                },
+                "required": ["type", "function"]
+              }
+            ]
+          },
+          "parallel_tool_calls": {
+            "type": "boolean",
+            "description": "Whether to enable parallel function calling during tool use. Cortex support parallel tool calls by default",
+            "example": true,
+            "default": true
+          },
+          "user": {
+            "type": "string",
+            "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582)."
+          },
+          "dynatemp_range": {
+            "type": "number",
+            "description": "Dynamic temperature range. This parameter only supported by `llama-cpp` engine."
+          },
+          "dynatemp_exponent": {
+            "type": "number",
+            "description": "Dynamic temperature exponent. This parameter only supported by `llama-cpp` engine."
+          },
+          "top_k": {
+            "type": "integer",
+            "description": "The number of most likely tokens to consider at each step. This parameter only supported by `llama-cpp` engine."
+          },
+          "min_p": {
+            "type": "number",
+            "description": "Minimum probability threshold for token sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "tfs_z": {
+            "type": "number",
+            "description": "The z-score used for Typical token sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "typ_p": {
+            "type": "number",
+            "description": "The cumulative probability threshold used for Typical token sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "repeat_last_n": {
+            "type": "integer",
+            "description": "Number of previous tokens to penalize for repeating. This parameter only supported by `llama-cpp` engine."
+          },
+          "repeat_penalty": {
+            "type": "number",
+            "description": "Penalty for repeating tokens. This parameter only supported by `llama-cpp` engine."
+          },
+          "mirostat": {
+            "type": "boolean",
+            "description": "Enables or disables Mirostat sampling (true or false). This parameter only supported by `llama-cpp` engine."
+          },
+          "mirostat_tau": {
+            "type": "number",
+            "description": "Target entropy value for Mirostat sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "mirostat_eta": {
+            "type": "number",
+            "description": "Learning rate for Mirostat sampling. This parameter only supported by `llama-cpp` engine."
+          },
+          "penalize_nl": {
+            "type": "boolean",
+            "description": "Penalizes newline tokens (true or false). This parameter only supported by `llama-cpp` engine."
+          },
+          "ignore_eos": {
+            "type": "boolean",
+            "description": "Ignores the end-of-sequence token (true or false). This parameter only supported by `llama-cpp` engine."
+          },
+          "n_probs": {
+            "type": "integer",
+            "description": "Number of probabilities to return. This parameter only supported by `llama-cpp` engine."
+          },
+          "min_keep": {
+            "type": "integer",
+            "description": "Minimum number of tokens to keep. This parameter only supported by `llama-cpp` engine."
+          }
+        },
+        "required": ["messages", "model"]
+      },
+      "Function": {
+        "type": "object",
+        "properties": {
+          "description": {
+            "type": "string"
+          },
+          "name": {
+            "type": "string",
+            "pattern": "^[a-zA-Z0-9_-]{1,64}$"
+          },
+          "parameters": {
+            "type": "object"
+          },
+          "strict": {
+            "type": "boolean",
+            "default": false
+          }
+        },
+        "required": ["name"]
+      },
+      "MessageDto": {
+        "type": "object",
+        "properties": {
+          "content": {
+            "type": "string",
+            "description": "The textual content of the chat message or completion generated by the model."
+          },
+          "role": {
+            "type": "string",
+            "description": "The role of the participant in the chat, such as 'user' or 'system', indicating who is the sender of the message."
+          }
+        },
+        "required": ["content", "role"]
+      },
+      "ChoiceDto": {
+        "type": "object",
+        "properties": {
+          "finish_reason": {
+            "type": "string",
+            "description": "The reason the chat completion ended, typically indicating whether the model completed the text naturally or was cut off."
+          },
+          "index": {
+            "type": "number",
+            "description": "The index of the completion relative to other generated completions, useful for identifying its order in a batch request."
+          },
+          "message": {
+            "description": "An object representing the message details involved in the chat completion, encapsulated within a MessageDto.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/MessageDto"
+              }
+            ]
+          }
+        },
+        "required": ["finish_reason", "index", "message"]
+      },
+      "UsageDto": {
+        "type": "object",
+        "properties": {
+          "completion_tokens": {
+            "type": "number",
+            "description": "The number of tokens used in the completion part of the response generated by the model."
+          },
+          "prompt_tokens": {
+            "type": "number",
+            "description": "The number of tokens used in the prompt part of the chat input, which is provided to the model."
+          },
+          "total_tokens": {
+            "type": "number",
+            "description": "The total number of tokens used in both the prompt and the completion, summarizing the entire token count of the chat operation."
+          }
+        },
+        "required": ["completion_tokens", "prompt_tokens", "total_tokens"]
+      },
+      "ChatCompletionResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "A unique identifier for the chat completion."
+          },
+          "choices": {
+            "type": "array",
+            "description": "A list of chat completion choices. Can be more than one if n is greater than 1.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "finish_reason": {
+                  "type": "string",
+                  "description": "The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool, or function_call (deprecated) if the model called a function."
+                },
+                "index": {
+                  "type": "integer",
+                  "description": "The index of the choice in the list of choices."
+                },
+                "message": {
+                  "type": "object",
+                  "properties": {
+                    "content": {
+                      "type": ["string", "null"],
+                      "description": "The contents of the message."
+                    },
+                    "refusal": {
+                      "type": ["string", "null"],
+                      "description": "The refusal message generated by the model."
+                    },
+                    "tool_calls": {
+                      "type": "array",
+                      "description": "The tool calls generated by the model, such as function calls.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "id": {
+                            "type": "string",
+                            "description": "The ID of the tool call."
+                          },
+                          "type": {
+                            "type": "string",
+                            "description": "The type of the tool. Currently, only function is supported."
+                          },
+                          "function": {
+                            "type": "object",
+                            "properties": {
+                              "name": {
+                                "type": "string",
+                                "description": "The name of the function to call."
+                              },
+                              "arguments": {
+                                "type": "string",
+                                "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
+                              }
+                            },
+                            "required": ["name", "arguments"]
+                          }
+                        },
+                        "required": ["id", "type", "function"]
+                      }
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "The role of the author of this message."
+                    },
+                    "function_call": {
+                      "type": "object",
+                      "deprecated": true,
+                      "description": "Deprecated and replaced by tool_calls. The name and arguments of a function that should be called, as generated by the model.",
+                      "properties": {
+                        "arguments": {
+                          "type": "string",
+                          "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
+                        },
+                        "name": {
+                          "type": "string",
+                          "description": "The name of the function to call."
+                        }
+                      },
+                      "required": ["arguments", "name"]
+                    },
+                    "audio": {
+                      "type": "object",
+                      "description": "If the audio output modality is requested, this object contains data about the audio response from the model.",
+                      "properties": {
+                        "id": {
+                          "type": "string",
+                          "description": "Unique identifier for this audio response."
+                        },
+                        "expires_at": {
+                          "type": "integer",
+                          "description": "The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations."
+                        },
+                        "data": {
+                          "type": "string",
+                          "description": "Base64 encoded audio bytes generated by the model, in the format specified in the request."
+                        },
+                        "transcript": {
+                          "type": "string",
+                          "description": "Transcript of the audio generated by the model."
+                        }
+                      },
+                      "required": ["id", "expires_at", "data", "transcript"]
+                    }
+                  },
+                  "required": ["role"]
+                },
+                "logprobs": {
+                  "type": "object",
+                  "description": "Log probability information for the choice.",
+                  "properties": {
+                    "content": {
+                      "type": ["array", "null"],
+                      "description": "A list of message content tokens with log probability information.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    },
+                    "top_logprobs": {
+                      "type": "array",
+                      "description": "List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested top_logprobs returned.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    },
+                    "refusal": {
+                      "type": ["array", "null"],
+                      "description": "A list of message refusal tokens with log probability information.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    }
+                  }
+                }
+              },
+              "required": ["finish_reason", "index", "message"]
+            }
+          },
+          "created": {
+            "type": "integer",
+            "description": "The Unix timestamp (in seconds) of when the chat completion was created."
+          },
+          "model": {
+            "type": "string",
+            "description": "The model used for the chat completion."
+          },
+          "service_tier": {
+            "type": ["string", "null"],
+            "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request."
+          },
+          "system_fingerprint": {
+            "type": "string",
+            "description": "This fingerprint represents the backend configuration that the model runs with. Can be used in conjunction with the seed request parameter to understand when backend changes have been made that might impact determinism."
+          },
+          "object": {
+            "type": "string",
+            "description": "The object type, which is always chat.completion."
+          },
+          "usage": {
+            "type": "object",
+            "description": "Usage statistics for the completion request.",
+            "properties": {
+              "completion_tokens": {
+                "type": "integer",
+                "description": "Number of tokens in the generated completion."
+              },
+              "prompt_tokens": {
+                "type": "integer",
+                "description": "Number of tokens in the prompt."
+              },
+              "total_tokens": {
+                "type": "integer",
+                "description": "Total number of tokens used in the request (prompt + completion)."
+              },
+              "completion_tokens_details": {
+                "type": "object",
+                "description": "Breakdown of tokens used in a completion.",
+                "properties": {
+                  "audio_tokens": {
+                    "type": "integer",
+                    "description": "Audio input tokens generated by the model."
+                  },
+                  "reasoning_tokens": {
+                    "type": "integer",
+                    "description": "Tokens generated by the model for reasoning."
+                  }
+                },
+                "required": ["audio_tokens", "reasoning_tokens"]
+              },
+              "prompt_tokens_details": {
+                "type": "object",
+                "description": "Breakdown of tokens used in the prompt.",
+                "properties": {
+                  "audio_tokens": {
+                    "type": "integer",
+                    "description": "Audio input tokens present in the prompt."
+                  },
+                  "cached_tokens": {
+                    "type": "integer",
+                    "description": "Cached tokens present in the prompt."
+                  }
+                },
+                "required": ["audio_tokens", "cached_tokens"]
+              }
+            },
+            "required": [
+              "completion_tokens",
+              "prompt_tokens",
+              "total_tokens",
+              "completion_tokens_details",
+              "prompt_tokens_details"
+            ]
+          }
+        },
+        "required": [
+          "id",
+          "choices",
+          "created",
+          "model",
+          "system_fingerprint",
+          "object",
+          "usage"
+        ]
+      },
+      "ChatCompletionChunkResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "A unique identifier for the chat completion. Each chunk has the same ID."
+          },
+          "choices": {
+            "type": "array",
+            "description": "A list of chat completion choices. Can contain more than one element if n is greater than 1. Can also be empty for the last chunk if you set stream_options: {\"include_usage\": true}.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "delta": {
+                  "type": "object",
+                  "description": "A chat completion delta generated by streamed model responses.",
+                  "properties": {
+                    "content": {
+                      "type": ["string", "null"],
+                      "description": "The contents of the chunk message."
+                    },
+                    "function_call": {
+                      "type": "object",
+                      "description": "Deprecated and replaced by tool_calls. The name and arguments of a function that should be called, as generated by the model.",
+                      "deprecated": true
+                    },
+                    "tool_calls": {
+                      "type": "array",
+                      "description": "The tool calls generated by the model.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "index": {
+                            "type": "integer",
+                            "description": "The index of the tool call in the list of tool calls."
+                          },
+                          "id": {
+                            "type": "string",
+                            "description": "The ID of the tool call."
+                          },
+                          "type": {
+                            "type": "string",
+                            "description": "The type of the tool. Currently, only function is supported."
+                          },
+                          "function": {
+                            "type": "object",
+                            "properties": {
+                              "name": {
+                                "type": "string",
+                                "description": "The name of the function to call."
+                              },
+                              "arguments": {
+                                "type": "string",
+                                "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
+                              }
+                            },
+                            "required": ["name", "arguments"]
+                          }
+                        },
+                        "required": ["index", "id", "type", "function"]
+                      }
+                    },
+                    "role": {
+                      "type": "string",
+                      "description": "The role of the author of this message."
+                    },
+                    "refusal": {
+                      "type": ["string", "null"],
+                      "description": "The refusal message generated by the model."
+                    }
+                  }
+                },
+                "logprobs": {
+                  "type": "object",
+                  "description": "Log probability information for the choice.",
+                  "properties": {
+                    "content": {
+                      "type": ["array", "null"],
+                      "description": "A list of message content tokens with log probability information.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    },
+                    "top_logprobs": {
+                      "type": "array",
+                      "description": "List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested top_logprobs returned.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    },
+                    "refusal": {
+                      "type": ["array", "null"],
+                      "description": "A list of message refusal tokens with log probability information.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "token": {
+                            "type": "string",
+                            "description": "The token."
+                          },
+                          "logprob": {
+                            "type": "number",
+                            "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
+                          },
+                          "bytes": {
+                            "type": ["array", "null"],
+                            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
+                          }
+                        },
+                        "required": ["token", "logprob"]
+                      }
+                    }
+                  }
+                },
+                "finish_reason": {
+                  "type": ["string", "null"],
+                  "description": "The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool, or function_call (deprecated) if the model called a function."
+                },
+                "index": {
+                  "type": "integer",
+                  "description": "The index of the choice in the list of choices."
+                }
+              },
+              "required": ["delta", "index"]
+            }
+          },
+          "created": {
+            "type": "integer",
+            "description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp."
+          },
+          "model": {
+            "type": "string",
+            "description": "The model used to generate the completion."
+          },
+          "service_tier": {
+            "type": ["string", "null"],
+            "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request."
+          },
+          "system_fingerprint": {
+            "type": "string",
+            "description": "This fingerprint represents the backend configuration that the model runs with. Can be used in conjunction with the seed request parameter to understand when backend changes have been made that might impact determinism."
+          },
+          "object": {
+            "type": "string",
+            "description": "The object type, which is always chat.completion.chunk."
+          },
+          "usage": {
+            "type": "object",
+            "description": "An optional field that will only be present when you set stream_options: {\"include_usage\": true} in your request. When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request.",
+            "properties": {
+              "completion_tokens": {
+                "type": "integer",
+                "description": "Number of tokens in the generated completion."
+              },
+              "prompt_tokens": {
+                "type": "integer",
+                "description": "Number of tokens in the prompt."
+              },
+              "total_tokens": {
+                "type": "integer",
+                "description": "Total number of tokens used in the request (prompt + completion)."
+              }
+            },
+            "required": ["completion_tokens", "prompt_tokens", "total_tokens"]
+          }
+        },
+        "required": [
+          "id",
+          "choices",
+          "created",
+          "model",
+          "system_fingerprint",
+          "object"
+        ]
+      },
+      "CreateEmbeddingsDto": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "example": "mistral",
+            "description": "The name of the embedding model to be used."
+          },
+          "input": {
+            "example": ["Hello World"],
+            "description": "The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "encoding_format": {
+            "type": "string",
+            "example": "float",
+            "description": "Specifies the format for the embeddings. Supported formats include `float` and `int`. This field is optional."
+          },
+          "dimensions": {
+            "type": "number",
+            "example": 3,
+            "description": "Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional."
+          }
+        },
+        "required": ["model", "input"]
+      },
+      "EmbeddingsResponseDto": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "description": "Type of the result object."
+          },
+          "model": {
+            "type": "string",
+            "description": "Identifier of the model utilized for generating embeddings."
+          },
+          "embedding": {
+            "description": "The embedding vector represented as an array of floating-point numbers. ",
+            "type": "array",
+            "items": {
+              "type": "number"
+            }
+          },
+          "usage": {
+            "description": "Details of token usage, including prompt_tokens and total_tokens.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/UsageDto"
+              }
+            ]
+          }
+        },
+        "required": ["object", "model", "embedding", "usage"]
+      },
+      "PullModelRequest": {
+        "type": "object",
+        "required": ["model"],
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The identifier or URL of the model to use. It can be a model ID on Cortexso (https://huggingface.co/cortexso) or a HuggingFace URL pointing to the model file. For example: 'gpt2' or 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf'",
+            "examples": [
+              "tinyllama:gguf",
+              "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf"
+            ]
+          },
+          "id": {
+            "type": "string",
+            "description": "The id which will be used to register the model.",
+            "examples": "my-custom-model-id"
+          },
+          "name": {
+            "type": "string",
+            "description": "The name which will be used to overwrite the model name.",
+            "examples": "my-custom-model-name"
+          }
+        }
+      },
+      "PullModelResponse": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "example": "Model start downloading!"
+          }
+        }
+      },
+      "AddModelRequest": {
+        "type": "object",
+        "required": [
+          "model",
+          "engine",
+          "version",
+          "inference_params",
+          "TransformReq",
+          "TransformResp",
+          "metadata"
+        ],
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The identifier of the model."
+          },
+          "api_key_template": {
+            "type": "string",
+            "description": "Template for the API key header."
+          },
+          "engine": {
+            "type": "string",
+            "description": "The engine used for the model."
+          },
+          "version": {
+            "type": "string",
+            "description": "The version of the model."
+          },
+          "inference_params": {
+            "type": "object",
+            "properties": {
+              "temperature": {
+                "type": "number"
+              },
+              "top_p": {
+                "type": "number"
+              },
+              "frequency_penalty": {
+                "type": "number"
+              },
+              "presence_penalty": {
+                "type": "number"
+              },
+              "max_tokens": {
+                "type": "integer"
+              },
+              "stream": {
+                "type": "boolean"
+              }
+            }
+          },
+          "TransformReq": {
+            "type": "object",
+            "properties": {
+              "get_models": {
+                "type": "object"
+              },
+              "chat_completions": {
+                "type": "object",
+                "properties": {
+                  "url": {
+                    "type": "string"
+                  },
+                  "template": {
+                    "type": "string"
+                  }
+                }
+              },
+              "embeddings": {
+                "type": "object"
+              }
+            }
+          },
+          "TransformResp": {
+            "type": "object",
+            "properties": {
+              "chat_completions": {
+                "type": "object",
+                "properties": {
+                  "template": {
+                    "type": "string"
+                  }
+                }
+              },
+              "embeddings": {
+                "type": "object"
+              }
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "properties": {
+              "author": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "end_point": {
+                "type": "string"
+              },
+              "logo": {
+                "type": "string"
+              },
+              "api_key_url": {
+                "type": "string"
+              }
+            }
+          }
+        }
+      },
+      "CreateModelDto": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The unique identifier of the model.",
+            "example": "mistral"
+          },
+          "name": {
+            "type": "string",
+            "description": "The name of the model.",
+            "example": "mistral"
+          },
+          "files": {
+            "description": "The URL sources from which the model downloaded or accessed.",
+            "example": ["https://huggingface.co/cortexso/mistral/tree/gguf"],
+            "oneOf": [
+              {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              {
+                "$ref": "#/components/schemas/ModelArtifactDto"
+              }
+            ]
+          },
+          "prompt_template": {
+            "type": "string",
+            "description": "A predefined text or framework that guides the AI model's response generation.",
+            "example": "\n      You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}."
+          },
+          "stop": {
+            "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
+            "example": ["End"],
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "max_tokens": {
+            "type": "number",
+            "description": "Sets the upper limit on the number of tokens the model can generate in a single output.",
+            "example": 4096
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Sets probability threshold for more relevant outputs.",
+            "example": 0.9
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Influences the randomness of the model's output.",
+            "example": 0.7
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "description": "Modifies the likelihood of the model repeating the same words or phrases within a single output.",
+            "example": 0.5
+          },
+          "presence_penalty": {
+            "type": "number",
+            "description": "Reduces the likelihood of repeating tokens, promoting novelty in the output.",
+            "example": 0.6
+          },
+          "stream": {
+            "type": "boolean",
+            "description": "Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.",
+            "example": true
+          },
+          "ctx_len": {
+            "type": "number",
+            "description": "Sets the maximum input the model can use to generate a response, it varies with the model used.",
+            "example": 4096
+          },
+          "ngl": {
+            "type": "number",
+            "description": "Determines GPU layer usage.",
+            "example": 32
+          },
+          "n_parallel": {
+            "type": "number",
+            "minimum": 1,
+            "description": "Number of parallel processing units to use.",
+            "example": 1
+          },
+          "cpu_threads": {
+            "type": "number",
+            "minimum": 1,
+            "description": "Determines CPU inference threads, limited by hardware and OS. ",
+            "example": 10
+          },
+          "engine": {
+            "type": "string",
+            "description": "The engine used to run the model.",
+            "example": "llamacpp"
+          },
+          "owned_by": {
+            "type": "string",
+            "description": "The owner of the model.",
+            "example": "",
+            "default": ""
+          }
+        },
+        "required": ["model", "files"]
+      },
+      "StartModelSuccessDto": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "description": "The success or error message displayed when a model is successfully loaded or fails to load."
+          },
+          "modelId": {
+            "type": "string",
+            "description": "The unique identifier of the model."
+          }
+        },
+        "required": ["message", "modelId"]
+      },
+      "ModelStartDto": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "example": "llama3:8b-gguf-q6-k",
+            "description": "A downloaded model name."
+          },
+          "ctx_len": {
+            "type": "number",
+            "description": "The context length for model operations varies; the maximum depends on the specific model used.",
+            "example": 4096
+          },
+          "ngl": {
+            "type": "number",
+            "description": "Determines GPU layer usage.",
+            "example": 32
+          },
+          "n_parallel": {
+            "type": "number",
+            "minimum": 1,
+            "description": "Number of parallel processing units to use.",
+            "example": 1
+          },
+          "cache_type": {
+            "type": "string",
+            "description": "KV cache type: f16, q8_0, q4_0, default is f16",
+            "example": "f16"
+          },
+          "caching_enabled": {
+            "type": "boolean",
+            "description": "To enable prompt caching or not",
+            "example": true
+          },
+          "model_path": {
+            "type": "string",
+            "description": "Local path to LLM model file",
+            "example": "/tmp/model.gguf"
+          },
+          "mmproj": {
+            "type": "string",
+            "description": "Local path to mmproj model file",
+            "example": "/tmp/model.gguf"
+          }
+        },
+        "required": ["model"]
+      },
+      "ModelStopDto": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "example": "llama3:8b-gguf-q6-k",
+            "description": "A downloaded model name."
+          }
+        },
+        "required": ["model"]
+      },
+      "ImportModelRequest": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The unique identifier of the model."
+          },
+          "modelPath": {
+            "type": "string",
+            "description": "The file path to the model."
+          },
+          "name": {
+            "type": "string",
+            "description": "The display name of the model."
+          },
+          "option": {
+            "type": "string",
+            "description": "Import options such as symlink or copy.",
+            "enum": ["symlink", "copy"]
+          }
+        },
+        "required": ["model", "modelPath"]
+      },
+      "ImportModelResponse": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "description": "Success message."
+          },
+          "modelHandle": {
+            "type": "string",
+            "description": "The unique identifier of the imported model."
+          },
+          "result": {
+            "type": "string",
+            "description": "Result status.",
+            "example": "OK"
+          }
+        },
+        "required": ["message", "modelHandle", "result"]
+      },
+      "CommonResponseDto": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "description": "The response success or error message."
+          }
+        },
+        "required": ["message"]
+      },
+      "EngineUninstallationResponseDto": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "example": "Engine my_engine uninstalled successfully!",
+            "enum": [
+              "Engine onnxruntime uninstalled successfully!",
+              "Engine llama-cpp uninstalled successfully!",
+              "Engine tensorrt-llm uninstalled successfully!"
+            ]
+          }
+        }
+      },
+      "SimpleErrorResponse": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string"
+          }
+        }
+      },
+      "EngineInstallationResponseDto": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "example": "Engine my_engine installed successfully!",
+            "enum": [
+              "Engine onnxruntime installed successfully!",
+              "Engine llama-cpp installed successfully!",
+              "Engine tensorrt-llm installed successfully!"
+            ]
+          }
+        }
+      },
+      "EngineList": {
+        "type": "object",
+        "properties": {
+          "data": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Engine"
+            }
+          },
+          "object": {
+            "type": "string",
+            "example": "list"
+          },
+          "result": {
+            "type": "string",
+            "example": "OK"
+          }
+        },
+        "required": ["data", "object", "result"]
+      },
+      "Engine": {
+        "type": "object",
+        "properties": {
+          "description": {
+            "type": "string",
+            "example": "This extension enables chat completion API calls using the Onnx engine"
+          },
+          "name": {
+            "type": "string",
+            "example": "onnxruntime"
+          },
+          "productName": {
+            "type": "string",
+            "example": "onnxruntime"
+          },
+          "status": {
+            "type": "string",
+            "example": "Incompatible"
+          },
+          "variant": {
+            "type": "string",
+            "example": "mac-arm64"
+          },
+          "version": {
+            "type": "string",
+            "example": "0.1.34"
+          }
+        },
+        "required": ["description", "name", "productName", "status"]
+      },
+      "CpuModeDto": {
+        "type": "object",
+        "properties": {
+          "ram": {
+            "type": "number",
+            "example": 1024
+          }
+        }
+      },
+      "GpuModeDto": {
+        "type": "object",
+        "properties": {
+          "ram": {
+            "type": "number",
+            "example": 1024
+          },
+          "vram": {
+            "type": "number",
+            "example": 1024
+          },
+          "ngl": {
+            "type": "number",
+            "example": 30
+          },
+          "context_length": {
+            "type": "number",
+            "example": 4096
+          },
+          "recommend_ngl": {
+            "type": "number",
+            "example": 33
+          }
+        }
+      },
+      "RecommendDto": {
+        "type": "object",
+        "properties": {
+          "cpu_mode": {
+            "type": "object",
+            "$ref": "#/components/schemas/CpuModeDto"
+          },
+          "gpu_mode": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/GPUDto"
+            }
+          }
+        }
+      },
+      "ModelDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "mistral",
+            "description": "The model identifier, which can be referenced in the API endpoints."
+          },
+          "prompt_template": {
+            "type": "string",
+            "example": "You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.",
+            "description": "A predefined text or framework that guides the AI model's response generation."
+          },
+          "stop": {
+            "example": ["End"],
+            "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "max_tokens": {
+            "type": "number",
+            "example": 4096,
+            "description": "Sets the upper limit on the number of tokens the model can generate in a single output."
+          },
+          "temperature": {
+            "type": "number",
+            "example": 0.7,
+            "description": "Influences the randomness of the model's output."
+          },
+          "top_p": {
+            "type": "number",
+            "example": 0.95,
+            "description": "Sets probability threshold for more relevant outputs"
+          },
+          "stream": {
+            "type": "boolean",
+            "example": true,
+            "description": "Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file."
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "example": 0,
+            "description": "Modifies the likelihood of the model repeating the same words or phrases within a single output."
+          },
+          "presence_penalty": {
+            "type": "number",
+            "example": 0,
+            "description": "Reduces the likelihood of repeating tokens, promoting novelty in the output."
+          },
+          "ngl": {
+            "type": "number",
+            "description": "Determines GPU layer usage.",
+            "example": 32
+          },
+          "ctx_len": {
+            "type": "number",
+            "description": "The context length for model operations varies; the maximum depends on the specific model used.",
+            "example": 4096
+          },
+          "cpu_threads": {
+            "type": "number",
+            "description": "Determines CPU inference threads, limited by hardware and OS.",
+            "example": 10
+          },
+          "pre_prompt": {
+            "type": "string",
+            "description": "The prompt to use for internal configuration",
+            "example": "You are an assistant with expert knowledge in {subject}. Please provide a detailed and accurate response to the following query: {query}. Ensure that your response is clear, concise, and informative."
+          },
+          "n_batch": {
+            "type": "number",
+            "description": "The batch size for prompt eval step",
+            "example": 512
+          },
+          "caching_enabled": {
+            "type": "boolean",
+            "description": "To enable prompt caching or not",
+            "example": true
+          },
+          "grp_attn_n": {
+            "type": "number",
+            "description": "Group attention factor in self-extend",
+            "example": 1
+          },
+          "grp_attn_w": {
+            "type": "number",
+            "description": "Group attention width in self-extend",
+            "example": 512
+          },
+          "mlock": {
+            "type": "boolean",
+            "description": "Prevent system swapping of the model to disk in macOS",
+            "example": false
+          },
+          "grammar_file": {
+            "type": "string",
+            "description": "You can constrain the sampling using GBNF grammars by providing path to a grammar file"
+          },
+          "flash_attn": {
+            "type": "boolean",
+            "description": "To enable Flash Attention, default is true",
+            "example": true
+          },
+          "cache_type": {
+            "type": "string",
+            "description": "KV cache type: f16, q8_0, q4_0, default is f16",
+            "example": "f16"
+          },
+          "use_mmap": {
+            "type": "boolean",
+            "description": "To enable mmap, default is true",
+            "example": true
+          },
+          "size": {
+            "type": "number",
+            "description": "The model file size in bytes",
+            "example": 1073741824
+          },
+          "engine": {
+            "type": "string",
+            "description": "The engine to use.",
+            "example": "llamacpp"
+          },
+          "recommendation": {
+            "type": "object",
+            "$ref": "#/components/schemas/RecommendDto"
+          }
+        },
+        "required": ["id"]
+      },
+      "ListModelsResponseDto": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "example": "list",
+            "enum": ["list"]
+          },
+          "data": {
+            "description": "List of models",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ModelDto"
+            }
+          }
+        },
+        "required": ["object", "data"]
+      },
+      "UpdateModelDto": {
+        "type": "object",
+        "properties": {
+          "files": {
+            "type": "array",
+            "description": "List of file paths associated with the model. Can be relative or absolute.",
+            "items": {
+              "type": "string",
+              "example": "models\\cortex.so\\tinyllama\\1b-gguf\\model.gguf"
+            }
+          },
+          "stop": {
+            "type": "array",
+            "description": "Tokens that signal the end of generation.",
+            "items": {
+              "type": "string"
+            },
+            "example": ["</s>"]
+          },
+          "stream": {
+            "type": "boolean",
+            "description": "Whether to stream the output as it is generated.",
+            "example": true
+          },
+          "top_p": {
+            "type": "number",
+            "description": "Controls nucleus sampling; the model considers the results of the tokens with top_p probability mass.",
+            "example": 0.95
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Controls randomness in token selection; lower values make the output more deterministic.",
+            "example": 0.7
+          },
+          "frequency_penalty": {
+            "type": "number",
+            "description": "Penalizes repeated tokens based on their frequency.",
+            "example": 0
+          },
+          "presence_penalty": {
+            "type": "number",
+            "description": "Penalizes tokens that have already appeared in the output.",
+            "example": 0
+          },
+          "max_tokens": {
+            "type": "integer",
+            "description": "Maximum number of tokens to generate.",
+            "example": 4096
+          },
+          "seed": {
+            "type": "integer",
+            "description": "Seed for random number generation to ensure reproducibility; -1 for random seed.",
+            "example": -1
+          },
+          "dynatemp_range": {
+            "type": "number",
+            "description": "Range for dynamic temperature adjustment.",
+            "example": 0
+          },
+          "dynatemp_exponent": {
+            "type": "number",
+            "description": "Exponent for dynamic temperature adjustment.",
+            "example": 1
+          },
+          "top_k": {
+            "type": "integer",
+            "description": "Limits the sampling pool to the top_k most probable tokens.",
+            "example": 40
+          },
+          "min_p": {
+            "type": "number",
+            "description": "Minimum probability threshold for token selection.",
+            "example": 0.05
+          },
+          "tfs_z": {
+            "type": "number",
+            "description": "Threshold for token frequency sampling.",
+            "example": 1
+          },
+          "typ_p": {
+            "type": "number",
+            "description": "Controls typical sampling; similar to top_p but focuses on local token distribution.",
+            "example": 1
+          },
+          "repeat_last_n": {
+            "type": "integer",
+            "description": "Number of recent tokens to consider for repetition penalty.",
+            "example": 64
+          },
+          "repeat_penalty": {
+            "type": "number",
+            "description": "Penalty applied to repeated tokens.",
+            "example": 1
+          },
+          "mirostat": {
+            "type": "boolean",
+            "description": "Enables or disables Mirostat sampling.",
+            "example": false
+          },
+          "mirostat_tau": {
+            "type": "number",
+            "description": "Target entropy for Mirostat sampling.",
+            "example": 5
+          },
+          "mirostat_eta": {
+            "type": "number",
+            "description": "Learning rate for Mirostat sampling.",
+            "example": 0.1
+          },
+          "penalize_nl": {
+            "type": "boolean",
+            "description": "Whether to penalize newline tokens.",
+            "example": false
+          },
+          "ignore_eos": {
+            "type": "boolean",
+            "description": "Whether to ignore end-of-sequence tokens during generation.",
+            "example": false
+          },
+          "n_probs": {
+            "type": "integer",
+            "description": "Number of probabilities to consider for each token.",
+            "example": 0
+          },
+          "min_keep": {
+            "type": "integer",
+            "description": "Minimum number of tokens to keep in the buffer.",
+            "example": 0
+          },
+          "engine": {
+            "type": "string",
+            "description": "The engine used to run the model.",
+            "example": "llama-cpp"
+          },
+          "prompt_template": {
+            "type": "string",
+            "description": "Template used for formatting prompts.",
+            "example": "\n\n<|system|>\n{system_message}</s>\n\n\n\n\n<|user|>\n{prompt}</s>\n\n\n<|assistant|>\n\n"
+          },
+          "ctx_len": {
+            "type": "integer",
+            "description": "Context length for the model.",
+            "example": 4096
+          },
+          "n_parallel": {
+            "type": "integer",
+            "description": "Number of parallel threads for execution.",
+            "example": 1
+          },
+          "ngl": {
+            "type": "integer",
+            "description": "Number of GPU layers.",
+            "example": 33
+          },
+          "api_key_template": {
+            "type": "string",
+            "description": "Template for the API key header."
+          },
+          "version": {
+            "type": "string",
+            "description": "The version of the model."
+          },
+          "inference_params": {
+            "type": "object",
+            "properties": {
+              "temperature": {
+                "type": "number"
+              },
+              "top_p": {
+                "type": "number"
+              },
+              "frequency_penalty": {
+                "type": "number"
+              },
+              "presence_penalty": {
+                "type": "number"
+              },
+              "max_tokens": {
+                "type": "integer"
+              },
+              "stream": {
+                "type": "boolean"
+              }
+            }
+          }
+        }
+      },
+      "DeleteModelResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "mistral-ins-7b-q4",
+            "description": "The identifier of the model that was deleted."
+          },
+          "object": {
+            "type": "string",
+            "example": "model",
+            "description": "Type of the object, indicating it's a model.",
+            "default": "model"
+          },
+          "deleted": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates whether the model was successfully deleted."
+          }
+        },
+        "required": ["id", "object", "deleted"]
+      },
+      "CreateThreadAssistantDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "thread_123",
+            "description": "The unique identifier of the assistant."
+          },
+          "avatar": {
+            "type": "string",
+            "example": "https://example.com/avatar.png",
+            "description": "URL of the assistant's avatar image."
+          },
+          "name": {
+            "type": "string",
+            "example": "Virtual Helper",
+            "description": "The name of the assistant."
+          },
+          "model": {
+            "type": "string",
+            "example": "mistral",
+            "description": "The model's unique identifier and settings."
+          },
+          "instructions": {
+            "type": "string",
+            "example": "Assist with customer queries and provide information based on the company database.",
+            "description": "The assistant's specific instructions."
+          },
+          "tools": {
+            "type": "array",
+            "example": [
+              {
+                "name": "Knowledge Retrieval",
+                "settings": {
+                  "source": "internal",
+                  "endpoint": "https://api.example.com/knowledge"
+                }
+              }
+            ],
+            "description": "The thread's tool(Knowledge Retrieval) configurations."
+          },
+          "description": {
+            "type": "string",
+            "nullable": true,
+            "example": "This assistant helps with customer support by retrieving relevant information.",
+            "description": "The description of the assistant."
+          },
+          "metadata": {
+            "type": "object",
+            "nullable": true,
+            "example": {
+              "department": "support",
+              "version": "1.0"
+            },
+            "description": "Additional metadata for the assistant."
+          },
+          "object": {
+            "type": "string",
+            "example": "assistant",
+            "description": "The object type, always \"assistant\"."
+          },
+          "temperature": {
+            "type": "number",
+            "nullable": true,
+            "example": 0.7,
+            "description": "Sampling temperature for the assistant."
+          },
+          "top_p": {
+            "type": "number",
+            "nullable": true,
+            "example": 0.9,
+            "description": "Top-p sampling value for the assistant."
+          },
+          "created_at": {
+            "type": "number",
+            "example": 1622470423,
+            "description": "Timestamp of when the assistant was created."
+          },
+          "response_format": {
+            "type": "object",
+            "example": {
+              "format": "json"
+            },
+            "description": "The response format option for the assistant."
+          },
+          "tool_resources": {
+            "type": "object",
+            "example": {
+              "resources": ["database1", "database2"]
+            },
+            "description": "Tool resources for the assistant."
+          }
+        },
+        "required": [
+          "id",
+          "name",
+          "model",
+          "instructions",
+          "tools",
+          "description",
+          "metadata",
+          "object",
+          "created_at"
+        ]
+      },
+      "CreateThreadDto": {
+        "type": "object",
+        "properties": {
+          "assistants": {
+            "description": "The details of the thread's settings.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/CreateThreadAssistantDto"
+            }
+          }
+        },
+        "required": ["assistants"]
+      },
+      "ContentDto": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "type": "string",
+            "example": "text",
+            "description": "Type of content, e.g., \"text\"."
+          },
+          "text": {
+            "type": "object",
+            "example": {
+              "value": "How does AI work? Explain it in simple terms.",
+              "annotations": []
+            },
+            "description": "Text content of the message along with any annotations."
+          }
+        },
+        "required": ["type", "text"]
+      },
+      "GetMessageResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "msg_abc123",
+            "description": "The identifier of the message."
+          },
+          "object": {
+            "type": "string",
+            "example": "thread.message",
+            "description": "Type of the object, indicating it's a thread message.",
+            "default": "thread.message"
+          },
+          "created_at": {
+            "type": "integer",
+            "example": 1699017614,
+            "description": "Unix timestamp representing the creation time of the message."
+          },
+          "thread_id": {
+            "type": "string",
+            "example": "thread_abc123",
+            "description": "Identifier of the thread to which this message belongs."
+          },
+          "role": {
+            "type": "string",
+            "example": "user",
+            "description": "Role of the sender, either 'user' or 'assistant'."
+          },
+          "content": {
+            "description": "Array of content objects detailing the message content.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ContentDto"
+            }
+          },
+          "file_ids": {
+            "example": [],
+            "description": "Array of file IDs associated with the message, if any.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "assistant_id": {
+            "type": "string",
+            "nullable": true,
+            "example": null,
+            "description": "Identifier of the assistant involved in the message, if applicable."
+          },
+          "run_id": {
+            "type": "string",
+            "nullable": true,
+            "example": null,
+            "description": "Run ID associated with the message, if applicable."
+          },
+          "metadata": {
+            "type": "object",
+            "example": {},
+            "description": "Metadata associated with the message."
+          }
+        },
+        "required": [
+          "id",
+          "object",
+          "created_at",
+          "thread_id",
+          "role",
+          "content",
+          "file_ids",
+          "assistant_id",
+          "run_id",
+          "metadata"
+        ]
+      },
+      "ListMessageObjectDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "msg_abc123",
+            "description": "The identifier of the message."
+          },
+          "object": {
+            "type": "string",
+            "example": "thread.message",
+            "description": "Type of the object, indicating it's a thread message."
+          },
+          "created_at": {
+            "type": "integer",
+            "example": 1699017614,
+            "description": "Unix timestamp representing the creation time of the message."
+          },
+          "thread_id": {
+            "type": "string",
+            "example": "thread_abc123",
+            "description": "Identifier of the thread to which this message belongs."
+          },
+          "role": {
+            "type": "string",
+            "example": "user",
+            "description": "Role of the sender, either 'user' or 'assistant'."
+          },
+          "file_ids": {
+            "description": "Array of file IDs associated with the message, if any.",
+            "example": [],
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "assistant_id": {
+            "type": "string",
+            "nullable": true,
+            "description": "Identifier of the assistant involved in the message, if applicable.",
+            "example": null
+          },
+          "run_id": {
+            "type": "string",
+            "nullable": true,
+            "description": "Run ID associated with the message, if applicable.",
+            "example": null
+          },
+          "metadata": {
+            "type": "object",
+            "example": {},
+            "description": "Metadata associated with the message."
+          }
+        },
+        "required": [
+          "id",
+          "object",
+          "created_at",
+          "thread_id",
+          "role",
+          "file_ids",
+          "assistant_id",
+          "run_id",
+          "metadata"
+        ]
+      },
+      "ListMessagesResponseDto": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "example": "list",
+            "description": "Type of the object, indicating it's a list."
+          },
+          "data": {
+            "description": "Array of message objects.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ListMessageObjectDto"
+            }
+          },
+          "first_id": {
+            "type": "string",
+            "example": "msg_abc123",
+            "description": "Identifier of the first message in the list."
+          },
+          "last_id": {
+            "type": "string",
+            "example": "msg_abc456",
+            "description": "Identifier of the last message in the list."
+          },
+          "has_more": {
+            "type": "boolean",
+            "example": false,
+            "description": "Indicates whether there are more messages to retrieve."
+          }
+        },
+        "required": ["object", "data", "first_id", "last_id", "has_more"]
+      },
+      "CreateMessageDto": {
+        "type": "object",
+        "properties": {
+          "role": {
+            "type": "object",
+            "example": "user",
+            "description": "The role of the entity that is creating the message. Allowed values include:\n      - user: Indicates the message is sent by an actual user and should be used in most cases to represent user-generated messages.\n      - assistant: Indicates the message is generated by the assistant. Use this value to insert messages from the assistant into the conversation."
+          },
+          "content": {
+            "type": "string",
+            "example": "Tell me a joke",
+            "description": "The text contents of the message."
+          }
+        },
+        "required": ["role", "content"]
+      },
+      "UpdateMessageDto": {
+        "type": "object",
+        "properties": {}
+      },
+      "DeleteMessageDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "message_123",
+            "description": "The identifier of the message that was deleted."
+          },
+          "object": {
+            "type": "string",
+            "example": "message",
+            "description": "Type of the object, indicating it's a message.",
+            "default": "message"
+          },
+          "deleted": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates whether the message was successfully deleted."
+          }
+        },
+        "required": ["id", "object", "deleted"]
+      },
+      "GetThreadResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "thread_abc123",
+            "description": "The identifier of the thread."
+          },
+          "object": {
+            "type": "string",
+            "example": "thread",
+            "description": "Type of the object"
+          },
+          "created_at": {
+            "type": "integer",
+            "example": 1699014083,
+            "description": "Unix timestamp representing the creation time of the thread."
+          },
+          "assistants": {
+            "example": ["assistant-001"],
+            "description": "List of assistants involved in the thread.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "metadata": {
+            "type": "object",
+            "example": {},
+            "description": "Metadata associated with the thread."
+          },
+          "messages": {
+            "example": [],
+            "description": "List of messages within the thread.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          }
+        },
+        "required": [
+          "id",
+          "object",
+          "created_at",
+          "assistants",
+          "metadata",
+          "messages"
+        ]
+      },
+      "UpdateThreadDto": {
+        "type": "object",
+        "properties": {}
+      },
+      "DeleteThreadResponseDto": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "example": "thread_123",
+            "description": "The identifier of the thread that was deleted."
+          },
+          "object": {
+            "type": "string",
+            "example": "thread",
+            "description": "Type of the object, indicating it's a thread.",
+            "default": "thread"
+          },
+          "deleted": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates whether the thread was successfully deleted."
+          }
+        },
+        "required": ["id", "object", "deleted"]
+      },
+      "CPUDto": {
+        "type": "object",
+        "properties": {
+          "arch": {
+            "type": "string",
+            "example": "amd64",
+            "description": "The architecture of the CPU."
+          },
+          "cores": {
+            "type": "integer",
+            "example": 8,
+            "description": "The number of CPU cores available."
+          },
+          "instructions": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "example": [
+              "fpu",
+              "mmx",
+              "sse",
+              "sse2",
+              "sse3",
+              "ssse3",
+              "sse4_1",
+              "sse4_2",
+              "pclmulqdq",
+              "avx",
+              "avx2",
+              "aes",
+              "f16c"
+            ],
+            "description": "A list of supported CPU instruction sets."
+          },
+          "model": {
+            "type": "string",
+            "example": "AMD Ryzen Threadripper PRO 5955WX 16-Cores",
+            "description": "The model name of the CPU."
+          }
+        },
+        "required": ["arch", "cores", "instructions", "model"]
+      },
+      "GPUDto": {
+        "type": "object",
+        "properties": {
+          "activated": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates if the GPU is currently activated."
+          },
+          "additional_information": {
+            "type": "object",
+            "properties": {
+              "compute_cap": {
+                "type": "string",
+                "example": "8.6",
+                "description": "The compute capability of the GPU."
+              },
+              "driver_version": {
+                "type": "string",
+                "example": "535.183",
+                "description": "The version of the installed driver."
+              }
+            },
+            "required": ["compute_cap", "driver_version"]
+          },
+          "free_vram": {
+            "type": "integer",
+            "example": 23983,
+            "description": "The amount of free VRAM in MB."
+          },
+          "id": {
+            "type": "string",
+            "example": "0",
+            "description": "Unique identifier for the GPU."
+          },
+          "name": {
+            "type": "string",
+            "example": "NVIDIA GeForce RTX 3090",
+            "description": "The name of the GPU model."
+          },
+          "total_vram": {
+            "type": "integer",
+            "example": 24576,
+            "description": "The total VRAM available in MB."
+          },
+          "uuid": {
+            "type": "string",
+            "example": "GPU-5206045b-2a1c-1e7d-6c60-d7c367d02376",
+            "description": "The universally unique identifier for the GPU."
+          },
+          "version": {
+            "type": "string",
+            "example": "12.2",
+            "description": "The version of the GPU."
+          }
+        },
+        "required": [
+          "activated",
+          "additional_information",
+          "free_vram",
+          "id",
+          "name",
+          "total_vram",
+          "uuid",
+          "version"
+        ]
+      },
+      "OperatingSystemDto": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string",
+            "example": "Ubuntu 24.04.1 LTS",
+            "description": "The name of the operating system."
+          },
+          "version": {
+            "type": "string",
+            "example": "24.04.1 LTS (Noble Numbat)",
+            "description": "The version of the operating system."
+          }
+        },
+        "required": ["name", "version"]
+      },
+      "PowerDto": {
+        "type": "object",
+        "properties": {
+          "battery_life": {
+            "type": "integer",
+            "example": 0,
+            "description": "The percentage of battery life remaining."
+          },
+          "charging_status": {
+            "type": "string",
+            "example": "",
+            "description": "The charging status of the device."
+          },
+          "is_power_saving": {
+            "type": "boolean",
+            "example": false,
+            "description": "Indicates if the power-saving mode is enabled."
+          }
+        },
+        "required": ["battery_life", "charging_status", "is_power_saving"]
+      },
+      "RAMDto": {
+        "type": "object",
+        "properties": {
+          "available": {
+            "type": "integer",
+            "example": 11100,
+            "description": "The amount of available RAM in MB."
+          },
+          "total": {
+            "type": "integer",
+            "example": 15991,
+            "description": "The total RAM in MB."
+          },
+          "type": {
+            "type": "string",
+            "example": "",
+            "description": "The type of RAM."
+          }
+        },
+        "required": ["available", "total", "type"]
+      },
+      "StorageDto": {
+        "type": "object",
+        "properties": {
+          "available": {
+            "type": "integer",
+            "example": 0,
+            "description": "The amount of available storage in MB."
+          },
+          "total": {
+            "type": "integer",
+            "example": 0,
+            "description": "The total storage in MB."
+          },
+          "type": {
+            "type": "string",
+            "example": "",
+            "description": "The type of storage."
+          }
+        },
+        "required": ["available", "total", "type"]
+      }
+    }
+  }
+}
diff --git a/server/index.ts b/server/index.ts
index e8a6eea78..4008d7008 100644
--- a/server/index.ts
+++ b/server/index.ts
@@ -1,9 +1,9 @@
 import fastify from 'fastify'
 import dotenv from 'dotenv'
-import { v1Router, log, getJanExtensionsPath } from '@janhq/core/node'
-import { join } from 'path'
+import { log } from '@janhq/core/node'
 import tcpPortUsed from 'tcp-port-used'
 import { Logger } from './helpers/logger'
+import CORTEX_SCHEMA from './cortex.json'
 
 // Load environment variables
 dotenv.config()
@@ -66,34 +66,29 @@ export const startServer = async (configs?: ServerConfig): Promise<boolean> => {
 
     // Initialize Fastify server with logging
     server = fastify({
-      logger: new Logger(),
+      loggerInstance: new Logger(),
       // Set body limit to 100MB - Default is 1MB
       // According to OpenAI - a batch input file can be up to 100 MB in size
       // Whisper endpoints accept up to 25MB
       // Vision endpoints accept up to 4MB
-      bodyLimit: 104_857_600
+      bodyLimit: 104_857_600,
     })
 
     // Register CORS if enabled
     if (corsEnabled) await server.register(require('@fastify/cors'), {})
 
+    CORTEX_SCHEMA.servers[0].url = configs?.prefix ?? '/v1'
     // Register Swagger for API documentation
     await server.register(require('@fastify/swagger'), {
       mode: 'static',
       specification: {
-        path: configs?.schemaPath ?? './../docs/openapi/jan.yaml',
-        baseDir: configs?.baseDir ?? './../docs/openapi',
-        postProcessor: function (swaggerObject: any) {
-          swaggerObject.servers[0].url = configs?.prefix ?? '/v1'
-          return swaggerObject
-        },
+        document: CORTEX_SCHEMA,
       },
     })
 
     // Register Swagger UI
     await server.register(require('@fastify/swagger-ui'), {
       routePrefix: '/',
-      baseDir: configs?.baseDir ?? join(__dirname, '../..', './docs/openapi'),
       uiConfig: {
         docExpansion: 'full',
         deepLinking: false,
@@ -102,26 +97,12 @@ export const startServer = async (configs?: ServerConfig): Promise<boolean> => {
       transformSpecificationClone: true,
     })
 
-    // Register static file serving for extensions
-    // TODO: Watch extension files changes and reload
-    await server.register(
-      (childContext: any, _: any, done: any) => {
-        childContext.register(require('@fastify/static'), {
-          root: getJanExtensionsPath(),
-          wildcard: false,
-        })
+    server.register(require('@fastify/http-proxy'), {
+      upstream: 'http://127.0.0.1:39291/v1',
+      prefix: configs?.prefix ?? '/v1',
+      http2: false,
+    })
 
-        done()
-      },
-      { prefix: 'extensions' }
-    )
-
-    // Register proxy middleware
-    if (configs?.storageAdataper)
-      server.addHook('preHandler', configs.storageAdataper)
-
-    // Register API routes
-    await server.register(v1Router, { prefix: configs?.prefix ?? '/v1' })
     // Start listening for requests
     await server
       .listen({
diff --git a/server/main.ts b/server/main.ts
index 71fb11106..b69197d15 100644
--- a/server/main.ts
+++ b/server/main.ts
@@ -1,7 +1,6 @@
-import { s3 } from './middleware/s3'
 import { setup } from './helpers/setup'
 import { startServer as start } from './index'
 /**
  * Setup extensions and start the server
  */
-setup().then(() => start({ storageAdataper: s3 }))
+setup().then(() => start())
diff --git a/server/middleware/s3.ts b/server/middleware/s3.ts
deleted file mode 100644
index 3024285a3..000000000
--- a/server/middleware/s3.ts
+++ /dev/null
@@ -1,70 +0,0 @@
-import { join } from 'path'
-
-// Middleware to intercept requests and proxy if certain conditions are met
-const config = {
-  endpoint: process.env.AWS_ENDPOINT,
-  region: process.env.AWS_REGION,
-  credentials: {
-    accessKeyId: process.env.AWS_ACCESS_KEY_ID,
-    secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
-  },
-}
-
-const S3_BUCKET_NAME = process.env.S3_BUCKET_NAME
-
-const fs = require('@cyclic.sh/s3fs')(S3_BUCKET_NAME, config)
-const PROXY_PREFIX = '/v1/fs'
-const PROXY_ROUTES = ['/threads', '/messages']
-
-export const s3 = (req: any, reply: any, done: any) => {
-  // Proxy FS requests to S3 using S3FS
-  if (req.url.startsWith(PROXY_PREFIX)) {
-    const route = req.url.split('/').pop()
-    const args = parseRequestArgs(req)
-
-    // Proxy matched requests to the s3fs module
-    if (args.length && PROXY_ROUTES.some((route) => args[0].includes(route))) {
-      try {
-        // Handle customized route
-        // S3FS does not handle appendFileSync
-        if (route === 'appendFileSync') {
-          let result = handAppendFileSync(args)
-
-          reply.status(200).send(result)
-          return
-        }
-        // Reroute the other requests to the s3fs module
-        const result = fs[route](...args)
-        reply.status(200).send(result)
-        return
-      } catch (ex) {
-        console.error(ex)
-      }
-    }
-  }
-  // Let other requests go through
-  done()
-}
-
-const parseRequestArgs = (req: Request) => {
-  const {
-    getJanDataFolderPath,
-    normalizeFilePath,
-  } = require('@janhq/core/node')
-
-  return JSON.parse(req.body as any).map((arg: any) =>
-    typeof arg === 'string' &&
-    (arg.startsWith(`file:/`) || arg.startsWith(`file:\\`))
-      ? join(getJanDataFolderPath(), normalizeFilePath(arg))
-      : arg
-  )
-}
-
-const handAppendFileSync = (args: any[]) => {
-  if (fs.existsSync(args[0])) {
-    const data = fs.readFileSync(args[0], 'utf-8')
-    return fs.writeFileSync(args[0], data + args[1])
-  } else {
-    return fs.writeFileSync(args[0], args[1])
-  }
-}
diff --git a/server/package.json b/server/package.json
index b2c237c61..634c91fe0 100644
--- a/server/package.json
+++ b/server/package.json
@@ -8,7 +8,8 @@
   "homepage": "https://jan.ai",
   "description": "Use offline LLMs with your own data. Run open source models like Llama2 or Falcon on your internal computers/servers.",
   "files": [
-    "build/**"
+    "build/**",
+    "cortex.json"
   ],
   "scripts": {
     "lint": "eslint . --ext \".js,.jsx,.ts,.tsx\"",
@@ -19,14 +20,15 @@
   "dependencies": {
     "@alumna/reflect": "^1.1.3",
     "@cyclic.sh/s3fs": "^1.2.9",
-    "@fastify/cors": "^8.4.2",
+    "@fastify/cors": "^10.0.1",
+    "@fastify/http-proxy": "^10.0.0",
     "@fastify/static": "^6.12.0",
-    "@fastify/swagger": "^8.13.0",
-    "@fastify/swagger-ui": "2.0.1",
+    "@fastify/swagger": "^9.4.0",
+    "@fastify/swagger-ui": "5.2.0",
     "@janhq/core": "link:./core",
     "@npmcli/arborist": "^7.3.1",
     "dotenv": "^16.3.1",
-    "fastify": "^4.24.3",
+    "fastify": "^5.2.0",
     "fetch-retry": "^5.0.6",
     "node-fetch": "2",
     "request": "^2.88.2",
diff --git a/server/tsconfig.json b/server/tsconfig.json
index dd27b8932..d70798403 100644
--- a/server/tsconfig.json
+++ b/server/tsconfig.json
@@ -15,7 +15,8 @@
     "paths": { "*": ["node_modules/*"] },
     "typeRoots": ["node_modules/@types"],
     "ignoreDeprecations": "5.0",
-    "declaration": true
+    "declaration": true,
+    "resolveJsonModule": true
   },
   // "sourceMap": true,
 
diff --git a/web/containers/CenterPanelContainer/index.tsx b/web/containers/CenterPanelContainer/index.tsx
index 9ce81f184..b3df8face 100644
--- a/web/containers/CenterPanelContainer/index.tsx
+++ b/web/containers/CenterPanelContainer/index.tsx
@@ -1,15 +1,45 @@
 import { PropsWithChildren } from 'react'
 
+import { useMediaQuery } from '@janhq/joi'
 import { useAtomValue } from 'jotai'
 
 import { twMerge } from 'tailwind-merge'
 
+import { MainViewState } from '@/constants/screens'
+
+import { LEFT_PANEL_WIDTH } from '../LeftPanelContainer'
+
+import { RIGHT_PANEL_WIDTH } from '../RightPanelContainer'
+
+import {
+  mainViewStateAtom,
+  showLeftPanelAtom,
+  showRightPanelAtom,
+} from '@/helpers/atoms/App.atom'
 import { reduceTransparentAtom } from '@/helpers/atoms/Setting.atom'
 
-const CenterPanelContainer = ({ children }: PropsWithChildren) => {
+type Props = {
+  isShowStarterScreen?: boolean
+} & PropsWithChildren
+
+const CenterPanelContainer = ({ children, isShowStarterScreen }: Props) => {
   const reduceTransparent = useAtomValue(reduceTransparentAtom)
+  const matches = useMediaQuery('(max-width: 880px)')
+  const showLeftPanel = useAtomValue(showLeftPanelAtom)
+  const showRightPanel = useAtomValue(showRightPanelAtom)
+  const mainViewState = useAtomValue(mainViewStateAtom)
+
   return (
-    <div className={twMerge('flex h-full w-full')}>
+    <div
+      className={twMerge('flex h-full w-full')}
+      style={{
+        maxWidth: matches
+          ? '100%'
+          : mainViewState === MainViewState.Thread && !isShowStarterScreen
+            ? `calc(100% - (${showRightPanel ? Number(localStorage.getItem(RIGHT_PANEL_WIDTH)) : 0}px + ${showLeftPanel ? Number(localStorage.getItem(LEFT_PANEL_WIDTH)) : 0}px))`
+            : '100%',
+      }}
+    >
       <div
         className={twMerge(
           'h-full w-full overflow-hidden bg-[hsla(var(--center-panel-bg))]',
diff --git a/web/containers/ErrorMessage/index.test.tsx b/web/containers/ErrorMessage/index.test.tsx
index 306a80e32..d9866d3c0 100644
--- a/web/containers/ErrorMessage/index.test.tsx
+++ b/web/containers/ErrorMessage/index.test.tsx
@@ -30,20 +30,23 @@ describe('ErrorMessage Component', () => {
 
   beforeEach(() => {
     jest.clearAllMocks()
-    ;(useAtomValue as jest.Mock).mockReturnValue([])
-    ;(useSetAtom as jest.Mock).mockReturnValue(mockSetMainState)
-    ;(useSetAtom as jest.Mock).mockReturnValue(mockSetSelectedSettingScreen)
-    ;(useSetAtom as jest.Mock).mockReturnValue(mockSetModalTroubleShooting)
-    ;(useSendChatMessage as jest.Mock).mockReturnValue({
-      resendChatMessage: mockResendChatMessage,
-    })
+      ; (useAtomValue as jest.Mock).mockReturnValue([])
+      ; (useSetAtom as jest.Mock).mockReturnValue(mockSetMainState)
+      ; (useSetAtom as jest.Mock).mockReturnValue(mockSetSelectedSettingScreen)
+      ; (useSetAtom as jest.Mock).mockReturnValue(mockSetModalTroubleShooting)
+      ; (useSendChatMessage as jest.Mock).mockReturnValue({
+        resendChatMessage: mockResendChatMessage,
+      })
   })
 
   it('renders error message with InvalidApiKey correctly', () => {
     const message: ThreadMessage = {
       id: '1',
-      status: MessageStatus.Error,
-      error_code: ErrorCode.InvalidApiKey,
+      metadata: {
+        error: MessageStatus.Error,
+        error_code: ErrorCode.InvalidApiKey,
+      },
+      status: "completed",
       content: [{ text: { value: 'Invalid API Key' } }],
     } as ThreadMessage
 
@@ -56,8 +59,11 @@ describe('ErrorMessage Component', () => {
   it('renders general error message correctly', () => {
     const message: ThreadMessage = {
       id: '1',
-      status: MessageStatus.Error,
-      error_code: ErrorCode.Unknown,
+      status: "completed",
+      metadata: {
+        error: MessageStatus.Error,
+        error_code: ErrorCode.Unknown
+      },
       content: [{ text: { value: 'Unknown error occurred' } }],
     } as ThreadMessage
 
@@ -69,9 +75,11 @@ describe('ErrorMessage Component', () => {
   it('opens troubleshooting modal when link is clicked', () => {
     const message: ThreadMessage = {
       id: '1',
-      status: MessageStatus.Error,
-      error_code: ErrorCode.Unknown,
-      content: [{ text: { value: 'Unknown error occurred' } }],
+      status: "completed",
+      metadata: {
+        error: MessageStatus.Error,
+        error_code: ErrorCode.Unknown,
+      }, content: [{ text: { value: 'Unknown error occurred' } }],
     } as ThreadMessage
 
     render(<ErrorMessage message={message} />)
diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx
index b2f6bc23a..e0705e6b6 100644
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@@ -14,41 +14,65 @@ import ModalTroubleShooting, {
 
 import { MainViewState } from '@/constants/screens'
 
+import { isLocalEngine } from '@/utils/modelEngine'
+
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
-import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
   const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
   const setMainState = useSetAtom(mainViewStateAtom)
   const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
-  const activeThread = useAtomValue(activeThreadAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
+
+  const defaultDesc = () => {
+    return (
+      <>
+        <p>
+          {`Something's wrong.`} Access&nbsp;
+          <span
+            className="cursor-pointer text-[hsla(var(--app-link))] underline"
+            onClick={() => setModalTroubleShooting(true)}
+          >
+            troubleshooting assistance
+          </span>
+          &nbsp;now.
+        </p>
+        <ModalTroubleShooting />
+      </>
+    )
+  }
+
+  const getEngine = () => {
+    const engineName = activeAssistant?.model?.engine
+    return engineName ? EngineManager.instance().get(engineName) : null
+  }
 
   const getErrorTitle = () => {
-    switch (message.error_code) {
+    const engine = getEngine()
+
+    switch (message.metadata?.error_code) {
       case ErrorCode.InvalidApiKey:
       case ErrorCode.AuthenticationError:
         return (
-          <span data-testid="invalid-API-key-error">
-            Invalid API key. Please check your API key from{' '}
-            <button
-              className="font-medium text-[hsla(var(--app-link))] underline"
-              onClick={() => {
-                setMainState(MainViewState.Settings)
-
-                if (activeThread?.assistants[0]?.model.engine) {
-                  const engine = EngineManager.instance().get(
-                    activeThread.assistants[0].model.engine
-                  )
+          <>
+            <span data-testid="invalid-API-key-error">
+              Invalid API key. Please check your API key from{' '}
+              <button
+                className="font-medium text-[hsla(var(--app-link))] underline"
+                onClick={() => {
+                  setMainState(MainViewState.Settings)
                   engine?.name && setSelectedSettingScreen(engine.name)
-                }
-              }}
-            >
-              Settings
-            </button>{' '}
-            and try again.
-          </span>
+                }}
+              >
+                Settings
+              </button>{' '}
+              and try again.
+            </span>
+            {defaultDesc()}
+          </>
         )
       default:
         return (
@@ -56,8 +80,20 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
             data-testid="passthrough-error-message"
             className="first-letter:uppercase"
           >
-            {message.content[0]?.text?.value && (
-              <AutoLink text={message.content[0].text.value} />
+            {message.content[0]?.text?.value === 'Failed to fetch' &&
+            engine &&
+            !isLocalEngine(String(engine?.name)) ? (
+              <span>
+                No internet connection. <br /> Switch to an on-device model or
+                check connection.
+              </span>
+            ) : (
+              <>
+                {message?.content[0]?.text?.value && (
+                  <AutoLink text={message?.content[0]?.text?.value} />
+                )}
+                {defaultDesc()}
+              </>
             )}
           </p>
         )
@@ -65,24 +101,13 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
   }
 
   return (
-    <div className="mx-auto mt-10 max-w-[700px]">
-      {message.status === MessageStatus.Error && (
+    <div className="mx-auto my-6 max-w-[700px]">
+      {!!message.metadata?.error && (
         <div
           key={message.id}
           className="mx-6 flex flex-col items-center space-y-2 text-center font-medium text-[hsla(var(--text-secondary))]"
         >
           {getErrorTitle()}
-          <p>
-            {`Something's wrong.`} Access&nbsp;
-            <span
-              className="cursor-pointer text-[hsla(var(--app-link))] underline"
-              onClick={() => setModalTroubleShooting(true)}
-            >
-              troubleshooting assistance
-            </span>
-            &nbsp;now.
-          </p>
-          <ModalTroubleShooting />
         </div>
       )}
     </div>
diff --git a/web/containers/Layout/BottomPanel/index.tsx b/web/containers/Layout/BottomPanel/index.tsx
index cc0efd805..69894c9e3 100644
--- a/web/containers/Layout/BottomPanel/index.tsx
+++ b/web/containers/Layout/BottomPanel/index.tsx
@@ -35,7 +35,7 @@ const BottomPanel = () => {
   return (
     <div
       className={twMerge(
-        'fixed bottom-0 left-0 z-50 flex h-9 w-full items-center justify-between px-3 text-xs',
+        'fixed bottom-0 left-0 z-40 flex h-9 w-full items-center justify-between px-3 text-xs',
         reduceTransparent &&
           'border-t border-[hsla(var(--app-border))] bg-[hsla(var(--bottom-panel-bg))]'
       )}
diff --git a/web/containers/Layout/TopPanel/index.tsx b/web/containers/Layout/TopPanel/index.tsx
index aff616973..9ba393e09 100644
--- a/web/containers/Layout/TopPanel/index.tsx
+++ b/web/containers/Layout/TopPanel/index.tsx
@@ -144,7 +144,7 @@ const TopPanel = () => {
             theme="icon"
             onClick={() => {
               setMainViewState(MainViewState.Settings)
-              setSelectedSetting('Appearance')
+              setSelectedSetting('Preferences')
             }}
           >
             <PaletteIcon size={16} className="cursor-pointer" />
diff --git a/web/containers/Layout/index.tsx b/web/containers/Layout/index.tsx
index e787163d4..29fda70de 100644
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@@ -1,9 +1,11 @@
 'use client'
 
-import { useEffect, useMemo } from 'react'
+import { useEffect, useState } from 'react'
 
-import { useAtomValue, useSetAtom } from 'jotai'
+import { Button } from '@janhq/joi'
+import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 
+import posthog from 'posthog-js'
 import { twMerge } from 'tailwind-merge'
 
 import BottomPanel from '@/containers/Layout/BottomPanel'
@@ -31,12 +33,72 @@ import MainViewContainer from '../MainViewContainer'
 import InstallingExtensionModal from './BottomPanel/InstallingExtension/InstallingExtensionModal'
 
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
-import { reduceTransparentAtom } from '@/helpers/atoms/Setting.atom'
+import {
+  productAnalyticAtom,
+  productAnalyticPromptAtom,
+  reduceTransparentAtom,
+} from '@/helpers/atoms/Setting.atom'
 
 const BaseLayout = () => {
   const setMainViewState = useSetAtom(mainViewStateAtom)
   const importModelStage = useAtomValue(getImportModelStageAtom)
   const reduceTransparent = useAtomValue(reduceTransparentAtom)
+  const [productAnalytic, setProductAnalytic] = useAtom(productAnalyticAtom)
+  const [productAnalyticPrompt, setProductAnalyticPrompt] = useAtom(
+    productAnalyticPromptAtom
+  )
+  const [showProductAnalyticPrompt, setShowProductAnalyticPrompt] =
+    useState(false)
+
+  useEffect(() => {
+    const timer = setTimeout(() => {
+      if (productAnalyticPrompt) {
+        setShowProductAnalyticPrompt(true)
+      }
+      return () => clearTimeout(timer)
+    }, 3000) // 3 seconds delay
+
+    return () => clearTimeout(timer) // Cleanup timer on unmount
+  }, [productAnalyticPrompt])
+
+  useEffect(() => {
+    if (productAnalytic) {
+      posthog.init(POSTHOG_KEY, {
+        api_host: POSTHOG_HOST,
+        autocapture: false,
+        capture_pageview: false,
+        capture_pageleave: false,
+        disable_session_recording: true,
+        person_profiles: 'always',
+        persistence: 'localStorage',
+        opt_out_capturing_by_default: true,
+        // eslint-disable-next-line @typescript-eslint/naming-convention
+        sanitize_properties: function (properties) {
+          const denylist = [
+            '$pathname',
+            '$initial_pathname',
+            '$current_url',
+            '$initial_current_url',
+            '$host',
+            '$initial_host',
+            '$initial_person_info',
+          ]
+
+          denylist.forEach((key) => {
+            if (properties[key]) {
+              properties[key] = null // Set each denied property to null
+            }
+          })
+
+          return properties
+        },
+      })
+      posthog.opt_in_capturing()
+      posthog.register({ app_version: VERSION })
+    } else {
+      posthog.opt_out_capturing()
+    }
+  }, [productAnalytic])
 
   useEffect(() => {
     if (localStorage.getItem(SUCCESS_SET_NEW_DESTINATION) === 'true') {
@@ -54,6 +116,17 @@ const BaseLayout = () => {
     )
   }, [setMainViewState])
 
+  const handleProductAnalytics = (isAllowed: boolean) => {
+    setProductAnalytic(isAllowed)
+    setProductAnalyticPrompt(false)
+    setShowProductAnalyticPrompt(false)
+    if (isAllowed) {
+      posthog.opt_in_capturing()
+    } else {
+      posthog.opt_out_capturing()
+    }
+  }
+
   return (
     <div
       className={twMerge(
@@ -76,6 +149,79 @@ const BaseLayout = () => {
         <ChooseWhatToImportModal />
         <InstallingExtensionModal />
         <HuggingFaceRepoDetailModal />
+        {showProductAnalyticPrompt && (
+          <div className="fixed bottom-4 z-50 m-4 max-w-full rounded-xl border border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))] p-6 shadow-2xl sm:bottom-8 sm:right-4 sm:m-0 sm:max-w-[400px]">
+            <div className="mb-4 flex items-center gap-x-2">
+              <svg
+                width="32"
+                height="32"
+                viewBox="0 0 32 32"
+                fill="none"
+                xmlns="http://www.w3.org/2000/svg"
+              >
+                <path
+                  d="M5.5 12.5C5.5 11.1193 6.61929 10 8 10H24C25.3807 10 26.5 11.1193 26.5 12.5V18.5C26.5 24.299 21.799 29 16 29C10.201 29 5.5 24.299 5.5 18.5V12.5Z"
+                  fill="#2563EB"
+                />
+                <path
+                  fillRule="evenodd"
+                  clipRule="evenodd"
+                  d="M8.20959 25.54L12.0093 10H14.0093L9.84984 27.0113C9.25274 26.579 8.70292 26.0855 8.20959 25.54ZM11.5993 28.0361C11.2955 27.8957 10.9996 27.7412 10.7124 27.5734L15.0093 10H16.0093L11.5993 28.0361Z"
+                  fill="white"
+                />
+                <path
+                  d="M21 8C21 6.67392 20.4732 5.40215 19.5355 4.46447C18.5979 3.52678 17.3261 3 16 3C14.6739 3 13.4021 3.52678 12.4645 4.46447C11.5268 5.40215 11 6.67392 11 8"
+                  stroke="#2563EB"
+                  strokeWidth="2"
+                  strokeLinecap="round"
+                />
+                <path
+                  d="M27.0478 18.054C27.609 18.5733 27.609 19.4267 27.0478 19.946C25.221 21.6363 20.9622 25 16 25C11.0378 25 6.77899 21.6363 4.95219 19.946C4.39099 19.4267 4.39099 18.5733 4.95219 18.054C6.77899 16.3637 11.0378 13 16 13C20.9622 13 25.221 16.3637 27.0478 18.054Z"
+                  fill="#C8D1EA"
+                />
+                <circle cx="16" cy="19" r="4" fill="#2563EB" />
+                <path
+                  d="M19.25 17.5C19.9404 17.5 20.5 16.9404 20.5 16.25C20.5 15.5596 19.9404 15 19.25 15C18.5596 15 18 15.5596 18 16.25C18 16.9404 18.5596 17.5 19.25 17.5Z"
+                  fill="white"
+                />
+                <path
+                  d="M17.75 18.5C18.1642 18.5 18.5 18.1642 18.5 17.75C18.5 17.3358 18.1642 17 17.75 17C17.3358 17 17 17.3358 17 17.75C17 18.1642 17.3358 18.5 17.75 18.5Z"
+                  fill="white"
+                />
+              </svg>
+
+              <h6 className="text-base font-semibold">Help Us Improve Jan</h6>
+            </div>
+            <p className="text-[hsla(var(--text-secondary))]">
+              To improve Jan, we collect anonymous data to understand feature
+              usage. Your chats and personal information are never tracked. You
+              can change this anytime in&nbsp;
+              <span className="font-semibold">{`Settings > Privacy.`}</span>
+            </p>
+            <p className="mt-6 text-[hsla(var(--text-secondary))]">
+              Would you like to help us to improve Jan?
+            </p>
+            <div className="mt-6 flex items-center gap-x-2">
+              <Button
+                onClick={() => {
+                  handleProductAnalytics(true)
+                }}
+              >
+                Allow
+              </Button>
+              <Button
+                data-testid="btn-deny-product-analytics"
+                theme="ghost"
+                variant="outline"
+                onClick={() => {
+                  handleProductAnalytics(false)
+                }}
+              >
+                Deny
+              </Button>
+            </div>
+          </div>
+        )}
       </div>
       <BottomPanel />
     </div>
diff --git a/web/containers/LeftPanelContainer/index.tsx b/web/containers/LeftPanelContainer/index.tsx
index 3991757f3..c6665a037 100644
--- a/web/containers/LeftPanelContainer/index.tsx
+++ b/web/containers/LeftPanelContainer/index.tsx
@@ -17,7 +17,7 @@ import { reduceTransparentAtom } from '@/helpers/atoms/Setting.atom'
 type Props = PropsWithChildren
 
 const DEFAULT_LEFT_PANEL_WIDTH = 200
-const LEFT_PANEL_WIDTH = 'leftPanelWidth'
+export const LEFT_PANEL_WIDTH = 'leftPanelWidth'
 
 const LeftPanelContainer = ({ children }: Props) => {
   const [leftPanelRef, setLeftPanelRef] = useState<HTMLDivElement | null>(null)
@@ -106,7 +106,7 @@ const LeftPanelContainer = ({ children }: Props) => {
           <Fragment>
             <div
               className={twMerge(
-                'group/resize absolute right-0 top-0 z-[9999] h-full w-1 flex-shrink-0 flex-grow-0 resize-x blur-sm hover:cursor-col-resize hover:bg-[hsla(var(--resize-bg))]',
+                'group/resize absolute right-0 top-0 z-40 h-full w-1 flex-shrink-0 flex-grow-0 resize-x blur-sm hover:cursor-col-resize hover:bg-[hsla(var(--resize-bg))]',
                 isResizing && 'cursor-col-resize bg-[hsla(var(--resize-bg))]',
                 !reduceTransparent && 'shadow-sm'
               )}
diff --git a/web/containers/MainViewContainer/index.tsx b/web/containers/MainViewContainer/index.tsx
index ba7f87fd2..811f19c6e 100644
--- a/web/containers/MainViewContainer/index.tsx
+++ b/web/containers/MainViewContainer/index.tsx
@@ -37,7 +37,7 @@ const MainViewContainer = () => {
   }
 
   return (
-    <div className={twMerge('relative flex w-full')}>
+    <div className={twMerge('relative flex w-[calc(100%-48px)]')}>
       <div className="w-full">
         <m.div
           key={mainViewState}
diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index dd6caa795..09240845d 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -12,7 +12,7 @@ import {
   useClickOutside,
 } from '@janhq/joi'
 
-import { useAtom, useAtomValue, useSetAtom } from 'jotai'
+import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import {
   ChevronDownIcon,
@@ -37,6 +37,7 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
 
 import { formatDownloadPercentage, toGibibytes } from '@/utils/converter'
 
+import { manualRecommendationModel } from '@/utils/model'
 import {
   getLogoEngine,
   getTitleByEngine,
@@ -46,6 +47,7 @@ import {
 
 import { extensionManager } from '@/extension'
 
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { inActiveEngineProviderAtom } from '@/helpers/atoms/Extension.atom'
 import {
   configuredModelsAtom,
@@ -64,17 +66,23 @@ type Props = {
   disabled?: boolean
 }
 
+export const modelDropdownStateAtom = atom(false)
+
 const ModelDropdown = ({
   disabled,
   chatInputMode,
   strictedThread = true,
 }: Props) => {
   const { downloadModel } = useDownloadModel()
+  const [modelDropdownState, setModelDropdownState] = useAtom(
+    modelDropdownStateAtom
+  )
 
   const [searchFilter, setSearchFilter] = useState('local')
   const [searchText, setSearchText] = useState('')
-  const [open, setOpen] = useState(false)
+  const [open, setOpen] = useState<boolean>(modelDropdownState)
   const activeThread = useAtomValue(activeThreadAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
   const downloadingModels = useAtomValue(getDownloadingModelAtom)
   const [toggle, setToggle] = useState<HTMLDivElement | null>(null)
   const [selectedModel, setSelectedModel] = useAtom(selectedModelAtom)
@@ -82,22 +90,38 @@ const ModelDropdown = ({
   const [dropdownOptions, setDropdownOptions] = useState<HTMLDivElement | null>(
     null
   )
+
   const downloadStates = useAtomValue(modelDownloadStateAtom)
   const setThreadModelParams = useSetAtom(setThreadModelParamsAtom)
   const { updateModelParameter } = useUpdateModelParameters()
   const searchInputRef = useRef<HTMLInputElement>(null)
   const configuredModels = useAtomValue(configuredModelsAtom)
-  const featuredModel = configuredModels.filter((x) =>
-    x.metadata?.tags?.includes('Featured')
+
+  const featuredModel = configuredModels.filter(
+    (x) =>
+      manualRecommendationModel.includes(x.id) &&
+      x.metadata?.tags?.includes('Featured') &&
+      x.metadata?.size < 5000000000
   )
   const { updateThreadMetadata } = useCreateNewThread()
 
-  useClickOutside(() => setOpen(false), null, [dropdownOptions, toggle])
+  useClickOutside(() => handleChangeStateOpen(false), null, [
+    dropdownOptions,
+    toggle,
+  ])
 
   const [showEngineListModel, setShowEngineListModel] = useAtom(
     showEngineListModelAtom
   )
 
+  const handleChangeStateOpen = useCallback(
+    (state: boolean) => {
+      setOpen(state)
+      setModelDropdownState(state)
+    },
+    [setModelDropdownState]
+  )
+
   const isModelSupportRagAndTools = useCallback((model: Model) => {
     return (
       model?.engine === InferenceEngine.openai ||
@@ -143,6 +167,12 @@ const ModelDropdown = ({
     [configuredModels, searchText, searchFilter, downloadedModels]
   )
 
+  useEffect(() => {
+    if (modelDropdownState && chatInputMode) {
+      setOpen(modelDropdownState)
+    }
+  }, [chatInputMode, modelDropdownState])
+
   useEffect(() => {
     if (open && searchInputRef.current) {
       searchInputRef.current.focus()
@@ -151,17 +181,24 @@ const ModelDropdown = ({
 
   useEffect(() => {
     if (!activeThread) return
-    const modelId = activeThread?.assistants?.[0]?.model?.id
+    const modelId = activeAssistant?.model?.id
 
     let model = downloadedModels.find((model) => model.id === modelId)
     if (!model) {
-      model = recommendedModel
+      model = undefined
     }
     setSelectedModel(model)
-  }, [recommendedModel, activeThread, downloadedModels, setSelectedModel])
+  }, [
+    recommendedModel,
+    activeThread,
+    downloadedModels,
+    setSelectedModel,
+    activeAssistant?.model?.id,
+  ])
 
   const onClickModelItem = useCallback(
     async (modelId: string) => {
+      if (!activeAssistant) return
       const model = downloadedModels.find((m) => m.id === modelId)
       setSelectedModel(model)
       setOpen(false)
@@ -172,14 +209,14 @@ const ModelDropdown = ({
           ...activeThread,
           assistants: [
             {
-              ...activeThread.assistants[0],
+              ...activeAssistant,
               tools: [
                 {
                   type: 'retrieval',
                   enabled: isModelSupportRagAndTools(model as Model),
                   settings: {
-                    ...(activeThread.assistants[0].tools &&
-                      activeThread.assistants[0].tools[0]?.settings),
+                    ...(activeAssistant.tools &&
+                      activeAssistant.tools[0]?.settings),
                   },
                 },
               ],
@@ -192,8 +229,12 @@ const ModelDropdown = ({
           model?.settings.ctx_len ?? 8192
         )
         const overriddenParameters = {
-          ctx_len: Math.min(8192, model?.settings.ctx_len ?? 8192),
-          max_tokens: defaultContextLength,
+          ctx_len: !isLocalEngine(model?.engine)
+            ? undefined
+            : defaultContextLength,
+          max_tokens: !isLocalEngine(model?.engine)
+            ? (model?.parameters.max_tokens ?? 8192)
+            : defaultContextLength,
         }
 
         const modelParams = {
@@ -215,13 +256,14 @@ const ModelDropdown = ({
       }
     },
     [
+      activeAssistant,
       downloadedModels,
-      activeThread,
       setSelectedModel,
+      activeThread,
+      updateThreadMetadata,
       isModelSupportRagAndTools,
       setThreadModelParams,
       updateModelParameter,
-      updateThreadMetadata,
     ]
   )
 
@@ -329,14 +371,21 @@ const ModelDropdown = ({
               'inline-block max-w-[200px] cursor-pointer overflow-hidden text-ellipsis whitespace-nowrap',
               open && 'border border-transparent'
             )}
-            onClick={() => setOpen(!open)}
+            onClick={() => handleChangeStateOpen(!open)}
           >
-            <span>{selectedModel?.name}</span>
+            <span
+              className={twMerge(
+                !selectedModel && 'text-[hsla(var(--text-tertiary))]'
+              )}
+            >
+              {selectedModel?.name || 'Select Model'}
+            </span>
           </Badge>
         ) : (
           <Input
             value={selectedModel?.name || ''}
             className="cursor-pointer"
+            placeholder="Select Model"
             disabled={disabled}
             readOnly
             suffixIcon={
diff --git a/web/containers/Providers/Jotai.tsx b/web/containers/Providers/Jotai.tsx
index c68226fef..5371097f4 100644
--- a/web/containers/Providers/Jotai.tsx
+++ b/web/containers/Providers/Jotai.tsx
@@ -4,9 +4,11 @@ import { PropsWithChildren } from 'react'
 
 import { Provider, atom } from 'jotai'
 
+import { FileInfo } from '@/types/file'
+
 export const editPromptAtom = atom<string>('')
 export const currentPromptAtom = atom<string>('')
-export const fileUploadAtom = atom<FileInfo[]>([])
+export const fileUploadAtom = atom<FileInfo | undefined>()
 
 export const searchAtom = atom<string>('')
 
@@ -15,10 +17,3 @@ export const selectedTextAtom = atom('')
 export default function JotaiWrapper({ children }: PropsWithChildren) {
   return <Provider>{children}</Provider>
 }
-
-export type FileType = 'image' | 'pdf'
-
-export type FileInfo = {
-  file: File
-  type: FileType
-}
diff --git a/web/containers/Providers/ModelHandler.tsx b/web/containers/Providers/ModelHandler.tsx
index 373c0aebd..ad7f5c974 100644
--- a/web/containers/Providers/ModelHandler.tsx
+++ b/web/containers/Providers/ModelHandler.tsx
@@ -31,6 +31,7 @@ import {
   addNewMessageAtom,
   updateMessageAtom,
   tokenSpeedAtom,
+  deleteMessageAtom,
 } from '@/helpers/atoms/ChatMessage.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import {
@@ -49,6 +50,7 @@ export default function ModelHandler() {
   const addNewMessage = useSetAtom(addNewMessageAtom)
   const updateMessage = useSetAtom(updateMessageAtom)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
+  const deleteMessage = useSetAtom(deleteMessageAtom)
   const activeModel = useAtomValue(activeModelAtom)
   const setActiveModel = useSetAtom(activeModelAtom)
   const setStateModel = useSetAtom(stateModelAtom)
@@ -86,7 +88,7 @@ export default function ModelHandler() {
   }, [activeModelParams])
 
   const onNewMessageResponse = useCallback(
-    (message: ThreadMessage) => {
+    async (message: ThreadMessage) => {
       if (message.type === MessageRequestType.Thread) {
         addNewMessage(message)
       }
@@ -154,12 +156,15 @@ export default function ModelHandler() {
         ...thread,
 
         title: cleanedMessageContent,
-        metadata: thread.metadata,
+        metadata: {
+          ...thread.metadata,
+          title: cleanedMessageContent,
+        },
       }
 
       extensionManager
         .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.saveThread({
+        ?.modifyThread({
           ...updatedThread,
         })
         .then(() => {
@@ -233,7 +238,9 @@ export default function ModelHandler() {
 
       const thread = threadsRef.current?.find((e) => e.id == message.thread_id)
       if (!thread) return
+
       const messageContent = message.content[0]?.text?.value
+
       const metadata = {
         ...thread.metadata,
         ...(messageContent && { lastMessage: messageContent }),
@@ -246,15 +253,31 @@ export default function ModelHandler() {
 
       extensionManager
         .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.saveThread({
+        ?.modifyThread({
           ...thread,
           metadata,
         })
 
-      // If this is not the summary of the Thread, don't need to add it to the Thread
-      extensionManager
-        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.addNewMessage(message)
+      if (message.status === MessageStatus.Error) {
+        message.metadata = {
+          ...message.metadata,
+          error: message.content[0]?.text?.value,
+          error_code: message.error_code,
+        }
+      }
+      ;(async () => {
+        const updatedMessage = await extensionManager
+          .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+          ?.createMessage(message)
+          .catch(() => undefined)
+        if (updatedMessage) {
+          deleteMessage(message.id)
+          addNewMessage(updatedMessage)
+          setTokenSpeed((prev) =>
+            prev ? { ...prev, message: updatedMessage.id } : undefined
+          )
+        }
+      })()
 
       // Attempt to generate the title of the Thread when needed
       generateThreadTitle(message, thread)
@@ -279,7 +302,9 @@ export default function ModelHandler() {
 
   const generateThreadTitle = (message: ThreadMessage, thread: Thread) => {
     // If this is the first ever prompt in the thread
-    if (thread.title?.trim() !== defaultThreadTitle) {
+    if (
+      (thread.title ?? thread.metadata?.title)?.trim() !== defaultThreadTitle
+    ) {
       return
     }
 
@@ -292,11 +317,14 @@ export default function ModelHandler() {
       const updatedThread: Thread = {
         ...thread,
         title: (thread.metadata?.lastMessage as string) || defaultThreadTitle,
-        metadata: thread.metadata,
+        metadata: {
+          ...thread.metadata,
+          title: (thread.metadata?.lastMessage as string) || defaultThreadTitle,
+        },
       }
       return extensionManager
         .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.saveThread({
+        ?.modifyThread({
           ...updatedThread,
         })
         .then(() => {
@@ -313,7 +341,7 @@ export default function ModelHandler() {
 
     if (!threadMessages || threadMessages.length === 0) return
 
-    const summarizeFirstPrompt = `Summarize in a ${maxWordForThreadTitle}-word Title. Give the title only. "${threadMessages[0].content[0].text.value}"`
+    const summarizeFirstPrompt = `Summarize in a ${maxWordForThreadTitle}-word Title. Give the title only. "${threadMessages[0]?.content[0]?.text?.value}"`
 
     // Prompt: Given this query from user {query}, return to me the summary in 10 words as the title
     const msgId = ulid()
@@ -330,6 +358,7 @@ export default function ModelHandler() {
       id: msgId,
       threadId: message.thread_id,
       type: MessageRequestType.Summary,
+      attachments: [],
       messages,
       model: {
         ...activeModelRef.current,
diff --git a/web/containers/Providers/Responsive.tsx b/web/containers/Providers/Responsive.tsx
index cb7bd4c1c..f73fdc970 100644
--- a/web/containers/Providers/Responsive.tsx
+++ b/web/containers/Providers/Responsive.tsx
@@ -11,15 +11,14 @@ const Responsive = () => {
   const [showRightPanel, setShowRightPanel] = useAtom(showRightPanelAtom)
 
   // Refs to store the last known state of the panels
-  const lastLeftPanelState = useRef<boolean>(true)
-  const lastRightPanelState = useRef<boolean>(true)
+  const lastLeftPanelState = useRef<boolean>(showLeftPanel)
+  const lastRightPanelState = useRef<boolean>(showRightPanel)
 
   useEffect(() => {
     if (matches) {
       // Store the last known state before closing the panels
       lastLeftPanelState.current = showLeftPanel
       lastRightPanelState.current = showRightPanel
-
       setShowLeftPanel(false)
       setShowRightPanel(false)
     } else {
diff --git a/web/containers/RightPanelContainer/index.tsx b/web/containers/RightPanelContainer/index.tsx
index 133130017..27d339bb7 100644
--- a/web/containers/RightPanelContainer/index.tsx
+++ b/web/containers/RightPanelContainer/index.tsx
@@ -16,13 +16,13 @@ import { reduceTransparentAtom } from '@/helpers/atoms/Setting.atom'
 
 type Props = PropsWithChildren
 
-const DEFAULT_RIGTH_PANEL_WIDTH = 280
-const RIGHT_PANEL_WIDTH = 'rightPanelWidth'
+const DEFAULT_RIGHT_PANEL_WIDTH = 280
+export const RIGHT_PANEL_WIDTH = 'rightPanelWidth'
 
 const RightPanelContainer = ({ children }: Props) => {
   const [isResizing, setIsResizing] = useState(false)
   const [threadRightPanelWidth, setRightPanelWidth] = useState(
-    Number(localStorage.getItem(RIGHT_PANEL_WIDTH)) || DEFAULT_RIGTH_PANEL_WIDTH
+    Number(localStorage.getItem(RIGHT_PANEL_WIDTH)) || DEFAULT_RIGHT_PANEL_WIDTH
   )
   const [rightPanelRef, setRightPanelRef] = useState<HTMLDivElement | null>(
     null
@@ -55,11 +55,11 @@ const RightPanelContainer = ({ children }: Props) => {
               mouseMoveEvent.clientX <
             200
           ) {
-            setRightPanelWidth(DEFAULT_RIGTH_PANEL_WIDTH)
+            setRightPanelWidth(DEFAULT_RIGHT_PANEL_WIDTH)
             setIsResizing(false)
             localStorage.setItem(
               RIGHT_PANEL_WIDTH,
-              String(DEFAULT_RIGTH_PANEL_WIDTH)
+              String(DEFAULT_RIGHT_PANEL_WIDTH)
             )
             setShowRightPanel(false)
           } else {
@@ -77,8 +77,8 @@ const RightPanelContainer = ({ children }: Props) => {
 
   useEffect(() => {
     if (localStorage.getItem(RIGHT_PANEL_WIDTH) === null) {
-      setRightPanelWidth(DEFAULT_RIGTH_PANEL_WIDTH)
-      localStorage.setItem(RIGHT_PANEL_WIDTH, String(DEFAULT_RIGTH_PANEL_WIDTH))
+      setRightPanelWidth(DEFAULT_RIGHT_PANEL_WIDTH)
+      localStorage.setItem(RIGHT_PANEL_WIDTH, String(DEFAULT_RIGHT_PANEL_WIDTH))
     }
     window.addEventListener('mousemove', resize)
     window.addEventListener('mouseup', stopResizing)
@@ -109,7 +109,7 @@ const RightPanelContainer = ({ children }: Props) => {
           <Fragment>
             <div
               className={twMerge(
-                'group/resize absolute left-0 top-0 z-[9999] h-full w-1 flex-shrink-0 flex-grow-0 resize-x blur-sm hover:cursor-col-resize hover:bg-[hsla(var(--resize-bg))]',
+                'group/resize absolute left-0 top-0 z-40 h-full w-1 flex-shrink-0 flex-grow-0 resize-x blur-sm hover:cursor-col-resize hover:bg-[hsla(var(--resize-bg))]',
                 isResizing && 'cursor-col-resize bg-[hsla(var(--resize-bg))]',
                 !reduceTransparent && 'shadow-sm'
               )}
diff --git a/web/helpers/atoms/App.atom.ts b/web/helpers/atoms/App.atom.ts
index 8770b4bcd..bd1e4f7aa 100644
--- a/web/helpers/atoms/App.atom.ts
+++ b/web/helpers/atoms/App.atom.ts
@@ -1,14 +1,25 @@
 import { atom } from 'jotai'
 
+import { atomWithStorage } from 'jotai/utils'
+
 import { MainViewState } from '@/constants/screens'
 
 export const mainViewStateAtom = atom<MainViewState>(MainViewState.Thread)
 
 export const defaultJanDataFolderAtom = atom<string>('')
 
+const SHOW_RIGHT_PANEL = 'showRightPanel'
+
 // Store panel atom
 export const showLeftPanelAtom = atom<boolean>(true)
-export const showRightPanelAtom = atom<boolean>(true)
+
+export const showRightPanelAtom = atomWithStorage<boolean>(
+  SHOW_RIGHT_PANEL,
+  false,
+  undefined,
+  { getOnInit: true }
+)
+
 export const showSystemMonitorPanelAtom = atom<boolean>(false)
 export const appDownloadProgressAtom = atom<number>(-1)
 export const updateVersionErrorAtom = atom<string | undefined>(undefined)
diff --git a/web/helpers/atoms/Assistant.atom.ts b/web/helpers/atoms/Assistant.atom.ts
index d44703cf4..cb50a0553 100644
--- a/web/helpers/atoms/Assistant.atom.ts
+++ b/web/helpers/atoms/Assistant.atom.ts
@@ -1,4 +1,12 @@
-import { Assistant } from '@janhq/core'
+import { Assistant, ThreadAssistantInfo } from '@janhq/core'
 import { atom } from 'jotai'
+import { atomWithStorage } from 'jotai/utils'
 
 export const assistantsAtom = atom<Assistant[]>([])
+
+/**
+ * Get the current active assistant
+ */
+export const activeAssistantAtom = atomWithStorage<
+  ThreadAssistantInfo | undefined
+>('activeAssistant', undefined, undefined, { getOnInit: true })
diff --git a/web/helpers/atoms/ChatMessage.atom.ts b/web/helpers/atoms/ChatMessage.atom.ts
index 1f6099a2e..5df44e031 100644
--- a/web/helpers/atoms/ChatMessage.atom.ts
+++ b/web/helpers/atoms/ChatMessage.atom.ts
@@ -6,6 +6,8 @@ import {
 } from '@janhq/core'
 import { atom } from 'jotai'
 
+import { atomWithStorage } from 'jotai/utils'
+
 import {
   getActiveThreadIdAtom,
   updateThreadStateLastMessageAtom,
@@ -13,15 +15,32 @@ import {
 
 import { TokenSpeed } from '@/types/token'
 
+const CHAT_MESSAGE_NAME = 'chatMessages'
 /**
  * Stores all chat messages for all threads
  */
-export const chatMessages = atom<Record<string, ThreadMessage[]>>({})
+export const chatMessagesStorage = atomWithStorage<
+  Record<string, ThreadMessage[]>
+>(CHAT_MESSAGE_NAME, {}, undefined, { getOnInit: true })
+
+export const cachedMessages = atom<Record<string, ThreadMessage[]>>()
+/**
+ * Retrieve chat messages for all threads
+ */
+export const chatMessages = atom(
+  (get) => get(cachedMessages) ?? get(chatMessagesStorage),
+  (_get, set, newValue: Record<string, ThreadMessage[]>) => {
+    set(cachedMessages, newValue)
+    ;(() => set(chatMessagesStorage, newValue))()
+  }
+)
 
 /**
  * Stores the status of the messages load for each thread
  */
-export const readyThreadsMessagesAtom = atom<Record<string, boolean>>({})
+export const readyThreadsMessagesAtom = atomWithStorage<
+  Record<string, boolean>
+>('currentThreadMessages', {}, undefined, { getOnInit: true })
 
 /**
  * Store the token speed for current message
@@ -34,6 +53,7 @@ export const getCurrentChatMessagesAtom = atom<ThreadMessage[]>((get) => {
   const activeThreadId = get(getActiveThreadIdAtom)
   if (!activeThreadId) return []
   const messages = get(chatMessages)[activeThreadId]
+  if (!Array.isArray(messages)) return []
   return messages ?? []
 })
 
@@ -121,7 +141,7 @@ export const deleteMessageAtom = atom(null, (get, set, id: string) => {
   if (threadId) {
     // Should also delete error messages to clear out the error state
     newData[threadId] = newData[threadId].filter(
-      (e) => e.id !== id && e.status !== MessageStatus.Error
+      (e) => e.id !== id && !e.metadata?.error
     )
 
     set(chatMessages, newData)
diff --git a/web/helpers/atoms/Model.atom.test.ts b/web/helpers/atoms/Model.atom.test.ts
index 923f24df4..b4eb87e7a 100644
--- a/web/helpers/atoms/Model.atom.test.ts
+++ b/web/helpers/atoms/Model.atom.test.ts
@@ -58,7 +58,9 @@ describe('Model.atom.ts', () => {
         setAtom.current({ id: '1' } as any)
       })
       expect(getAtom.current).toEqual([{ id: '1' }])
-      reset.current([])
+      act(() => {
+        reset.current([])
+      })
     })
   })
 
@@ -83,7 +85,9 @@ describe('Model.atom.ts', () => {
         removeAtom.current('1')
       })
       expect(getAtom.current).toEqual([])
-      reset.current([])
+      act(() => {
+        reset.current([])
+      })
     })
   })
 
@@ -113,7 +117,9 @@ describe('Model.atom.ts', () => {
         removeAtom.current('1')
       })
       expect(getAtom.current).toEqual([])
-      reset.current([])
+      act(() => {
+        reset.current([])
+      })
     })
   })
 
diff --git a/web/helpers/atoms/Setting.atom.ts b/web/helpers/atoms/Setting.atom.ts
index 5f220be46..905c88d0f 100644
--- a/web/helpers/atoms/Setting.atom.ts
+++ b/web/helpers/atoms/Setting.atom.ts
@@ -11,9 +11,12 @@ export const janSettingScreenAtom = atom<SettingScreen[]>([])
 export const THEME = 'themeAppearance'
 export const REDUCE_TRANSPARENT = 'reduceTransparent'
 export const SPELL_CHECKING = 'spellChecking'
+export const PRODUCT_ANALYTIC = 'productAnalytic'
+export const PRODUCT_ANALYTIC_PROMPT = 'productAnalyticPrompt'
 export const THEME_DATA = 'themeData'
 export const THEME_OPTIONS = 'themeOptions'
 export const THEME_PATH = 'themePath'
+export const CHAT_WIDTH = 'chatWidth'
 export const themesOptionsAtom = atomWithStorage<
   { name: string; value: string }[]
 >(THEME_OPTIONS, [], undefined, { getOnInit: true })
@@ -47,3 +50,21 @@ export const spellCheckAtom = atomWithStorage<boolean>(
   undefined,
   { getOnInit: true }
 )
+export const productAnalyticAtom = atomWithStorage<boolean>(
+  PRODUCT_ANALYTIC,
+  false,
+  undefined,
+  { getOnInit: true }
+)
+export const productAnalyticPromptAtom = atomWithStorage<boolean>(
+  PRODUCT_ANALYTIC_PROMPT,
+  true,
+  undefined,
+  { getOnInit: true }
+)
+export const chatWidthAtom = atomWithStorage<string>(
+  CHAT_WIDTH,
+  'full',
+  undefined,
+  { getOnInit: true }
+)
diff --git a/web/helpers/atoms/Thread.atom.ts b/web/helpers/atoms/Thread.atom.ts
index e0ea433ce..7fb6f3c60 100644
--- a/web/helpers/atoms/Thread.atom.ts
+++ b/web/helpers/atoms/Thread.atom.ts
@@ -207,7 +207,7 @@ export const setThreadModelParamsAtom = atom(
  */
 export const activeSettingInputBoxAtom = atomWithStorage<boolean>(
   ACTIVE_SETTING_INPUT_BOX,
-  false,
+  true,
   undefined,
   { getOnInit: true }
 )
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 63513bee2..ed704dd61 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -8,8 +8,8 @@ import { toaster } from '@/containers/Toast'
 import { LAST_USED_MODEL_ID } from './useRecommendedModel'
 
 import { vulkanEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
-import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 export const activeModelAtom = atom<Model | undefined>(undefined)
 export const loadModelErrorAtom = atom<string | undefined>(undefined)
@@ -28,12 +28,12 @@ export const stateModelAtom = atom<ModelState>({
 
 export function useActiveModel() {
   const [activeModel, setActiveModel] = useAtom(activeModelAtom)
-  const activeThread = useAtomValue(activeThreadAtom)
   const [stateModel, setStateModel] = useAtom(stateModelAtom)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
   const setLoadModelError = useSetAtom(loadModelErrorAtom)
   const pendingModelLoad = useRef(false)
   const isVulkanEnabled = useAtomValue(vulkanEnabledAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
 
   const downloadedModelsRef = useRef<Model[]>([])
 
@@ -79,12 +79,12 @@ export function useActiveModel() {
     }
 
     /// Apply thread model settings
-    if (activeThread?.assistants[0]?.model.id === modelId) {
+    if (activeAssistant?.model.id === modelId) {
       model = {
         ...model,
         settings: {
           ...model.settings,
-          ...activeThread.assistants[0].model.settings,
+          ...activeAssistant?.model.settings,
         },
       }
     }
diff --git a/web/hooks/useCreateNewThread.test.ts b/web/hooks/useCreateNewThread.test.ts
index 25589c098..d98983830 100644
--- a/web/hooks/useCreateNewThread.test.ts
+++ b/web/hooks/useCreateNewThread.test.ts
@@ -67,7 +67,7 @@ describe('useCreateNewThread', () => {
       } as any)
     })
 
-    expect(mockSetAtom).toHaveBeenCalledTimes(6) // Check if all the necessary atoms were set
+    expect(mockSetAtom).toHaveBeenCalledTimes(1)
     expect(extensionManager.get).toHaveBeenCalled()
   })
 
@@ -104,7 +104,7 @@ describe('useCreateNewThread', () => {
       await result.current.requestCreateNewThread({
         id: 'assistant1',
         name: 'Assistant 1',
-        instructions: "Hello Jan Assistant",
+        instructions: 'Hello Jan Assistant',
         model: {
           id: 'model1',
           parameters: [],
@@ -113,16 +113,8 @@ describe('useCreateNewThread', () => {
       } as any)
     })
 
-    expect(mockSetAtom).toHaveBeenCalledTimes(6) // Check if all the necessary atoms were set
+    expect(mockSetAtom).toHaveBeenCalledTimes(1) // Check if all the necessary atoms were set
     expect(extensionManager.get).toHaveBeenCalled()
-    expect(mockSetAtom).toHaveBeenNthCalledWith(
-      2,
-      expect.objectContaining({
-        assistants: expect.arrayContaining([
-          expect.objectContaining({ instructions: 'Hello Jan Assistant' }),
-        ]),
-      })
-    )
   })
 
   it('should create a new thread with previous instructions', async () => {
@@ -166,16 +158,8 @@ describe('useCreateNewThread', () => {
       } as any)
     })
 
-    expect(mockSetAtom).toHaveBeenCalledTimes(6) // Check if all the necessary atoms were set
+    expect(mockSetAtom).toHaveBeenCalledTimes(1) // Check if all the necessary atoms were set
     expect(extensionManager.get).toHaveBeenCalled()
-    expect(mockSetAtom).toHaveBeenNthCalledWith(
-      2,
-      expect.objectContaining({
-        assistants: expect.arrayContaining([
-          expect.objectContaining({ instructions: 'Hello Jan' }),
-        ]),
-      })
-    )
   })
 
   it('should show a warning toast if trying to create an empty thread', async () => {
@@ -212,13 +196,12 @@ describe('useCreateNewThread', () => {
 
     const { result } = renderHook(() => useCreateNewThread())
 
-    const mockThread = { id: 'thread1', title: 'Test Thread' }
+    const mockThread = { id: 'thread1', title: 'Test Thread', assistants: [{}] }
 
     await act(async () => {
       await result.current.updateThreadMetadata(mockThread as any)
     })
 
     expect(mockUpdateThread).toHaveBeenCalledWith(mockThread)
-    expect(extensionManager.get).toHaveBeenCalled()
   })
 })
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index 999c887cb..c4c77d0a4 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -1,7 +1,6 @@
 import { useCallback } from 'react'
 
 import {
-  Assistant,
   ConversationalExtension,
   ExtensionTypeEnum,
   Thread,
@@ -9,24 +8,28 @@ import {
   ThreadState,
   AssistantTool,
   Model,
+  Assistant,
 } from '@janhq/core'
-import { atom, useAtomValue, useSetAtom } from 'jotai'
+import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
+
+import { useDebouncedCallback } from 'use-debounce'
 
 import { copyOverInstructionEnabledAtom } from '@/containers/CopyInstruction'
 import { fileUploadAtom } from '@/containers/Providers/Jotai'
 
 import { toaster } from '@/containers/Toast'
 
-import { generateThreadId } from '@/utils/thread'
+import { isLocalEngine } from '@/utils/modelEngine'
 
 import { useActiveModel } from './useActiveModel'
-import useRecommendedModel from './useRecommendedModel'
 
+import useRecommendedModel from './useRecommendedModel'
 import useSetActiveThread from './useSetActiveThread'
 
 import { extensionManager } from '@/extension'
 
 import { experimentalFeatureEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import {
   threadsAtom,
@@ -34,7 +37,6 @@ import {
   updateThreadAtom,
   setThreadModelParamsAtom,
   isGeneratingResponseAtom,
-  activeThreadAtom,
 } from '@/helpers/atoms/Thread.atom'
 
 const createNewThreadAtom = atom(null, (get, set, newThread: Thread) => {
@@ -64,25 +66,25 @@ export const useCreateNewThread = () => {
   const copyOverInstructionEnabled = useAtomValue(
     copyOverInstructionEnabledAtom
   )
-  const activeThread = useAtomValue(activeThreadAtom)
+  const [activeAssistant, setActiveAssistant] = useAtom(activeAssistantAtom)
 
   const experimentalEnabled = useAtomValue(experimentalFeatureEnabledAtom)
   const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
 
-  const { recommendedModel, downloadedModels } = useRecommendedModel()
-
   const threads = useAtomValue(threadsAtom)
   const { stopInference } = useActiveModel()
 
+  const { recommendedModel } = useRecommendedModel()
+
   const requestCreateNewThread = async (
-    assistant: Assistant,
+    assistant: (ThreadAssistantInfo & { id: string; name: string }) | Assistant,
     model?: Model | undefined
   ) => {
     // Stop generating if any
     setIsGeneratingResponse(false)
     stopInference()
 
-    const defaultModel = model ?? recommendedModel ?? downloadedModels[0]
+    const defaultModel = model || recommendedModel
 
     if (!model) {
       // if we have model, which means user wants to create new thread from Model hub. Allow them.
@@ -113,18 +115,22 @@ export const useCreateNewThread = () => {
     )
 
     const overriddenSettings = {
-      ctx_len: defaultContextLength,
+      ctx_len: !isLocalEngine(defaultModel?.engine)
+        ? undefined
+        : defaultContextLength,
     }
 
     // Use ctx length by default
     const overriddenParameters = {
-      max_tokens: defaultContextLength,
+      max_tokens: !isLocalEngine(defaultModel?.engine)
+        ? (defaultModel?.parameters.token_limit ?? 8192)
+        : defaultContextLength,
     }
 
     const createdAt = Date.now()
     let instructions: string | undefined = assistant.instructions
     if (copyOverInstructionEnabled) {
-      instructions = activeThread?.assistants[0]?.instructions ?? undefined
+      instructions = activeAssistant?.instructions ?? undefined
     }
     const assistantInfo: ThreadAssistantInfo = {
       assistant_id: assistant.id,
@@ -139,46 +145,97 @@ export const useCreateNewThread = () => {
       instructions,
     }
 
-    const threadId = generateThreadId(assistant.id)
-    const thread: Thread = {
-      id: threadId,
+    const thread: Partial<Thread> = {
       object: 'thread',
       title: 'New Thread',
       assistants: [assistantInfo],
       created: createdAt,
       updated: createdAt,
+      metadata: {
+        title: 'New Thread',
+      },
     }
 
     // add the new thread on top of the thread list to the state
     //TODO: Why do we have thread list then thread states? Should combine them
-    createNewThread(thread)
+    try {
+      const createdThread = await persistNewThread(thread, assistantInfo)
+      if (!createdThread) throw 'Thread created failed.'
+      createNewThread(createdThread)
 
-    setSelectedModel(defaultModel)
-    setThreadModelParams(thread.id, {
-      ...defaultModel?.settings,
-      ...defaultModel?.parameters,
-      ...overriddenSettings,
-    })
+      setSelectedModel(defaultModel)
+      setThreadModelParams(createdThread.id, {
+        ...defaultModel?.settings,
+        ...defaultModel?.parameters,
+        ...overriddenSettings,
+      })
 
-    // Delete the file upload state
-    setFileUpload([])
-    // Update thread metadata
-    await updateThreadMetadata(thread)
-
-    setActiveThread(thread)
+      // Delete the file upload state
+      setFileUpload(undefined)
+      setActiveThread(createdThread)
+    } catch (ex) {
+      return toaster({
+        title: 'Thread created failed.',
+        description: `To avoid piling up empty threads, please reuse previous one before creating new.`,
+        type: 'error',
+      })
+    }
   }
 
+  const updateThreadExtension = (thread: Thread) => {
+    return extensionManager
+      .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+      ?.modifyThread(thread)
+  }
+
+  const updateAssistantExtension = (
+    threadId: string,
+    assistant: ThreadAssistantInfo
+  ) => {
+    return extensionManager
+      .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+      ?.modifyThreadAssistant(threadId, assistant)
+  }
+
+  const updateThreadCallback = useDebouncedCallback(updateThreadExtension, 300)
+  const updateAssistantCallback = useDebouncedCallback(
+    updateAssistantExtension,
+    300
+  )
+
   const updateThreadMetadata = useCallback(
     async (thread: Thread) => {
       updateThread(thread)
 
-      await extensionManager
-        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.saveThread(thread)
+      setActiveAssistant(thread.assistants[0])
+      updateThreadCallback(thread)
+      updateAssistantCallback(thread.id, thread.assistants[0])
     },
-    [updateThread]
+    [
+      updateThread,
+      setActiveAssistant,
+      updateThreadCallback,
+      updateAssistantCallback,
+    ]
   )
 
+  const persistNewThread = async (
+    thread: Partial<Thread>,
+    assistantInfo: ThreadAssistantInfo
+  ): Promise<Thread | undefined> => {
+    return await extensionManager
+      .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+      ?.createThread(thread)
+      .then(async (thread) => {
+        await extensionManager
+          .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+          ?.createThreadAssistant(thread.id, assistantInfo)
+          .catch(console.error)
+        return thread
+      })
+      .catch(() => undefined)
+  }
+
   return {
     requestCreateNewThread,
     updateThreadMetadata,
diff --git a/web/hooks/useDeleteThread.test.ts b/web/hooks/useDeleteThread.test.ts
index d3a6138d0..50b0c7511 100644
--- a/web/hooks/useDeleteThread.test.ts
+++ b/web/hooks/useDeleteThread.test.ts
@@ -2,8 +2,7 @@ import { renderHook, act } from '@testing-library/react'
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 import useDeleteThread from './useDeleteThread'
 import { extensionManager } from '@/extension/ExtensionManager'
-import { toaster } from '@/containers/Toast'
-
+import { useCreateNewThread } from './useCreateNewThread'
 // Mock the necessary dependencies
 // Mock dependencies
 jest.mock('jotai', () => ({
@@ -12,6 +11,7 @@ jest.mock('jotai', () => ({
   useAtom: jest.fn(),
   atom: jest.fn(),
 }))
+jest.mock('./useCreateNewThread')
 jest.mock('@/extension/ExtensionManager')
 jest.mock('@/containers/Toast')
 
@@ -27,8 +27,13 @@ describe('useDeleteThread', () => {
     ]
     const mockSetThreads = jest.fn()
     ;(useAtom as jest.Mock).mockReturnValue([mockThreads, mockSetThreads])
+    ;(useSetAtom as jest.Mock).mockReturnValue(() => {})
+    ;(useCreateNewThread as jest.Mock).mockReturnValue({})
+
+    const mockDeleteThread = jest.fn().mockImplementation(() => ({
+      catch: () => jest.fn,
+    }))
 
-    const mockDeleteThread = jest.fn()
     extensionManager.get = jest.fn().mockReturnValue({
       deleteThread: mockDeleteThread,
     })
@@ -50,12 +55,17 @@ describe('useDeleteThread', () => {
     const mockCleanMessages = jest.fn()
     ;(useSetAtom as jest.Mock).mockReturnValue(() => mockCleanMessages)
     ;(useAtomValue as jest.Mock).mockReturnValue(['thread 1'])
+    const mockCreateNewThread = jest.fn()
+    ;(useCreateNewThread as jest.Mock).mockReturnValue({
+      requestCreateNewThread: mockCreateNewThread,
+    })
 
-    const mockWriteMessages = jest.fn()
     const mockSaveThread = jest.fn()
+    const mockDeleteThread = jest.fn().mockResolvedValue({})
     extensionManager.get = jest.fn().mockReturnValue({
-      writeMessages: mockWriteMessages,
       saveThread: mockSaveThread,
+      getThreadAssistant: jest.fn().mockResolvedValue({}),
+      deleteThread: mockDeleteThread,
     })
 
     const { result } = renderHook(() => useDeleteThread())
@@ -64,20 +74,18 @@ describe('useDeleteThread', () => {
       await result.current.cleanThread('thread1')
     })
 
-    expect(mockWriteMessages).toHaveBeenCalled()
-    expect(mockSaveThread).toHaveBeenCalledWith(
-      expect.objectContaining({
-        id: 'thread1',
-        title: 'New Thread',
-        metadata: expect.objectContaining({ lastMessage: undefined }),
-      })
-    )
+    expect(mockDeleteThread).toHaveBeenCalled()
+    expect(mockCreateNewThread).toHaveBeenCalled()
   })
 
   it('should handle errors when deleting a thread', async () => {
     const mockThreads = [{ id: 'thread1', title: 'Thread 1' }]
     const mockSetThreads = jest.fn()
     ;(useAtom as jest.Mock).mockReturnValue([mockThreads, mockSetThreads])
+    const mockCreateNewThread = jest.fn()
+    ;(useCreateNewThread as jest.Mock).mockReturnValue({
+      requestCreateNewThread: mockCreateNewThread,
+    })
 
     const mockDeleteThread = jest
       .fn()
@@ -98,8 +106,6 @@ describe('useDeleteThread', () => {
 
     expect(mockDeleteThread).toHaveBeenCalledWith('thread1')
     expect(consoleErrorSpy).toHaveBeenCalledWith(expect.any(Error))
-    expect(mockSetThreads).not.toHaveBeenCalled()
-    expect(toaster).not.toHaveBeenCalled()
 
     consoleErrorSpy.mockRestore()
   })
diff --git a/web/hooks/useDeleteThread.ts b/web/hooks/useDeleteThread.ts
index 69e51228f..7b98a4ea5 100644
--- a/web/hooks/useDeleteThread.ts
+++ b/web/hooks/useDeleteThread.ts
@@ -1,13 +1,6 @@
 import { useCallback } from 'react'
 
-import {
-  ChatCompletionRole,
-  ExtensionTypeEnum,
-  ConversationalExtension,
-  fs,
-  joinPath,
-  Thread,
-} from '@janhq/core'
+import { ExtensionTypeEnum, ConversationalExtension } from '@janhq/core'
 
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 
@@ -15,89 +8,63 @@ import { currentPromptAtom } from '@/containers/Providers/Jotai'
 
 import { toaster } from '@/containers/Toast'
 
+import { useCreateNewThread } from './useCreateNewThread'
+
 import { extensionManager } from '@/extension/ExtensionManager'
 
-import { janDataFolderPathAtom } from '@/helpers/atoms/AppConfig.atom'
-import {
-  chatMessages,
-  cleanChatMessageAtom as cleanChatMessagesAtom,
-  deleteChatMessageAtom as deleteChatMessagesAtom,
-} from '@/helpers/atoms/ChatMessage.atom'
+import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
+import { deleteChatMessageAtom as deleteChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import {
   threadsAtom,
   setActiveThreadIdAtom,
   deleteThreadStateAtom,
-  updateThreadStateLastMessageAtom,
-  updateThreadAtom,
 } from '@/helpers/atoms/Thread.atom'
 
 export default function useDeleteThread() {
   const [threads, setThreads] = useAtom(threadsAtom)
-  const messages = useAtomValue(chatMessages)
-  const janDataFolderPath = useAtomValue(janDataFolderPathAtom)
+  const { requestCreateNewThread } = useCreateNewThread()
+  const assistants = useAtomValue(assistantsAtom)
+  const models = useAtomValue(downloadedModelsAtom)
 
   const setCurrentPrompt = useSetAtom(currentPromptAtom)
   const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
   const deleteMessages = useSetAtom(deleteChatMessagesAtom)
-  const cleanMessages = useSetAtom(cleanChatMessagesAtom)
 
   const deleteThreadState = useSetAtom(deleteThreadStateAtom)
-  const updateThreadLastMessage = useSetAtom(updateThreadStateLastMessageAtom)
-  const updateThread = useSetAtom(updateThreadAtom)
 
   const cleanThread = useCallback(
     async (threadId: string) => {
-      cleanMessages(threadId)
       const thread = threads.find((c) => c.id === threadId)
       if (!thread) return
+      const assistantInfo = await extensionManager
+        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+        ?.getThreadAssistant(thread.id)
 
-      const updatedMessages = (messages[threadId] ?? []).filter(
-        (msg) => msg.role === ChatCompletionRole.System
+      if (!assistantInfo) return
+      const model = models.find((c) => c.id === assistantInfo?.model?.id)
+
+      requestCreateNewThread(
+        {
+          ...assistantInfo,
+          id: assistants[0].id,
+          name: assistants[0].name,
+        },
+        model
+          ? {
+              ...model,
+              parameters: assistantInfo?.model?.parameters ?? {},
+              settings: assistantInfo?.model?.settings ?? {},
+            }
+          : undefined
       )
-
-      // remove files
-      try {
-        const threadFolderPath = await joinPath([
-          janDataFolderPath,
-          'threads',
-          threadId,
-        ])
-        const threadFilesPath = await joinPath([threadFolderPath, 'files'])
-        const threadMemoryPath = await joinPath([threadFolderPath, 'memory'])
-        await fs.rm(threadFilesPath)
-        await fs.rm(threadMemoryPath)
-      } catch (err) {
-        console.warn('Error deleting thread files', err)
-      }
-
+      // Delete this thread
       await extensionManager
         .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.writeMessages(threadId, updatedMessages)
-
-      thread.metadata = {
-        ...thread.metadata,
-      }
-
-      const updatedThread: Thread = {
-        ...thread,
-        title: 'New Thread',
-        metadata: { ...thread.metadata, lastMessage: undefined },
-      }
-
-      await extensionManager
-        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.saveThread(updatedThread)
-      updateThreadLastMessage(threadId, undefined)
-      updateThread(updatedThread)
+        ?.deleteThread(threadId)
+        .catch(console.error)
     },
-    [
-      cleanMessages,
-      threads,
-      messages,
-      updateThreadLastMessage,
-      updateThread,
-      janDataFolderPath,
-    ]
+    [assistants, models, requestCreateNewThread, threads]
   )
 
   const deleteThread = async (threadId: string) => {
@@ -105,30 +72,27 @@ export default function useDeleteThread() {
       alert('No active thread')
       return
     }
-    try {
-      await extensionManager
-        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.deleteThread(threadId)
-      const availableThreads = threads.filter((c) => c.id !== threadId)
-      setThreads(availableThreads)
+    await extensionManager
+      .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+      ?.deleteThread(threadId)
+      .catch(console.error)
+    const availableThreads = threads.filter((c) => c.id !== threadId)
+    setThreads(availableThreads)
 
-      // delete the thread state
-      deleteThreadState(threadId)
+    // delete the thread state
+    deleteThreadState(threadId)
 
-      deleteMessages(threadId)
-      setCurrentPrompt('')
-      toaster({
-        title: 'Thread successfully deleted.',
-        description: `Thread ${threadId} has been successfully deleted.`,
-        type: 'success',
-      })
-      if (availableThreads.length > 0) {
-        setActiveThreadId(availableThreads[0].id)
-      } else {
-        setActiveThreadId(undefined)
-      }
-    } catch (err) {
-      console.error(err)
+    deleteMessages(threadId)
+    setCurrentPrompt('')
+    toaster({
+      title: 'Thread successfully deleted.',
+      description: `Thread ${threadId} has been successfully deleted.`,
+      type: 'success',
+    })
+    if (availableThreads.length > 0) {
+      setActiveThreadId(availableThreads[0].id)
+    } else {
+      setActiveThreadId(undefined)
     }
   }
 
diff --git a/web/hooks/useDropModelBinaries.test.ts b/web/hooks/useDropModelBinaries.test.ts
index dad8c6178..7ca5a479e 100644
--- a/web/hooks/useDropModelBinaries.test.ts
+++ b/web/hooks/useDropModelBinaries.test.ts
@@ -1,3 +1,6 @@
+/**
+ * @jest-environment jsdom
+ */
 // useDropModelBinaries.test.ts
 
 import { renderHook, act } from '@testing-library/react'
@@ -18,6 +21,7 @@ jest.mock('jotai', () => ({
 jest.mock('uuid')
 jest.mock('@/utils/file')
 jest.mock('@/containers/Toast')
+jest.mock("@uppy/core")
 
 describe('useDropModelBinaries', () => {
   const mockSetImportingModels = jest.fn()
diff --git a/web/hooks/usePath.ts b/web/hooks/usePath.ts
index b732926a6..464ff0b58 100644
--- a/web/hooks/usePath.ts
+++ b/web/hooks/usePath.ts
@@ -1,7 +1,10 @@
 import { openFileExplorer, joinPath, baseName } from '@janhq/core'
 import { useAtomValue } from 'jotai'
 
+import { getFileInfo } from '@/utils/file'
+
 import { janDataFolderPathAtom } from '@/helpers/atoms/AppConfig.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
@@ -9,13 +12,14 @@ export const usePath = () => {
   const janDataFolderPath = useAtomValue(janDataFolderPathAtom)
   const activeThread = useAtomValue(activeThreadAtom)
   const selectedModel = useAtomValue(selectedModelAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
 
   const onRevealInFinder = async (type: string) => {
     // TODO: this logic should be refactored.
     if (type !== 'Model' && !activeThread) return
 
     let filePath = undefined
-    const assistantId = activeThread?.assistants[0]?.assistant_id
+    const assistantId = activeAssistant?.assistant_id
     switch (type) {
       case 'Engine':
       case 'Thread':
@@ -45,13 +49,23 @@ export const usePath = () => {
   const onViewFile = async (id: string) => {
     if (!activeThread) return
 
-    let filePath = undefined
-
     id = await baseName(id)
-    filePath = await joinPath(['threads', `${activeThread.id}/files`, `${id}`])
-    if (!filePath) return
-    const fullPath = await joinPath([janDataFolderPath, filePath])
-    openFileExplorer(fullPath)
+
+    // New ID System
+    if (!id.startsWith('file-')) {
+      const threadFilePath = await joinPath([
+        janDataFolderPath,
+        'threads',
+        `${activeThread.id}/files`,
+        id,
+      ])
+      openFileExplorer(threadFilePath)
+    } else {
+      id = id.split('.')[0]
+      const fileName = (await getFileInfo(id)).filename
+      const filesPath = await joinPath([janDataFolderPath, 'files', fileName])
+      openFileExplorer(filesPath)
+    }
   }
 
   const onViewFileContainer = async () => {
diff --git a/web/hooks/useRecommendedModel.ts b/web/hooks/useRecommendedModel.ts
index d5bf0aba7..03bcc4a30 100644
--- a/web/hooks/useRecommendedModel.ts
+++ b/web/hooks/useRecommendedModel.ts
@@ -6,6 +6,7 @@ import { atom, useAtomValue } from 'jotai'
 
 import { activeModelAtom } from './useActiveModel'
 
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
@@ -28,6 +29,7 @@ export default function useRecommendedModel() {
   const [recommendedModel, setRecommendedModel] = useState<Model | undefined>()
   const activeThread = useAtomValue(activeThreadAtom)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
 
   const getAndSortDownloadedModels = useCallback(async (): Promise<Model[]> => {
     const models = downloadedModels.sort((a, b) =>
@@ -45,8 +47,8 @@ export default function useRecommendedModel() {
   > => {
     const models = await getAndSortDownloadedModels()
 
-    if (!activeThread) return
-    const modelId = activeThread.assistants[0]?.model.id
+    if (!activeThread || !activeAssistant) return
+    const modelId = activeAssistant.model.id
     const model = models.find((model) => model.id === modelId)
 
     if (model) {
@@ -65,6 +67,7 @@ export default function useRecommendedModel() {
     if (models.length === 0) {
       // if we have no downloaded models, then can't recommend anything
       console.debug("No downloaded models, can't recommend anything")
+      setRecommendedModel(undefined)
       return
     }
 
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index dc9a52f1b..66b031849 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -10,10 +10,12 @@ import {
   ConversationalExtension,
   EngineManager,
   ToolManager,
+  ThreadAssistantInfo,
 } from '@janhq/core'
 import { extractInferenceParams, extractModelLoadParams } from '@janhq/core'
 import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
+import { modelDropdownStateAtom } from '@/containers/ModelDropdown'
 import {
   currentPromptAtom,
   editPromptAtom,
@@ -28,6 +30,7 @@ import { ThreadMessageBuilder } from '@/utils/threadMessageBuilder'
 import { useActiveModel } from './useActiveModel'
 
 import { extensionManager } from '@/extension/ExtensionManager'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import {
   addNewMessageAtom,
   deleteMessageAtom,
@@ -48,6 +51,7 @@ export const reloadModelAtom = atom(false)
 
 export default function useSendChatMessage() {
   const activeThread = useAtomValue(activeThreadAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
   const addNewMessage = useSetAtom(addNewMessageAtom)
   const updateThread = useSetAtom(updateThreadAtom)
   const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom)
@@ -68,7 +72,9 @@ export default function useSendChatMessage() {
   const [fileUpload, setFileUpload] = useAtom(fileUploadAtom)
   const setIsGeneratingResponse = useSetAtom(isGeneratingResponseAtom)
   const activeThreadRef = useRef<Thread | undefined>()
+  const activeAssistantRef = useRef<ThreadAssistantInfo | undefined>()
   const setTokenSpeed = useSetAtom(tokenSpeedAtom)
+  const setModelDropdownState = useSetAtom(modelDropdownStateAtom)
 
   const selectedModelRef = useRef<Model | undefined>()
 
@@ -84,36 +90,42 @@ export default function useSendChatMessage() {
     selectedModelRef.current = selectedModel
   }, [selectedModel])
 
-  const resendChatMessage = async (currentMessage: ThreadMessage) => {
+  useEffect(() => {
+    activeAssistantRef.current = activeAssistant
+  }, [activeAssistant])
+
+  const resendChatMessage = async () => {
     // Delete last response before regenerating
-    const newConvoData = currentMessages
-    let toSendMessage = currentMessage
+    const newConvoData = Array.from(currentMessages)
+    let toSendMessage = newConvoData.pop()
 
-    do {
-      deleteMessage(currentMessage.id)
-      const msg = newConvoData.pop()
-      if (!msg) break
-      toSendMessage = msg
-      deleteMessage(toSendMessage.id ?? '')
-    } while (toSendMessage.role !== ChatCompletionRole.User)
-
-    if (activeThreadRef.current) {
+    while (toSendMessage && toSendMessage?.role !== ChatCompletionRole.User) {
       await extensionManager
         .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.writeMessages(activeThreadRef.current.id, newConvoData)
+        ?.deleteMessage(toSendMessage.thread_id, toSendMessage.id)
+        .catch(console.error)
+      deleteMessage(toSendMessage.id ?? '')
+      toSendMessage = newConvoData.pop()
     }
 
-    sendChatMessage(toSendMessage.content[0]?.text.value)
+    if (toSendMessage?.content[0]?.text?.value)
+      sendChatMessage(toSendMessage.content[0].text.value, true)
   }
 
   const sendChatMessage = async (
     message: string,
+    isResend: boolean = false,
     messages?: ThreadMessage[]
   ) => {
     if (!message || message.trim().length === 0) return
 
-    if (!activeThreadRef.current) {
-      console.error('No active thread')
+    if (!activeThreadRef.current || !activeAssistantRef.current) {
+      console.error('No active thread or assistant')
+      return
+    }
+
+    if (selectedModelRef.current?.id === undefined) {
+      setModelDropdownState(true)
       return
     }
 
@@ -129,21 +141,19 @@ export default function useSendChatMessage() {
     setCurrentPrompt('')
     setEditPrompt('')
 
-    let base64Blob = fileUpload[0]
-      ? await getBase64(fileUpload[0].file)
-      : undefined
+    let base64Blob = fileUpload ? await getBase64(fileUpload.file) : undefined
 
-    if (base64Blob && fileUpload[0]?.type === 'image') {
+    if (base64Blob && fileUpload?.type === 'image') {
       // Compress image
       base64Blob = await compressImage(base64Blob, 512)
     }
 
     const modelRequest =
-      selectedModelRef?.current ?? activeThreadRef.current.assistants[0].model
+      selectedModelRef?.current ?? activeAssistantRef.current?.model
 
     // Fallback support for previous broken threads
-    if (activeThreadRef.current?.assistants[0]?.model?.id === '*') {
-      activeThreadRef.current.assistants[0].model = {
+    if (activeAssistantRef.current?.model?.id === '*') {
+      activeAssistantRef.current.model = {
         id: modelRequest.id,
         settings: modelRequest.settings,
         parameters: modelRequest.parameters,
@@ -163,46 +173,50 @@ export default function useSendChatMessage() {
       },
       activeThreadRef.current,
       messages ?? currentMessages
-    ).addSystemMessage(activeThreadRef.current.assistants[0].instructions)
+    ).addSystemMessage(activeAssistantRef.current?.instructions)
 
-    requestBuilder.pushMessage(prompt, base64Blob, fileUpload[0]?.type)
+    if (!isResend) {
+      requestBuilder.pushMessage(prompt, base64Blob, fileUpload)
 
-    // Build Thread Message to persist
-    const threadMessageBuilder = new ThreadMessageBuilder(
-      requestBuilder
-    ).pushMessage(prompt, base64Blob, fileUpload)
+      // Build Thread Message to persist
+      const threadMessageBuilder = new ThreadMessageBuilder(
+        requestBuilder
+      ).pushMessage(prompt, base64Blob, fileUpload)
 
-    const newMessage = threadMessageBuilder.build()
+      const newMessage = threadMessageBuilder.build()
 
-    // Push to states
-    addNewMessage(newMessage)
+      // Update thread state
+      const updatedThread: Thread = {
+        ...activeThreadRef.current,
+        updated: newMessage.created_at,
+        metadata: {
+          ...activeThreadRef.current.metadata,
+          lastMessage: prompt,
+        },
+      }
+      updateThread(updatedThread)
 
-    // Update thread state
-    const updatedThread: Thread = {
-      ...activeThreadRef.current,
-      updated: newMessage.created,
-      metadata: {
-        ...activeThreadRef.current.metadata,
-        lastMessage: prompt,
-      },
+      // Add message
+      const createdMessage = await extensionManager
+        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+        ?.createMessage(newMessage)
+        .catch(() => undefined)
+
+      if (!createdMessage) return
+
+      // Push to states
+      addNewMessage(createdMessage)
     }
-    updateThread(updatedThread)
-
-    // Add message
-    await extensionManager
-      .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-      ?.addNewMessage(newMessage)
 
     // Start Model if not started
     const modelId =
-      selectedModelRef.current?.id ??
-      activeThreadRef.current.assistants[0].model.id
+      selectedModelRef.current?.id ?? activeAssistantRef.current?.model.id
 
     if (base64Blob) {
-      setFileUpload([])
+      setFileUpload(undefined)
     }
 
-    if (modelRef.current?.id !== modelId) {
+    if (modelRef.current?.id !== modelId && modelId) {
       const error = await startModel(modelId).catch((error: Error) => error)
       if (error) {
         updateThreadWaiting(activeThreadRef.current.id, false)
@@ -214,9 +228,7 @@ export default function useSendChatMessage() {
     // Process message request with Assistants tools
     const request = await ToolManager.instance().process(
       requestBuilder.build(),
-      activeThreadRef.current.assistants?.flatMap(
-        (assistant) => assistant.tools ?? []
-      ) ?? []
+      activeAssistantRef?.current.tools ?? []
     )
 
     // Request for inference
diff --git a/web/hooks/useSetActiveThread.ts b/web/hooks/useSetActiveThread.ts
index 6b306224d..8c7ed5361 100644
--- a/web/hooks/useSetActiveThread.ts
+++ b/web/hooks/useSetActiveThread.ts
@@ -1,12 +1,10 @@
 import { ExtensionTypeEnum, Thread, ConversationalExtension } from '@janhq/core'
 
-import { useAtomValue, useSetAtom } from 'jotai'
+import { useSetAtom } from 'jotai'
 
 import { extensionManager } from '@/extension'
-import {
-  readyThreadsMessagesAtom,
-  setConvoMessagesAtom,
-} from '@/helpers/atoms/ChatMessage.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
+import { setConvoMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 import {
   setActiveThreadIdAtom,
   setThreadModelParamsAtom,
@@ -17,21 +15,27 @@ export default function useSetActiveThread() {
   const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
   const setThreadMessage = useSetAtom(setConvoMessagesAtom)
   const setThreadModelParams = useSetAtom(setThreadModelParamsAtom)
-  const readyMessageThreads = useAtomValue(readyThreadsMessagesAtom)
+  const setActiveAssistant = useSetAtom(activeAssistantAtom)
 
   const setActiveThread = async (thread: Thread) => {
-    // Load local messages only if there are no messages in the state
-    if (!readyMessageThreads[thread?.id]) {
-      const messages = await getLocalThreadMessage(thread?.id)
-      setThreadMessage(thread?.id, messages)
-    }
+    if (!thread?.id) return
 
     setActiveThreadId(thread?.id)
-    const modelParams: ModelParams = {
-      ...thread?.assistants[0]?.model?.parameters,
-      ...thread?.assistants[0]?.model?.settings,
+
+    try {
+      const assistantInfo = await getThreadAssistant(thread.id)
+      setActiveAssistant(assistantInfo)
+      // Load local messages only if there are no messages in the state
+      const messages = await getLocalThreadMessage(thread.id).catch(() => [])
+      const modelParams: ModelParams = {
+        ...assistantInfo?.model?.parameters,
+        ...assistantInfo?.model?.settings,
+      }
+      setThreadModelParams(thread?.id, modelParams)
+      setThreadMessage(thread.id, messages)
+    } catch (e) {
+      console.error(e)
     }
-    setThreadModelParams(thread?.id, modelParams)
   }
 
   return { setActiveThread }
@@ -40,4 +44,9 @@ export default function useSetActiveThread() {
 const getLocalThreadMessage = async (threadId: string) =>
   extensionManager
     .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-    ?.getAllMessages(threadId) ?? []
+    ?.listMessages(threadId) ?? []
+
+const getThreadAssistant = async (threadId: string) =>
+  extensionManager
+    .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+    ?.getThreadAssistant(threadId)
diff --git a/web/hooks/useThread.test.ts b/web/hooks/useThread.test.ts
index a40c709be..4db7f87ac 100644
--- a/web/hooks/useThread.test.ts
+++ b/web/hooks/useThread.test.ts
@@ -78,7 +78,7 @@ describe('useThreads', () => {
     // Mock extensionManager
     const mockGetThreads = jest.fn().mockResolvedValue(mockThreads)
     ;(extensionManager.get as jest.Mock).mockReturnValue({
-      getThreads: mockGetThreads,
+      listThreads: mockGetThreads,
     })
 
     const { result } = renderHook(() => useThreads())
@@ -119,7 +119,7 @@ describe('useThreads', () => {
   it('should handle empty threads', async () => {
     // Mock empty threads
     ;(extensionManager.get as jest.Mock).mockReturnValue({
-      getThreads: jest.fn().mockResolvedValue([]),
+      listThreads: jest.fn().mockResolvedValue([]),
     })
 
     const mockSetThreadStates = jest.fn()
diff --git a/web/hooks/useThreads.ts b/web/hooks/useThreads.ts
index 9366101c3..1e3b428a9 100644
--- a/web/hooks/useThreads.ts
+++ b/web/hooks/useThreads.ts
@@ -68,6 +68,6 @@ const useThreads = () => {
 const getLocalThreads = async (): Promise<Thread[]> =>
   (await extensionManager
     .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-    ?.getThreads()) ?? []
+    ?.listThreads()) ?? []
 
 export default useThreads
diff --git a/web/hooks/useUpdateModelParameters.test.ts b/web/hooks/useUpdateModelParameters.test.ts
index bc60aa631..6c7ceb8b0 100644
--- a/web/hooks/useUpdateModelParameters.test.ts
+++ b/web/hooks/useUpdateModelParameters.test.ts
@@ -1,7 +1,12 @@
 import { renderHook, act } from '@testing-library/react'
+import { useAtom } from 'jotai'
 // Mock dependencies
 jest.mock('ulidx')
 jest.mock('@/extension')
+jest.mock('jotai', () => ({
+  ...jest.requireActual('jotai'),
+  useAtom: jest.fn(),
+}))
 
 import useUpdateModelParameters from './useUpdateModelParameters'
 import { extensionManager } from '@/extension'
@@ -13,7 +18,8 @@ let model: any = {
 }
 
 let extension: any = {
-  saveThread: jest.fn(),
+  modifyThread: jest.fn(),
+  modifyThreadAssistant: jest.fn(),
 }
 
 const mockThread: any = {
@@ -35,6 +41,7 @@ const mockThread: any = {
 describe('useUpdateModelParameters', () => {
   beforeAll(() => {
     jest.clearAllMocks()
+    jest.useFakeTimers()
     jest.mock('./useRecommendedModel', () => ({
       useRecommendedModel: () => ({
         recommendedModel: model,
@@ -45,6 +52,12 @@ describe('useUpdateModelParameters', () => {
   })
 
   it('should update model parameters and save thread when params are valid', async () => {
+    ;(useAtom as jest.Mock).mockReturnValue([
+      {
+        id: 'assistant-1',
+      },
+      jest.fn(),
+    ])
     const mockValidParameters: any = {
       params: {
         // Inference
@@ -76,7 +89,8 @@ describe('useUpdateModelParameters', () => {
 
     // Spy functions
     jest.spyOn(extensionManager, 'get').mockReturnValue(extension)
-    jest.spyOn(extension, 'saveThread').mockReturnValue({})
+    jest.spyOn(extension, 'modifyThread').mockReturnValue({})
+    jest.spyOn(extension, 'modifyThreadAssistant').mockReturnValue({})
 
     const { result } = renderHook(() => useUpdateModelParameters())
 
@@ -84,44 +98,46 @@ describe('useUpdateModelParameters', () => {
       await result.current.updateModelParameter(mockThread, mockValidParameters)
     })
 
+    jest.runAllTimers()
+
     // Check if the model parameters are valid before persisting
-    expect(extension.saveThread).toHaveBeenCalledWith({
-      assistants: [
-        {
-          model: {
-            parameters: {
-              stop: ['<eos>', '<eos2>'],
-              temperature: 0.5,
-              token_limit: 1000,
-              top_k: 0.7,
-              top_p: 0.1,
-              stream: true,
-              max_tokens: 1000,
-              frequency_penalty: 0.3,
-              presence_penalty: 0.2,
-            },
-            settings: {
-              ctx_len: 1024,
-              ngl: 12,
-              embedding: true,
-              n_parallel: 2,
-              cpu_threads: 4,
-              prompt_template: 'template',
-              llama_model_path: 'path',
-              mmproj: 'mmproj',
-            },
-          },
+    expect(extension.modifyThreadAssistant).toHaveBeenCalledWith('thread-1', {
+      id: 'assistant-1',
+      model: {
+        parameters: {
+          stop: ['<eos>', '<eos2>'],
+          temperature: 0.5,
+          token_limit: 1000,
+          top_k: 0.7,
+          top_p: 0.1,
+          stream: true,
+          max_tokens: 1000,
+          frequency_penalty: 0.3,
+          presence_penalty: 0.2,
         },
-      ],
-      created: 0,
-      id: 'thread-1',
-      object: 'thread',
-      title: 'New Thread',
-      updated: 0,
+        settings: {
+          ctx_len: 1024,
+          ngl: 12,
+          embedding: true,
+          n_parallel: 2,
+          cpu_threads: 4,
+          prompt_template: 'template',
+          llama_model_path: 'path',
+          mmproj: 'mmproj',
+        },
+        id: 'model-1',
+        engine: 'nitro',
+      },
     })
   })
 
   it('should not update invalid model parameters', async () => {
+    ;(useAtom as jest.Mock).mockReturnValue([
+      {
+        id: 'assistant-1',
+      },
+      jest.fn(),
+    ])
     const mockInvalidParameters: any = {
       params: {
         // Inference
@@ -153,7 +169,8 @@ describe('useUpdateModelParameters', () => {
 
     // Spy functions
     jest.spyOn(extensionManager, 'get').mockReturnValue(extension)
-    jest.spyOn(extension, 'saveThread').mockReturnValue({})
+    jest.spyOn(extension, 'modifyThread').mockReturnValue({})
+    jest.spyOn(extension, 'modifyThreadAssistant').mockReturnValue({})
 
     const { result } = renderHook(() => useUpdateModelParameters())
 
@@ -164,36 +181,38 @@ describe('useUpdateModelParameters', () => {
       )
     })
 
+    jest.runAllTimers()
+
     // Check if the model parameters are valid before persisting
-    expect(extension.saveThread).toHaveBeenCalledWith({
-      assistants: [
-        {
-          model: {
-            parameters: {
-              max_tokens: 1000,
-              token_limit: 1000,
-            },
-            settings: {
-              cpu_threads: 4,
-              ctx_len: 1024,
-              prompt_template: 'template',
-              llama_model_path: 'path',
-              mmproj: 'mmproj',
-              n_parallel: 2,
-              ngl: 12,
-            },
-          },
+    expect(extension.modifyThreadAssistant).toHaveBeenCalledWith('thread-1', {
+      id: 'assistant-1',
+      model: {
+        engine: 'nitro',
+        id: 'model-1',
+        parameters: {
+          token_limit: 1000,
+          max_tokens: 1000,
         },
-      ],
-      created: 0,
-      id: 'thread-1',
-      object: 'thread',
-      title: 'New Thread',
-      updated: 0,
+        settings: {
+          cpu_threads: 4,
+          ctx_len: 1024,
+          prompt_template: 'template',
+          llama_model_path: 'path',
+          mmproj: 'mmproj',
+          n_parallel: 2,
+          ngl: 12,
+        },
+      },
     })
   })
 
   it('should update valid model parameters only', async () => {
+    ;(useAtom as jest.Mock).mockReturnValue([
+      {
+        id: 'assistant-1',
+      },
+      jest.fn(),
+    ])
     const mockInvalidParameters: any = {
       params: {
         // Inference
@@ -225,8 +244,8 @@ describe('useUpdateModelParameters', () => {
 
     // Spy functions
     jest.spyOn(extensionManager, 'get').mockReturnValue(extension)
-    jest.spyOn(extension, 'saveThread').mockReturnValue({})
-
+    jest.spyOn(extension, 'modifyThread').mockReturnValue({})
+    jest.spyOn(extension, 'modifyThreadAssistant').mockReturnValue({})
     const { result } = renderHook(() => useUpdateModelParameters())
 
     await act(async () => {
@@ -235,80 +254,33 @@ describe('useUpdateModelParameters', () => {
         mockInvalidParameters
       )
     })
+    jest.runAllTimers()
 
     // Check if the model parameters are valid before persisting
-    expect(extension.saveThread).toHaveBeenCalledWith({
-      assistants: [
-        {
-          model: {
-            parameters: {
-              stop: ['<eos>'],
-              top_k: 0.7,
-              top_p: 0.1,
-              stream: true,
-              token_limit: 100,
-              max_tokens: 1000,
-              presence_penalty: 0.2,
-            },
-            settings: {
-              ctx_len: 1024,
-              ngl: 0,
-              n_parallel: 2,
-              cpu_threads: 4,
-              prompt_template: 'template',
-              llama_model_path: 'path',
-              mmproj: 'mmproj',
-            },
-          },
+    expect(extension.modifyThreadAssistant).toHaveBeenCalledWith('thread-1', {
+      id: 'assistant-1',
+      model: {
+        engine: 'nitro',
+        id: 'model-1',
+        parameters: {
+          stop: ['<eos>'],
+          top_k: 0.7,
+          top_p: 0.1,
+          stream: true,
+          token_limit: 100,
+          max_tokens: 1000,
+          presence_penalty: 0.2,
+        },
+        settings: {
+          ctx_len: 1024,
+          ngl: 0,
+          n_parallel: 2,
+          cpu_threads: 4,
+          prompt_template: 'template',
+          llama_model_path: 'path',
+          mmproj: 'mmproj',
         },
-      ],
-      created: 0,
-      id: 'thread-1',
-      object: 'thread',
-      title: 'New Thread',
-      updated: 0,
-    })
-  })
-
-  it('should handle missing modelId and engine gracefully', async () => {
-    const mockParametersWithoutModelIdAndEngine: any = {
-      params: {
-        stop: ['<eos>', '<eos2>'],
-        temperature: 0.5,
       },
-    }
-
-    // Spy functions
-    jest.spyOn(extensionManager, 'get').mockReturnValue(extension)
-    jest.spyOn(extension, 'saveThread').mockReturnValue({})
-
-    const { result } = renderHook(() => useUpdateModelParameters())
-
-    await act(async () => {
-      await result.current.updateModelParameter(
-        mockThread,
-        mockParametersWithoutModelIdAndEngine
-      )
-    })
-
-    // Check if the model parameters are valid before persisting
-    expect(extension.saveThread).toHaveBeenCalledWith({
-      assistants: [
-        {
-          model: {
-            parameters: {
-              stop: ['<eos>', '<eos2>'],
-              temperature: 0.5,
-            },
-            settings: {},
-          },
-        },
-      ],
-      created: 0,
-      id: 'thread-1',
-      object: 'thread',
-      title: 'New Thread',
-      updated: 0,
     })
   })
 })
diff --git a/web/hooks/useUpdateModelParameters.ts b/web/hooks/useUpdateModelParameters.ts
index 6eb7c3c5a..dab2f6e28 100644
--- a/web/hooks/useUpdateModelParameters.ts
+++ b/web/hooks/useUpdateModelParameters.ts
@@ -12,7 +12,10 @@ import {
 
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 
+import { useDebouncedCallback } from 'use-debounce'
+
 import { extensionManager } from '@/extension'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import {
   getActiveThreadModelParamsAtom,
@@ -29,11 +32,28 @@ export type UpdateModelParameter = {
 
 export default function useUpdateModelParameters() {
   const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
+  const [activeAssistant, setActiveAssistant] = useAtom(activeAssistantAtom)
   const [selectedModel] = useAtom(selectedModelAtom)
   const setThreadModelParams = useSetAtom(setThreadModelParamsAtom)
 
+  const updateAssistantExtension = (
+    threadId: string,
+    assistant: ThreadAssistantInfo
+  ) => {
+    return extensionManager
+      .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+      ?.modifyThreadAssistant(threadId, assistant)
+  }
+
+  const updateAssistantCallback = useDebouncedCallback(
+    updateAssistantExtension,
+    300
+  )
+
   const updateModelParameter = useCallback(
     async (thread: Thread, settings: UpdateModelParameter) => {
+      if (!activeAssistant) return
+
       const toUpdateSettings = processStopWords(settings.params ?? {})
       const updatedModelParams = settings.modelId
         ? toUpdateSettings
@@ -48,30 +68,34 @@ export default function useUpdateModelParameters() {
       setThreadModelParams(thread.id, updatedModelParams)
       const runtimeParams = extractInferenceParams(updatedModelParams)
       const settingParams = extractModelLoadParams(updatedModelParams)
-
-      const assistants = thread.assistants.map(
-        (assistant: ThreadAssistantInfo) => {
-          assistant.model.parameters = runtimeParams
-          assistant.model.settings = settingParams
-          if (selectedModel) {
-            assistant.model.id = settings.modelId ?? selectedModel?.id
-            assistant.model.engine = settings.engine ?? selectedModel?.engine
-          }
-          return assistant
-        }
-      )
-
-      // update thread
-      const updatedThread: Thread = {
-        ...thread,
-        assistants,
+      const assistantInfo = {
+        ...activeAssistant,
+        model: {
+          ...activeAssistant?.model,
+          parameters: runtimeParams,
+          settings: settingParams,
+          id: settings.modelId ?? selectedModel?.id ?? activeAssistant.model.id,
+          engine:
+            settings.engine ??
+            selectedModel?.engine ??
+            activeAssistant.model.engine,
+        },
       }
+      setActiveAssistant(assistantInfo)
 
-      await extensionManager
-        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.saveThread(updatedThread)
+      updateAssistantCallback(thread.id, assistantInfo)
     },
-    [activeModelParams, selectedModel, setThreadModelParams]
+    [
+      activeAssistant,
+      selectedModel?.parameters,
+      selectedModel?.settings,
+      selectedModel?.id,
+      selectedModel?.engine,
+      activeModelParams,
+      setThreadModelParams,
+      setActiveAssistant,
+      updateAssistantCallback,
+    ]
   )
 
   const processStopWords = (params: ModelParams): ModelParams => {
diff --git a/web/jest.config.js b/web/jest.config.js
index f78007532..27e8d0bda 100644
--- a/web/jest.config.js
+++ b/web/jest.config.js
@@ -37,5 +37,5 @@ const config = {
 // module.exports = createJestConfig(config)
 module.exports = async () => ({
   ...(await createJestConfig(config)()),
-  transformIgnorePatterns: ['/node_modules/(?!(layerr)/)'],
+  transformIgnorePatterns: ['/node_modules/(?!(layerr|nanoid|@uppy|preact)/)'],
 })
diff --git a/web/next.config.js b/web/next.config.js
index 48ea0703e..b6da1780c 100644
--- a/web/next.config.js
+++ b/web/next.config.js
@@ -31,9 +31,11 @@ const nextConfig = {
       new webpack.DefinePlugin({
         VERSION: JSON.stringify(packageJson.version),
         ANALYTICS_ID: JSON.stringify(process.env.ANALYTICS_ID),
+        POSTHOG_KEY: JSON.stringify(process.env.POSTHOG_KEY),
+        POSTHOG_HOST: JSON.stringify(process.env.POSTHOG_HOST),
         ANALYTICS_HOST: JSON.stringify(process.env.ANALYTICS_HOST),
         API_BASE_URL: JSON.stringify(
-          process.env.API_BASE_URL ?? 'http://localhost:1337'
+          process.env.API_BASE_URL ?? 'http://127.0.0.1:39291'
         ),
         isMac: process.platform === 'darwin',
         isWindows: process.platform === 'win32',
diff --git a/web/package.json b/web/package.json
index e0f855882..db57facb5 100644
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@janhq/web",
-  "version": "0.5.10",
+  "version": "0.5.11",
   "private": true,
   "homepage": "./",
   "scripts": {
@@ -17,6 +17,9 @@
     "@janhq/core": "link:./core",
     "@janhq/joi": "link:./joi",
     "@tanstack/react-virtual": "^3.10.9",
+    "@uppy/core": "^4.3.0",
+    "@uppy/react": "^4.0.4",
+    "@uppy/xhr-upload": "^4.2.3",
     "autoprefixer": "10.4.16",
     "class-variance-authority": "^0.7.0",
     "framer-motion": "^10.16.4",
@@ -30,6 +33,7 @@
     "next-themes": "^0.2.1",
     "postcss": "8.4.31",
     "postcss-url": "10.1.3",
+    "posthog-js": "^1.194.6",
     "react": "18.2.0",
     "react-circular-progressbar": "^2.1.0",
     "react-dom": "18.2.0",
@@ -41,7 +45,6 @@
     "rehype-highlight": "^7.0.1",
     "rehype-highlight-code-lines": "^1.0.4",
     "rehype-katex": "^7.0.1",
-    "rehype-raw": "^7.0.0",
     "remark-math": "^6.0.0",
     "sass": "^1.69.4",
     "slate": "latest",
diff --git a/web/public/images/compact-width-dark.png b/web/public/images/compact-width-dark.png
new file mode 100644
index 000000000..12c4cfa24
Binary files /dev/null and b/web/public/images/compact-width-dark.png differ
diff --git a/web/public/images/compact-width.png b/web/public/images/compact-width.png
new file mode 100644
index 000000000..2404a34c6
Binary files /dev/null and b/web/public/images/compact-width.png differ
diff --git a/web/public/images/full-width-dark.png b/web/public/images/full-width-dark.png
new file mode 100644
index 000000000..93e3df9c4
Binary files /dev/null and b/web/public/images/full-width-dark.png differ
diff --git a/web/public/images/full-width.png b/web/public/images/full-width.png
new file mode 100644
index 000000000..47d151132
Binary files /dev/null and b/web/public/images/full-width.png differ
diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx
index da98e41e3..9a939aa44 100644
--- a/web/screens/Hub/ModelList/ModelHeader/index.tsx
+++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx
@@ -71,7 +71,6 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
 
   let downloadButton = (
     <Button
-      className="z-50"
       onClick={(e) => {
         e.stopPropagation()
         onDownloadClick()
diff --git a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
index 010566f51..3438cac7a 100644
--- a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
@@ -163,44 +163,38 @@ const LocalServerLeftPanel = () => {
           </div>
         </div>
 
-        <Tooltip
-          trigger={
-            <div className="p-3">
-              <p className="mb-2 block font-semibold">Server Options</p>
+        <div className="p-3">
+          <p className="mb-2 block font-semibold">Server Options</p>
 
-              <div className="flex w-full">
-                <Select
-                  value={host}
-                  onValueChange={(e) => setHost(e)}
-                  disabled={serverEnabled}
-                  options={hostOptions}
-                  block
-                />
-              </div>
+          <div className="flex w-full">
+            <Select
+              value={host}
+              onValueChange={(e) => setHost(e)}
+              disabled={serverEnabled}
+              options={hostOptions}
+              block
+            />
+          </div>
 
-              <div className="relative z-50 mt-2 block">
-                <Input
-                  className={twMerge(
-                    errorRangePort && 'border-[hsla(var(--destructive-bg))]'
-                  )}
-                  type="number"
-                  value={port}
-                  onChange={(e) => {
-                    handleChangePort(e.target.value)
-                  }}
-                  maxLength={5}
-                  disabled={serverEnabled}
-                />
-              </div>
-
-              {errorRangePort && (
-                <p className="mt-2 text-xs text-[hsla(var(--destructive-bg))]">{`The port range should be from 0 to 65536`}</p>
+          <div className="relative z-50 mt-2 block">
+            <Input
+              className={twMerge(
+                errorRangePort && 'border-[hsla(var(--destructive-bg))]'
               )}
-            </div>
-          }
-          disabled={!serverEnabled}
-          content="Settings cannot be modified while the server is running"
-        />
+              type="number"
+              value={port}
+              onChange={(e) => {
+                handleChangePort(e.target.value)
+              }}
+              maxLength={5}
+              disabled={serverEnabled}
+            />
+          </div>
+
+          {errorRangePort && (
+            <p className="mt-2 text-xs text-[hsla(var(--destructive-bg))]">{`The port range should be from 0 to 65536`}</p>
+          )}
+        </div>
 
         <div className="space-y-4 px-3">
           <div className="block">
diff --git a/web/screens/Settings/Advanced/index.test.tsx b/web/screens/Settings/Advanced/index.test.tsx
index e34626f6e..6141fb44c 100644
--- a/web/screens/Settings/Advanced/index.test.tsx
+++ b/web/screens/Settings/Advanced/index.test.tsx
@@ -91,20 +91,6 @@ describe('Advanced', () => {
     expect(experimentalToggle).not.toBeChecked()
   })
 
-  it('clears logs', async () => {
-    const jestMock = jest.fn()
-    jest.spyOn(toast, 'toaster').mockImplementation(jestMock)
-
-    render(<Advanced />)
-    let clearLogsButton
-    await waitFor(() => {
-      clearLogsButton = screen.getByTestId(/clear-logs/i)
-      fireEvent.click(clearLogsButton)
-    })
-    expect(clearLogsButton).toBeInTheDocument()
-    expect(jestMock).toHaveBeenCalled()
-  })
-
   it('toggles proxy enabled', async () => {
     render(<Advanced />)
     let proxyToggle
diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx
index 8d791694c..52aafba83 100644
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@@ -2,11 +2,10 @@
 
 import { useEffect, useState, useCallback, ChangeEvent } from 'react'
 
-import { openExternalUrl, fs, AppConfiguration } from '@janhq/core'
+import { openExternalUrl, AppConfiguration } from '@janhq/core'
 
 import {
   ScrollArea,
-  Button,
   Switch,
   Input,
   Tooltip,
@@ -180,24 +179,6 @@ const Advanced = () => {
     setUseGpuIfPossible()
   }, [readSettings, setGpuList, setGpuEnabled, setGpusInUse, setVulkanEnabled])
 
-  /**
-   * Clear logs
-   * @returns
-   */
-  const clearLogs = async () => {
-    try {
-      await fs.rm(`file://logs`)
-    } catch (err) {
-      console.error('Error clearing logs: ', err)
-    }
-
-    toaster({
-      title: 'Logs cleared',
-      description: 'All logs have been cleared.',
-      type: 'success',
-    })
-  }
-
   /**
    * Handle GPU Change
    * @param gpuId
@@ -436,7 +417,7 @@ const Advanced = () => {
         )}
 
         {/* Vulkan for AMD GPU/ APU and Intel Arc GPU */}
-        {!isMac && experimentalEnabled && (
+        {!isMac && gpuList.length > 0 && experimentalEnabled && (
           <div className="flex w-full flex-col items-start justify-between gap-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row">
             <div className="space-y-1">
               <div className="flex gap-x-2">
@@ -447,7 +428,7 @@ const Advanced = () => {
                 model performance (reload needed).
               </p>
             </div>
-            <div className="flex-sharink-0">
+            <div className="flex-shrink-0">
               <Switch
                 checked={vulkanEnabled}
                 onChange={(e) => updateVulkanEnabled(e.target.checked)}
@@ -542,25 +523,6 @@ const Advanced = () => {
           </div>
         )}
 
-        {/* Clear log */}
-        <div className="flex w-full flex-col items-start justify-between gap-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row">
-          <div className="flex-shrink-0 space-y-1">
-            <div className="flex gap-x-2">
-              <h6 className="font-semibold capitalize">Clear logs</h6>
-            </div>
-            <p className="font-medium leading-relaxed text-[hsla(var(--text-secondary))]">
-              Clear all logs from Jan app.
-            </p>
-          </div>
-          <Button
-            data-testid="clear-logs"
-            theme="destructive"
-            onClick={clearLogs}
-          >
-            Clear
-          </Button>
-        </div>
-
         {/* Factory Reset */}
         <FactoryReset />
       </div>
diff --git a/web/screens/Settings/Appearance/index.tsx b/web/screens/Settings/Appearance/index.tsx
index 2dd7d96a2..606a69769 100644
--- a/web/screens/Settings/Appearance/index.tsx
+++ b/web/screens/Settings/Appearance/index.tsx
@@ -6,7 +6,10 @@ import { fs, joinPath } from '@janhq/core'
 import { Button, Select, Switch } from '@janhq/joi'
 import { useAtom, useAtomValue } from 'jotai'
 
+import { twMerge } from 'tailwind-merge'
+
 import {
+  chatWidthAtom,
   janThemesPathAtom,
   reduceTransparentAtom,
   selectedThemeIdAtom,
@@ -18,13 +21,29 @@ import {
 export default function AppearanceOptions() {
   const [selectedIdTheme, setSelectedIdTheme] = useAtom(selectedThemeIdAtom)
   const themeOptions = useAtomValue(themesOptionsAtom)
-  const { setTheme } = useTheme()
+  const { setTheme, theme } = useTheme()
   const janThemesPath = useAtomValue(janThemesPathAtom)
   const [themeData, setThemeData] = useAtom(themeDataAtom)
   const [reduceTransparent, setReduceTransparent] = useAtom(
     reduceTransparentAtom
   )
   const [spellCheck, setSpellCheck] = useAtom(spellCheckAtom)
+  const [chatWidth, setChatWidth] = useAtom(chatWidthAtom)
+
+  const chatWidthOption = [
+    {
+      name: 'Full Width',
+      value: 'full',
+      img: 'images/full-width.png',
+      darkImg: 'images/full-width-dark.png',
+    },
+    {
+      name: 'Compact Width',
+      value: 'compact',
+      img: 'images/compact-width.png',
+      darkImg: 'images/compact-width-dark.png',
+    },
+  ]
 
   const handleClickTheme = useCallback(
     async (e: string) => {
@@ -91,6 +110,72 @@ export default function AppearanceOptions() {
           </div>
         </div>
       )}
+      <div className="flex w-full flex-col items-start justify-between gap-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row">
+        <div className="w-full space-y-1 lg:w-3/4">
+          <div className="flex gap-x-2">
+            <h6 className="font-semibold capitalize">Chat Width</h6>
+          </div>
+          <p className=" font-medium leading-relaxed text-[hsla(var(--text-secondary))]">
+            Choose the width of the chat area to customize your conversation
+            view
+          </p>
+        </div>
+        <div className="flex-shrink-0">
+          <div className="flex items-center gap-4">
+            {chatWidthOption.map((option) => {
+              return (
+                <div
+                  className="inline-flex flex-col items-center justify-center text-center"
+                  key={option.name}
+                >
+                  <label
+                    className="relative cursor-pointer"
+                    htmlFor={option.name}
+                    onClick={() => setChatWidth(option.value)}
+                  >
+                    <img
+                      src={theme === 'dark' ? option.darkImg : option.img}
+                      alt={option.value}
+                      width={140}
+                      className={twMerge(
+                        'rounded-lg border-2 border-[hsla(var(--app-border))] bg-[hsla(var(--secondary-bg))] transition-all',
+                        chatWidth === option.value &&
+                          'border-[hsla(var(--primary-bg))]'
+                      )}
+                    />
+                    <p className="my-2 font-medium">{option.name}</p>
+                    {chatWidth === option.value && (
+                      <div className="absolute right-2 top-2 ">
+                        <svg
+                          width="24"
+                          height="24"
+                          viewBox="0 0 16 16"
+                          fill="none"
+                          xmlns="http://www.w3.org/2000/svg"
+                        >
+                          <rect
+                            width="16"
+                            height="16"
+                            rx="8"
+                            className="fill-[hsla(var(--primary-bg))]"
+                          />
+                          <path
+                            d="M11.1111 5.66699L6.83333 9.94477L4.88889 8.00033"
+                            stroke="white"
+                            strokeWidth="0.886667"
+                            strokeLinecap="round"
+                            strokeLinejoin="round"
+                          />
+                        </svg>
+                      </div>
+                    )}
+                  </label>
+                </div>
+              )
+            })}
+          </div>
+        </div>
+      </div>
       <div className="flex w-full flex-col items-start justify-between gap-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row">
         <div className="w-full space-y-1 lg:w-3/4">
           <div className="flex gap-x-2">
diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx
index 2e87f3080..3c5ec500f 100644
--- a/web/screens/Settings/MyModels/MyModelList/index.tsx
+++ b/web/screens/Settings/MyModels/MyModelList/index.tsx
@@ -133,10 +133,7 @@ const MyModelList = ({ model }: Props) => {
                         <div
                           className={twMerge(
                             'flex items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                            serverEnabled &&
-                              activeModel &&
-                              activeModel.id !== model.id &&
-                              'pointer-events-none cursor-not-allowed opacity-40'
+                            serverEnabled && 'cursor-not-allowed opacity-40'
                           )}
                           onClick={() => {
                             onModelActionClick(model.id)
@@ -172,8 +169,7 @@ const MyModelList = ({ model }: Props) => {
                     <div
                       className={twMerge(
                         'flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                        serverEnabled &&
-                          'pointer-events-none cursor-not-allowed opacity-40'
+                        serverEnabled && ' cursor-not-allowed opacity-40'
                       )}
                       onClick={() => {
                         setTimeout(async () => {
diff --git a/web/screens/Settings/Privacy/index.test.tsx b/web/screens/Settings/Privacy/index.test.tsx
new file mode 100644
index 000000000..66fa5d855
--- /dev/null
+++ b/web/screens/Settings/Privacy/index.test.tsx
@@ -0,0 +1,82 @@
+/**
+ * @jest-environment jsdom
+ */
+
+import React from 'react'
+import { render, screen, fireEvent, waitFor } from '@testing-library/react'
+import '@testing-library/jest-dom'
+import Privacy from '.'
+
+class ResizeObserverMock {
+  observe() {}
+  unobserve() {}
+  disconnect() {}
+}
+
+global.ResizeObserver = ResizeObserverMock
+global.window.core = {
+  api: {
+    getAppConfigurations: () => jest.fn(),
+    updateAppConfiguration: () => jest.fn(),
+    relaunch: () => jest.fn(),
+  },
+}
+
+const setSettingsMock = jest.fn()
+
+// Mock useSettings hook
+jest.mock('@/hooks/useSettings', () => ({
+  __esModule: true,
+  useSettings: () => ({
+    readSettings: () => ({
+      run_mode: 'gpu',
+      experimental: false,
+      proxy: false,
+      gpus: [{ name: 'gpu-1' }, { name: 'gpu-2' }],
+      gpus_in_use: ['0'],
+      quick_ask: false,
+    }),
+    setSettings: setSettingsMock,
+  }),
+}))
+
+import * as toast from '@/containers/Toast'
+
+jest.mock('@/containers/Toast')
+
+jest.mock('@janhq/core', () => ({
+  __esModule: true,
+  ...jest.requireActual('@janhq/core'),
+  fs: {
+    rm: jest.fn(),
+  },
+}))
+
+// Simulate a full Privacy settings screen
+// @ts-ignore
+global.isMac = false
+// @ts-ignore
+global.isWindows = true
+
+describe('Privacy', () => {
+  it('renders the component', async () => {
+    render(<Privacy />)
+    await waitFor(() => {
+      expect(screen.getByText('Clear logs')).toBeInTheDocument()
+    })
+  })
+
+  it('clears logs', async () => {
+    const jestMock = jest.fn()
+    jest.spyOn(toast, 'toaster').mockImplementation(jestMock)
+
+    render(<Privacy />)
+    let clearLogsButton
+    await waitFor(() => {
+      clearLogsButton = screen.getByTestId(/clear-logs/i)
+      fireEvent.click(clearLogsButton)
+    })
+    expect(clearLogsButton).toBeInTheDocument()
+    expect(jestMock).toHaveBeenCalled()
+  })
+})
diff --git a/web/screens/Settings/Privacy/index.tsx b/web/screens/Settings/Privacy/index.tsx
new file mode 100644
index 000000000..3034d8b2f
--- /dev/null
+++ b/web/screens/Settings/Privacy/index.tsx
@@ -0,0 +1,150 @@
+import { fs } from '@janhq/core'
+import { Button, Input, ScrollArea, Switch } from '@janhq/joi'
+import { useAtom, useAtomValue } from 'jotai'
+import { FolderOpenIcon } from 'lucide-react'
+
+import posthog from 'posthog-js'
+
+import { toaster } from '@/containers/Toast'
+
+import { usePath } from '@/hooks/usePath'
+
+import { janDataFolderPathAtom } from '@/helpers/atoms/AppConfig.atom'
+import { productAnalyticAtom } from '@/helpers/atoms/Setting.atom'
+
+const Privacy = () => {
+  /**
+   * Clear logs
+   * @returns
+   */
+  const clearLogs = async () => {
+    try {
+      await fs.rm(`file://logs`)
+    } catch (err) {
+      console.error('Error clearing logs: ', err)
+    }
+
+    toaster({
+      title: 'Logs cleared',
+      description: 'All logs have been cleared.',
+      type: 'success',
+    })
+  }
+
+  const janDataFolderPath = useAtomValue(janDataFolderPathAtom)
+  const { onRevealInFinder } = usePath()
+  const [productAnalytic, setProductAnalytic] = useAtom(productAnalyticAtom)
+
+  return (
+    <ScrollArea className="h-full w-full px-4">
+      <div className="mb-4 mt-8 rounded-xl bg-[hsla(var(--tertiary-bg))] px-4 py-2 text-[hsla(var(--text-secondary))]">
+        <p>
+          We prioritize your control over your data. Learn more about our&nbsp;
+          <a
+            href="https://jan.ai/docs/privacy"
+            target="_blank"
+            className="text-[hsla(var(--app-link))]"
+          >
+            Privacy Policy.
+          </a>
+        </p>
+        <br />
+        <p>
+          To make Jan better, we need to understand how it’s used - but only if
+          you choose to help. You can change your Jan Analytics settings
+          anytime.
+        </p>
+        <br />
+        <p>
+          {`Your choice to opt-in or out doesn't change our core privacy promises:`}
+        </p>
+        <ul className="list-inside list-disc pl-4">
+          <li>Your chats are never read</li>
+          <li>No personal information is collected</li>
+          <li>No accounts or logins required</li>
+          <li>We don’t access your files</li>
+          <li>Your chat history and settings stay on your device</li>
+        </ul>
+      </div>
+      <div className="block w-full py-4">
+        {/* Analytic */}
+        <div className="flex w-full flex-col justify-between gap-x-20 gap-y-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row sm:items-center">
+          <div className="space-y-1">
+            <div className="flex gap-x-2">
+              <h6 className="font-semibold capitalize">Analytics</h6>
+            </div>
+            <p className="font-medium leading-relaxed text-[hsla(var(--text-secondary))]">
+              By opting in, you help us make Jan better by sharing anonymous
+              data, like feature usage and user counts. Your chats and personal
+              information are never collected.
+            </p>
+          </div>
+          <div className="flex-shrink-0">
+            <Switch
+              checked={productAnalytic}
+              onChange={(e) => {
+                if (e.target.checked) {
+                  posthog.opt_in_capturing()
+                } else {
+                  posthog.capture('user_opt_out', { timestamp: new Date() })
+                  posthog.opt_out_capturing()
+                }
+                setProductAnalytic(e.target.checked)
+              }}
+            />
+          </div>
+        </div>
+
+        {/* Logs */}
+
+        <div className="flex w-full flex-col items-start justify-between gap-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row">
+          <div className="space-y-1">
+            <div className="flex gap-x-2">
+              <h6 className="font-semibold capitalize">Logs</h6>
+            </div>
+            <p className="font-medium leading-relaxed text-[hsla(var(--text-secondary))]">
+              Open App Logs and Cortex Logs
+            </p>
+          </div>
+          <div className="flex items-center gap-x-3">
+            <div className="relative">
+              <Input
+                data-testid="jan-data-folder-input"
+                value={janDataFolderPath + '/logs'}
+                className="w-full pr-8 sm:w-[240px]"
+                disabled
+              />
+              <FolderOpenIcon
+                size={16}
+                className="absolute right-2 top-1/2 z-10 -translate-y-1/2 cursor-pointer"
+                onClick={() => onRevealInFinder('Logs')}
+              />
+            </div>
+          </div>
+        </div>
+
+        {/* Clear log */}
+        <div className="flex w-full flex-col items-start justify-between gap-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row">
+          <div className="flex-shrink-0 space-y-1">
+            <div className="flex gap-x-2">
+              <h6 className="font-semibold capitalize">Clear logs</h6>
+            </div>
+            <p className="font-medium leading-relaxed text-[hsla(var(--text-secondary))]">
+              Clear all logs from Jan app.
+            </p>
+          </div>
+          <Button
+            data-testid="clear-logs"
+            theme="destructive"
+            variant="soft"
+            onClick={clearLogs}
+          >
+            Clear
+          </Button>
+        </div>
+      </div>
+    </ScrollArea>
+  )
+}
+
+export default Privacy
diff --git a/web/screens/Settings/SettingDetail/index.tsx b/web/screens/Settings/SettingDetail/index.tsx
index 85feafbb3..993303955 100644
--- a/web/screens/Settings/SettingDetail/index.tsx
+++ b/web/screens/Settings/SettingDetail/index.tsx
@@ -6,6 +6,7 @@ import ExtensionCatalog from '@/screens/Settings/CoreExtensions'
 import ExtensionSetting from '@/screens/Settings/ExtensionSetting'
 import Hotkeys from '@/screens/Settings/Hotkeys'
 import MyModels from '@/screens/Settings/MyModels'
+import Privacy from '@/screens/Settings/Privacy'
 
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 
@@ -16,12 +17,15 @@ const SettingDetail = () => {
     case 'Extensions':
       return <ExtensionCatalog />
 
-    case 'Appearance':
+    case 'Preferences':
       return <AppearanceOptions />
 
     case 'Keyboard Shortcuts':
       return <Hotkeys />
 
+    case 'Privacy':
+      return <Privacy />
+
     case 'Advanced Settings':
       return <Advanced />
 
diff --git a/web/screens/Settings/index.tsx b/web/screens/Settings/index.tsx
index a90a37915..5003babcd 100644
--- a/web/screens/Settings/index.tsx
+++ b/web/screens/Settings/index.tsx
@@ -13,8 +13,9 @@ import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 
 export const SettingScreenList = [
   'My Models',
-  'Appearance',
+  'Preferences',
   'Keyboard Shortcuts',
+  'Privacy',
   'Advanced Settings',
   'Extensions',
 ] as const
diff --git a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.test.tsx b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.test.tsx
index 96ff6f559..9b4e67ffb 100644
--- a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.test.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.test.tsx
@@ -7,6 +7,8 @@ import { useAtomValue, useSetAtom } from 'jotai'
 import { useActiveModel } from '@/hooks/useActiveModel'
 import { useCreateNewThread } from '@/hooks/useCreateNewThread'
 import AssistantSetting from './index'
+import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 
 jest.mock('jotai', () => {
   const originalModule = jest.requireActual('jotai')
@@ -68,6 +70,7 @@ describe('AssistantSetting Component', () => {
 
   beforeEach(() => {
     jest.clearAllMocks()
+    jest.useFakeTimers()
   })
 
   test('renders AssistantSetting component with proper data', async () => {
@@ -75,7 +78,14 @@ describe('AssistantSetting Component', () => {
     ;(useSetAtom as jest.Mock).mockImplementationOnce(
       () => setEngineParamsUpdate
     )
-    ;(useAtomValue as jest.Mock).mockImplementationOnce(() => mockActiveThread)
+    ;(useAtomValue as jest.Mock).mockImplementation((atom) => {
+      switch (atom) {
+        case activeThreadAtom:
+          return mockActiveThread
+        case activeAssistantAtom:
+          return {}
+      }
+    })
     const updateThreadMetadata = jest.fn()
     ;(useActiveModel as jest.Mock).mockReturnValueOnce({ stopModel: jest.fn() })
     ;(useCreateNewThread as jest.Mock).mockReturnValueOnce({
@@ -98,7 +108,14 @@ describe('AssistantSetting Component', () => {
     const setEngineParamsUpdate = jest.fn()
     const updateThreadMetadata = jest.fn()
     const stopModel = jest.fn()
-    ;(useAtomValue as jest.Mock).mockImplementationOnce(() => mockActiveThread)
+    ;(useAtomValue as jest.Mock).mockImplementation((atom) => {
+      switch (atom) {
+        case activeThreadAtom:
+          return mockActiveThread
+        case activeAssistantAtom:
+          return {}
+      }
+    })
     ;(useSetAtom as jest.Mock).mockImplementation(() => setEngineParamsUpdate)
     ;(useActiveModel as jest.Mock).mockReturnValueOnce({ stopModel })
     ;(useCreateNewThread as jest.Mock).mockReturnValueOnce({
diff --git a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
index 95c905dde..19ec3328a 100644
--- a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
@@ -8,6 +8,7 @@ import { useCreateNewThread } from '@/hooks/useCreateNewThread'
 
 import SettingComponentBuilder from '../../../../containers/ModelSetting/SettingComponent'
 
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import {
   activeThreadAtom,
   engineParamsUpdateAtom,
@@ -19,13 +20,14 @@ type Props = {
 
 const AssistantSetting: React.FC<Props> = ({ componentData }) => {
   const activeThread = useAtomValue(activeThreadAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
   const { updateThreadMetadata } = useCreateNewThread()
   const { stopModel } = useActiveModel()
   const setEngineParamsUpdate = useSetAtom(engineParamsUpdateAtom)
 
   const onValueChanged = useCallback(
     (key: string, value: string | number | boolean | string[]) => {
-      if (!activeThread) return
+      if (!activeThread || !activeAssistant) return
       const shouldReloadModel =
         componentData.find((x) => x.key === key)?.requireModelReload ?? false
       if (shouldReloadModel) {
@@ -34,40 +36,40 @@ const AssistantSetting: React.FC<Props> = ({ componentData }) => {
       }
 
       if (
-        activeThread.assistants[0].tools &&
+        activeAssistant?.tools &&
         (key === 'chunk_overlap' || key === 'chunk_size')
       ) {
         if (
-          activeThread.assistants[0].tools[0]?.settings?.chunk_size <
-          activeThread.assistants[0].tools[0]?.settings?.chunk_overlap
+          activeAssistant.tools[0]?.settings?.chunk_size <
+          activeAssistant.tools[0]?.settings?.chunk_overlap
         ) {
-          activeThread.assistants[0].tools[0].settings.chunk_overlap =
-            activeThread.assistants[0].tools[0].settings.chunk_size
+          activeAssistant.tools[0].settings.chunk_overlap =
+            activeAssistant.tools[0].settings.chunk_size
         }
         if (
           key === 'chunk_size' &&
-          value < activeThread.assistants[0].tools[0].settings?.chunk_overlap
+          value < activeAssistant.tools[0].settings?.chunk_overlap
         ) {
-          activeThread.assistants[0].tools[0].settings.chunk_overlap = value
+          activeAssistant.tools[0].settings.chunk_overlap = value
         } else if (
           key === 'chunk_overlap' &&
-          value > activeThread.assistants[0].tools[0].settings?.chunk_size
+          value > activeAssistant.tools[0].settings?.chunk_size
         ) {
-          activeThread.assistants[0].tools[0].settings.chunk_size = value
+          activeAssistant.tools[0].settings.chunk_size = value
         }
       }
       updateThreadMetadata({
         ...activeThread,
         assistants: [
           {
-            ...activeThread.assistants[0],
+            ...activeAssistant,
             tools: [
               {
                 type: 'retrieval',
                 enabled: true,
                 settings: {
-                  ...(activeThread.assistants[0].tools &&
-                    activeThread.assistants[0].tools[0]?.settings),
+                  ...(activeAssistant.tools &&
+                    activeAssistant.tools[0]?.settings),
                   [key]: value,
                 },
               },
@@ -77,6 +79,7 @@ const AssistantSetting: React.FC<Props> = ({ componentData }) => {
       })
     },
     [
+      activeAssistant,
       activeThread,
       componentData,
       setEngineParamsUpdate,
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
index 44d1748ed..041c37b18 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
@@ -27,6 +27,7 @@ import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
 import { useStarterScreen } from '@/hooks/useStarterScreen'
 
 import { formatDownloadPercentage, toGibibytes } from '@/utils/converter'
+import { manualRecommendationModel } from '@/utils/model'
 import {
   getLogoEngine,
   getTitleByEngine,
@@ -40,7 +41,11 @@ import {
 } from '@/helpers/atoms/Model.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 
-const OnDeviceStarterScreen = () => {
+type Props = {
+  isShowStarterScreen?: boolean
+}
+
+const OnDeviceStarterScreen = ({ isShowStarterScreen }: Props) => {
   const { extensionHasSettings } = useStarterScreen()
   const [searchValue, setSearchValue] = useState('')
   const [isOpen, setIsOpen] = useState(Boolean(searchValue.length))
@@ -52,15 +57,16 @@ const OnDeviceStarterScreen = () => {
   const configuredModels = useAtomValue(configuredModelsAtom)
   const setMainViewState = useSetAtom(mainViewStateAtom)
 
-  const recommendModel = ['llama3.2-1b-instruct', 'llama3.2-3b-instruct']
-
   const featuredModel = configuredModels.filter((x) => {
     const manualRecommendModel = configuredModels.filter((x) =>
-      recommendModel.includes(x.id)
+      manualRecommendationModel.includes(x.id)
     )
 
     if (manualRecommendModel.length === 2) {
-      return x.id === recommendModel[0] || x.id === recommendModel[1]
+      return (
+        x.id === manualRecommendationModel[0] ||
+        x.id === manualRecommendationModel[1]
+      )
     } else {
       return (
         x.metadata?.tags?.includes('Featured') && x.metadata?.size < 5000000000
@@ -103,7 +109,7 @@ const OnDeviceStarterScreen = () => {
   const [visibleRows, setVisibleRows] = useState(1)
 
   return (
-    <CenterPanelContainer>
+    <CenterPanelContainer isShowStarterScreen={isShowStarterScreen}>
       <ScrollArea className="flex h-full w-full items-center">
         <div className="relative mt-4 flex h-full w-full flex-col items-center justify-center">
           <div className="mx-auto flex h-full w-3/4 flex-col items-center justify-center py-16 text-center">
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx
index 9077a351a..c2f7935f6 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/index.tsx
@@ -1,4 +1,4 @@
-import { memo, useEffect, useMemo, useRef, useState } from 'react'
+import { memo, useCallback, useEffect, useMemo, useRef, useState } from 'react'
 
 import { ThreadMessage } from '@janhq/core'
 import { useVirtualizer } from '@tanstack/react-virtual'
@@ -57,7 +57,10 @@ const ChatBody = memo(
     loadModelError?: string
   }) => {
     // The scrollable element for your list
-    const parentRef = useRef(null)
+    const parentRef = useRef<HTMLDivElement>(null)
+    const prevScrollTop = useRef(0)
+    const isUserManuallyScrollingUp = useRef(false)
+    const currentThread = useAtomValue(activeThreadAtom)
 
     const count = useMemo(
       () => (messages?.length ?? 0) + (loadModelError ? 1 : 0),
@@ -71,28 +74,60 @@ const ChatBody = memo(
       estimateSize: () => 35,
       overscan: 5,
     })
+
     useEffect(() => {
-      if (count > 0 && messages && virtualizer) {
-        virtualizer.scrollToIndex(count - 1)
+      // Delay the scroll until the DOM is updated
+      if (parentRef.current) {
+        requestAnimationFrame(() => {
+          if (parentRef.current) {
+            parentRef.current.scrollTo({ top: parentRef.current.scrollHeight })
+            virtualizer.scrollToIndex(count - 1)
+          }
+        })
       }
-    }, [count, virtualizer, messages, loadModelError])
+    }, [count, currentThread?.id, virtualizer])
 
     const items = virtualizer.getVirtualItems()
+
     virtualizer.shouldAdjustScrollPositionOnItemSizeChange = (
       item,
       _,
       instance
     ) => {
+      if (isUserManuallyScrollingUp.current === true) return false
       return (
         // item.start < (instance.scrollOffset ?? 0) &&
         instance.scrollDirection !== 'backward'
       )
     }
 
+    const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
+      const currentScrollTop = event.currentTarget.scrollTop
+
+      if (prevScrollTop.current > currentScrollTop) {
+        isUserManuallyScrollingUp.current = true
+      } else {
+        const currentScrollTop = event.currentTarget.scrollTop
+        const scrollHeight = event.currentTarget.scrollHeight
+        const clientHeight = event.currentTarget.clientHeight
+
+        if (currentScrollTop + clientHeight >= scrollHeight) {
+          isUserManuallyScrollingUp.current = false
+        }
+      }
+
+      if (isUserManuallyScrollingUp.current === true) {
+        event.preventDefault()
+        event.stopPropagation()
+      }
+      prevScrollTop.current = currentScrollTop
+    }, [])
+
     return (
       <div className="flex h-full w-full flex-col overflow-x-hidden">
         <div
           ref={parentRef}
+          onScroll={handleScroll}
           className="List"
           style={{
             flex: 1,
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
index 0d477d78d..2049fedf6 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
@@ -1,8 +1,7 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
-import { useCallback, useEffect, useRef, useState } from 'react'
+import { useCallback, useEffect, useMemo, useRef, ClipboardEvent } from 'react'
 
 import { MessageStatus } from '@janhq/core'
-import hljs from 'highlight.js'
 import { useAtom, useAtomValue } from 'jotai'
 
 import { BaseEditor, createEditor, Editor, Transforms } from 'slate'
@@ -24,6 +23,7 @@ import useSendChatMessage from '@/hooks/useSendChatMessage'
 
 import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 
+import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import {
   getActiveThreadIdAtom,
   activeSettingInputBoxAtom,
@@ -67,7 +67,7 @@ const RichTextEditor = ({
   placeholder,
   spellCheck,
 }: RichTextEditorProps) => {
-  const [editor] = useState(() => withHistory(withReact(createEditor())))
+  const editor = useMemo(() => withHistory(withReact(createEditor())), [])
   const currentLanguage = useRef<string>('plaintext')
   const hasStartBackticks = useRef<boolean>(false)
   const hasEndBackticks = useRef<boolean>(false)
@@ -79,6 +79,8 @@ const RichTextEditor = ({
   const messages = useAtomValue(getCurrentChatMessagesAtom)
   const { sendChatMessage } = useSendChatMessage()
   const { stopInference } = useActiveModel()
+  const selectedModel = useAtomValue(selectedModelAtom)
+  const largeContentThreshold = 1000
 
   // The decorate function identifies code blocks and marks the ranges
   const decorate = useCallback(
@@ -132,97 +134,9 @@ const RichTextEditor = ({
         })
       }
 
-      if (Editor.isBlock(editor, node) && node.type === 'paragraph') {
-        node.children.forEach((child: { text: any }, childIndex: number) => {
-          const text = child.text
-
-          const codeBlockStartRegex = /```(\w*)/g
-          const matches = [...currentPrompt.matchAll(codeBlockStartRegex)]
-
-          if (matches.length % 2 !== 0) {
-            hasEndBackticks.current = false
-          }
-
-          // Match code block start and end
-          const lang = text.match(/^```(\w*)$/)
-          const endMatch = text.match(/^```$/)
-
-          if (lang) {
-            // If it's the start of a code block, store the language
-            currentLanguage.current = lang[1] || 'plaintext'
-          } else if (endMatch) {
-            // Reset language when code block ends
-            currentLanguage.current = 'plaintext'
-          } else if (
-            hasStartBackticks.current &&
-            hasEndBackticks.current &&
-            currentLanguage.current !== 'plaintext'
-          ) {
-            // Highlight entire code line if in a code block
-
-            const codeContent = text.trim() // Remove leading spaces for highlighting
-
-            let highlighted = ''
-            highlighted = hljs.highlightAuto(codeContent).value
-            try {
-              highlighted = hljs.highlight(codeContent, {
-                language:
-                  currentLanguage.current.length > 1
-                    ? currentLanguage.current
-                    : 'plaintext',
-              }).value
-            } catch (err) {
-              highlighted = hljs.highlight(codeContent, {
-                language: 'javascript',
-              }).value
-            }
-
-            const parser = new DOMParser()
-            const doc = parser.parseFromString(highlighted, 'text/html')
-
-            let slateTextIndex = 0
-
-            doc.body.childNodes.forEach((childNode) => {
-              const childText = childNode.textContent || ''
-
-              const length = childText.length
-              const className =
-                childNode.nodeType === Node.ELEMENT_NODE
-                  ? (childNode as HTMLElement).className
-                  : ''
-
-              ranges.push({
-                anchor: {
-                  path: [...path, childIndex],
-                  offset: slateTextIndex,
-                },
-                focus: {
-                  path: [...path, childIndex],
-                  offset: slateTextIndex + length,
-                },
-                type: 'code',
-                code: true,
-                language: currentLanguage.current,
-                className,
-              })
-
-              slateTextIndex += length
-            })
-          } else {
-            currentLanguage.current = 'plaintext'
-            ranges.push({
-              anchor: { path: [...path, childIndex], offset: 0 },
-              focus: { path: [...path, childIndex], offset: text.length },
-              type: 'paragraph', // Treat as a paragraph
-              code: false,
-            })
-          }
-        })
-      }
-
       return ranges
     },
-    [currentPrompt, editor]
+    [editor]
   )
 
   // RenderLeaf applies the decoration styles
@@ -312,11 +226,17 @@ const RichTextEditor = ({
 
   const handleKeyDown = useCallback(
     (event: React.KeyboardEvent) => {
-      if (event.key === 'Enter' && !event.shiftKey) {
+      if (
+        event.key === 'Enter' &&
+        !event.shiftKey &&
+        event.nativeEvent.isComposing === false
+      ) {
         event.preventDefault()
         if (messages[messages.length - 1]?.status !== MessageStatus.Pending) {
           sendChatMessage(currentPrompt)
-          resetEditor()
+          if (selectedModel) {
+            resetEditor()
+          }
         } else onStopInferenceClick()
       }
     },
@@ -324,6 +244,16 @@ const RichTextEditor = ({
     [currentPrompt, editor, messages]
   )
 
+  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
+    const clipboardData = event.clipboardData || (window as any).clipboardData
+    const pastedData = clipboardData.getData('text')
+
+    if (pastedData.length > largeContentThreshold) {
+      event.preventDefault() // Prevent the default paste behavior
+      Transforms.insertText(editor, pastedData) // Insert the content directly into the editor
+    }
+  }
+
   return (
     <Slate
       editor={editor}
@@ -362,9 +292,18 @@ const RichTextEditor = ({
     >
       <Editable
         ref={textareaRef}
-        decorate={decorate} // Pass the decorate function
+        decorate={(entry) => {
+          // Skip decorate if content exceeds threshold
+          if (
+            currentPrompt.length > largeContentThreshold ||
+            !currentPrompt.length
+          )
+            return []
+          return decorate(entry)
+        }}
         renderLeaf={renderLeaf} // Pass the renderLeaf function
         onKeyDown={handleKeyDown}
+        onPaste={handlePaste} // Add the custom paste handler
         className={twMerge(
           className,
           disabled &&
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
index 5662cd0c0..198196484 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
@@ -24,6 +24,7 @@ import { useActiveModel } from '@/hooks/useActiveModel'
 
 import useSendChatMessage from '@/hooks/useSendChatMessage'
 
+import { uploader } from '@/utils/file'
 import { isLocalEngine } from '@/utils/modelEngine'
 
 import FileUploadPreview from '../FileUploadPreview'
@@ -33,6 +34,7 @@ import RichTextEditor from './RichTextEditor'
 
 import { showRightPanelAtom } from '@/helpers/atoms/App.atom'
 import { experimentalFeatureEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import { spellCheckAtom } from '@/helpers/atoms/Setting.atom'
@@ -67,8 +69,10 @@ const ChatInput = () => {
   const experimentalFeature = useAtomValue(experimentalFeatureEnabledAtom)
   const isGeneratingResponse = useAtomValue(isGeneratingResponseAtom)
   const threadStates = useAtomValue(threadStatesAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
   const { stopInference } = useActiveModel()
 
+  const upload = uploader()
   const [activeTabThreadRightPanel, setActiveTabThreadRightPanel] = useAtom(
     activeTabThreadRightPanelAtom
   )
@@ -86,13 +90,19 @@ const ChatInput = () => {
     }
   }, [activeThreadId])
 
+  useEffect(() => {
+    if (!selectedModel && !activeSettingInputBox) {
+      setActiveSettingInputBox(true)
+    }
+  }, [activeSettingInputBox, selectedModel, setActiveSettingInputBox])
+
   const onStopInferenceClick = async () => {
     stopInference()
   }
 
-  const isModelSupportRagAndTools =
-    selectedModel?.engine === InferenceEngine.openai ||
-    isLocalEngine(selectedModel?.engine as InferenceEngine)
+  const isModelSupportRagAndTools = isLocalEngine(
+    selectedModel?.engine as InferenceEngine
+  )
 
   /**
    * Handles the change event of the extension file input element by setting the file name state.
@@ -102,19 +112,27 @@ const ChatInput = () => {
   const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
     const file = event.target.files?.[0]
     if (!file) return
-    setFileUpload([{ file: file, type: 'pdf' }])
+    upload.addFile(file)
+    upload.upload().then((data) => {
+      setFileUpload({
+        file: file,
+        type: 'pdf',
+        id: data?.successful?.[0]?.response?.body?.id,
+        name: data?.successful?.[0]?.response?.body?.filename,
+      })
+    })
   }
 
   const handleImageChange = (event: React.ChangeEvent<HTMLInputElement>) => {
     const file = event.target.files?.[0]
     if (!file) return
-    setFileUpload([{ file: file, type: 'image' }])
+    setFileUpload({ file: file, type: 'image' })
   }
 
   const renderPreview = (fileUpload: any) => {
-    if (fileUpload.length > 0) {
-      if (fileUpload[0].type === 'image') {
-        return <ImageUploadPreview file={fileUpload[0].file} />
+    if (fileUpload) {
+      if (fileUpload.type === 'image') {
+        return <ImageUploadPreview file={fileUpload.file} />
       } else {
         return <FileUploadPreview />
       }
@@ -130,7 +148,7 @@ const ChatInput = () => {
             'relative mb-1 max-h-[400px] resize-none rounded-lg border border-[hsla(var(--app-border))] p-3 pr-20',
             'focus-within:outline-none focus-visible:outline-0 focus-visible:ring-1 focus-visible:ring-[hsla(var(--primary-bg))] focus-visible:ring-offset-0',
             'overflow-y-auto',
-            fileUpload.length && 'rounded-t-none',
+            fileUpload && 'rounded-t-none',
             experimentalFeature && 'pl-10',
             activeSettingInputBox && 'pb-14 pr-16'
           )}
@@ -152,10 +170,10 @@ const ChatInput = () => {
                 className="absolute left-3 top-2.5"
                 onClick={(e) => {
                   if (
-                    fileUpload.length > 0 ||
-                    (activeThread?.assistants[0].tools &&
-                      !activeThread?.assistants[0].tools[0]?.enabled &&
-                      !activeThread?.assistants[0].model.settings?.vision_model)
+                    !!fileUpload ||
+                    (activeAssistant?.tools &&
+                      !activeAssistant?.tools[0]?.enabled &&
+                      !activeAssistant?.model.settings?.vision_model)
                   ) {
                     e.stopPropagation()
                   } else {
@@ -170,27 +188,24 @@ const ChatInput = () => {
               </Button>
             }
             disabled={
-              isModelSupportRagAndTools &&
-              activeThread?.assistants[0].tools &&
-              activeThread?.assistants[0].tools[0]?.enabled
+              !isModelSupportRagAndTools ||
+              (activeAssistant?.tools && activeAssistant?.tools[0]?.enabled)
             }
             content={
               <>
-                {fileUpload.length > 0 ||
-                  (activeThread?.assistants[0].tools &&
-                    !activeThread?.assistants[0].tools[0]?.enabled &&
-                    !activeThread?.assistants[0].model.settings
-                      ?.vision_model && (
+                {!!fileUpload ||
+                  (activeAssistant?.tools &&
+                    !activeAssistant?.tools[0]?.enabled &&
+                    !activeAssistant?.model.settings?.vision_model && (
                       <>
-                        {fileUpload.length !== 0 && (
+                        {!!fileUpload && (
                           <span>
                             Currently, we only support 1 attachment at the same
                             time.
                           </span>
                         )}
-                        {activeThread?.assistants[0].tools &&
-                          activeThread?.assistants[0].tools[0]?.enabled ===
-                            false &&
+                        {activeAssistant?.tools &&
+                          activeAssistant?.tools[0]?.enabled === false &&
                           isModelSupportRagAndTools && (
                             <span>
                               Turn on Retrieval in Tools settings to use this
@@ -221,14 +236,13 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings?.vision_model
+                      activeAssistant?.model.settings?.vision_model ||
+                        isModelSupportRagAndTools
                         ? 'cursor-pointer'
                         : 'cursor-not-allowed opacity-50'
                     )}
                     onClick={() => {
-                      if (
-                        activeThread?.assistants[0].model.settings?.vision_model
-                      ) {
+                      if (activeAssistant?.model.settings?.vision_model) {
                         imageInputRef.current?.click()
                         setShowAttacmentMenus(false)
                       }
@@ -239,9 +253,7 @@ const ChatInput = () => {
                   </li>
                 }
                 content="This feature only supports multimodal models."
-                disabled={
-                  activeThread?.assistants[0].model.settings?.vision_model
-                }
+                disabled={activeAssistant?.model.settings?.vision_model}
               />
               <Tooltip
                 side="bottom"
@@ -249,11 +261,15 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      'cursor-pointer'
+                      isModelSupportRagAndTools
+                        ? 'cursor-pointer'
+                        : 'cursor-not-allowed opacity-50'
                     )}
                     onClick={() => {
-                      fileInputRef.current?.click()
-                      setShowAttacmentMenus(false)
+                      if (isModelSupportRagAndTools) {
+                        fileInputRef.current?.click()
+                        setShowAttacmentMenus(false)
+                      }
                     }}
                   >
                     <FileTextIcon size={16} />
@@ -261,8 +277,8 @@ const ChatInput = () => {
                   </li>
                 }
                 content={
-                  (!activeThread?.assistants[0].tools ||
-                    !activeThread?.assistants[0].tools[0]?.enabled) && (
+                  (!activeAssistant?.tools ||
+                    !activeAssistant?.tools[0]?.enabled) && (
                     <span>
                       Turn on Retrieval in Assistant Settings to use this
                       feature.
@@ -291,6 +307,7 @@ const ChatInput = () => {
                 </Button>
               </div>
             )}
+
             {messages[messages.length - 1]?.status !== MessageStatus.Pending &&
             !isGeneratingResponse &&
             !isStreamingResponse ? (
@@ -334,55 +351,53 @@ const ChatInput = () => {
           </div>
         </div>
 
-        {activeSettingInputBox && (
-          <div
-            className={twMerge(
-              'absolute bottom-[6px] left-[1px] flex w-[calc(100%-10px)] items-center justify-between rounded-b-lg bg-[hsla(var(--center-panel-bg))] p-3 pr-0',
-              !activeThread && 'bg-transparent',
-              stateModel.loading && 'bg-transparent'
-            )}
-          >
-            <div className="flex items-center gap-x-2">
-              <ModelDropdown chatInputMode />
-              <Badge
-                theme="secondary"
-                className={twMerge(
-                  'flex cursor-pointer items-center gap-x-1',
-                  activeTabThreadRightPanel === 'model' &&
-                    'border border-transparent'
-                )}
-                variant={
-                  activeTabThreadRightPanel === 'model' ? 'solid' : 'outline'
+        <div
+          className={twMerge(
+            'absolute bottom-[5px] left-[1px] flex w-[calc(100%-10px)] items-center justify-between rounded-b-lg bg-[hsla(var(--center-panel-bg))] p-3 pr-0',
+            !activeThread && 'bg-transparent',
+            !activeSettingInputBox && 'hidden',
+            stateModel.loading && 'bg-transparent'
+          )}
+        >
+          <div className="flex items-center gap-x-2">
+            <ModelDropdown chatInputMode />
+            <Badge
+              theme="secondary"
+              className={twMerge(
+                'flex cursor-pointer items-center gap-x-1',
+                activeTabThreadRightPanel === 'model' &&
+                  'border border-transparent'
+              )}
+              variant={
+                activeTabThreadRightPanel === 'model' ? 'solid' : 'outline'
+              }
+              onClick={() => {
+                // TODO @faisal: should be refactor later and better experience beetwen tab and toggle button
+                if (showRightPanel && activeTabThreadRightPanel !== 'model') {
+                  setShowRightPanel(true)
+                  setActiveTabThreadRightPanel('model')
                 }
-                onClick={() => {
-                  // TODO @faisal: should be refactor later and better experience beetwen tab and toggle button
-                  if (showRightPanel && activeTabThreadRightPanel !== 'model') {
-                    setShowRightPanel(true)
-                    setActiveTabThreadRightPanel('model')
-                  }
-                  if (showRightPanel && activeTabThreadRightPanel === 'model') {
-                    setShowRightPanel(false)
-                    setActiveTabThreadRightPanel(undefined)
-                  }
-                  if (activeTabThreadRightPanel === undefined) {
-                    setShowRightPanel(true)
-                    setActiveTabThreadRightPanel('model')
-                  }
-                  if (
-                    !showRightPanel &&
-                    activeTabThreadRightPanel !== 'model'
-                  ) {
-                    setShowRightPanel(true)
-                    setActiveTabThreadRightPanel('model')
-                  }
-                }}
-              >
-                <Settings2Icon
-                  size={16}
-                  className="flex-shrink-0 cursor-pointer text-[hsla(var(--text-secondary))]"
-                />
-              </Badge>
-            </div>
+                if (showRightPanel && activeTabThreadRightPanel === 'model') {
+                  setShowRightPanel(false)
+                  setActiveTabThreadRightPanel(undefined)
+                }
+                if (activeTabThreadRightPanel === undefined) {
+                  setShowRightPanel(true)
+                  setActiveTabThreadRightPanel('model')
+                }
+                if (!showRightPanel && activeTabThreadRightPanel !== 'model') {
+                  setShowRightPanel(true)
+                  setActiveTabThreadRightPanel('model')
+                }
+              }}
+            >
+              <Settings2Icon
+                size={16}
+                className="flex-shrink-0 cursor-pointer text-[hsla(var(--text-secondary))]"
+              />
+            </Badge>
+          </div>
+          {selectedModel && (
             <Button
               theme="icon"
               onClick={() => setActiveSettingInputBox(false)}
@@ -392,8 +407,8 @@ const ChatInput = () => {
                 className="cursor-pointer text-[hsla(var(--text-secondary))]"
               />
             </Button>
-          </div>
-        )}
+          )}
+        </div>
       </div>
 
       <input
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatItem/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatItem/index.tsx
index 1fa3ef115..57876d044 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatItem/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatItem/index.tsx
@@ -23,9 +23,7 @@ const ChatItem = forwardRef<Ref, Props>((message, ref) => {
   const [content, setContent] = useState<ThreadContent[]>(message.content)
   const [status, setStatus] = useState<MessageStatus>(message.status)
   const [errorMessage, setErrorMessage] = useState<ThreadMessage | undefined>(
-    message.isCurrentMessage && message.status === MessageStatus.Error
-      ? message
-      : undefined
+    message.isCurrentMessage && !!message?.metadata?.error ? message : undefined
   )
 
   function onMessageUpdate(data: ThreadMessage) {
@@ -52,16 +50,18 @@ const ChatItem = forwardRef<Ref, Props>((message, ref) => {
 
   return (
     <>
-      {status !== MessageStatus.Error && content?.length > 0 && (
-        <div ref={ref} className="relative">
-          <MessageContainer
-            {...message}
-            content={content}
-            status={status}
-            isCurrentMessage={message.isCurrentMessage ?? false}
-          />
-        </div>
-      )}
+      {status !== MessageStatus.Error &&
+        !message.metadata?.error &&
+        content?.length > 0 && (
+          <div ref={ref} className="relative">
+            <MessageContainer
+              {...message}
+              content={content}
+              status={status}
+              isCurrentMessage={message.isCurrentMessage ?? false}
+            />
+          </div>
+        )}
       {errorMessage && !message.loadModelError && (
         <ErrorMessage message={errorMessage} />
       )}
diff --git a/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx
index ea22e3a58..88f6a72b0 100644
--- a/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/EditChatInput/index.tsx
@@ -72,7 +72,8 @@ const EditChatInput: React.FC<Props> = ({ message }) => {
   }, [editPrompt])
 
   useEffect(() => {
-    setEditPrompt(message.content[0]?.text?.value)
+    if (message.content?.[0]?.text?.value)
+      setEditPrompt(message.content[0].text.value)
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [])
 
@@ -80,19 +81,17 @@ const EditChatInput: React.FC<Props> = ({ message }) => {
     setEditMessage('')
     const messageIdx = messages.findIndex((msg) => msg.id === message.id)
     const newMessages = messages.slice(0, messageIdx)
-    if (activeThread) {
-      setMessages(activeThread.id, newMessages)
-      await extensionManager
-        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.writeMessages(
-          activeThread.id,
-          // Remove all of the messages below this
-          newMessages
-        )
-        .then(() => {
-          sendChatMessage(editPrompt, newMessages)
-        })
-    }
+    const toDeleteMessages = messages.slice(messageIdx)
+    const threadId = messages[0].thread_id
+    await Promise.all(
+      toDeleteMessages.map(async (message) =>
+        extensionManager
+          .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+          ?.deleteMessage(message.thread_id, message.id)
+      )
+    ).catch(console.error)
+    setMessages(threadId, newMessages)
+    sendChatMessage(editPrompt, false, newMessages)
   }
 
   const onKeyDown = async (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
diff --git a/web/screens/Thread/ThreadCenterPanel/FileUploadPreview/index.tsx b/web/screens/Thread/ThreadCenterPanel/FileUploadPreview/index.tsx
index 348e915e6..0e4872e10 100644
--- a/web/screens/Thread/ThreadCenterPanel/FileUploadPreview/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/FileUploadPreview/index.tsx
@@ -15,31 +15,33 @@ const FileUploadPreview = () => {
   const setCurrentPrompt = useSetAtom(currentPromptAtom)
 
   const onDeleteClick = () => {
-    setFileUpload([])
+    setFileUpload(undefined)
     setCurrentPrompt('')
   }
 
   return (
     <div className="flex flex-col rounded-t-lg border border-b-0 border-[hsla(var(--app-border))] p-4">
-      <div className="bg-secondary relative inline-flex w-60 space-x-3 rounded-lg p-4">
-        <Icon type={fileUpload[0].type} />
+      {!!fileUpload && (
+        <div className="bg-secondary relative inline-flex w-60 space-x-3 rounded-lg p-4">
+          <Icon type={fileUpload?.type} />
 
-        <div className="w-full">
-          <h6 className="line-clamp-1 w-3/4 truncate font-medium">
-            {fileUpload[0].file.name.replaceAll(/[-._]/g, ' ')}
-          </h6>
-          <p className="text-[hsla(var(--text-secondary)]">
-            {toGibibytes(fileUpload[0].file.size)}
-          </p>
-        </div>
+          <div className="w-full">
+            <h6 className="line-clamp-1 w-3/4 truncate font-medium">
+              {fileUpload?.file.name.replaceAll(/[-._]/g, ' ')}
+            </h6>
+            <p className="text-[hsla(var(--text-secondary)]">
+              {toGibibytes(fileUpload?.file.size)}
+            </p>
+          </div>
 
-        <div
-          className="absolute -right-2 -top-2 cursor-pointer rounded-full p-0.5"
-          onClick={onDeleteClick}
-        >
-          <XIcon size={14} className="text-background" />
+          <div
+            className="absolute -right-2 -top-2 cursor-pointer rounded-full p-0.5"
+            onClick={onDeleteClick}
+          >
+            <XIcon size={14} className="text-background" />
+          </div>
         </div>
-      </div>
+      )}
     </div>
   )
 }
diff --git a/web/screens/Thread/ThreadCenterPanel/ImageUploadPreview/index.tsx b/web/screens/Thread/ThreadCenterPanel/ImageUploadPreview/index.tsx
index b43b80830..7fa9e417a 100644
--- a/web/screens/Thread/ThreadCenterPanel/ImageUploadPreview/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ImageUploadPreview/index.tsx
@@ -29,7 +29,7 @@ const ImageUploadPreview: React.FC<Props> = ({ file }) => {
   }
 
   const onDeleteClick = () => {
-    setFileUpload([])
+    setFileUpload(undefined)
     setCurrentPrompt('')
   }
 
diff --git a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
index d6fed4804..204ec40fb 100644
--- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
@@ -10,15 +10,15 @@ import { MainViewState } from '@/constants/screens'
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
-import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 const LoadModelError = () => {
   const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
   const loadModelError = useAtomValue(loadModelErrorAtom)
   const setMainState = useSetAtom(mainViewStateAtom)
   const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
-  const activeThread = useAtomValue(activeThreadAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
 
   const ErrorMessage = () => {
     if (
@@ -33,9 +33,9 @@ const LoadModelError = () => {
             className="cursor-pointer font-medium text-[hsla(var(--app-link))]"
             onClick={() => {
               setMainState(MainViewState.Settings)
-              if (activeThread?.assistants[0]?.model.engine) {
+              if (activeAssistant?.model.engine) {
                 const engine = EngineManager.instance().get(
-                  activeThread.assistants[0].model.engine
+                  activeAssistant.model.engine
                 )
                 engine?.name && setSelectedSettingScreen(engine.name)
               }
diff --git a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
index c4a97a6b9..a7b59216a 100644
--- a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
@@ -55,15 +55,11 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => {
       .slice(-1)[0]
 
     if (thread) {
-      // Should also delete error messages to clear out the error state
+      // TODO: Should also delete error messages to clear out the error state
       await extensionManager
         .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-        ?.writeMessages(
-          thread.id,
-          messages.filter(
-            (msg) => msg.id !== message.id && msg.status !== MessageStatus.Error
-          )
-        )
+        ?.deleteMessage(thread.id, message.id)
+        .catch(console.error)
 
       const updatedThread: Thread = {
         ...thread,
@@ -74,7 +70,7 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => {
           )[
             messages.filter((msg) => msg.role === ChatCompletionRole.Assistant)
               .length - 1
-          ]?.content[0]?.text.value,
+          ]?.content[0]?.text?.value,
         },
       }
 
@@ -89,10 +85,6 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => {
     setEditMessage(message.id ?? '')
   }
 
-  const onRegenerateClick = async () => {
-    resendChatMessage(message)
-  }
-
   if (message.status === MessageStatus.Pending) return null
 
   return (
@@ -117,12 +109,11 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => {
           )}
 
         {message.id === messages[messages.length - 1]?.id &&
-          messages[messages.length - 1].status !== MessageStatus.Error &&
-          messages[messages.length - 1].content[0]?.type !==
-            ContentType.Pdf && (
+          !messages[messages.length - 1]?.metadata?.error &&
+          !messages[messages.length - 1].attachments?.length && (
             <div
               className="cursor-pointer rounded-lg border border-[hsla(var(--app-border))] p-2"
-              onClick={onRegenerateClick}
+              onClick={resendChatMessage}
             >
               <Tooltip
                 trigger={
diff --git a/web/screens/Thread/ThreadCenterPanel/TextMessage/DocMessage.tsx b/web/screens/Thread/ThreadCenterPanel/TextMessage/DocMessage.tsx
index 9c0289734..9e00d2b83 100644
--- a/web/screens/Thread/ThreadCenterPanel/TextMessage/DocMessage.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/TextMessage/DocMessage.tsx
@@ -1,51 +1,38 @@
 import { memo } from 'react'
 
-import { Tooltip } from '@janhq/joi'
-
-import { FolderOpenIcon } from 'lucide-react'
-
 import { usePath } from '@/hooks/usePath'
 
 import { toGibibytes } from '@/utils/converter'
-import { openFileTitle } from '@/utils/titleUtils'
 
 import Icon from '../FileUploadPreview/Icon'
 
 const DocMessage = ({
   id,
-  name,
-  size,
+  metadata,
 }: {
   id: string
-  name?: string
-  size?: number
+  metadata: Record<string, unknown> | undefined
 }) => {
-  const { onViewFile, onViewFileContainer } = usePath()
+  const { onViewFile } = usePath()
 
   return (
     <div className="group/file bg-secondary relative mb-2 inline-flex w-60 cursor-pointer gap-x-3 overflow-hidden rounded-lg p-4">
       <div
-        className="absolute left-0 top-0 z-20 hidden h-full w-full bg-black/20 backdrop-blur-sm group-hover/file:inline-block"
+        className="absolute left-0 top-0 z-20 hidden h-full w-full bg-black/20 opacity-50 group-hover/file:inline-block"
         onClick={() => onViewFile(`${id}.pdf`)}
       />
-      <Tooltip
-        trigger={
-          <div
-            className="absolute right-2 top-2 z-20 hidden h-8 w-8 cursor-pointer items-center justify-center rounded-md bg-[hsla(var(--app-bg))] group-hover/file:flex"
-            onClick={onViewFileContainer}
-          >
-            <FolderOpenIcon size={20} />
-          </div>
-        }
-        content={<span>{openFileTitle()}</span>}
-      />
+
       <Icon type="pdf" />
       <div className="w-full">
-        <h6 className="line-clamp-1 w-4/5 font-medium">
-          {name?.replaceAll(/[-._]/g, ' ')}
+        <h6 className="line-clamp-1 w-4/5 overflow-hidden font-medium">
+          {metadata && 'filename' in metadata
+            ? (metadata.filename as string)
+            : id}
         </h6>
-        <p className="text-[hsla(var(--text-secondary)]">
-          {toGibibytes(Number(size))}
+        <p className="text-[hsla(var(--text-secondary)] line-clamp-1 overflow-hidden truncate">
+          {metadata && 'size' in metadata
+            ? toGibibytes(Number(metadata.size))
+            : id}
         </p>
       </div>
     </div>
diff --git a/web/screens/Thread/ThreadCenterPanel/TextMessage/ImageMessage.tsx b/web/screens/Thread/ThreadCenterPanel/TextMessage/ImageMessage.tsx
index 117f259c0..14041721b 100644
--- a/web/screens/Thread/ThreadCenterPanel/TextMessage/ImageMessage.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/TextMessage/ImageMessage.tsx
@@ -1,43 +1,11 @@
-import { memo, useMemo } from 'react'
-
-import { ThreadContent } from '@janhq/core'
-import { Tooltip } from '@janhq/joi'
-
-import { FolderOpenIcon } from 'lucide-react'
-
-import { usePath } from '@/hooks/usePath'
-
-import { openFileTitle } from '@/utils/titleUtils'
+import { memo } from 'react'
 
 import { RelativeImage } from '../TextMessage/RelativeImage'
 
-const ImageMessage = ({ content }: { content: ThreadContent }) => {
-  const { onViewFile, onViewFileContainer } = usePath()
-
-  const annotation = useMemo(
-    () => content?.text?.annotations[0] ?? '',
-    [content]
-  )
-
+const ImageMessage = ({ image }: { image: string }) => {
   return (
-    <div className="group/image relative mb-2 inline-flex cursor-pointer overflow-hidden rounded-xl">
-      <div className="left-0 top-0 z-20 h-full w-full group-hover/image:inline-block">
-        <RelativeImage
-          src={annotation}
-          onClick={() => onViewFile(annotation)}
-        />
-      </div>
-      <Tooltip
-        trigger={
-          <div
-            className="absolute right-2 top-2 z-20 hidden h-8 w-8 cursor-pointer items-center justify-center rounded-md bg-[hsla(var(--app-bg))] group-hover/image:flex"
-            onClick={onViewFileContainer}
-          >
-            <FolderOpenIcon size={20} />
-          </div>
-        }
-        content={<span>{openFileTitle()}</span>}
-      />
+    <div className="group/file relative mb-2 inline-flex overflow-hidden rounded-xl">
+      <RelativeImage src={image} />
     </div>
   )
 }
diff --git a/web/screens/Thread/ThreadCenterPanel/TextMessage/MarkdownTextMessage.tsx b/web/screens/Thread/ThreadCenterPanel/TextMessage/MarkdownTextMessage.tsx
index 6b416f152..f0a23e416 100644
--- a/web/screens/Thread/ThreadCenterPanel/TextMessage/MarkdownTextMessage.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/TextMessage/MarkdownTextMessage.tsx
@@ -7,8 +7,9 @@ import Markdown from 'react-markdown'
 
 import rehypeHighlight from 'rehype-highlight'
 import rehypeHighlightCodeLines from 'rehype-highlight-code-lines'
+
 import rehypeKatex from 'rehype-katex'
-import rehypeRaw from 'rehype-raw'
+
 import remarkMath from 'remark-math'
 
 import 'katex/dist/katex.min.css'
@@ -18,9 +19,15 @@ import { useClipboard } from '@/hooks/useClipboard'
 import { getLanguageFromExtension } from '@/utils/codeLanguageExtension'
 
 export const MarkdownTextMessage = memo(
-  ({ text }: { id: string; text: string }) => {
+  ({ text, isUser }: { id: string; text: string; isUser: boolean }) => {
     const clipboard = useClipboard({ timeout: 1000 })
 
+    // Escapes headings
+    function preprocessMarkdown(text: string): string {
+      if (!isUser) return text
+      return text.replace(/^#{1,6} /gm, (match) => `\\${match}`)
+    }
+
     function extractCodeLines(node: { children: { children: any[] }[] }) {
       const codeLines: any[] = []
 
@@ -198,14 +205,12 @@ export const MarkdownTextMessage = memo(
           remarkPlugins={[remarkMath]}
           rehypePlugins={[
             [rehypeKatex, { throwOnError: false }],
-            rehypeRaw,
             rehypeHighlight,
             [rehypeHighlightCodeLines, { showLineNumbers: true }],
             wrapCodeBlocksWithoutVisit,
           ]}
-          skipHtml={true}
         >
-          {text}
+          {preprocessMarkdown(text)}
         </Markdown>
       </>
     )
diff --git a/web/screens/Thread/ThreadCenterPanel/TextMessage/RelativeImage.tsx b/web/screens/Thread/ThreadCenterPanel/TextMessage/RelativeImage.tsx
index 72d2a9365..bfd13b0ef 100644
--- a/web/screens/Thread/ThreadCenterPanel/TextMessage/RelativeImage.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/TextMessage/RelativeImage.tsx
@@ -7,7 +7,7 @@ export const RelativeImage = ({
   onClick,
 }: {
   src: string
-  onClick: () => void
+  onClick?: () => void
 }) => {
   const [path, setPath] = useState<string>('')
 
@@ -17,9 +17,12 @@ export const RelativeImage = ({
     })
   }, [])
   return (
-    <button onClick={onClick}>
+    <button
+      onClick={onClick}
+      className={onClick ? 'cursor-pointer' : 'cursor-default'}
+    >
       <img
-        className="aspect-auto h-[300px] cursor-pointer"
+        className="aspect-auto h-[300px]"
         alt={src}
         src={src.includes('files/') ? `file://${path}/${src}` : src}
       />
diff --git a/web/screens/Thread/ThreadCenterPanel/TextMessage/index.tsx b/web/screens/Thread/ThreadCenterPanel/TextMessage/index.tsx
index f65b9b967..a44adb431 100644
--- a/web/screens/Thread/ThreadCenterPanel/TextMessage/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/TextMessage/index.tsx
@@ -17,11 +17,13 @@ import DocMessage from './DocMessage'
 import ImageMessage from './ImageMessage'
 import { MarkdownTextMessage } from './MarkdownTextMessage'
 
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import {
   editMessageAtom,
   tokenSpeedAtom,
 } from '@/helpers/atoms/ChatMessage.atom'
-import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
+
+import { chatWidthAtom } from '@/helpers/atoms/Setting.atom'
 
 const MessageContainer: React.FC<
   ThreadMessage & { isCurrentMessage: boolean }
@@ -29,20 +31,31 @@ const MessageContainer: React.FC<
   const isUser = props.role === ChatCompletionRole.User
   const isSystem = props.role === ChatCompletionRole.System
   const editMessage = useAtomValue(editMessageAtom)
-  const activeThread = useAtomValue(activeThreadAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
   const tokenSpeed = useAtomValue(tokenSpeedAtom)
+  const chatWidth = useAtomValue(chatWidthAtom)
 
   const text = useMemo(
-    () => props.content[0]?.text?.value ?? '',
-    [props.content]
-  )
-  const messageType = useMemo(
-    () => props.content[0]?.type ?? '',
+    () =>
+      props.content.find((e) => e.type === ContentType.Text)?.text?.value ?? '',
     [props.content]
   )
 
+  const image = useMemo(
+    () =>
+      props.content.find((e) => e.type === ContentType.Image)?.image_url?.url,
+    [props.content]
+  )
+
+  const attachedFile = useMemo(() => 'attachments' in props, [props])
+
   return (
-    <div className="group relative mx-auto max-w-[700px] p-4">
+    <div
+      className={twMerge(
+        'group relative mx-auto p-4',
+        chatWidth === 'compact' && 'max-w-[700px]'
+      )}
+    >
       <div
         className={twMerge(
           'mb-2 flex items-center justify-start gap-x-2',
@@ -75,29 +88,33 @@ const MessageContainer: React.FC<
         >
           {isUser
             ? props.role
-            : (activeThread?.assistants[0].assistant_name ?? props.role)}
+            : (activeAssistant?.assistant_name ?? props.role)}
         </div>
         <p className="text-xs font-medium text-gray-400">
-          {displayDate(props.created)}
+          {props.created_at &&
+            displayDate(props.created_at ?? Date.now() / 1000)}
         </p>
-        {tokenSpeed &&
-          tokenSpeed.message === props.id &&
-          tokenSpeed.tokenSpeed > 0 && (
-            <p className="absolute right-8 text-xs font-medium text-[hsla(var(--text-secondary))]">
-              Token Speed: {Number(tokenSpeed.tokenSpeed).toFixed(2)}t/s
-            </p>
-          )}
       </div>
 
-      <div className="flex w-full flex-col">
+      <div className="flex w-full flex-col ">
         <div
           className={twMerge(
             'absolute right-0 order-1 mt-2 flex cursor-pointer items-center justify-start gap-x-2 transition-all',
             props.isCurrentMessage && !isUser
-              ? 'relative order-2 flex justify-end'
+              ? 'relative left-0 order-2 flex w-full justify-between'
               : 'hidden group-hover:absolute group-hover:right-4 group-hover:top-4 group-hover:flex'
           )}
         >
+          <div>
+            {tokenSpeed &&
+              tokenSpeed.message === props.id &&
+              tokenSpeed.tokenSpeed > 0 && (
+                <p className="text-xs font-medium text-[hsla(var(--text-secondary))]">
+                  Token Speed: {Number(tokenSpeed.tokenSpeed).toFixed(2)}t/s
+                </p>
+              )}
+          </div>
+
           <MessageToolbar message={props} />
         </div>
         <div
@@ -108,14 +125,11 @@ const MessageContainer: React.FC<
           )}
         >
           <>
-            {messageType === ContentType.Image && (
-              <ImageMessage content={props.content[0]} />
-            )}
-            {messageType === ContentType.Pdf && (
+            {image && <ImageMessage image={image} />}
+            {attachedFile && (
               <DocMessage
-                id={props.id}
-                name={props.content[0]?.text?.name}
-                size={props.content[0]?.text?.size}
+                id={props.attachments?.[0]?.file_id ?? props.id}
+                metadata={props.metadata}
               />
             )}
 
@@ -130,7 +144,11 @@ const MessageContainer: React.FC<
                 )}
                 dir="ltr"
               >
-                <MarkdownTextMessage id={props.id} text={text} />
+                <MarkdownTextMessage
+                  id={props.id}
+                  text={text}
+                  isUser={isUser}
+                />
               </div>
             )}
           </>
diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx
index 01ba0aaeb..ca04f9e59 100644
--- a/web/screens/Thread/ThreadCenterPanel/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/index.tsx
@@ -22,11 +22,14 @@ import { reloadModelAtom } from '@/hooks/useSendChatMessage'
 
 import ChatBody from '@/screens/Thread/ThreadCenterPanel/ChatBody'
 
+import { uploader } from '@/utils/file'
+
 import ChatInput from './ChatInput'
 import RequestDownloadModel from './RequestDownloadModel'
 
 import { showSystemMonitorPanelAtom } from '@/helpers/atoms/App.atom'
 import { experimentalFeatureEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 import {
@@ -55,9 +58,9 @@ const ThreadCenterPanel = () => {
   const setFileUpload = useSetAtom(fileUploadAtom)
   const experimentalFeature = useAtomValue(experimentalFeatureEnabledAtom)
   const activeThread = useAtomValue(activeThreadAtom)
-
-  const acceptedFormat: Accept = activeThread?.assistants[0].model.settings
-    ?.vision_model
+  const activeAssistant = useAtomValue(activeAssistantAtom)
+  const upload = uploader()
+  const acceptedFormat: Accept = activeAssistant?.model.settings?.vision_model
     ? {
         'application/pdf': ['.pdf'],
         'image/jpeg': ['.jpeg'],
@@ -78,14 +81,13 @@ const ThreadCenterPanel = () => {
       if (!experimentalFeature) return
       if (
         e.dataTransfer.items.length === 1 &&
-        ((activeThread?.assistants[0].tools &&
-          activeThread?.assistants[0].tools[0]?.enabled) ||
-          activeThread?.assistants[0].model.settings?.vision_model)
+        ((activeAssistant?.tools && activeAssistant?.tools[0]?.enabled) ||
+          activeAssistant?.model.settings?.vision_model)
       ) {
         setDragOver(true)
       } else if (
-        activeThread?.assistants[0].tools &&
-        !activeThread?.assistants[0].tools[0]?.enabled
+        activeAssistant?.tools &&
+        !activeAssistant?.tools[0]?.enabled
       ) {
         setDragRejected({ code: 'retrieval-off' })
       } else {
@@ -93,27 +95,36 @@ const ThreadCenterPanel = () => {
       }
     },
     onDragLeave: () => setDragOver(false),
-    onDrop: (files, rejectFiles) => {
+    onDrop: async (files, rejectFiles) => {
       // Retrieval file drag and drop is experimental feature
       if (!experimentalFeature) return
       if (
         !files ||
         files.length !== 1 ||
         rejectFiles.length !== 0 ||
-        (activeThread?.assistants[0].tools &&
-          !activeThread?.assistants[0].tools[0]?.enabled &&
-          !activeThread?.assistants[0].model.settings?.vision_model)
+        (activeAssistant?.tools &&
+          !activeAssistant?.tools[0]?.enabled &&
+          !activeAssistant?.model.settings?.vision_model)
       )
         return
       const imageType = files[0]?.type.includes('image')
-      setFileUpload([{ file: files[0], type: imageType ? 'image' : 'pdf' }])
+      if (imageType) {
+        setFileUpload({ file: files[0], type: 'image' })
+      } else {
+        upload.addFile(files[0])
+        upload.upload().then((data) => {
+          setFileUpload({
+            file: files[0],
+            type: imageType ? 'image' : 'pdf',
+            id: data?.successful?.[0]?.response?.body?.id,
+            name: data?.successful?.[0]?.response?.body?.filename,
+          })
+        })
+      }
       setDragOver(false)
     },
     onDropRejected: (e) => {
-      if (
-        activeThread?.assistants[0].tools &&
-        !activeThread?.assistants[0].tools[0]?.enabled
-      ) {
+      if (activeAssistant?.tools && !activeAssistant?.tools[0]?.enabled) {
         setDragRejected({ code: 'retrieval-off' })
       } else {
         setDragRejected({ code: e[0].errors[0].code })
@@ -186,8 +197,7 @@ const ThreadCenterPanel = () => {
                   <h6 className="font-bold">
                     {isDragReject
                       ? `Currently, we only support 1 attachment at the same time with ${
-                          activeThread?.assistants[0].model.settings
-                            ?.vision_model
+                          activeAssistant?.model.settings?.vision_model
                             ? 'PDF, JPEG, JPG, PNG'
                             : 'PDF'
                         } format`
@@ -195,7 +205,7 @@ const ThreadCenterPanel = () => {
                   </h6>
                   {!isDragReject && (
                     <p className="mt-2">
-                      {activeThread?.assistants[0].model.settings?.vision_model
+                      {activeAssistant?.model.settings?.vision_model
                         ? 'PDF, JPEG, JPG, PNG'
                         : 'PDF'}
                     </p>
diff --git a/web/screens/Thread/ThreadLeftPanel/ModalEditTitleThread/index.tsx b/web/screens/Thread/ThreadLeftPanel/ModalEditTitleThread/index.tsx
index ddeaedf40..21b415f49 100644
--- a/web/screens/Thread/ThreadLeftPanel/ModalEditTitleThread/index.tsx
+++ b/web/screens/Thread/ThreadLeftPanel/ModalEditTitleThread/index.tsx
@@ -15,13 +15,15 @@ const ModalEditTitleThread = () => {
   const [modalActionThread, setModalActionThread] = useAtom(
     modalActionThreadAtom
   )
-  const [title, setTitle] = useState(modalActionThread.thread?.title as string)
+  const [title, setTitle] = useState(
+    modalActionThread.thread?.metadata?.title as string
+  )
 
   useLayoutEffect(() => {
-    if (modalActionThread.thread?.title) {
-      setTitle(modalActionThread.thread?.title)
+    if (modalActionThread.thread?.metadata?.title) {
+      setTitle(modalActionThread.thread?.metadata?.title as string)
     }
-  }, [modalActionThread.thread?.title])
+  }, [modalActionThread.thread?.metadata])
 
   const onUpdateTitle = useCallback(
     (e: React.MouseEvent<HTMLButtonElement, MouseEvent>) => {
@@ -30,6 +32,10 @@ const ModalEditTitleThread = () => {
       updateThreadMetadata({
         ...modalActionThread?.thread,
         title: title || 'New Thread',
+        metadata: {
+          ...modalActionThread?.thread.metadata,
+          title: title || 'New Thread',
+        },
       })
     },
     [modalActionThread?.thread, title, updateThreadMetadata]
diff --git a/web/screens/Thread/ThreadLeftPanel/index.tsx b/web/screens/Thread/ThreadLeftPanel/index.tsx
index 61c6672fc..64a272ca6 100644
--- a/web/screens/Thread/ThreadLeftPanel/index.tsx
+++ b/web/screens/Thread/ThreadLeftPanel/index.tsx
@@ -20,7 +20,10 @@ import { useCreateNewThread } from '@/hooks/useCreateNewThread'
 import useRecommendedModel from '@/hooks/useRecommendedModel'
 import useSetActiveThread from '@/hooks/useSetActiveThread'
 
-import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
+import {
+  activeAssistantAtom,
+  assistantsAtom,
+} from '@/helpers/atoms/Assistant.atom'
 import { editMessageAtom } from '@/helpers/atoms/ChatMessage.atom'
 
 import {
@@ -34,6 +37,7 @@ import {
 const ThreadLeftPanel = () => {
   const threads = useAtomValue(threadsAtom)
   const activeThreadId = useAtomValue(getActiveThreadIdAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
   const { setActiveThread } = useSetActiveThread()
   const assistants = useAtomValue(assistantsAtom)
   const threadDataReady = useAtomValue(threadDataReadyAtom)
@@ -75,7 +79,12 @@ const ThreadLeftPanel = () => {
         (model) => model.engine === InferenceEngine.cortex_llamacpp
       )
       const selectedModel = model[0] || recommendedModel
-      requestCreateNewThread(assistants[0], selectedModel)
+      requestCreateNewThread(
+        activeAssistant
+          ? { ...assistants[0], ...activeAssistant }
+          : assistants[0],
+        selectedModel
+      )
     } else if (threadDataReady && !activeThreadId) {
       setActiveThread(threads[0])
     }
@@ -88,6 +97,7 @@ const ThreadLeftPanel = () => {
     setActiveThread,
     recommendedModel,
     downloadedModels,
+    activeAssistant,
   ])
 
   const onContextMenu = (event: React.MouseEvent, thread: Thread) => {
@@ -138,7 +148,7 @@ const ThreadLeftPanel = () => {
                     activeThreadId && 'font-medium'
                   )}
                 >
-                  {thread.title}
+                  {thread.title ?? thread.metadata?.title}
                 </h1>
               </div>
               <div
diff --git a/web/screens/Thread/ThreadRightPanel/Tools/index.tsx b/web/screens/Thread/ThreadRightPanel/Tools/index.tsx
index aaa81fe5d..e233bcd05 100644
--- a/web/screens/Thread/ThreadRightPanel/Tools/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/Tools/index.tsx
@@ -14,48 +14,54 @@ import AssistantSetting from '@/screens/Thread/ThreadCenterPanel/AssistantSettin
 import { getConfigurationsData } from '@/utils/componentSettings'
 
 import { experimentalFeatureEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 const Tools = () => {
   const experimentalFeature = useAtomValue(experimentalFeatureEnabledAtom)
   const activeThread = useAtomValue(activeThreadAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
   const [selectedModel, setSelectedModel] = useAtom(selectedModelAtom)
   const { updateThreadMetadata } = useCreateNewThread()
   const { recommendedModel, downloadedModels } = useRecommendedModel()
 
   const componentDataAssistantSetting = getConfigurationsData(
-    (activeThread?.assistants[0]?.tools &&
-      activeThread?.assistants[0]?.tools[0]?.settings) ??
-      {}
+    (activeAssistant?.tools && activeAssistant?.tools[0]?.settings) ?? {}
   )
 
   useEffect(() => {
     if (!activeThread) return
     let model = downloadedModels.find(
-      (model) => model.id === activeThread.assistants[0].model.id
+      (model) => model.id === activeAssistant?.model.id
     )
     if (!model) {
       model = recommendedModel
     }
     setSelectedModel(model)
-  }, [recommendedModel, activeThread, downloadedModels, setSelectedModel])
+  }, [
+    recommendedModel,
+    activeThread,
+    downloadedModels,
+    setSelectedModel,
+    activeAssistant?.model.id,
+  ])
 
   const onRetrievalSwitchUpdate = useCallback(
     (enabled: boolean) => {
-      if (!activeThread) return
+      if (!activeThread || !activeAssistant) return
       updateThreadMetadata({
         ...activeThread,
         assistants: [
           {
-            ...activeThread.assistants[0],
+            ...activeAssistant,
             tools: [
               {
                 type: 'retrieval',
                 enabled: enabled,
                 settings:
-                  (activeThread.assistants[0].tools &&
-                    activeThread.assistants[0].tools[0]?.settings) ??
+                  (activeAssistant.tools &&
+                    activeAssistant.tools[0]?.settings) ??
                   {},
               },
             ],
@@ -63,25 +69,25 @@ const Tools = () => {
         ],
       })
     },
-    [activeThread, updateThreadMetadata]
+    [activeAssistant, activeThread, updateThreadMetadata]
   )
 
   const onTimeWeightedRetrieverSwitchUpdate = useCallback(
     (enabled: boolean) => {
-      if (!activeThread) return
+      if (!activeThread || !activeAssistant) return
       updateThreadMetadata({
         ...activeThread,
         assistants: [
           {
-            ...activeThread.assistants[0],
+            ...activeAssistant,
             tools: [
               {
                 type: 'retrieval',
                 enabled: true,
                 useTimeWeightedRetriever: enabled,
                 settings:
-                  (activeThread.assistants[0].tools &&
-                    activeThread.assistants[0].tools[0]?.settings) ??
+                  (activeAssistant.tools &&
+                    activeAssistant.tools[0]?.settings) ??
                   {},
               },
             ],
@@ -89,23 +95,54 @@ const Tools = () => {
         ],
       })
     },
-    [activeThread, updateThreadMetadata]
+    [activeAssistant, activeThread, updateThreadMetadata]
   )
 
   if (!experimentalFeature) return null
 
   return (
     <Fragment>
-      {activeThread?.assistants[0]?.tools &&
-        componentDataAssistantSetting.length > 0 && (
-          <div className="p-4">
-            <div className="mb-2">
+      {activeAssistant?.tools && componentDataAssistantSetting.length > 0 && (
+        <div className="p-4">
+          <div className="mb-2">
+            <div className="flex items-center justify-between">
+              <label
+                id="retrieval"
+                className="inline-flex items-center font-medium"
+              >
+                Retrieval
+                <Tooltip
+                  trigger={
+                    <InfoIcon
+                      size={16}
+                      className="ml-2 flex-shrink-0 text-[hsl(var(--text-secondary))]"
+                    />
+                  }
+                  content="Retrieval helps the assistant use information from
+                      files you send to it. Once you share a file, the
+                      assistant automatically fetches the relevant content
+                      based on your request."
+                />
+              </label>
               <div className="flex items-center justify-between">
-                <label
-                  id="retrieval"
-                  className="inline-flex items-center font-medium"
-                >
-                  Retrieval
+                <Switch
+                  name="retrieval"
+                  checked={activeAssistant?.tools[0].enabled}
+                  onChange={(e) => onRetrievalSwitchUpdate(e.target.checked)}
+                />
+              </div>
+            </div>
+          </div>
+          {activeAssistant?.tools[0].enabled && (
+            <div className="pb-4 pt-2">
+              <div className="mb-4">
+                <div className="item-center mb-2 flex">
+                  <label
+                    id="embedding-model"
+                    className="inline-flex font-medium"
+                  >
+                    Embedding Model
+                  </label>
                   <Tooltip
                     trigger={
                       <InfoIcon
@@ -113,90 +150,26 @@ const Tools = () => {
                         className="ml-2 flex-shrink-0 text-[hsl(var(--text-secondary))]"
                       />
                     }
-                    content="Retrieval helps the assistant use information from
-                      files you send to it. Once you share a file, the
-                      assistant automatically fetches the relevant content
-                      based on your request."
-                  />
-                </label>
-                <div className="flex items-center justify-between">
-                  <Switch
-                    name="retrieval"
-                    checked={activeThread?.assistants[0].tools[0].enabled}
-                    onChange={(e) => onRetrievalSwitchUpdate(e.target.checked)}
-                  />
-                </div>
-              </div>
-            </div>
-            {activeThread?.assistants[0]?.tools[0].enabled && (
-              <div className="pb-4 pt-2">
-                <div className="mb-4">
-                  <div className="item-center mb-2 flex">
-                    <label
-                      id="embedding-model"
-                      className="inline-flex font-medium"
-                    >
-                      Embedding Model
-                    </label>
-                    <Tooltip
-                      trigger={
-                        <InfoIcon
-                          size={16}
-                          className="ml-2 flex-shrink-0 text-[hsl(var(--text-secondary))]"
-                        />
-                      }
-                      content="Embedding model is crucial for understanding and
+                    content="Embedding model is crucial for understanding and
                         processing the input text effectively by
                         converting text to numerical representations.
                         Align the model choice with your task, evaluate
                         its performance, and consider factors like
                         resource availability. Experiment to find the best
                         fit for your specific use case."
-                    />
-                  </div>
-                  <div className="w-full">
-                    <Input
-                      value={selectedModel?.name || ''}
-                      disabled
-                      readOnly
-                    />
-                  </div>
+                  />
                 </div>
-                <div className="mb-4">
-                  <div className="mb-2 flex items-center">
-                    <label
-                      id="vector-database"
-                      className="inline-flex items-center font-medium"
-                    >
-                      Vector Database
-                      <Tooltip
-                        trigger={
-                          <InfoIcon
-                            size={16}
-                            className="ml-2 flex-shrink-0 text-[hsl(var(--text-secondary))]"
-                          />
-                        }
-                        content="Vector Database is crucial for efficient storage
-                          and retrieval of embeddings. Consider your
-                          specific task, available resources, and language
-                          requirements. Experiment to find the best fit for
-                          your specific use case."
-                      />
-                    </label>
-                  </div>
-
-                  <div className="w-full">
-                    <Input value="HNSWLib" disabled readOnly />
-                  </div>
+                <div className="w-full">
+                  <Input value={selectedModel?.name || ''} disabled readOnly />
                 </div>
-                <div className="mb-4">
-                  <div className="mb-2 flex items-center">
-                    <label
-                      id="use-time-weighted-retriever"
-                      className="inline-block font-medium"
-                    >
-                      Time-Weighted Retrieval?
-                    </label>
+              </div>
+              <div className="mb-4">
+                <div className="mb-2 flex items-center">
+                  <label
+                    id="vector-database"
+                    className="inline-flex items-center font-medium"
+                  >
+                    Vector Database
                     <Tooltip
                       trigger={
                         <InfoIcon
@@ -204,33 +177,59 @@ const Tools = () => {
                           className="ml-2 flex-shrink-0 text-[hsl(var(--text-secondary))]"
                         />
                       }
-                      content="Time-Weighted Retriever looks at how similar
+                      content="Vector Database is crucial for efficient storage
+                          and retrieval of embeddings. Consider your
+                          specific task, available resources, and language
+                          requirements. Experiment to find the best fit for
+                          your specific use case."
+                    />
+                  </label>
+                </div>
+
+                <div className="w-full">
+                  <Input value="HNSWLib" disabled readOnly />
+                </div>
+              </div>
+              <div className="mb-4">
+                <div className="mb-2 flex items-center">
+                  <label
+                    id="use-time-weighted-retriever"
+                    className="inline-block font-medium"
+                  >
+                    Time-Weighted Retrieval?
+                  </label>
+                  <Tooltip
+                    trigger={
+                      <InfoIcon
+                        size={16}
+                        className="ml-2 flex-shrink-0 text-[hsl(var(--text-secondary))]"
+                      />
+                    }
+                    content="Time-Weighted Retriever looks at how similar
                                 they are and how new they are. It compares
                                 documents based on their meaning like usual, but
                                 also considers when they were added to give
                                 newer ones more importance."
+                  />
+                  <div className="ml-auto flex items-center justify-between">
+                    <Switch
+                      name="use-time-weighted-retriever"
+                      checked={
+                        activeAssistant?.tools[0].useTimeWeightedRetriever ||
+                        false
+                      }
+                      onChange={(e) =>
+                        onTimeWeightedRetrieverSwitchUpdate(e.target.checked)
+                      }
                     />
-                    <div className="ml-auto flex items-center justify-between">
-                      <Switch
-                        name="use-time-weighted-retriever"
-                        checked={
-                          activeThread?.assistants[0].tools[0]
-                            .useTimeWeightedRetriever || false
-                        }
-                        onChange={(e) =>
-                          onTimeWeightedRetrieverSwitchUpdate(e.target.checked)
-                        }
-                      />
-                    </div>
                   </div>
                 </div>
-                <AssistantSetting
-                  componentData={componentDataAssistantSetting}
-                />
               </div>
-            )}
-          </div>
-        )}
+              <AssistantSetting componentData={componentDataAssistantSetting} />
+            </div>
+          )}
+        </div>
+      )}
     </Fragment>
   )
 }
diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx
index 952ba8eb3..dab50aba7 100644
--- a/web/screens/Thread/ThreadRightPanel/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/index.tsx
@@ -38,6 +38,7 @@ import PromptTemplateSetting from './PromptTemplateSetting'
 import Tools from './Tools'
 
 import { experimentalFeatureEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
+import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import {
   activeThreadAtom,
@@ -53,6 +54,7 @@ const ENGINE_SETTINGS = 'Engine Settings'
 
 const ThreadRightPanel = () => {
   const activeThread = useAtomValue(activeThreadAtom)
+  const activeAssistant = useAtomValue(activeAssistantAtom)
   const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
   const selectedModel = useAtomValue(selectedModelAtom)
   const [activeTabThreadRightPanel, setActiveTabThreadRightPanel] = useAtom(
@@ -154,18 +156,18 @@ const ThreadRightPanel = () => {
 
   const onAssistantInstructionChanged = useCallback(
     (e: React.ChangeEvent<HTMLTextAreaElement>) => {
-      if (activeThread)
+      if (activeThread && activeAssistant)
         updateThreadMetadata({
           ...activeThread,
           assistants: [
             {
-              ...activeThread.assistants[0],
+              ...activeAssistant,
               instructions: e.target.value || '',
             },
           ],
         })
     },
-    [activeThread, updateThreadMetadata]
+    [activeAssistant, activeThread, updateThreadMetadata]
   )
 
   const resetModel = useDebouncedCallback(() => {
@@ -174,9 +176,7 @@ const ThreadRightPanel = () => {
 
   const onValueChanged = useCallback(
     (key: string, value: string | number | boolean | string[]) => {
-      if (!activeThread) {
-        return
-      }
+      if (!activeThread || !activeAssistant) return
 
       setEngineParamsUpdate(true)
       resetModel()
@@ -186,32 +186,38 @@ const ThreadRightPanel = () => {
       })
 
       if (
-        activeThread.assistants[0].model.parameters?.max_tokens &&
-        activeThread.assistants[0].model.settings?.ctx_len
+        activeAssistant.model.parameters?.max_tokens &&
+        activeAssistant.model.settings?.ctx_len
       ) {
         if (
           key === 'max_tokens' &&
-          Number(value) > activeThread.assistants[0].model.settings.ctx_len
+          Number(value) > activeAssistant.model.settings.ctx_len
         ) {
           updateModelParameter(activeThread, {
             params: {
-              max_tokens: activeThread.assistants[0].model.settings.ctx_len,
+              max_tokens: activeAssistant.model.settings.ctx_len,
             },
           })
         }
         if (
           key === 'ctx_len' &&
-          Number(value) < activeThread.assistants[0].model.parameters.max_tokens
+          Number(value) < activeAssistant.model.parameters.max_tokens
         ) {
           updateModelParameter(activeThread, {
             params: {
-              max_tokens: activeThread.assistants[0].model.settings.ctx_len,
+              max_tokens: activeAssistant.model.settings.ctx_len,
             },
           })
         }
       }
     },
-    [activeThread, resetModel, setEngineParamsUpdate, updateModelParameter]
+    [
+      activeAssistant,
+      activeThread,
+      resetModel,
+      setEngineParamsUpdate,
+      updateModelParameter,
+    ]
   )
 
   if (!activeThread) {
@@ -250,7 +256,7 @@ const ThreadRightPanel = () => {
               <TextArea
                 id="assistant-instructions"
                 placeholder="Eg. You are a helpful assistant."
-                value={activeThread?.assistants[0].instructions ?? ''}
+                value={activeAssistant?.instructions ?? ''}
                 autoResize
                 onChange={onAssistantInstructionChanged}
               />
@@ -262,34 +268,38 @@ const ThreadRightPanel = () => {
           <div className="flex flex-col gap-4 px-2 py-4">
             <ModelDropdown />
           </div>
-          <Accordion defaultValue={[]}>
-            {settings.runtimeSettings.length !== 0 && (
-              <AccordionItem
-                title={INFERENCE_SETTINGS}
-                value={INFERENCE_SETTINGS}
-              >
-                <ModelSetting
-                  componentProps={settings.runtimeSettings}
-                  onValueChanged={onValueChanged}
-                />
-              </AccordionItem>
-            )}
+          {selectedModel && (
+            <Accordion defaultValue={[]}>
+              {settings.runtimeSettings.length !== 0 && (
+                <AccordionItem
+                  title={INFERENCE_SETTINGS}
+                  value={INFERENCE_SETTINGS}
+                >
+                  <ModelSetting
+                    componentProps={settings.runtimeSettings}
+                    onValueChanged={onValueChanged}
+                  />
+                </AccordionItem>
+              )}
 
-            {promptTemplateSettings.length !== 0 && (
-              <AccordionItem title={MODEL_SETTINGS} value={MODEL_SETTINGS}>
-                <PromptTemplateSetting componentData={promptTemplateSettings} />
-              </AccordionItem>
-            )}
+              {promptTemplateSettings.length !== 0 && (
+                <AccordionItem title={MODEL_SETTINGS} value={MODEL_SETTINGS}>
+                  <PromptTemplateSetting
+                    componentData={promptTemplateSettings}
+                  />
+                </AccordionItem>
+              )}
 
-            {settings.engineSettings.length !== 0 && (
-              <AccordionItem title={ENGINE_SETTINGS} value={ENGINE_SETTINGS}>
-                <EngineSetting
-                  componentData={settings.engineSettings}
-                  onValueChanged={onValueChanged}
-                />
-              </AccordionItem>
-            )}
-          </Accordion>
+              {settings.engineSettings.length !== 0 && (
+                <AccordionItem title={ENGINE_SETTINGS} value={ENGINE_SETTINGS}>
+                  <EngineSetting
+                    componentData={settings.engineSettings}
+                    onValueChanged={onValueChanged}
+                  />
+                </AccordionItem>
+              )}
+            </Accordion>
+          )}
         </TabsContent>
         <TabsContent value="tools">
           <Tools />
diff --git a/web/screens/Thread/index.test.tsx b/web/screens/Thread/index.test.tsx
index 01af0ffc5..f3227bfbe 100644
--- a/web/screens/Thread/index.test.tsx
+++ b/web/screens/Thread/index.test.tsx
@@ -5,16 +5,19 @@ import { useStarterScreen } from '../../hooks/useStarterScreen'
 import '@testing-library/jest-dom'
 
 global.ResizeObserver = class {
-  observe() {}
-  unobserve() {}
-  disconnect() {}
+  observe() { }
+  unobserve() { }
+  disconnect() { }
 }
 // Mock the useStarterScreen hook
 jest.mock('@/hooks/useStarterScreen')
 
+// @ts-ignore
+global.API_BASE_URL = 'http://localhost:3000'
+
 describe('ThreadScreen', () => {
   it('renders OnDeviceStarterScreen when isShowStarterScreen is true', () => {
-    ;(useStarterScreen as jest.Mock).mockReturnValue({
+    ; (useStarterScreen as jest.Mock).mockReturnValue({
       isShowStarterScreen: true,
       extensionHasSettings: false,
     })
@@ -24,7 +27,7 @@ describe('ThreadScreen', () => {
   })
 
   it('renders Thread panels when isShowStarterScreen is false', () => {
-    ;(useStarterScreen as jest.Mock).mockReturnValue({
+    ; (useStarterScreen as jest.Mock).mockReturnValue({
       isShowStarterScreen: false,
       extensionHasSettings: false,
     })
diff --git a/web/screens/Thread/index.tsx b/web/screens/Thread/index.tsx
index 6789c181d..c3963067a 100644
--- a/web/screens/Thread/index.tsx
+++ b/web/screens/Thread/index.tsx
@@ -17,7 +17,7 @@ type Props = {
 
 const ThreadPanels = memo(({ isShowStarterScreen }: Props) => {
   return isShowStarterScreen ? (
-    <OnDeviceStarterScreen />
+    <OnDeviceStarterScreen isShowStarterScreen={isShowStarterScreen} />
   ) : (
     <>
       <ThreadLeftPanel />
diff --git a/web/styles/components/message.scss b/web/styles/components/message.scss
index d73a39f65..d96ab8b6a 100644
--- a/web/styles/components/message.scss
+++ b/web/styles/components/message.scss
@@ -5,8 +5,9 @@
   ul,
   ol {
     list-style: auto;
-    padding-left: 24px;
+    padding-left: 16px;
     white-space: normal;
+    list-style-position: inside;
   }
 
   ul {
diff --git a/web/types/file.d.ts b/web/types/file.d.ts
new file mode 100644
index 000000000..aac6ba379
--- /dev/null
+++ b/web/types/file.d.ts
@@ -0,0 +1,8 @@
+export type FileType = 'image' | 'pdf'
+
+export type FileInfo = {
+  file: File
+  type: FileType
+  id?: string
+  name?: string
+}
diff --git a/web/types/index.d.ts b/web/types/index.d.ts
index ed83e0d14..a017dd514 100644
--- a/web/types/index.d.ts
+++ b/web/types/index.d.ts
@@ -6,6 +6,8 @@ export {}
 declare global {
   declare const VERSION: string
   declare const ANALYTICS_ID: string
+  declare const POSTHOG_KEY: string
+  declare const POSTHOG_HOST: string
   declare const ANALYTICS_HOST: string
   declare const API_BASE_URL: string
   declare const isMac: boolean
diff --git a/web/utils/datetime.test.ts b/web/utils/datetime.test.ts
index 75bdb1f8f..605f98d24 100644
--- a/web/utils/datetime.test.ts
+++ b/web/utils/datetime.test.ts
@@ -1,27 +1,24 @@
+import { displayDate } from './datetime'
+import { isToday } from './datetime'
 
-import { displayDate } from './datetime';
-import { isToday } from './datetime';
-
-test('should return only time for today\'s timestamp', () => {
-  const today = new Date();
-  const timestamp = today.getTime();
-  const expectedTime = today.toLocaleTimeString(undefined, {
+test("should return only time for today's timestamp", () => {
+  const today = new Date()
+  const timestamp = today.getTime()
+  const expectedTime = `${today.toLocaleTimeString(undefined, {
     hour: '2-digit',
     minute: '2-digit',
     second: '2-digit',
     hour12: true,
-  });
-  expect(displayDate(timestamp)).toBe(expectedTime);
-});
-
+  })}`
+  expect(displayDate(timestamp / 1000)).toBe(expectedTime)
+})
 
 test('should return N/A for undefined timestamp', () => {
-  expect(displayDate()).toBe('N/A');
-});
+  expect(displayDate()).toBe('N/A')
+})
 
-
-test('should return true for today\'s timestamp', () => {
-  const today = new Date();
-  const timestamp = today.setHours(0, 0, 0, 0);
-  expect(isToday(timestamp)).toBe(true);
-});
+test("should return true for today's timestamp", () => {
+  const today = new Date()
+  const timestamp = today.setHours(0, 0, 0, 0)
+  expect(isToday(timestamp / 1000)).toBe(true)
+})
diff --git a/web/utils/datetime.ts b/web/utils/datetime.ts
index e596f0841..f5fdf8226 100644
--- a/web/utils/datetime.ts
+++ b/web/utils/datetime.ts
@@ -1,12 +1,18 @@
 export const isToday = (timestamp: number) => {
   const today = new Date()
-  return today.setHours(0, 0, 0, 0) === new Date(timestamp).setHours(0, 0, 0, 0)
+  return (
+    today.setHours(0, 0, 0, 0) ===
+    new Date(timestamp * 1000).setHours(0, 0, 0, 0)
+  )
 }
 
 export const displayDate = (timestamp?: string | number | Date) => {
   if (!timestamp) return 'N/A'
 
-  const date = new Date(timestamp)
+  const date =
+    typeof timestamp === 'number'
+      ? new Date(timestamp * 1000)
+      : new Date(timestamp)
 
   let displayDate = `${date.toLocaleDateString(undefined, {
     day: '2-digit',
diff --git a/web/utils/file.ts b/web/utils/file.ts
index 4a14de247..e20a75dc3 100644
--- a/web/utils/file.ts
+++ b/web/utils/file.ts
@@ -1,4 +1,6 @@
 import { baseName } from '@janhq/core'
+import Uppy from '@uppy/core'
+import XHR from '@uppy/xhr-upload'
 
 export type FilePathWithSize = {
   path: string
@@ -27,3 +29,30 @@ export const getFileInfoFromFile = async (
   }
   return result
 }
+
+/**
+ * This function creates an Uppy instance with XHR plugin for file upload to the server.
+ * @returns Uppy instance
+ */
+export const uploader = () => {
+  const uppy = new Uppy().use(XHR, {
+    endpoint: `${API_BASE_URL}/v1/files`,
+    method: 'POST',
+    fieldName: 'file',
+    formData: true,
+    limit: 1,
+  })
+  uppy.setMeta({
+    purpose: 'assistants',
+  })
+  return uppy
+}
+
+/**
+ * Get the file information from the server.
+ */
+export const getFileInfo = (id: string) => {
+  return fetch(`${API_BASE_URL}/v1/files/${id}`)
+    .then((e) => e.json())
+    .catch(() => undefined)
+}
diff --git a/web/utils/messageRequestBuilder.ts b/web/utils/messageRequestBuilder.ts
index 3153a7e3e..c3da9cbd8 100644
--- a/web/utils/messageRequestBuilder.ts
+++ b/web/utils/messageRequestBuilder.ts
@@ -6,17 +6,16 @@ import {
   ChatCompletionRole,
   MessageRequest,
   MessageRequestType,
-  MessageStatus,
   ModelInfo,
   Thread,
   ThreadMessage,
 } from '@janhq/core'
 import { ulid } from 'ulidx'
 
-import { FileType } from '@/containers/Providers/Jotai'
-
 import { Stack } from '@/utils/Stack'
 
+import { FileInfo } from '@/types/file'
+
 export class MessageRequestBuilder {
   msgId: string
   type: MessageRequestType
@@ -35,10 +34,10 @@ export class MessageRequestBuilder {
     this.model = model
     this.thread = thread
     this.messages = messages
-      .filter((e) => e.status !== MessageStatus.Error)
+      .filter((e) => !e.metadata?.error)
       .map<ChatCompletionMessage>((msg) => ({
         role: msg.role,
-        content: msg.content[0]?.text.value ?? '.',
+        content: msg.content[0]?.text?.value ?? '.',
       }))
   }
 
@@ -46,11 +45,11 @@ export class MessageRequestBuilder {
   pushMessage(
     message: string,
     base64Blob: string | undefined,
-    fileContentType: FileType
+    fileInfo?: FileInfo
   ) {
-    if (base64Blob && fileContentType === 'pdf')
-      return this.addDocMessage(message)
-    else if (base64Blob && fileContentType === 'image') {
+    if (base64Blob && fileInfo?.type === 'pdf')
+      return this.addDocMessage(message, fileInfo?.name)
+    else if (base64Blob && fileInfo?.type === 'image') {
       return this.addImageMessage(message, base64Blob)
     }
     this.messages = [
@@ -77,7 +76,7 @@ export class MessageRequestBuilder {
   }
 
   // Chainable
-  addDocMessage(prompt: string) {
+  addDocMessage(prompt: string, name?: string) {
     const message: ChatCompletionMessage = {
       role: ChatCompletionRole.User,
       content: [
@@ -88,7 +87,7 @@ export class MessageRequestBuilder {
         {
           type: ChatCompletionMessageContentType.Doc,
           doc_url: {
-            url: `threads/${this.thread.id}/files/${this.msgId}.pdf`,
+            url: name ?? `${this.msgId}.pdf`,
           },
         },
       ] as ChatCompletionMessageContent,
@@ -163,6 +162,7 @@ export class MessageRequestBuilder {
     return {
       id: this.msgId,
       type: this.type,
+      attachments: [],
       threadId: this.thread.id,
       messages: this.normalizeMessages(this.messages),
       model: this.model,
diff --git a/web/utils/model.ts b/web/utils/model.ts
index cb0f0ff31..00bf80c12 100644
--- a/web/utils/model.ts
+++ b/web/utils/model.ts
@@ -7,3 +7,8 @@
 export const normalizeModelId = (downloadUrl: string): string => {
   return downloadUrl.split('/').pop() ?? downloadUrl
 }
+
+export const manualRecommendationModel = [
+  'llama3.2-1b-instruct',
+  'llama3.2-3b-instruct',
+]
diff --git a/web/utils/modelEngine.ts b/web/utils/modelEngine.ts
index 2ac4a1acd..d87d8d382 100644
--- a/web/utils/modelEngine.ts
+++ b/web/utils/modelEngine.ts
@@ -38,7 +38,9 @@ export const getLogoEngine = (engine: InferenceEngine) => {
  * @param engine
  * @returns
  */
-export const isLocalEngine = (engine: string) => {
+export const isLocalEngine = (engine?: string) => {
+  if (!engine) return false
+
   const engineObj = EngineManager.instance().get(engine)
   if (!engineObj) return false
   return (
diff --git a/web/utils/threadMessageBuilder.test.ts b/web/utils/threadMessageBuilder.test.ts
index cc192a5c1..d296635e6 100644
--- a/web/utils/threadMessageBuilder.test.ts
+++ b/web/utils/threadMessageBuilder.test.ts
@@ -1,100 +1,100 @@
+import {
+  ChatCompletionRole,
+  MessageRequestType,
+  MessageStatus,
+} from '@janhq/core'
 
-import { ChatCompletionRole, MessageStatus } from '@janhq/core'
+import { ThreadMessageBuilder } from './threadMessageBuilder'
+import { MessageRequestBuilder } from './messageRequestBuilder'
 
-  import { ThreadMessageBuilder } from './threadMessageBuilder'
-  import { MessageRequestBuilder } from './messageRequestBuilder'
-  
-import { ContentType } from '@janhq/core';
-  describe('ThreadMessageBuilder', () => {
-    it('testBuildMethod', () => {
-      const msgRequest = new MessageRequestBuilder(
-        'type',
-        { model: 'model' },
-        { id: 'thread-id' },
-        []
-      )
-      const builder = new ThreadMessageBuilder(msgRequest)
-      const result = builder.build()
-  
-      expect(result.id).toBe(msgRequest.msgId)
-      expect(result.thread_id).toBe(msgRequest.thread.id)
-      expect(result.role).toBe(ChatCompletionRole.User)
-      expect(result.status).toBe(MessageStatus.Ready)
-      expect(result.created).toBeDefined()
-      expect(result.updated).toBeDefined()
-      expect(result.object).toBe('thread.message')
-      expect(result.content).toEqual([])
-    })
+import { ContentType } from '@janhq/core'
+describe('ThreadMessageBuilder', () => {
+  it('testBuildMethod', () => {
+    const msgRequest = new MessageRequestBuilder(
+      MessageRequestType.Thread,
+      { model: 'model' } as any,
+      { id: 'thread-id' } as any,
+      []
+    )
+    const builder = new ThreadMessageBuilder(msgRequest)
+    const result = builder.build()
+
+    expect(result.id).toBe(msgRequest.msgId)
+    expect(result.thread_id).toBe(msgRequest.thread.id)
+    expect(result.role).toBe(ChatCompletionRole.User)
+    expect(result.status).toBe(MessageStatus.Ready)
+    expect(result.created_at).toBeDefined()
+    expect(result.completed_at).toBeDefined()
+    expect(result.object).toBe('thread.message')
+    expect(result.content).toEqual([])
   })
+})
 
-  it('testPushMessageWithPromptOnly', () => {
-    const msgRequest = new MessageRequestBuilder(
-      'type',
-      { model: 'model' },
-      { id: 'thread-id' },
-      []
-    );
-    const builder = new ThreadMessageBuilder(msgRequest);
-    const prompt = 'test prompt';
-    builder.pushMessage(prompt, undefined, []);
-    expect(builder.content).toEqual([
-      {
-        type: ContentType.Text,
-        text: {
-          value: prompt,
-          annotations: [],
-        },
+it('testPushMessageWithPromptOnly', () => {
+  const msgRequest = new MessageRequestBuilder(
+    MessageRequestType.Thread,
+    { model: 'model' } as any,
+    { id: 'thread-id' } as any,
+    []
+  )
+  const builder = new ThreadMessageBuilder(msgRequest)
+  const prompt = 'test prompt'
+  builder.pushMessage(prompt, undefined, undefined)
+  expect(builder.content).toEqual([
+    {
+      type: ContentType.Text,
+      text: {
+        value: prompt,
+        annotations: [],
       },
-    ]);
-  });
+    },
+  ])
+})
 
-
-  it('testPushMessageWithPdf', () => {
-    const msgRequest = new MessageRequestBuilder(
-      'type',
-      { model: 'model' },
-      { id: 'thread-id' },
-      []
-    );
-    const builder = new ThreadMessageBuilder(msgRequest);
-    const prompt = 'test prompt';
-    const base64 = 'test base64';
-    const fileUpload = [{ type: 'pdf', file: { name: 'test.pdf', size: 1000 } }];
-    builder.pushMessage(prompt, base64, fileUpload);
-    expect(builder.content).toEqual([
-      {
-        type: ContentType.Pdf,
-        text: {
-          value: prompt,
-          annotations: [base64],
-          name: fileUpload[0].file.name,
-          size: fileUpload[0].file.size,
-        },
+it('testPushMessageWithPdf', () => {
+  const msgRequest = new MessageRequestBuilder(
+    MessageRequestType.Thread,
+    { model: 'model' } as any,
+    { id: 'thread-id' } as any,
+    []
+  )
+  const builder = new ThreadMessageBuilder(msgRequest)
+  const prompt = 'test prompt'
+  const base64 = 'test base64'
+  const fileUpload = [
+    { type: 'pdf', file: { name: 'test.pdf', size: 1000 } },
+  ] as any
+  builder.pushMessage(prompt, base64, fileUpload)
+  expect(builder.content).toEqual([
+    {
+      type: ContentType.Text,
+      text: {
+        value: prompt,
+        annotations: [],
       },
-    ]);
-  });
+    },
+  ])
+})
 
-
-  it('testPushMessageWithImage', () => {
-    const msgRequest = new MessageRequestBuilder(
-      'type',
-      { model: 'model' },
-      { id: 'thread-id' },
-      []
-    );
-    const builder = new ThreadMessageBuilder(msgRequest);
-    const prompt = 'test prompt';
-    const base64 = 'test base64';
-    const fileUpload = [{ type: 'image', file: { name: 'test.jpg', size: 1000 } }];
-    builder.pushMessage(prompt, base64, fileUpload);
-    expect(builder.content).toEqual([
-      {
-        type: ContentType.Image,
-        text: {
-          value: prompt,
-          annotations: [base64],
-        },
+it('testPushMessageWithImage', () => {
+  const msgRequest = new MessageRequestBuilder(
+    MessageRequestType.Thread,
+    { model: 'model' } as any,
+    { id: 'thread-id' } as any,
+    []
+  )
+  const builder = new ThreadMessageBuilder(msgRequest)
+  const prompt = 'test prompt'
+  const base64 = 'test base64'
+  const fileUpload = [{ type: 'image', file: { name: 'test.jpg', size: 1000 } }]
+  builder.pushMessage(prompt, base64, fileUpload as any)
+  expect(builder.content).toEqual([
+    {
+      type: ContentType.Text,
+      text: {
+        value: prompt,
+        annotations: [],
       },
-    ]);
-  });
-
+    },
+  ])
+})
diff --git a/web/utils/threadMessageBuilder.ts b/web/utils/threadMessageBuilder.ts
index 92e51e574..1162dd2f6 100644
--- a/web/utils/threadMessageBuilder.ts
+++ b/web/utils/threadMessageBuilder.ts
@@ -1,4 +1,5 @@
 import {
+  Attachment,
   ChatCompletionRole,
   ContentType,
   MessageStatus,
@@ -6,61 +7,43 @@ import {
   ThreadMessage,
 } from '@janhq/core'
 
-import { FileInfo } from '@/containers/Providers/Jotai'
-
 import { MessageRequestBuilder } from './messageRequestBuilder'
 
+import { FileInfo } from '@/types/file'
+
 export class ThreadMessageBuilder {
   messageRequest: MessageRequestBuilder
 
   content: ThreadContent[] = []
+  attachments: Attachment[] = []
+  metadata: Record<string, unknown> = {}
 
   constructor(messageRequest: MessageRequestBuilder) {
     this.messageRequest = messageRequest
   }
 
   build(): ThreadMessage {
-    const timestamp = Date.now()
+    const timestamp = Date.now() / 1000
     return {
       id: this.messageRequest.msgId,
       thread_id: this.messageRequest.thread.id,
+      attachments: this.attachments,
       role: ChatCompletionRole.User,
       status: MessageStatus.Ready,
-      created: timestamp,
-      updated: timestamp,
+      created_at: timestamp,
+      completed_at: timestamp,
       object: 'thread.message',
       content: this.content,
+      metadata: this.metadata,
     }
   }
 
   pushMessage(
     prompt: string,
     base64: string | undefined,
-    fileUpload: FileInfo[]
+    fileUpload?: FileInfo
   ) {
-    if (base64 && fileUpload[0]?.type === 'image') {
-      this.content.push({
-        type: ContentType.Image,
-        text: {
-          value: prompt,
-          annotations: [base64],
-        },
-      })
-    }
-
-    if (base64 && fileUpload[0]?.type === 'pdf') {
-      this.content.push({
-        type: ContentType.Pdf,
-        text: {
-          value: prompt,
-          annotations: [base64],
-          name: fileUpload[0].file.name,
-          size: fileUpload[0].file.size,
-        },
-      })
-    }
-
-    if (prompt && !base64) {
+    if (prompt) {
       this.content.push({
         type: ContentType.Text,
         text: {
@@ -69,6 +52,30 @@ export class ThreadMessageBuilder {
         },
       })
     }
+    if (base64 && fileUpload?.type === 'image') {
+      this.content.push({
+        type: ContentType.Image,
+        image_url: {
+          url: base64,
+        },
+      })
+    }
+
+    if (base64 && fileUpload?.type === 'pdf') {
+      this.attachments.push({
+        file_id: fileUpload.id,
+        tools: [
+          {
+            type: 'file_search',
+          },
+        ],
+      })
+      this.metadata = {
+        filename: fileUpload.name,
+        size: fileUpload.file?.size,
+      }
+    }
+
     return this
   }
 }