From 9d64e9bef41f6ecdc38ad6262092e1b4af4d9ec6 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 24 Mar 2025 11:53:30 +0700
Subject: [PATCH] chore: merge release/v0.5.16 into dev (#4833)

* chore: bump cortex 1.0.11-rc10

* chore: bump to latest cortex release

* feat: Cortex API Authorization

* chore: correct CI CD repo name

* chore: correct new menloresearch repo name

* feat: rotate api token for each run (#4820)

* feat: rotate api token for each run

* chore: correct github repo url

* chore: correct github api url

* chore: should not filter out models first launch

* chore: bump cortex release

* chore: should get hardware information on launch (#4821)

* chore: should have an option to not revalidate hardware information

* chore: cortex.cpp gpu activation could cause a race condition (#4825)

* fix: jan beta logo displayed in jan release (#4828)

---------

Co-authored-by: David <davidpt.janai@gmail.com>
Co-authored-by: Nguyen Ngoc Minh <91668012+Minh141120@users.noreply.github.com>
---
 .github/workflows/jan-electron-build.yml      |   2 +-
 .../nightly-integrate-cortex-cpp.yml          |  98 ++++----
 .../workflows/template-build-linux-x64.yml    |   2 +-
 .github/workflows/template-build-macos.yml    |   2 +-
 .../workflows/template-build-windows-x64.yml  |   2 +-
 core/src/types/api/index.ts                   |   2 +
 electron/handlers/native.ts                   |   7 +
 electron/main.ts                              |   5 +
 .../assistant-extension/src/node/retrieval.ts |  12 +-
 .../conversational-extension/src/index.ts     | 113 ++++++---
 .../engine-management-extension/src/index.ts  | 194 +++++++++------
 .../package.json                              |   2 -
 .../src/index.ts                              |  45 +++-
 .../bin/version.txt                           |   2 +-
 .../rolldown.config.mjs                       |   2 +-
 .../inference-cortex-extension/src/index.ts   | 141 +++++++----
 .../src/node/index.ts                         |   5 +-
 .../model-extension/rolldown.config.mjs       |   2 +-
 .../model-extension/src/@types/global.d.ts    |   2 +-
 extensions/model-extension/src/index.ts       | 186 +++++++++-----
 server/cortex.json                            | 232 ------------------
 server/index.ts                               |  36 ++-
 web/containers/Providers/DataLoader.tsx       |   3 +
 web/hooks/useHardwareManagement.ts            |   4 +-
 web/screens/Hub/index.tsx                     |   2 +-
 web/screens/Settings/Hardware/index.tsx       |  10 +-
 26 files changed, 562 insertions(+), 551 deletions(-)

diff --git a/.github/workflows/jan-electron-build.yml b/.github/workflows/jan-electron-build.yml
index c868434e7..7d69a5c12 100644
--- a/.github/workflows/jan-electron-build.yml
+++ b/.github/workflows/jan-electron-build.yml
@@ -88,4 +88,4 @@ jobs:
         #   config-name: my-config.yml
         #   disable-autolabeler: true
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/nightly-integrate-cortex-cpp.yml b/.github/workflows/nightly-integrate-cortex-cpp.yml
index 52b042aeb..066fbd28e 100644
--- a/.github/workflows/nightly-integrate-cortex-cpp.yml
+++ b/.github/workflows/nightly-integrate-cortex-cpp.yml
@@ -18,67 +18,67 @@ jobs:
       pr_created: ${{ steps.check-update.outputs.pr_created }}
 
     steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-        with:
-          submodules: recursive
-          ref: dev
-          fetch-depth: 0
-          token: ${{ secrets.PAT_SERVICE_ACCOUNT }}
+    - name: Checkout repository
+      uses: actions/checkout@v3
+      with:
+        submodules: recursive
+        ref: dev
+        fetch-depth: 0
+        token: ${{ secrets.PAT_SERVICE_ACCOUNT }}
 
-      - name: Configure Git
-        run: |
-          git config --global user.name 'github-actions[bot]'
-          git config --global user.email 'github-actions[bot]@users.noreply.github.com'
+    - name: Configure Git
+      run: |
+        git config --global user.name 'github-actions[bot]'
+        git config --global user.email 'github-actions[bot]@users.noreply.github.com'
 
-      - name: Update submodule to latest release
-        id: check-update
-        env:
-          GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
-        run: |
-          curl -s https://api.github.com/repos/menloresearch/cortex/releases > /tmp/github_api_releases.json
-          latest_prerelease_name=$(cat /tmp/github_api_releases.json | jq -r '.[] | select(.prerelease) | .name' | head -n 1)
+    - name: Update submodule to latest release
+      id: check-update
+      env:
+        GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
+      run: |
+        curl -s https://api.github.com/repos/menloresearch/cortex/releases > /tmp/github_api_releases.json
+        latest_prerelease_name=$(cat /tmp/github_api_releases.json | jq -r '.[] | select(.prerelease) | .name' | head -n 1)
 
-          get_asset_count() {
-            local version_name=$1
-            cat /tmp/github_api_releases.json | jq -r --arg version_name "$version_name" '.[] | select(.name == $version_name) | .assets | length'
-          }
+        get_asset_count() {
+          local version_name=$1
+          cat /tmp/github_api_releases.json | jq -r --arg version_name "$version_name" '.[] | select(.name == $version_name) | .assets | length'
+        }
 
-          cortex_cpp_version_file_path="extensions/inference-nitro-extension/bin/version.txt"
-          current_version_name=$(cat "$cortex_cpp_version_file_path" | head -n 1)
+        cortex_cpp_version_file_path="extensions/inference-nitro-extension/bin/version.txt"
+        current_version_name=$(cat "$cortex_cpp_version_file_path" | head -n 1)
 
-          current_version_asset_count=$(get_asset_count "$current_version_name")
-          latest_prerelease_asset_count=$(get_asset_count "$latest_prerelease_name")
+        current_version_asset_count=$(get_asset_count "$current_version_name")
+        latest_prerelease_asset_count=$(get_asset_count "$latest_prerelease_name")
 
-          if [ "$current_version_name" = "$latest_prerelease_name" ]; then
-            echo "cortex cpp remote repo doesn't have update today, skip update cortex.cpp for today nightly build"
-            echo "::set-output name=pr_created::false"
-            exit 0
-          fi
+        if [ "$current_version_name" = "$latest_prerelease_name" ]; then
+          echo "cortex cpp remote repo doesn't have update today, skip update cortex.cpp for today nightly build"
+          echo "::set-output name=pr_created::false"
+          exit 0
+        fi
 
-          if [ "$current_version_asset_count" != "$latest_prerelease_asset_count" ]; then
-            echo "Latest prerelease version has different number of assets, somethink went wrong, skip update cortex.cpp for today nightly build"
-            echo "::set-output name=pr_created::false"
-            exit 1
-          fi
+        if [ "$current_version_asset_count" != "$latest_prerelease_asset_count" ]; then
+          echo "Latest prerelease version has different number of assets, somethink went wrong, skip update cortex.cpp for today nightly build"
+          echo "::set-output name=pr_created::false"
+          exit 1
+        fi
 
-          echo $latest_prerelease_name > $cortex_cpp_version_file_path
-          echo "Updated version from $current_version_name to $latest_prerelease_name."
-          echo "::set-output name=pr_created::true"
+        echo $latest_prerelease_name > $cortex_cpp_version_file_path
+        echo "Updated version from $current_version_name to $latest_prerelease_name."
+        echo "::set-output name=pr_created::true"
 
-          git add -f $cortex_cpp_version_file_path
-          git commit -m "Update cortex cpp nightly to version $latest_prerelease_name"
-          branch_name="update-nightly-$(date +'%Y-%m-%d-%H-%M')"
-          git checkout -b $branch_name
-          git push origin $branch_name
+        git add -f $cortex_cpp_version_file_path
+        git commit -m "Update cortex cpp nightly to version $latest_prerelease_name"
+        branch_name="update-nightly-$(date +'%Y-%m-%d-%H-%M')"
+        git checkout -b $branch_name
+        git push origin $branch_name
 
-          pr_title="Update cortex cpp nightly to version $latest_prerelease_name"
-          pr_body="This PR updates the Update cortex cpp nightly to version $latest_prerelease_name"
+        pr_title="Update cortex cpp nightly to version $latest_prerelease_name"
+        pr_body="This PR updates the Update cortex cpp nightly to version $latest_prerelease_name"
 
-          gh pr create --title "$pr_title" --body "$pr_body" --head $branch_name --base dev --reviewer Van-QA
+        gh pr create --title "$pr_title" --body "$pr_body" --head $branch_name --base dev --reviewer Van-QA
 
-          pr_number=$(gh pr list --head $branch_name --json number --jq '.[0].number')
-          echo "::set-output name=pr_number::$pr_number"
+        pr_number=$(gh pr list --head $branch_name --json number --jq '.[0].number')
+        echo "::set-output name=pr_number::$pr_number"
 
   check-and-merge-pr:
     needs: update-submodule
diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml
index 3f0a2c412..58b566931 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@@ -184,4 +184,4 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: jan-linux-amd64-${{ inputs.new_version }}-AppImage
-          path: ./electron/dist/*.AppImage
+          path: ./electron/dist/*.AppImage
\ No newline at end of file
diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml
index fb4660c3f..a5e5cc724 100644
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@@ -230,4 +230,4 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: jan-mac-universal-${{ inputs.new_version }}
-          path: ./electron/dist/*.dmg
+          path: ./electron/dist/*.dmg
\ No newline at end of file
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index 1032918e1..9be028e15 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -226,4 +226,4 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: jan-win-x64-${{ inputs.new_version }}
-          path: ./electron/dist/*.exe
+          path: ./electron/dist/*.exe
\ No newline at end of file
diff --git a/core/src/types/api/index.ts b/core/src/types/api/index.ts
index a663f2674..2f33b72e4 100644
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@@ -33,6 +33,8 @@ export enum NativeRoute {
   stopServer = 'stopServer',
 
   appUpdateDownload = 'appUpdateDownload',
+
+  appToken = 'appToken',
 }
 
 /**
diff --git a/electron/handlers/native.ts b/electron/handlers/native.ts
index 7afeed285..f8f70c302 100644
--- a/electron/handlers/native.ts
+++ b/electron/handlers/native.ts
@@ -317,4 +317,11 @@ export function handleAppIPCs() {
     const { stopServer } = require('@janhq/server')
     return stopServer()
   })
+
+  /**
+   * Handles the "appToken" IPC message to generate a random app ID.
+   */
+  ipcMain.handle(NativeRoute.appToken, async (_event): Promise<string> => {
+    return process.env.appToken ?? 'cortex.cpp'
+  })
 }
diff --git a/electron/main.ts b/electron/main.ts
index 19192bd99..59e72ca24 100644
--- a/electron/main.ts
+++ b/electron/main.ts
@@ -29,6 +29,7 @@ import { trayManager } from './managers/tray'
 import { logSystemInfo } from './utils/system'
 import { registerGlobalShortcuts } from './utils/shortcut'
 import { registerLogger } from './utils/logger'
+import { randomBytes } from 'crypto'
 
 const preloadPath = join(__dirname, 'preload.js')
 const preloadQuickAskPath = join(__dirname, 'preload.quickask.js')
@@ -56,6 +57,10 @@ const createMainWindow = () => {
   windowManager.createMainWindow(preloadPath, startUrl)
 }
 
+// Generate a random token for the app
+// This token is used for authentication when making request to cortex.cpp server
+process.env.appToken = randomBytes(16).toString('hex')
+
 app
   .whenReady()
   .then(() => {
diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts
index 2db2bb4fb..05fa67d54 100644
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@@ -23,11 +23,16 @@ export class Retrieval {
 
   constructor(chunkSize: number = 4000, chunkOverlap: number = 200) {
     this.updateTextSplitter(chunkSize, chunkOverlap)
+    this.initialize()
+  }
+
+  private async initialize() {
+    const apiKey = await window.core?.api.appToken() ?? 'cortex.cpp'
 
     // declare time-weighted retriever and storage
     this.timeWeightedVectorStore = new MemoryVectorStore(
       new OpenAIEmbeddings(
-        { openAIApiKey: 'cortex-embedding' },
+        { openAIApiKey: apiKey },
         { basePath: `${CORTEX_API_URL}/v1` }
       )
     )
@@ -47,9 +52,10 @@ export class Retrieval {
     })
   }
 
-  public updateEmbeddingEngine(model: string, engine: string): void {
+  public async updateEmbeddingEngine(model: string, engine: string) {
+    const apiKey = await window.core?.api.appToken() ?? 'cortex.cpp'
     this.embeddingModel = new OpenAIEmbeddings(
-      { openAIApiKey: 'cortex-embedding', model },
+      { openAIApiKey: apiKey, model },
       // TODO: Raw settings
       { basePath: `${CORTEX_API_URL}/v1` }
     )
diff --git a/extensions/conversational-extension/src/index.ts b/extensions/conversational-extension/src/index.ts
index eeb4fcf38..791385fc9 100644
--- a/extensions/conversational-extension/src/index.ts
+++ b/extensions/conversational-extension/src/index.ts
@@ -4,7 +4,7 @@ import {
   ThreadAssistantInfo,
   ThreadMessage,
 } from '@janhq/core'
-import ky from 'ky'
+import ky, { KyInstance } from 'ky'
 import PQueue from 'p-queue'
 
 type ThreadList = {
@@ -22,6 +22,22 @@ type MessageList = {
 export default class CortexConversationalExtension extends ConversationalExtension {
   queue = new PQueue({ concurrency: 1 })
 
+  api?: KyInstance
+  /**
+   * Get the API instance
+   * @returns
+   */
+  async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
+    const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
+    this.api = ky.extend({
+      prefixUrl: API_URL,
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+      },
+    })
+    return this.api
+  }
   /**
    * Called when the extension is loaded.
    */
@@ -39,10 +55,12 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async listThreads(): Promise<Thread[]> {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/threads?limit=-1`)
-        .json<ThreadList>()
-        .then((e) => e.data)
+      this.apiInstance().then((api) =>
+        api
+          .get('v1/threads?limit=-1')
+          .json<ThreadList>()
+          .then((e) => e.data)
+      )
     ) as Promise<Thread[]>
   }
 
@@ -52,7 +70,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async createThread(thread: Thread): Promise<Thread> {
     return this.queue.add(() =>
-      ky.post(`${API_URL}/v1/threads`, { json: thread }).json<Thread>()
+      this.apiInstance().then((api) =>
+        api.post('v1/threads', { json: thread }).json<Thread>()
+      )
     ) as Promise<Thread>
   }
 
@@ -63,7 +83,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
   async modifyThread(thread: Thread): Promise<void> {
     return this.queue
       .add(() =>
-        ky.patch(`${API_URL}/v1/threads/${thread.id}`, { json: thread })
+        this.apiInstance().then((api) =>
+          api.patch(`v1/threads/${thread.id}`, { json: thread })
+        )
       )
       .then()
   }
@@ -74,7 +96,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async deleteThread(threadId: string): Promise<void> {
     return this.queue
-      .add(() => ky.delete(`${API_URL}/v1/threads/${threadId}`))
+      .add(() =>
+        this.apiInstance().then((api) => api.delete(`v1/threads/${threadId}`))
+      )
       .then()
   }
 
@@ -85,11 +109,13 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async createMessage(message: ThreadMessage): Promise<ThreadMessage> {
     return this.queue.add(() =>
-      ky
-        .post(`${API_URL}/v1/threads/${message.thread_id}/messages`, {
-          json: message,
-        })
-        .json<ThreadMessage>()
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/threads/${message.thread_id}/messages`, {
+            json: message,
+          })
+          .json<ThreadMessage>()
+      )
     ) as Promise<ThreadMessage>
   }
 
@@ -100,14 +126,13 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async modifyMessage(message: ThreadMessage): Promise<ThreadMessage> {
     return this.queue.add(() =>
-      ky
-        .patch(
-          `${API_URL}/v1/threads/${message.thread_id}/messages/${message.id}`,
-          {
+      this.apiInstance().then((api) =>
+        api
+          .patch(`v1/threads/${message.thread_id}/messages/${message.id}`, {
             json: message,
-          }
-        )
-        .json<ThreadMessage>()
+          })
+          .json<ThreadMessage>()
+      )
     ) as Promise<ThreadMessage>
   }
 
@@ -120,7 +145,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
   async deleteMessage(threadId: string, messageId: string): Promise<void> {
     return this.queue
       .add(() =>
-        ky.delete(`${API_URL}/v1/threads/${threadId}/messages/${messageId}`)
+        this.apiInstance().then((api) =>
+          api.delete(`v1/threads/${threadId}/messages/${messageId}`)
+        )
       )
       .then()
   }
@@ -132,10 +159,12 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async listMessages(threadId: string): Promise<ThreadMessage[]> {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/threads/${threadId}/messages?order=asc&limit=-1`)
-        .json<MessageList>()
-        .then((e) => e.data)
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/threads/${threadId}/messages?order=asc&limit=-1`)
+          .json<MessageList>()
+          .then((e) => e.data)
+      )
     ) as Promise<ThreadMessage[]>
   }
 
@@ -147,9 +176,11 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async getThreadAssistant(threadId: string): Promise<ThreadAssistantInfo> {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/assistants/${threadId}?limit=-1`)
-        .json<ThreadAssistantInfo>()
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/assistants/${threadId}?limit=-1`)
+          .json<ThreadAssistantInfo>()
+      )
     ) as Promise<ThreadAssistantInfo>
   }
   /**
@@ -163,9 +194,11 @@ export default class CortexConversationalExtension extends ConversationalExtensi
     assistant: ThreadAssistantInfo
   ): Promise<ThreadAssistantInfo> {
     return this.queue.add(() =>
-      ky
-        .post(`${API_URL}/v1/assistants/${threadId}`, { json: assistant })
-        .json<ThreadAssistantInfo>()
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/assistants/${threadId}`, { json: assistant })
+          .json<ThreadAssistantInfo>()
+      )
     ) as Promise<ThreadAssistantInfo>
   }
 
@@ -180,9 +213,11 @@ export default class CortexConversationalExtension extends ConversationalExtensi
     assistant: ThreadAssistantInfo
   ): Promise<ThreadAssistantInfo> {
     return this.queue.add(() =>
-      ky
-        .patch(`${API_URL}/v1/assistants/${threadId}`, { json: assistant })
-        .json<ThreadAssistantInfo>()
+      this.apiInstance().then((api) =>
+        api
+          .patch(`v1/assistants/${threadId}`, { json: assistant })
+          .json<ThreadAssistantInfo>()
+      )
     ) as Promise<ThreadAssistantInfo>
   }
 
@@ -191,10 +226,12 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    * @returns
    */
   async healthz(): Promise<void> {
-    return ky
-      .get(`${API_URL}/healthz`, {
-        retry: { limit: 20, delay: () => 500, methods: ['get'] },
-      })
+    return this.apiInstance()
+      .then((api) =>
+        api.get('healthz', {
+          retry: { limit: 20, delay: () => 500, methods: ['get'] },
+        })
+      )
       .then(() => {})
   }
 }
diff --git a/extensions/engine-management-extension/src/index.ts b/extensions/engine-management-extension/src/index.ts
index 1a5b004f7..7d0c9f9c4 100644
--- a/extensions/engine-management-extension/src/index.ts
+++ b/extensions/engine-management-extension/src/index.ts
@@ -15,7 +15,7 @@ import {
   ModelEvent,
   EngineEvent,
 } from '@janhq/core'
-import ky, { HTTPError } from 'ky'
+import ky, { HTTPError, KyInstance } from 'ky'
 import PQueue from 'p-queue'
 import { EngineError } from './error'
 import { getJanDataFolderPath } from '@janhq/core'
@@ -31,6 +31,22 @@ interface ModelList {
 export default class JanEngineManagementExtension extends EngineManagementExtension {
   queue = new PQueue({ concurrency: 1 })
 
+  api?: KyInstance
+  /**
+   * Get the API instance
+   * @returns
+   */
+  async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
+    const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
+    this.api = ky.extend({
+      prefixUrl: API_URL,
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+      },
+    })
+    return this.api
+  }
   /**
    * Called when the extension is loaded.
    */
@@ -59,10 +75,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async getEngines(): Promise<Engines> {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/engines`)
-        .json<Engines>()
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .get('v1/engines')
+          .json<Engines>()
+          .then((e) => e)
+      )
     ) as Promise<Engines>
   }
 
@@ -70,12 +88,15 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    * @returns A Promise that resolves to an object of list engines.
    */
   async getRemoteModels(name: string): Promise<any> {
-    return ky
-      .get(`${API_URL}/v1/models/remote/${name}`)
-      .json<ModelList>()
-      .catch(() => ({
-        data: [],
-      })) as Promise<ModelList>
+    return this.apiInstance().then(
+      (api) =>
+        api
+          .get(`v1/models/remote/${name}`)
+          .json<ModelList>()
+          .catch(() => ({
+            data: [],
+          })) as Promise<ModelList>
+    )
   }
 
   /**
@@ -84,10 +105,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async getInstalledEngines(name: InferenceEngine): Promise<EngineVariant[]> {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/engines/${name}`)
-        .json<EngineVariant[]>()
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/engines/${name}`)
+          .json<EngineVariant[]>()
+          .then((e) => e)
+      )
     ) as Promise<EngineVariant[]>
   }
 
@@ -103,12 +126,14 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
     platform?: string
   ) {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/engines/${name}/releases/${version}`)
-        .json<EngineReleased[]>()
-        .then((e) =>
-          platform ? e.filter((r) => r.name.includes(platform)) : e
-        )
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/engines/${name}/releases/${version}`)
+          .json<EngineReleased[]>()
+          .then((e) =>
+            platform ? e.filter((r) => r.name.includes(platform)) : e
+          )
+      )
     ) as Promise<EngineReleased[]>
   }
 
@@ -119,12 +144,14 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async getLatestReleasedEngine(name: InferenceEngine, platform?: string) {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/engines/${name}/releases/latest`)
-        .json<EngineReleased[]>()
-        .then((e) =>
-          platform ? e.filter((r) => r.name.includes(platform)) : e
-        )
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/engines/${name}/releases/latest`)
+          .json<EngineReleased[]>()
+          .then((e) =>
+            platform ? e.filter((r) => r.name.includes(platform)) : e
+          )
+      )
     ) as Promise<EngineReleased[]>
   }
 
@@ -134,9 +161,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async installEngine(name: string, engineConfig: EngineConfig) {
     return this.queue.add(() =>
-      ky
-        .post(`${API_URL}/v1/engines/${name}/install`, { json: engineConfig })
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/engines/${name}/install`, { json: engineConfig })
+          .then((e) => e)
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -167,15 +196,17 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
       engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
 
     return this.queue.add(() =>
-      ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => {
-        if (persistModels && engineConfig.metadata?.get_models_url) {
-          // Pull /models from remote models endpoint
-          return this.populateRemoteModels(engineConfig)
-            .then(() => e)
-            .catch(() => e)
-        }
-        return e
-      })
+      this.apiInstance().then((api) =>
+        api.post('v1/engines', { json: engineConfig }).then((e) => {
+          if (persistModels && engineConfig.metadata?.get_models_url) {
+            // Pull /models from remote models endpoint
+            return this.populateRemoteModels(engineConfig)
+              .then(() => e)
+              .catch(() => e)
+          }
+          return e
+        })
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -185,9 +216,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async uninstallEngine(name: InferenceEngine, engineConfig: EngineConfig) {
     return this.queue.add(() =>
-      ky
-        .delete(`${API_URL}/v1/engines/${name}/install`, { json: engineConfig })
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .delete(`v1/engines/${name}/install`, { json: engineConfig })
+          .then((e) => e)
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -196,25 +229,27 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    * @param model - Remote model object.
    */
   async addRemoteModel(model: Model) {
-    return this.queue
-      .add(() =>
-        ky
-          .post(`${API_URL}/v1/models/add`, {
-            json: {
-              inference_params: {
-                max_tokens: 4096,
-                temperature: 0.7,
-                top_p: 0.95,
-                stream: true,
-                frequency_penalty: 0,
-                presence_penalty: 0,
+    return this.queue.add(() =>
+      this.apiInstance()
+        .then((api) =>
+          api
+            .post('v1/models/add', {
+              json: {
+                inference_params: {
+                  max_tokens: 4096,
+                  temperature: 0.7,
+                  top_p: 0.95,
+                  stream: true,
+                  frequency_penalty: 0,
+                  presence_penalty: 0,
+                },
+                ...model,
               },
-              ...model,
-            },
-          })
-          .then((e) => e)
-      )
-      .then(() => {})
+            })
+            .then((e) => e)
+        )
+        .then(() => {})
+    )
   }
 
   /**
@@ -223,10 +258,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async getDefaultEngineVariant(name: InferenceEngine) {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/engines/${name}/default`)
-        .json<{ messages: string }>()
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/engines/${name}/default`)
+          .json<{ messages: string }>()
+          .then((e) => e)
+      )
     ) as Promise<DefaultEngineVariant>
   }
 
@@ -240,9 +277,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
     engineConfig: EngineConfig
   ) {
     return this.queue.add(() =>
-      ky
-        .post(`${API_URL}/v1/engines/${name}/default`, { json: engineConfig })
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/engines/${name}/default`, { json: engineConfig })
+          .then((e) => e)
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -251,9 +290,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async updateEngine(name: InferenceEngine, engineConfig?: EngineConfig) {
     return this.queue.add(() =>
-      ky
-        .post(`${API_URL}/v1/engines/${name}/update`, { json: engineConfig })
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/engines/${name}/update`, { json: engineConfig })
+          .then((e) => e)
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -262,10 +303,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    * @returns
    */
   async healthz(): Promise<void> {
-    return ky
-      .get(`${API_URL}/healthz`, {
-        retry: { limit: 20, delay: () => 500, methods: ['get'] },
-      })
+    return this.apiInstance()
+      .then((api) =>
+        api.get('healthz', {
+          retry: { limit: 20, delay: () => 500, methods: ['get'] },
+        })
+      )
       .then(() => {
         this.queue.concurrency = Infinity
       })
@@ -390,7 +433,6 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
     const version = await this.getSetting<string>('version', '0.0.0')
     const engines = await this.getEngines()
     if (version < VERSION) {
-
       console.log('Migrating engine settings...')
       // Migrate engine settings
       await Promise.all(
@@ -398,7 +440,7 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
           const { id, ...data } = engine
 
           data.api_key = engines[id]?.api_key
-          return this.updateEngine(id,{
+          return this.updateEngine(id, {
             ...data,
           }).catch(console.error)
         })
diff --git a/extensions/hardware-management-extension/package.json b/extensions/hardware-management-extension/package.json
index ec98c7440..396404df9 100644
--- a/extensions/hardware-management-extension/package.json
+++ b/extensions/hardware-management-extension/package.json
@@ -29,12 +29,10 @@
   },
   "dependencies": {
     "@janhq/core": "../../core/package.tgz",
-    "cpu-instructions": "^0.0.13",
     "ky": "^1.7.2",
     "p-queue": "^8.0.1"
   },
   "bundledDependencies": [
-    "cpu-instructions",
     "@janhq/core"
   ],
   "hardwares": {
diff --git a/extensions/hardware-management-extension/src/index.ts b/extensions/hardware-management-extension/src/index.ts
index f64c2eea8..edd98a7ae 100644
--- a/extensions/hardware-management-extension/src/index.ts
+++ b/extensions/hardware-management-extension/src/index.ts
@@ -1,5 +1,5 @@
 import { HardwareManagementExtension, HardwareInformation } from '@janhq/core'
-import ky from 'ky'
+import ky, { KyInstance } from 'ky'
 import PQueue from 'p-queue'
 
 /**
@@ -17,6 +17,23 @@ export default class JSONHardwareManagementExtension extends HardwareManagementE
     this.queue.add(() => this.healthz())
   }
 
+  api?: KyInstance
+  /**
+   * Get the API instance
+   * @returns
+   */
+  async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
+    const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
+    this.api = ky.extend({
+      prefixUrl: API_URL,
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+      },
+    })
+    return this.api
+  }
+
   /**
    * Called when the extension is unloaded.
    */
@@ -27,11 +44,13 @@ export default class JSONHardwareManagementExtension extends HardwareManagementE
    * @returns
    */
   async healthz(): Promise<void> {
-    return ky
-      .get(`${API_URL}/healthz`, {
-        retry: { limit: 20, delay: () => 500, methods: ['get'] },
-      })
-      .then(() => {})
+    return this.apiInstance().then((api) =>
+      api
+        .get('healthz', {
+          retry: { limit: 20, delay: () => 500, methods: ['get'] },
+        })
+        .then(() => {})
+    )
   }
 
   /**
@@ -39,10 +58,12 @@ export default class JSONHardwareManagementExtension extends HardwareManagementE
    */
   async getHardware(): Promise<HardwareInformation> {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/hardware`)
-        .json<HardwareInformation>()
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .get('v1/hardware')
+          .json<HardwareInformation>()
+          .then((e) => e)
+      )
     ) as Promise<HardwareInformation>
   }
 
@@ -54,7 +75,9 @@ export default class JSONHardwareManagementExtension extends HardwareManagementE
     activated_gpus: number[]
   }> {
     return this.queue.add(() =>
-      ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e)
+      this.apiInstance().then((api) =>
+        api.post('v1/hardware/activate', { json: data }).then((e) => e)
+      )
     ) as Promise<{
       message: string
       activated_gpus: number[]
diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index 90ca20c81..492b167a6 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.11-rc9
+1.0.12
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/rolldown.config.mjs b/extensions/inference-cortex-extension/rolldown.config.mjs
index 3bad30b58..ef4c56c7b 100644
--- a/extensions/inference-cortex-extension/rolldown.config.mjs
+++ b/extensions/inference-cortex-extension/rolldown.config.mjs
@@ -24,7 +24,7 @@ export default defineConfig([
   },
   {
     input: 'src/node/index.ts',
-    external: ['@janhq/core/node', 'cpu-instructions'],
+    external: ['@janhq/core/node'],
     output: {
       format: 'cjs',
       file: 'dist/node/index.cjs.js',
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 5e1adf322..7ed51f9c2 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -17,7 +17,7 @@ import {
   ModelEvent,
 } from '@janhq/core'
 import PQueue from 'p-queue'
-import ky from 'ky'
+import ky, { KyInstance } from 'ky'
 
 /**
  * Event subscription types of Downloader
@@ -75,8 +75,35 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
   abortControllers = new Map<string, AbortController>()
 
+  api?: KyInstance
   /**
-   * Subscribes to events emitted by the @janhq/core package.
+   * Get the API instance
+   * @returns
+   */
+  async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
+    const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
+    this.api = ky.extend({
+      prefixUrl: CORTEX_API_URL,
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+      },
+    })
+    return this.api
+  }
+
+  /**
+   * Authorization headers for the API requests.
+   * @returns
+   */
+  headers(): Promise<HeadersInit> {
+    return window.core?.api.appToken().then((token: string) => ({
+      Authorization: `Bearer ${token}`,
+    }))
+  }
+
+  /**
+   * Called when the extension is loaded.
    */
   async onLoad() {
     super.onLoad()
@@ -153,45 +180,49 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     this.abortControllers.set(model.id, controller)
 
     return await this.queue.add(() =>
-      ky
-        .post(`${CORTEX_API_URL}/v1/models/start`, {
-          json: {
-            ...extractModelLoadParams(model.settings),
-            model: model.id,
-            engine:
-              model.engine === InferenceEngine.nitro // Legacy model cache
-                ? InferenceEngine.cortex_llamacpp
-                : model.engine,
-            cont_batching: this.cont_batching,
-            n_parallel: this.n_parallel,
-            caching_enabled: this.caching_enabled,
-            flash_attn: this.flash_attn,
-            cache_type: this.cache_type,
-            use_mmap: this.use_mmap,
-            ...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
-          },
-          timeout: false,
-          signal,
-        })
-        .json()
-        .catch(async (e) => {
-          throw (await e.response?.json()) ?? e
-        })
-        .finally(() => this.abortControllers.delete(model.id))
-        .then()
+      this.apiInstance().then((api) =>
+        api
+          .post('v1/models/start', {
+            json: {
+              ...extractModelLoadParams(model.settings),
+              model: model.id,
+              engine:
+                model.engine === InferenceEngine.nitro // Legacy model cache
+                  ? InferenceEngine.cortex_llamacpp
+                  : model.engine,
+              cont_batching: this.cont_batching,
+              n_parallel: this.n_parallel,
+              caching_enabled: this.caching_enabled,
+              flash_attn: this.flash_attn,
+              cache_type: this.cache_type,
+              use_mmap: this.use_mmap,
+              ...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
+            },
+            timeout: false,
+            signal,
+          })
+          .json()
+          .catch(async (e) => {
+            throw (await e.response?.json()) ?? e
+          })
+          .finally(() => this.abortControllers.delete(model.id))
+          .then()
+      )
     )
   }
 
   override async unloadModel(model: Model): Promise<void> {
-    return ky
-      .post(`${CORTEX_API_URL}/v1/models/stop`, {
-        json: { model: model.id },
-      })
-      .json()
-      .finally(() => {
-        this.abortControllers.get(model.id)?.abort()
-      })
-      .then()
+    return this.apiInstance().then((api) =>
+      api
+        .post('v1/models/stop', {
+          json: { model: model.id },
+        })
+        .json()
+        .finally(() => {
+          this.abortControllers.get(model.id)?.abort()
+        })
+        .then()
+    )
   }
 
   /**
@@ -199,15 +230,17 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * @returns
    */
   private async healthz(): Promise<void> {
-    return ky
-      .get(`${CORTEX_API_URL}/healthz`, {
-        retry: {
-          limit: 20,
-          delay: () => 500,
-          methods: ['get'],
-        },
-      })
-      .then(() => {})
+    return this.apiInstance().then((api) =>
+      api
+        .get('healthz', {
+          retry: {
+            limit: 20,
+            delay: () => 500,
+            methods: ['get'],
+          },
+        })
+        .then(() => {})
+    )
   }
 
   /**
@@ -215,13 +248,15 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * @returns
    */
   private async clean(): Promise<any> {
-    return ky
-      .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
-        timeout: 2000, // maximum 2 seconds
-        retry: {
-          limit: 0,
-        },
-      })
+    return this.apiInstance()
+      .then((api) =>
+        api.delete('processmanager/destroy', {
+          timeout: 2000, // maximum 2 seconds
+          retry: {
+            limit: 0,
+          },
+        })
+      )
       .catch(() => {
         // Do nothing
       })
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index 87a1c7096..d82225745 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -44,8 +44,9 @@ function run(): Promise<any> {
         `${path.join(dataFolderPath, '.janrc')}`,
         '--data_folder_path',
         dataFolderPath,
-        '--loglevel',
-        'INFO',
+        'config',
+        '--api_keys',
+        process.env.appToken ?? 'cortex.cpp',
       ],
       {
         env: {
diff --git a/extensions/model-extension/rolldown.config.mjs b/extensions/model-extension/rolldown.config.mjs
index e6a0e8add..54ea654ff 100644
--- a/extensions/model-extension/rolldown.config.mjs
+++ b/extensions/model-extension/rolldown.config.mjs
@@ -11,7 +11,7 @@ export default defineConfig({
   platform: 'browser',
   define: {
     SETTINGS: JSON.stringify(settingJson),
-    API_URL: JSON.stringify(`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? "39291"}`),
+    CORTEX_API_URL: JSON.stringify(`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? "39291"}`),
     DEFAULT_MODEL_SOURCES: JSON.stringify(modelSources),
   },
 })
diff --git a/extensions/model-extension/src/@types/global.d.ts b/extensions/model-extension/src/@types/global.d.ts
index 27b23fa30..e4d269cdb 100644
--- a/extensions/model-extension/src/@types/global.d.ts
+++ b/extensions/model-extension/src/@types/global.d.ts
@@ -1,5 +1,5 @@
 declare const NODE: string
-declare const API_URL: string
+declare const CORTEX_API_URL: string
 declare const SETTINGS: SettingComponentProps[]
 declare const DEFAULT_MODEL_SOURCES: any
 
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 4add0b6fa..4362ab9a5 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -13,7 +13,7 @@ import {
 import { scanModelsFolder } from './legacy/model-json'
 import { deleteModelFiles } from './legacy/delete'
 import PQueue from 'p-queue'
-import ky from 'ky'
+import ky, { KyInstance } from 'ky'
 
 /**
  * cortex.cpp setting keys
@@ -32,9 +32,25 @@ type Data<T> = {
  */
 export default class JanModelExtension extends ModelExtension {
   queue = new PQueue({ concurrency: 1 })
+
+  api?: KyInstance
+  /**
+   * Get the API instance
+   * @returns
+   */
+  async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
+    const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
+    this.api = ky.extend({
+      prefixUrl: CORTEX_API_URL,
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+      },
+    })
+    return this.api
+  }
   /**
    * Called when the extension is loaded.
-   * @override
    */
   async onLoad() {
     this.queue.add(() => this.healthz())
@@ -82,13 +98,15 @@ export default class JanModelExtension extends ModelExtension {
      * Sending POST to /models/pull/{id} endpoint to pull the model
      */
     return this.queue.add(() =>
-      ky
-        .post(`${API_URL}/v1/models/pull`, { json: { model, id, name } })
-        .json()
-        .catch(async (e) => {
-          throw (await e.response?.json()) ?? e
-        })
-        .then()
+      this.apiInstance().then((api) =>
+        api
+          .post('v1/models/pull', { json: { model, id, name }, timeout: false })
+          .json()
+          .catch(async (e) => {
+            throw (await e.response?.json()) ?? e
+          })
+          .then()
+      )
     )
   }
 
@@ -103,10 +121,12 @@ export default class JanModelExtension extends ModelExtension {
      * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull
      */
     return this.queue.add(() =>
-      ky
-        .delete(`${API_URL}/v1/models/pull`, { json: { taskId: model } })
-        .json()
-        .then()
+      this.apiInstance().then((api) =>
+        api
+          .delete('v1/models/pull', { json: { taskId: model } })
+          .json()
+          .then()
+      )
     )
   }
 
@@ -117,7 +137,11 @@ export default class JanModelExtension extends ModelExtension {
    */
   async deleteModel(model: string): Promise<void> {
     return this.queue
-      .add(() => ky.delete(`${API_URL}/v1/models/${model}`).json().then())
+      .add(() =>
+        this.apiInstance().then((api) =>
+          api.delete(`v1/models/${model}`).json().then()
+        )
+      )
       .catch((e) => console.debug(e))
       .finally(async () => {
         // Delete legacy model files
@@ -219,10 +243,15 @@ export default class JanModelExtension extends ModelExtension {
   async updateModel(model: Partial<Model>): Promise<Model> {
     return this.queue
       .add(() =>
-        ky
-          .patch(`${API_URL}/v1/models/${model.id}`, { json: { ...model } })
-          .json()
-          .then()
+        this.apiInstance().then((api) =>
+          api
+            .patch(`v1/models/${model.id}`, {
+              json: { ...model },
+              timeout: false,
+            })
+            .json()
+            .then()
+        )
       )
       .then(() => this.getModel(model.id))
   }
@@ -233,10 +262,12 @@ export default class JanModelExtension extends ModelExtension {
    */
   async getModel(model: string): Promise<Model> {
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/models/${model}`)
-        .json()
-        .then((e) => this.transformModel(e))
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/models/${model}`)
+          .json()
+          .then((e) => this.transformModel(e))
+      )
     ) as Promise<Model>
   }
 
@@ -252,13 +283,16 @@ export default class JanModelExtension extends ModelExtension {
     option?: OptionType
   ): Promise<void> {
     return this.queue.add(() =>
-      ky
-        .post(`${API_URL}/v1/models/import`, {
-          json: { model, modelPath, name, option },
-        })
-        .json()
-        .catch((e) => console.debug(e)) // Ignore error
-        .then()
+      this.apiInstance().then((api) =>
+        api
+          .post('v1/models/import', {
+            json: { model, modelPath, name, option },
+            timeout: false,
+          })
+          .json()
+          .catch((e) => console.debug(e)) // Ignore error
+          .then()
+      )
     )
   }
 
@@ -269,7 +303,11 @@ export default class JanModelExtension extends ModelExtension {
    */
   async getSources(): Promise<ModelSource[]> {
     const sources = await this.queue
-      .add(() => ky.get(`${API_URL}/v1/models/sources`).json<Data<ModelSource>>())
+      .add(() =>
+        this.apiInstance().then((api) =>
+          api.get('v1/models/sources').json<Data<ModelSource>>()
+        )
+      )
       .then((e) => (typeof e === 'object' ? (e.data as ModelSource[]) : []))
       .catch(() => [])
     return sources.concat(
@@ -283,11 +321,13 @@ export default class JanModelExtension extends ModelExtension {
    */
   async addSource(source: string): Promise<any> {
     return this.queue.add(() =>
-      ky.post(`${API_URL}/v1/models/sources`, {
-        json: {
-          source,
-        },
-      })
+      this.apiInstance().then((api) =>
+        api.post('v1/models/sources', {
+          json: {
+            source,
+          },
+        })
+      )
     )
   }
 
@@ -297,11 +337,14 @@ export default class JanModelExtension extends ModelExtension {
    */
   async deleteSource(source: string): Promise<any> {
     return this.queue.add(() =>
-      ky.delete(`${API_URL}/v1/models/sources`, {
-        json: {
-          source,
-        },
-      })
+      this.apiInstance().then((api) =>
+        api.delete('v1/models/sources', {
+          json: {
+            source,
+          },
+          timeout: false,
+        })
+      )
     )
   }
   // END - Model Sources
@@ -312,7 +355,9 @@ export default class JanModelExtension extends ModelExtension {
    */
   async isModelLoaded(model: string): Promise<boolean> {
     return this.queue
-      .add(() => ky.get(`${API_URL}/v1/models/status/${model}`))
+      .add(() =>
+        this.apiInstance().then((api) => api.get(`v1/models/status/${model}`))
+      )
       .then((e) => true)
       .catch(() => false)
   }
@@ -324,14 +369,18 @@ export default class JanModelExtension extends ModelExtension {
     return this.updateCortexConfig(options).catch((e) => console.debug(e))
   }
 
-   /**
+  /**
    * Fetches models list from cortex.cpp
    * @param model
    * @returns
    */
-   async fetchModels(): Promise<Model[]> {
+  async fetchModels(): Promise<Model[]> {
     return this.queue
-      .add(() => ky.get(`${API_URL}/v1/models?limit=-1`).json<Data<Model>>())
+      .add(() =>
+        this.apiInstance().then((api) =>
+          api.get('v1/models?limit=-1').json<Data<Model>>()
+        )
+      )
       .then((e) =>
         typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
       )
@@ -371,7 +420,9 @@ export default class JanModelExtension extends ModelExtension {
   }): Promise<void> {
     return this.queue
       .add(() =>
-        ky.patch(`${API_URL}/v1/configs`, { json: body }).then(() => {})
+        this.apiInstance().then((api) =>
+          api.patch('v1/configs', { json: body }).then(() => {})
+        )
       )
       .catch((e) => console.debug(e))
   }
@@ -381,14 +432,16 @@ export default class JanModelExtension extends ModelExtension {
    * @returns
    */
   private healthz(): Promise<void> {
-    return ky
-      .get(`${API_URL}/healthz`, {
-        retry: {
-          limit: 20,
-          delay: () => 500,
-          methods: ['get'],
-        },
-      })
+    return this.apiInstance()
+      .then((api) =>
+        api.get('healthz', {
+          retry: {
+            limit: 20,
+            delay: () => 500,
+            methods: ['get'],
+          },
+        })
+      )
       .then(() => {
         this.queue.concurrency = Infinity
       })
@@ -401,17 +454,22 @@ export default class JanModelExtension extends ModelExtension {
     const models = await this.fetchModels()
 
     return this.queue.add(() =>
-      ky
-        .get(`${API_URL}/v1/models/hub?author=cortexso&tag=cortex.cpp`)
-        .json<Data<string>>()
-        .then((e) => {
-          e.data?.forEach((model) => {
-            if (
-              !models.some((e) => 'modelSource' in e && e.modelSource === model)
-            )
-              this.addSource(model).catch((e) => console.debug(e))
-          })
-        })
+      this.apiInstance()
+        .then((api) =>
+          api
+            .get('v1/models/hub?author=cortexso&tag=cortex.cpp')
+            .json<Data<string>>()
+            .then((e) => {
+              e.data?.forEach((model) => {
+                if (
+                  !models.some(
+                    (e) => 'modelSource' in e && e.modelSource === model
+                  )
+                )
+                  this.addSource(model).catch((e) => console.debug(e))
+              })
+            })
+        )
         .catch((e) => console.debug(e))
     )
   }
diff --git a/server/cortex.json b/server/cortex.json
index ecf94952b..56daf32cc 100644
--- a/server/cortex.json
+++ b/server/cortex.json
@@ -3633,238 +3633,6 @@
         },
         "tags": ["Files"]
       }
-    },
-    "/configs": {
-      "get": {
-        "summary": "Get Configurations",
-        "description": "Retrieves the current configuration settings of the Cortex server.",
-        "responses": {
-          "200": {
-            "description": "Successful response",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "object",
-                  "properties": {
-                    "allowed_origins": {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      },
-                      "example": ["http://127.0.0.1:39281", "https://cortex.so"]
-                    },
-                    "cors": {
-                      "type": "boolean",
-                      "example": false
-                    },
-                    "proxy_username": {
-                      "type": "string",
-                      "example": "username"
-                    },
-                    "proxy_password": {
-                      "type": "string",
-                      "example": "password"
-                    },
-                    "proxy_url": {
-                      "type": "string",
-                      "example": "http://proxy.example.com:8080"
-                    },
-                    "verify_proxy_ssl": {
-                      "type": "boolean",
-                      "description": "test",
-                      "example": false
-                    },
-                    "verify_proxy_host_ssl": {
-                      "type": "boolean",
-                      "example": false
-                    },
-                    "verify_peer_ssl": {
-                      "type": "boolean",
-                      "example": false
-                    },
-                    "verify_host_ssl": {
-                      "type": "boolean",
-                      "example": false
-                    },
-                    "no_proxy": {
-                      "type": "string",
-                      "example": "localhost"
-                    },
-                    "huggingface_token": {
-                      "type": "string",
-                      "example": "your_token"
-                    }
-                  }
-                },
-                "example": {
-                  "allowed_origins": [
-                    "http://127.0.0.1:39281",
-                    "https://cortex.so"
-                  ],
-                  "cors": false,
-                  "proxy_username": "username",
-                  "proxy_password": "password",
-                  "proxy_url": "http://proxy.example.com:8080",
-                  "verify_proxy_ssl": false,
-                  "verify_proxy_host_ssl": false,
-                  "verify_peer_ssl": false,
-                  "verify_host_ssl": false,
-                  "no_proxy": "localhost",
-                  "huggingface_token": "your_token"
-                }
-              }
-            }
-          }
-        },
-        "tags": ["Configurations"]
-      },
-      "patch": {
-        "tags": ["Configurations"],
-        "summary": "Update configuration settings",
-        "requestBody": {
-          "required": true,
-          "content": {
-            "application/json": {
-              "schema": {
-                "type": "object",
-                "properties": {
-                  "cors": {
-                    "type": "boolean",
-                    "description": "Indicates whether CORS is enabled.",
-                    "example": false
-                  },
-                  "allowed_origins": {
-                    "type": "array",
-                    "items": {
-                      "type": "string"
-                    },
-                    "description": "List of allowed origins.",
-                    "example": ["http://127.0.0.1:39281", "https://cortex.so"]
-                  },
-                  "proxy_username": {
-                    "type": "string",
-                    "description": "Username for the proxy server.",
-                    "example": "username"
-                  },
-                  "proxy_password": {
-                    "type": "string",
-                    "description": "Password for the proxy server.",
-                    "example": "password"
-                  },
-                  "proxy_url": {
-                    "type": "string",
-                    "description": "URL for the proxy server.",
-                    "example": "http://proxy.example.com:8080"
-                  },
-                  "verify_proxy_ssl": {
-                    "type": "boolean",
-                    "description": "Indicates whether to verify the SSL certificate of the proxy server.",
-                    "example": false
-                  },
-                  "verify_proxy_host_ssl": {
-                    "type": "boolean",
-                    "description": "Indicates whether to verify the SSL certificate of the proxy server host.",
-                    "example": false
-                  },
-                  "verify_peer_ssl": {
-                    "type": "boolean",
-                    "description": "Indicates whether to verify the SSL certificate of the peer.",
-                    "example": false
-                  },
-                  "verify_host_ssl": {
-                    "type": "boolean",
-                    "description": "Indicates whether to verify the SSL certificate of the host.",
-                    "example": false
-                  },
-                  "no_proxy": {
-                    "type": "string",
-                    "description": "List of hosts that should not be proxied.",
-                    "example": "localhost"
-                  },
-                  "huggingface_token": {
-                    "type": "string",
-                    "description": "HuggingFace token to pull models.",
-                    "example": "your_token"
-                  }
-                }
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "Configuration updated successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "object",
-                  "properties": {
-                    "config": {
-                      "type": "object",
-                      "properties": {
-                        "allowed_origins": {
-                          "type": "array",
-                          "items": {
-                            "type": "string"
-                          },
-                          "example": [
-                            "http://127.0.0.1:39281",
-                            "https://cortex.so"
-                          ]
-                        },
-                        "cors": {
-                          "type": "boolean",
-                          "example": false
-                        },
-                        "proxy_username": {
-                          "type": "string",
-                          "example": "username"
-                        },
-                        "proxy_password": {
-                          "type": "string",
-                          "example": "password"
-                        },
-                        "proxy_url": {
-                          "type": "string",
-                          "example": "http://proxy.example.com:8080"
-                        },
-                        "verify_proxy_ssl": {
-                          "type": "boolean",
-                          "example": false
-                        },
-                        "verify_proxy_host_ssl": {
-                          "type": "boolean",
-                          "example": false
-                        },
-                        "verify_peer_ssl": {
-                          "type": "boolean",
-                          "example": false
-                        },
-                        "verify_host_ssl": {
-                          "type": "boolean",
-                          "example": false
-                        },
-                        "no_proxy": {
-                          "type": "string",
-                          "example": "localhost"
-                        },
-                        "huggingface_token": {
-                          "type": "string",
-                          "example": "your_token"
-                        }
-                      }
-                    },
-                    "message": {
-                      "type": "string",
-                      "example": "Configuration updated successfully"
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
     }
   },
   "info": {
diff --git a/server/index.ts b/server/index.ts
index cee3e4df2..4ea927199 100644
--- a/server/index.ts
+++ b/server/index.ts
@@ -86,6 +86,14 @@ export const startServer = async (configs?: ServerConfig): Promise<boolean> => {
       },
     })
 
+    const rewriteRequestHeaders = (req: any, headers: any) => {
+      if (req.url.includes('/configs')) return headers
+      return {
+        ...headers,
+        authorization: `Bearer ${process.env.appToken}`, // Add or modify Authorization header
+      }
+    }
+
     // Register Swagger UI
     await server.register(require('@fastify/swagger-ui'), {
       routePrefix: '/',
@@ -102,24 +110,36 @@ export const startServer = async (configs?: ServerConfig): Promise<boolean> => {
       upstream: `${CORTEX_API_URL}/v1`,
       prefix: configs?.prefix ?? '/v1',
       http2: false,
-    })
-
-    server.register(proxy, {
-      upstream: `${CORTEX_API_URL}/system`,
-      prefix:'/system',
-      http2: false,
+      replyOptions: {
+        rewriteRequestHeaders,
+      },
     })
 
     server.register(proxy, {
       upstream: `${CORTEX_API_URL}/processManager`,
-      prefix:'/processManager',
+      prefix: '/processManager',
       http2: false,
+      replyOptions: {
+        rewriteRequestHeaders,
+      },
+    })
+
+    server.register(proxy, {
+      upstream: `${CORTEX_API_URL}/system`,
+      prefix: '/system',
+      http2: false,
+      replyOptions: {
+        rewriteRequestHeaders,
+      },
     })
 
     server.register(proxy, {
       upstream: `${CORTEX_API_URL}/healthz`,
-      prefix:'/healthz',
+      prefix: '/healthz',
       http2: false,
+      replyOptions: {
+        rewriteRequestHeaders,
+      },
     })
 
     // Start listening for requests
diff --git a/web/containers/Providers/DataLoader.tsx b/web/containers/Providers/DataLoader.tsx
index 832e47d1a..1174150f4 100644
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@@ -15,6 +15,7 @@ import { useDebouncedCallback } from 'use-debounce'
 import useAssistants from '@/hooks/useAssistants'
 import { useGetEngines } from '@/hooks/useEngineManagement'
 import useGetSystemResources from '@/hooks/useGetSystemResources'
+import { useGetHardwareInfo } from '@/hooks/useHardwareManagement'
 import useModels from '@/hooks/useModels'
 import useThreads from '@/hooks/useThreads'
 
@@ -34,6 +35,7 @@ const DataLoader: React.FC = () => {
   const setJanSettingScreen = useSetAtom(janSettingScreenAtom)
   const { getData: loadModels } = useModels()
   const { mutate } = useGetEngines()
+  const { mutate: getHardwareInfo } = useGetHardwareInfo(false)
 
   useThreads()
   useAssistants()
@@ -42,6 +44,7 @@ const DataLoader: React.FC = () => {
   useEffect(() => {
     // Load data once
     loadModels()
+    getHardwareInfo()
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [])
   const reloadData = useDebouncedCallback(() => {
diff --git a/web/hooks/useHardwareManagement.ts b/web/hooks/useHardwareManagement.ts
index d39b3c1fc..90dbdb2b5 100644
--- a/web/hooks/useHardwareManagement.ts
+++ b/web/hooks/useHardwareManagement.ts
@@ -32,7 +32,7 @@ const getExtension = () =>
 /**
  * @returns A Promise that resolves to an object of list engines.
  */
-export function useGetHardwareInfo() {
+export function useGetHardwareInfo(updatePeriodically: boolean = true) {
   const setCpuUsage = useSetAtom(cpuUsageAtom)
   const setUsedRam = useSetAtom(usedRamAtom)
   const setTotalRam = useSetAtom(totalRamAtom)
@@ -56,7 +56,7 @@ export function useGetHardwareInfo() {
     {
       revalidateOnFocus: false,
       revalidateOnReconnect: false,
-      refreshInterval: 2000,
+      refreshInterval: updatePeriodically ? 2000 : undefined,
     }
   )
 
diff --git a/web/screens/Hub/index.tsx b/web/screens/Hub/index.tsx
index d98ba7bbe..ff6bf3a77 100644
--- a/web/screens/Hub/index.tsx
+++ b/web/screens/Hub/index.tsx
@@ -80,7 +80,7 @@ const filterOptions = [
   },
 ]
 
-const hubCompatibleAtom = atom(true)
+const hubCompatibleAtom = atom(false)
 
 const HubScreen = () => {
   const { sources } = useGetModelSources()
diff --git a/web/screens/Settings/Hardware/index.tsx b/web/screens/Settings/Hardware/index.tsx
index e0c25971b..41ef50073 100644
--- a/web/screens/Settings/Hardware/index.tsx
+++ b/web/screens/Settings/Hardware/index.tsx
@@ -6,13 +6,14 @@ import { useState } from 'react'
 import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd'
 
 import { Progress, ScrollArea, Switch } from '@janhq/joi'
-import { useAtom, useAtomValue } from 'jotai'
+import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 import { atomWithStorage } from 'jotai/utils'
 
 import { ChevronDownIcon, GripVerticalIcon } from 'lucide-react'
 
 import { twMerge } from 'tailwind-merge'
 
+import { activeModelAtom } from '@/hooks/useActiveModel'
 import {
   useGetHardwareInfo,
   setActiveGpus,
@@ -47,6 +48,7 @@ const Hardware = () => {
   const ramUtilitized = useAtomValue(ramUtilitizedAtom)
   const showScrollBar = useAtomValue(showScrollBarAtom)
   const [gpus, setGpus] = useAtom(gpusAtom)
+  const setActiveModel = useSetAtom(activeModelAtom)
 
   const [orderGpus, setOrderGpus] = useAtom(orderGpusAtom)
 
@@ -70,11 +72,15 @@ const Hardware = () => {
         .filter((gpu: any) => gpu.activated)
         .map((gpu: any) => Number(gpu.id))
       await setActiveGpus({ gpus: activeGpuIds })
+      setActiveModel(undefined)
       mutate()
-      window.location.reload()
     } catch (error) {
       console.error('Failed to update active GPUs:', error)
     }
+    setIsActivatingGpu((prev) => {
+      prev.delete(id)
+      return new Set(prev)
+    })
   }
 
   const handleDragEnd = (result: any) => {