diff --git a/extensions/conversational-extension/src/index.ts b/extensions/conversational-extension/src/index.ts
index 5a74c460a..791385fc9 100644
--- a/extensions/conversational-extension/src/index.ts
+++ b/extensions/conversational-extension/src/index.ts
@@ -22,22 +22,26 @@ type MessageList = {
 export default class CortexConversationalExtension extends ConversationalExtension {
   queue = new PQueue({ concurrency: 1 })
 
+  api?: KyInstance
   /**
-   * Extended API instance for making requests to the Cortex API.
+   * Get the API instance
    * @returns
    */
-  api: KyInstance
+  async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
+    const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
+    this.api = ky.extend({
+      prefixUrl: API_URL,
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+      },
+    })
+    return this.api
+  }
   /**
    * Called when the extension is loaded.
    */
   async onLoad() {
-    const apiKey = await window.core?.api.appToken() ?? 'cortex.cpp'
-    this.api = ky.extend({
-        prefixUrl: API_URL,
-        headers: {
-          Authorization: `Bearer ${apiKey}`,
-        },
-      })
     this.queue.add(() => this.healthz())
   }
 
@@ -51,10 +55,12 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async listThreads(): Promise<Thread[]> {
     return this.queue.add(() =>
-      this.api
-        .get('v1/threads?limit=-1')
-        .json<ThreadList>()
-        .then((e) => e.data)
+      this.apiInstance().then((api) =>
+        api
+          .get('v1/threads?limit=-1')
+          .json<ThreadList>()
+          .then((e) => e.data)
+      )
     ) as Promise<Thread[]>
   }
 
@@ -64,7 +70,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async createThread(thread: Thread): Promise<Thread> {
     return this.queue.add(() =>
-      this.api.post('v1/threads', { json: thread }).json<Thread>()
+      this.apiInstance().then((api) =>
+        api.post('v1/threads', { json: thread }).json<Thread>()
+      )
     ) as Promise<Thread>
   }
 
@@ -75,7 +83,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
   async modifyThread(thread: Thread): Promise<void> {
     return this.queue
       .add(() =>
-        this.api.patch(`v1/threads/${thread.id}`, { json: thread })
+        this.apiInstance().then((api) =>
+          api.patch(`v1/threads/${thread.id}`, { json: thread })
+        )
       )
       .then()
   }
@@ -86,7 +96,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async deleteThread(threadId: string): Promise<void> {
     return this.queue
-      .add(() => this.api.delete(`v1/threads/${threadId}`))
+      .add(() =>
+        this.apiInstance().then((api) => api.delete(`v1/threads/${threadId}`))
+      )
       .then()
   }
 
@@ -97,11 +109,13 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async createMessage(message: ThreadMessage): Promise<ThreadMessage> {
     return this.queue.add(() =>
-      this.api
-        .post(`v1/threads/${message.thread_id}/messages`, {
-          json: message,
-        })
-        .json<ThreadMessage>()
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/threads/${message.thread_id}/messages`, {
+            json: message,
+          })
+          .json<ThreadMessage>()
+      )
     ) as Promise<ThreadMessage>
   }
 
@@ -112,14 +126,13 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async modifyMessage(message: ThreadMessage): Promise<ThreadMessage> {
     return this.queue.add(() =>
-      this.api
-        .patch(
-          `v1/threads/${message.thread_id}/messages/${message.id}`,
-          {
+      this.apiInstance().then((api) =>
+        api
+          .patch(`v1/threads/${message.thread_id}/messages/${message.id}`, {
             json: message,
-          }
-        )
-        .json<ThreadMessage>()
+          })
+          .json<ThreadMessage>()
+      )
     ) as Promise<ThreadMessage>
   }
 
@@ -132,7 +145,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
   async deleteMessage(threadId: string, messageId: string): Promise<void> {
     return this.queue
       .add(() =>
-        this.api.delete(`v1/threads/${threadId}/messages/${messageId}`)
+        this.apiInstance().then((api) =>
+          api.delete(`v1/threads/${threadId}/messages/${messageId}`)
+        )
       )
       .then()
   }
@@ -144,10 +159,12 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async listMessages(threadId: string): Promise<ThreadMessage[]> {
     return this.queue.add(() =>
-      this.api
-        .get(`v1/threads/${threadId}/messages?order=asc&limit=-1`)
-        .json<MessageList>()
-        .then((e) => e.data)
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/threads/${threadId}/messages?order=asc&limit=-1`)
+          .json<MessageList>()
+          .then((e) => e.data)
+      )
     ) as Promise<ThreadMessage[]>
   }
 
@@ -159,9 +176,11 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    */
   async getThreadAssistant(threadId: string): Promise<ThreadAssistantInfo> {
     return this.queue.add(() =>
-      this.api
-        .get(`v1/assistants/${threadId}?limit=-1`)
-        .json<ThreadAssistantInfo>()
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/assistants/${threadId}?limit=-1`)
+          .json<ThreadAssistantInfo>()
+      )
     ) as Promise<ThreadAssistantInfo>
   }
   /**
@@ -175,9 +194,11 @@ export default class CortexConversationalExtension extends ConversationalExtensi
     assistant: ThreadAssistantInfo
   ): Promise<ThreadAssistantInfo> {
     return this.queue.add(() =>
-      this.api
-        .post(`v1/assistants/${threadId}`, { json: assistant })
-        .json<ThreadAssistantInfo>()
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/assistants/${threadId}`, { json: assistant })
+          .json<ThreadAssistantInfo>()
+      )
     ) as Promise<ThreadAssistantInfo>
   }
 
@@ -192,9 +213,11 @@ export default class CortexConversationalExtension extends ConversationalExtensi
     assistant: ThreadAssistantInfo
   ): Promise<ThreadAssistantInfo> {
     return this.queue.add(() =>
-      this.api
-        .patch(`v1/assistants/${threadId}`, { json: assistant })
-        .json<ThreadAssistantInfo>()
+      this.apiInstance().then((api) =>
+        api
+          .patch(`v1/assistants/${threadId}`, { json: assistant })
+          .json<ThreadAssistantInfo>()
+      )
     ) as Promise<ThreadAssistantInfo>
   }
 
@@ -203,10 +226,12 @@ export default class CortexConversationalExtension extends ConversationalExtensi
    * @returns
    */
   async healthz(): Promise<void> {
-    return this.api
-      .get('healthz', {
-        retry: { limit: 20, delay: () => 500, methods: ['get'] },
-      })
+    return this.apiInstance()
+      .then((api) =>
+        api.get('healthz', {
+          retry: { limit: 20, delay: () => 500, methods: ['get'] },
+        })
+      )
       .then(() => {})
   }
 }
diff --git a/extensions/engine-management-extension/src/index.ts b/extensions/engine-management-extension/src/index.ts
index c6137b4b5..7d0c9f9c4 100644
--- a/extensions/engine-management-extension/src/index.ts
+++ b/extensions/engine-management-extension/src/index.ts
@@ -31,15 +31,13 @@ interface ModelList {
 export default class JanEngineManagementExtension extends EngineManagementExtension {
   queue = new PQueue({ concurrency: 1 })
 
+  api?: KyInstance
   /**
-   * Extended API instance for making requests to the Cortex API.
+   * Get the API instance
    * @returns
    */
-  api: KyInstance
-  /**
-   * Called when the extension is loaded.
-   */
-  async onLoad() {
+  async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
     const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
     this.api = ky.extend({
       prefixUrl: API_URL,
@@ -47,6 +45,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
         Authorization: `Bearer ${apiKey}`,
       },
     })
+    return this.api
+  }
+  /**
+   * Called when the extension is loaded.
+   */
+  async onLoad() {
     // Symlink Engines Directory
     await executeOnMain(NODE, 'symlinkEngines')
     // Run Healthcheck
@@ -71,10 +75,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async getEngines(): Promise<Engines> {
     return this.queue.add(() =>
-      this.api
-        .get('v1/engines')
-        .json<Engines>()
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .get('v1/engines')
+          .json<Engines>()
+          .then((e) => e)
+      )
     ) as Promise<Engines>
   }
 
@@ -82,12 +88,15 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    * @returns A Promise that resolves to an object of list engines.
    */
   async getRemoteModels(name: string): Promise<any> {
-    return this.api
-      .get(`v1/models/remote/${name}`)
-      .json<ModelList>()
-      .catch(() => ({
-        data: [],
-      })) as Promise<ModelList>
+    return this.apiInstance().then(
+      (api) =>
+        api
+          .get(`v1/models/remote/${name}`)
+          .json<ModelList>()
+          .catch(() => ({
+            data: [],
+          })) as Promise<ModelList>
+    )
   }
 
   /**
@@ -96,10 +105,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async getInstalledEngines(name: InferenceEngine): Promise<EngineVariant[]> {
     return this.queue.add(() =>
-      this.api
-        .get(`v1/engines/${name}`)
-        .json<EngineVariant[]>()
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/engines/${name}`)
+          .json<EngineVariant[]>()
+          .then((e) => e)
+      )
     ) as Promise<EngineVariant[]>
   }
 
@@ -115,12 +126,14 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
     platform?: string
   ) {
     return this.queue.add(() =>
-      this.api
-        .get(`v1/engines/${name}/releases/${version}`)
-        .json<EngineReleased[]>()
-        .then((e) =>
-          platform ? e.filter((r) => r.name.includes(platform)) : e
-        )
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/engines/${name}/releases/${version}`)
+          .json<EngineReleased[]>()
+          .then((e) =>
+            platform ? e.filter((r) => r.name.includes(platform)) : e
+          )
+      )
     ) as Promise<EngineReleased[]>
   }
 
@@ -131,12 +144,14 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async getLatestReleasedEngine(name: InferenceEngine, platform?: string) {
     return this.queue.add(() =>
-      this.api
-        .get(`v1/engines/${name}/releases/latest`)
-        .json<EngineReleased[]>()
-        .then((e) =>
-          platform ? e.filter((r) => r.name.includes(platform)) : e
-        )
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/engines/${name}/releases/latest`)
+          .json<EngineReleased[]>()
+          .then((e) =>
+            platform ? e.filter((r) => r.name.includes(platform)) : e
+          )
+      )
     ) as Promise<EngineReleased[]>
   }
 
@@ -146,9 +161,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async installEngine(name: string, engineConfig: EngineConfig) {
     return this.queue.add(() =>
-      this.api
-        .post(`v1/engines/${name}/install`, { json: engineConfig })
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/engines/${name}/install`, { json: engineConfig })
+          .then((e) => e)
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -179,9 +196,8 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
       engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
 
     return this.queue.add(() =>
-      this.api
-        .post('v1/engines', { json: engineConfig })
-        .then((e) => {
+      this.apiInstance().then((api) =>
+        api.post('v1/engines', { json: engineConfig }).then((e) => {
           if (persistModels && engineConfig.metadata?.get_models_url) {
             // Pull /models from remote models endpoint
             return this.populateRemoteModels(engineConfig)
@@ -190,6 +206,7 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
           }
           return e
         })
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -199,9 +216,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async uninstallEngine(name: InferenceEngine, engineConfig: EngineConfig) {
     return this.queue.add(() =>
-      this.api
-        .delete(`v1/engines/${name}/install`, { json: engineConfig })
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .delete(`v1/engines/${name}/install`, { json: engineConfig })
+          .then((e) => e)
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -210,25 +229,27 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    * @param model - Remote model object.
    */
   async addRemoteModel(model: Model) {
-    return this.queue
-      .add(() =>
-        this.api
-          .post('v1/models/add', {
-            json: {
-              inference_params: {
-                max_tokens: 4096,
-                temperature: 0.7,
-                top_p: 0.95,
-                stream: true,
-                frequency_penalty: 0,
-                presence_penalty: 0,
+    return this.queue.add(() =>
+      this.apiInstance()
+        .then((api) =>
+          api
+            .post('v1/models/add', {
+              json: {
+                inference_params: {
+                  max_tokens: 4096,
+                  temperature: 0.7,
+                  top_p: 0.95,
+                  stream: true,
+                  frequency_penalty: 0,
+                  presence_penalty: 0,
+                },
+                ...model,
               },
-              ...model,
-            },
-          })
-          .then((e) => e)
-      )
-      .then(() => {})
+            })
+            .then((e) => e)
+        )
+        .then(() => {})
+    )
   }
 
   /**
@@ -237,10 +258,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async getDefaultEngineVariant(name: InferenceEngine) {
     return this.queue.add(() =>
-      this.api
-        .get(`v1/engines/${name}/default`)
-        .json<{ messages: string }>()
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/engines/${name}/default`)
+          .json<{ messages: string }>()
+          .then((e) => e)
+      )
     ) as Promise<DefaultEngineVariant>
   }
 
@@ -254,9 +277,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
     engineConfig: EngineConfig
   ) {
     return this.queue.add(() =>
-      this.api
-        .post(`v1/engines/${name}/default`, { json: engineConfig })
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/engines/${name}/default`, { json: engineConfig })
+          .then((e) => e)
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -265,9 +290,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    */
   async updateEngine(name: InferenceEngine, engineConfig?: EngineConfig) {
     return this.queue.add(() =>
-      this.api
-        .post(`v1/engines/${name}/update`, { json: engineConfig })
-        .then((e) => e)
+      this.apiInstance().then((api) =>
+        api
+          .post(`v1/engines/${name}/update`, { json: engineConfig })
+          .then((e) => e)
+      )
     ) as Promise<{ messages: string }>
   }
 
@@ -276,10 +303,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
    * @returns
    */
   async healthz(): Promise<void> {
-    return this.api
-      .get('healthz', {
-        retry: { limit: 20, delay: () => 500, methods: ['get'] },
-      })
+    return this.apiInstance()
+      .then((api) =>
+        api.get('healthz', {
+          retry: { limit: 20, delay: () => 500, methods: ['get'] },
+        })
+      )
       .then(() => {
         this.queue.concurrency = Infinity
       })
diff --git a/extensions/hardware-management-extension/src/index.ts b/extensions/hardware-management-extension/src/index.ts
index 472b65c07..edd98a7ae 100644
--- a/extensions/hardware-management-extension/src/index.ts
+++ b/extensions/hardware-management-extension/src/index.ts
@@ -17,18 +17,21 @@ export default class JSONHardwareManagementExtension extends HardwareManagementE
     this.queue.add(() => this.healthz())
   }
 
+  api?: KyInstance
   /**
    * Get the API instance
    * @returns
    */
   async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
     const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
-    return ky.extend({
+    this.api = ky.extend({
       prefixUrl: API_URL,
       headers: {
         Authorization: `Bearer ${apiKey}`,
       },
     })
+    return this.api
   }
 
   /**
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 595885264..7ed51f9c2 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -75,7 +75,22 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
   abortControllers = new Map<string, AbortController>()
 
-  api!: KyInstance
+  api?: KyInstance
+  /**
+   * Get the API instance
+   * @returns
+   */
+  async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
+    const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
+    this.api = ky.extend({
+      prefixUrl: CORTEX_API_URL,
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+      },
+    })
+    return this.api
+  }
 
   /**
    * Authorization headers for the API requests.
@@ -92,13 +107,6 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    */
   async onLoad() {
     super.onLoad()
-    const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
-    this.api = ky.extend({
-      prefixUrl: CORTEX_API_URL,
-      headers: {
-        Authorization: `Bearer ${apiKey}`,
-      },
-    })
 
     // Register Settings
     this.registerSettings(SETTINGS)
@@ -172,45 +180,49 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     this.abortControllers.set(model.id, controller)
 
     return await this.queue.add(() =>
-      this.api
-        .post('v1/models/start', {
-          json: {
-            ...extractModelLoadParams(model.settings),
-            model: model.id,
-            engine:
-              model.engine === InferenceEngine.nitro // Legacy model cache
-                ? InferenceEngine.cortex_llamacpp
-                : model.engine,
-            cont_batching: this.cont_batching,
-            n_parallel: this.n_parallel,
-            caching_enabled: this.caching_enabled,
-            flash_attn: this.flash_attn,
-            cache_type: this.cache_type,
-            use_mmap: this.use_mmap,
-            ...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
-          },
-          timeout: false,
-          signal,
-        })
-        .json()
-        .catch(async (e) => {
-          throw (await e.response?.json()) ?? e
-        })
-        .finally(() => this.abortControllers.delete(model.id))
-        .then()
+      this.apiInstance().then((api) =>
+        api
+          .post('v1/models/start', {
+            json: {
+              ...extractModelLoadParams(model.settings),
+              model: model.id,
+              engine:
+                model.engine === InferenceEngine.nitro // Legacy model cache
+                  ? InferenceEngine.cortex_llamacpp
+                  : model.engine,
+              cont_batching: this.cont_batching,
+              n_parallel: this.n_parallel,
+              caching_enabled: this.caching_enabled,
+              flash_attn: this.flash_attn,
+              cache_type: this.cache_type,
+              use_mmap: this.use_mmap,
+              ...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
+            },
+            timeout: false,
+            signal,
+          })
+          .json()
+          .catch(async (e) => {
+            throw (await e.response?.json()) ?? e
+          })
+          .finally(() => this.abortControllers.delete(model.id))
+          .then()
+      )
     )
   }
 
   override async unloadModel(model: Model): Promise<void> {
-    return this.api
-      .post('v1/models/stop', {
-        json: { model: model.id },
-      })
-      .json()
-      .finally(() => {
-        this.abortControllers.get(model.id)?.abort()
-      })
-      .then()
+    return this.apiInstance().then((api) =>
+      api
+        .post('v1/models/stop', {
+          json: { model: model.id },
+        })
+        .json()
+        .finally(() => {
+          this.abortControllers.get(model.id)?.abort()
+        })
+        .then()
+    )
   }
 
   /**
@@ -218,15 +230,17 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * @returns
    */
   private async healthz(): Promise<void> {
-    return this.api
-      .get('healthz', {
-        retry: {
-          limit: 20,
-          delay: () => 500,
-          methods: ['get'],
-        },
-      })
-      .then(() => {})
+    return this.apiInstance().then((api) =>
+      api
+        .get('healthz', {
+          retry: {
+            limit: 20,
+            delay: () => 500,
+            methods: ['get'],
+          },
+        })
+        .then(() => {})
+    )
   }
 
   /**
@@ -234,13 +248,15 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * @returns
    */
   private async clean(): Promise<any> {
-    return this.api
-      .delete('processmanager/destroy', {
-        timeout: 2000, // maximum 2 seconds
-        retry: {
-          limit: 0,
-        },
-      })
+    return this.apiInstance()
+      .then((api) =>
+        api.delete('processmanager/destroy', {
+          timeout: 2000, // maximum 2 seconds
+          retry: {
+            limit: 0,
+          },
+        })
+      )
       .catch(() => {
         // Do nothing
       })
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 8618103db..4362ab9a5 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -33,15 +33,13 @@ type Data<T> = {
 export default class JanModelExtension extends ModelExtension {
   queue = new PQueue({ concurrency: 1 })
 
+  api?: KyInstance
   /**
-   * Extended API instance for making requests to the Cortex API.
+   * Get the API instance
    * @returns
    */
-  api: KyInstance
-  /**
-   * Called when the extension is loaded.
-   */
-  async onLoad() {
+  async apiInstance(): Promise<KyInstance> {
+    if(this.api) return this.api
     const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
     this.api = ky.extend({
       prefixUrl: CORTEX_API_URL,
@@ -49,6 +47,12 @@ export default class JanModelExtension extends ModelExtension {
         Authorization: `Bearer ${apiKey}`,
       },
     })
+    return this.api
+  }
+  /**
+   * Called when the extension is loaded.
+   */
+  async onLoad() {
     this.queue.add(() => this.healthz())
 
     this.registerSettings(SETTINGS)
@@ -94,13 +98,15 @@ export default class JanModelExtension extends ModelExtension {
      * Sending POST to /models/pull/{id} endpoint to pull the model
      */
     return this.queue.add(() =>
-      this.api
-        .post('v1/models/pull', { json: { model, id, name }, timeout: false })
-        .json()
-        .catch(async (e) => {
-          throw (await e.response?.json()) ?? e
-        })
-        .then()
+      this.apiInstance().then((api) =>
+        api
+          .post('v1/models/pull', { json: { model, id, name }, timeout: false })
+          .json()
+          .catch(async (e) => {
+            throw (await e.response?.json()) ?? e
+          })
+          .then()
+      )
     )
   }
 
@@ -115,10 +121,12 @@ export default class JanModelExtension extends ModelExtension {
      * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull
      */
     return this.queue.add(() =>
-      this.api
-        .delete('v1/models/pull', { json: { taskId: model } })
-        .json()
-        .then()
+      this.apiInstance().then((api) =>
+        api
+          .delete('v1/models/pull', { json: { taskId: model } })
+          .json()
+          .then()
+      )
     )
   }
 
@@ -129,7 +137,11 @@ export default class JanModelExtension extends ModelExtension {
    */
   async deleteModel(model: string): Promise<void> {
     return this.queue
-      .add(() => this.api.delete(`v1/models/${model}`).json().then())
+      .add(() =>
+        this.apiInstance().then((api) =>
+          api.delete(`v1/models/${model}`).json().then()
+        )
+      )
       .catch((e) => console.debug(e))
       .finally(async () => {
         // Delete legacy model files
@@ -231,13 +243,15 @@ export default class JanModelExtension extends ModelExtension {
   async updateModel(model: Partial<Model>): Promise<Model> {
     return this.queue
       .add(() =>
-        this.api
-          .patch(`v1/models/${model.id}`, {
-            json: { ...model },
-            timeout: false,
-          })
-          .json()
-          .then()
+        this.apiInstance().then((api) =>
+          api
+            .patch(`v1/models/${model.id}`, {
+              json: { ...model },
+              timeout: false,
+            })
+            .json()
+            .then()
+        )
       )
       .then(() => this.getModel(model.id))
   }
@@ -248,10 +262,12 @@ export default class JanModelExtension extends ModelExtension {
    */
   async getModel(model: string): Promise<Model> {
     return this.queue.add(() =>
-      this.api
-        .get(`v1/models/${model}`)
-        .json()
-        .then((e) => this.transformModel(e))
+      this.apiInstance().then((api) =>
+        api
+          .get(`v1/models/${model}`)
+          .json()
+          .then((e) => this.transformModel(e))
+      )
     ) as Promise<Model>
   }
 
@@ -267,14 +283,16 @@ export default class JanModelExtension extends ModelExtension {
     option?: OptionType
   ): Promise<void> {
     return this.queue.add(() =>
-      this.api
-        .post('v1/models/import', {
-          json: { model, modelPath, name, option },
-          timeout: false,
-        })
-        .json()
-        .catch((e) => console.debug(e)) // Ignore error
-        .then()
+      this.apiInstance().then((api) =>
+        api
+          .post('v1/models/import', {
+            json: { model, modelPath, name, option },
+            timeout: false,
+          })
+          .json()
+          .catch((e) => console.debug(e)) // Ignore error
+          .then()
+      )
     )
   }
 
@@ -285,7 +303,11 @@ export default class JanModelExtension extends ModelExtension {
    */
   async getSources(): Promise<ModelSource[]> {
     const sources = await this.queue
-      .add(() => this.api.get('v1/models/sources').json<Data<ModelSource>>())
+      .add(() =>
+        this.apiInstance().then((api) =>
+          api.get('v1/models/sources').json<Data<ModelSource>>()
+        )
+      )
       .then((e) => (typeof e === 'object' ? (e.data as ModelSource[]) : []))
       .catch(() => [])
     return sources.concat(
@@ -299,11 +321,13 @@ export default class JanModelExtension extends ModelExtension {
    */
   async addSource(source: string): Promise<any> {
     return this.queue.add(() =>
-      this.api.post('v1/models/sources', {
-        json: {
-          source,
-        },
-      })
+      this.apiInstance().then((api) =>
+        api.post('v1/models/sources', {
+          json: {
+            source,
+          },
+        })
+      )
     )
   }
 
@@ -313,12 +337,14 @@ export default class JanModelExtension extends ModelExtension {
    */
   async deleteSource(source: string): Promise<any> {
     return this.queue.add(() =>
-      this.api.delete('v1/models/sources', {
-        json: {
-          source,
-        },
-        timeout: false,
-      })
+      this.apiInstance().then((api) =>
+        api.delete('v1/models/sources', {
+          json: {
+            source,
+          },
+          timeout: false,
+        })
+      )
     )
   }
   // END - Model Sources
@@ -329,7 +355,9 @@ export default class JanModelExtension extends ModelExtension {
    */
   async isModelLoaded(model: string): Promise<boolean> {
     return this.queue
-      .add(() => this.api.get(`v1/models/status/${model}`))
+      .add(() =>
+        this.apiInstance().then((api) => api.get(`v1/models/status/${model}`))
+      )
       .then((e) => true)
       .catch(() => false)
   }
@@ -348,7 +376,11 @@ export default class JanModelExtension extends ModelExtension {
    */
   async fetchModels(): Promise<Model[]> {
     return this.queue
-      .add(() => this.api.get('v1/models?limit=-1').json<Data<Model>>())
+      .add(() =>
+        this.apiInstance().then((api) =>
+          api.get('v1/models?limit=-1').json<Data<Model>>()
+        )
+      )
       .then((e) =>
         typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
       )
@@ -387,7 +419,11 @@ export default class JanModelExtension extends ModelExtension {
     [key: string]: any
   }): Promise<void> {
     return this.queue
-      .add(() => this.api.patch('v1/configs', { json: body }).then(() => {}))
+      .add(() =>
+        this.apiInstance().then((api) =>
+          api.patch('v1/configs', { json: body }).then(() => {})
+        )
+      )
       .catch((e) => console.debug(e))
   }
 
@@ -396,14 +432,16 @@ export default class JanModelExtension extends ModelExtension {
    * @returns
    */
   private healthz(): Promise<void> {
-    return this.api
-      .get('healthz', {
-        retry: {
-          limit: 20,
-          delay: () => 500,
-          methods: ['get'],
-        },
-      })
+    return this.apiInstance()
+      .then((api) =>
+        api.get('healthz', {
+          retry: {
+            limit: 20,
+            delay: () => 500,
+            methods: ['get'],
+          },
+        })
+      )
       .then(() => {
         this.queue.concurrency = Infinity
       })
@@ -416,17 +454,22 @@ export default class JanModelExtension extends ModelExtension {
     const models = await this.fetchModels()
 
     return this.queue.add(() =>
-      this.api
-        .get('v1/models/hub?author=cortexso&tag=cortex.cpp')
-        .json<Data<string>>()
-        .then((e) => {
-          e.data?.forEach((model) => {
-            if (
-              !models.some((e) => 'modelSource' in e && e.modelSource === model)
-            )
-              this.addSource(model).catch((e) => console.debug(e))
-          })
-        })
+      this.apiInstance()
+        .then((api) =>
+          api
+            .get('v1/models/hub?author=cortexso&tag=cortex.cpp')
+            .json<Data<string>>()
+            .then((e) => {
+              e.data?.forEach((model) => {
+                if (
+                  !models.some(
+                    (e) => 'modelSource' in e && e.modelSource === model
+                  )
+                )
+                  this.addSource(model).catch((e) => console.debug(e))
+              })
+            })
+        )
         .catch((e) => console.debug(e))
     )
   }
diff --git a/web/screens/Settings/Hardware/index.tsx b/web/screens/Settings/Hardware/index.tsx
index e0c25971b..41ef50073 100644
--- a/web/screens/Settings/Hardware/index.tsx
+++ b/web/screens/Settings/Hardware/index.tsx
@@ -6,13 +6,14 @@ import { useState } from 'react'
 import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd'
 
 import { Progress, ScrollArea, Switch } from '@janhq/joi'
-import { useAtom, useAtomValue } from 'jotai'
+import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 import { atomWithStorage } from 'jotai/utils'
 
 import { ChevronDownIcon, GripVerticalIcon } from 'lucide-react'
 
 import { twMerge } from 'tailwind-merge'
 
+import { activeModelAtom } from '@/hooks/useActiveModel'
 import {
   useGetHardwareInfo,
   setActiveGpus,
@@ -47,6 +48,7 @@ const Hardware = () => {
   const ramUtilitized = useAtomValue(ramUtilitizedAtom)
   const showScrollBar = useAtomValue(showScrollBarAtom)
   const [gpus, setGpus] = useAtom(gpusAtom)
+  const setActiveModel = useSetAtom(activeModelAtom)
 
   const [orderGpus, setOrderGpus] = useAtom(orderGpusAtom)
 
@@ -70,11 +72,15 @@ const Hardware = () => {
         .filter((gpu: any) => gpu.activated)
         .map((gpu: any) => Number(gpu.id))
       await setActiveGpus({ gpus: activeGpuIds })
+      setActiveModel(undefined)
       mutate()
-      window.location.reload()
     } catch (error) {
       console.error('Failed to update active GPUs:', error)
     }
+    setIsActivatingGpu((prev) => {
+      prev.delete(id)
+      return new Set(prev)
+    })
   }
 
   const handleDragEnd = (result: any) => {