diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts
index e4054e58e..90ce0543c 100644
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@@ -23,37 +23,37 @@ export interface InputAudio {
 }
 
 export interface ToolFunction {
-  name: string; // Required: a-z, A-Z, 0-9, _, -, max length 64
-  description?: string;
-  parameters?: Record<string, unknown>; // JSON Schema object
-  strict?: boolean | null; // Defaults to false
+  name: string // Required: a-z, A-Z, 0-9, _, -, max length 64
+  description?: string
+  parameters?: Record<string, unknown> // JSON Schema object
+  strict?: boolean | null // Defaults to false
 }
 
 export interface Tool {
-  type: 'function'; // Currently, only 'function' is supported
-  function: ToolFunction;
+  type: 'function' // Currently, only 'function' is supported
+  function: ToolFunction
 }
 
 export interface ToolCallOptions {
-  tools?: Tool[];
+  tools?: Tool[]
 }
 
 // A specific tool choice to force the model to call
 export interface ToolCallSpec {
-  type: 'function';
+  type: 'function'
   function: {
-    name: string;
-  };
+    name: string
+  }
 }
 
 // tool_choice may be one of several modes or a specific call
-export type ToolChoice = 'none' | 'auto' | 'required' | ToolCallSpec;
+export type ToolChoice = 'none' | 'auto' | 'required' | ToolCallSpec
 
 export interface chatCompletionRequest {
-  model: string; // Model ID, though for local it might be implicit via sessionInfo
-  messages: chatCompletionRequestMessage[];
-  tools?:  Tool[];
-  tool_choice?: ToolChoice;
+  model: string // Model ID, though for local it might be implicit via sessionInfo
+  messages: chatCompletionRequestMessage[]
+  tools?: Tool[]
+  tool_choice?: ToolChoice
   // Core sampling parameters
   temperature?: number | null
   dynatemp_range?: number | null
@@ -168,7 +168,7 @@ export type listResult = modelInfo[]
 export interface SessionInfo {
   pid: number // opaque handle for unload/chat
   port: number // llama-server output port (corrected from portid)
-  model_id: string, //name of the model
+  model_id: string //name of the model
   model_path: string // path of the loaded model
   api_key: string
 }
@@ -242,7 +242,8 @@ export abstract class AIEngine extends BaseExtension {
    * Sends a chat request to the model
    */
   abstract chat(
-    opts: chatCompletionRequest
+    opts: chatCompletionRequest,
+    abortController?: AbortController
   ): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>>
 
   /**
@@ -261,8 +262,8 @@ export abstract class AIEngine extends BaseExtension {
   abstract abortImport(modelId: string): Promise<void>
 
   /**
-    * Get currently loaded models
-  */
+   * Get currently loaded models
+   */
   abstract getLoadedModels(): Promise<string[]>
 
   /**
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 4fc0dcafb..47f0e631a 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -118,12 +118,45 @@ export default class llamacpp_extension extends AIEngine {
   private activeSessions: Map<number, SessionInfo> = new Map()
   private providerPath!: string
   private apiSecret: string = 'JustAskNow'
+  private pendingDownloads: Map<string, Promise<void>> = new Map()
 
   override async onLoad(): Promise<void> {
     super.onLoad() // Calls registerEngine() from AIEngine
 
     let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration
 
+    // This makes the settings (including the backend options and initial value) available to the Jan UI.
+    this.registerSettings(settings)
+
+    // 5. Load all settings into this.config from the registered settings.
+    // This populates `this.config` with the *persisted* user settings, falling back
+    // to the *default* values specified in the settings definitions (which might have been
+    // updated in step 3 to reflect the best available backend).
+    let loadedConfig: any = {}
+    // Iterate over the cloned 'settings' array because its 'controllerProps.value'
+    // might have been updated in step 3 to define the UI default.
+    // 'getSetting' will retrieve the actual persisted user value if it exists, falling back
+    // to the 'defaultValue' passed (which is the 'controllerProps.value' from the cloned settings array).
+    for (const item of settings) {
+      const defaultValue = item.controllerProps.value
+      // Use the potentially updated default value from the settings array as the fallback for getSetting
+      loadedConfig[item.key] = await this.getSetting<typeof defaultValue>(
+        item.key,
+        defaultValue
+      )
+    }
+    this.config = loadedConfig as LlamacppConfig
+
+    // This sets the base directory where model files for this provider are stored.
+    this.providerPath = await joinPath([
+      await getJanDataFolderPath(),
+      this.providerId,
+    ])
+
+    this.configureBackends()
+  }
+
+  async configureBackends(): Promise<void> {
     // 1. Fetch available backends early
     // This is necessary to populate the backend version dropdown in settings
     // and to determine the best available backend for auto-update/default selection.
@@ -226,6 +259,8 @@ export default class llamacpp_extension extends AIEngine {
       )
     }
 
+    let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration
+
     // 3. Update the 'version_backend' setting definition in the cloned settings array
     // This prepares the settings object that will be registered, influencing the UI default value.
     const backendSettingIndex = settings.findIndex(
@@ -274,28 +309,6 @@ export default class llamacpp_extension extends AIEngine {
       // Cannot proceed if this critical setting is missing
       throw new Error('Critical setting "version_backend" not found.')
     }
-
-    // This makes the settings (including the backend options and initial value) available to the Jan UI.
-    this.registerSettings(settings)
-
-    // 5. Load all settings into this.config from the registered settings.
-    // This populates `this.config` with the *persisted* user settings, falling back
-    // to the *default* values specified in the settings definitions (which might have been
-    // updated in step 3 to reflect the best available backend).
-    let loadedConfig: any = {}
-    // Iterate over the cloned 'settings' array because its 'controllerProps.value'
-    // might have been updated in step 3 to define the UI default.
-    // 'getSetting' will retrieve the actual persisted user value if it exists, falling back
-    // to the 'defaultValue' passed (which is the 'controllerProps.value' from the cloned settings array).
-    for (const item of settings) {
-      const defaultValue = item.controllerProps.value
-      // Use the potentially updated default value from the settings array as the fallback for getSetting
-      loadedConfig[item.key] = await this.getSetting<typeof defaultValue>(
-        item.key,
-        defaultValue
-      )
-    }
-    this.config = loadedConfig as LlamacppConfig
     // At this point, this.config.version_backend holds the value that will be used
     // UNLESS auto-update logic overrides it for the current session.
 
@@ -328,7 +341,7 @@ export default class llamacpp_extension extends AIEngine {
             `Auto-updating effective backend for this session from ${this.config.version_backend} to ${bestAvailableBackendString} (best available)`
           )
           try {
-            await downloadBackend(bestBackend, bestVersion)
+            await this.ensureBackendReady(bestBackend, bestVersion)
             effectiveBackendString = bestAvailableBackendString
             this.config.version_backend = effectiveBackendString
             this.getSettings().then((settings) => {
@@ -435,7 +448,7 @@ export default class llamacpp_extension extends AIEngine {
             // downloadBackend is called again here to ensure the *currently active* backend
             // is present, regardless of whether it was set by user config or auto-update.
             // This call will do nothing if it was already downloaded during auto-update.
-            await downloadBackend(selectedBackend, selectedVersion)
+            await this.ensureBackendReady(selectedBackend, selectedVersion)
             console.log(
               `Successfully installed effective backend: ${finalBackendToInstall}`
             )
@@ -461,13 +474,8 @@ export default class llamacpp_extension extends AIEngine {
     } else {
       console.warn('No backend selected or available in config to install.')
     }
-
-    // This sets the base directory where model files for this provider are stored.
-    this.providerPath = await joinPath([
-      await getJanDataFolderPath(),
-      this.providerId,
-    ])
   }
+
   async getProviderPath(): Promise<string> {
     if (!this.providerPath) {
       this.providerPath = await joinPath([
@@ -500,10 +508,7 @@ export default class llamacpp_extension extends AIEngine {
       const [version, backend] = valueStr.split('/')
 
       const closure = async () => {
-        const isInstalled = await isBackendInstalled(backend, version)
-        if (!isInstalled) {
-          await downloadBackend(backend, version)
-        }
+        await this.ensureBackendReady(backend, version)
       }
       closure()
     }
@@ -781,6 +786,9 @@ export default class llamacpp_extension extends AIEngine {
       )
     }
 
+    // Ensure backend is downloaded and ready before proceeding
+    await this.ensureBackendReady(backend, version)
+
     const janDataFolderPath = await getJanDataFolderPath()
     const modelConfigPath = await joinPath([
       this.providerPath,
@@ -923,15 +931,49 @@ export default class llamacpp_extension extends AIEngine {
     return `${this.provider}/${cleanModelId}`
   }
 
+  private async ensureBackendReady(
+    backend: string,
+    version: string
+  ): Promise<void> {
+    const backendKey = `${version}/${backend}`
+
+    // Check if backend is already installed
+    const isInstalled = await isBackendInstalled(backend, version)
+    if (isInstalled) {
+      return
+    }
+
+    // Check if download is already in progress
+    if (this.pendingDownloads.has(backendKey)) {
+      console.log(
+        `Backend ${backendKey} download already in progress, waiting...`
+      )
+      await this.pendingDownloads.get(backendKey)
+      return
+    }
+
+    // Start new download
+    console.log(`Backend ${backendKey} not installed, downloading...`)
+    const downloadPromise = downloadBackend(backend, version).finally(() => {
+      this.pendingDownloads.delete(backendKey)
+    })
+
+    this.pendingDownloads.set(backendKey, downloadPromise)
+    await downloadPromise
+    console.log(`Backend ${backendKey} download completed`)
+  }
+
   private async *handleStreamingResponse(
     url: string,
     headers: HeadersInit,
-    body: string
+    body: string,
+    abortController?: AbortController
   ): AsyncIterable<chatCompletionChunk> {
     const response = await fetch(url, {
       method: 'POST',
       headers,
       body,
+      signal: abortController?.signal,
     })
     if (!response.ok) {
       const errorData = await response.json().catch(() => null)
@@ -1035,7 +1077,7 @@ export default class llamacpp_extension extends AIEngine {
 
     const body = JSON.stringify(opts)
     if (opts.stream) {
-      return this.handleStreamingResponse(url, headers, body)
+      return this.handleStreamingResponse(url, headers, body, abortController)
     }
     // Handle non-streaming response
     const response = await fetch(url, {
diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx
index 64aa9af57..7a2ed912c 100644
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@@ -34,7 +34,6 @@ import DropdownModelProvider from '@/containers/DropdownModelProvider'
 import { ModelLoader } from '@/containers/loaders/ModelLoader'
 import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
 import { getConnectedServers } from '@/services/mcp'
-import { stopAllModels } from '@/services/models'
 
 type ChatInputProps = {
   className?: string
@@ -162,7 +161,6 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
   const stopStreaming = useCallback(
     (threadId: string) => {
       abortControllers[threadId]?.abort()
-      stopAllModels()
     },
     [abortControllers]
   )
diff --git a/web-app/src/containers/DownloadManegement.tsx b/web-app/src/containers/DownloadManegement.tsx
index acbc66bc2..77b03aedf 100644
--- a/web-app/src/containers/DownloadManegement.tsx
+++ b/web-app/src/containers/DownloadManegement.tsx
@@ -183,7 +183,7 @@ export function DownloadManagement() {
       toast.success(t('common:toast.downloadComplete.title'), {
         id: 'download-complete',
         description: t('common:toast.downloadComplete.description', {
-          modelId: state.modelId,
+          item: state.modelId,
         }),
       })
     },
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index e9e190a5c..4a841846d 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -399,11 +399,13 @@ export const useChat = () => {
           if (!followUpWithToolUse) availableTools = []
         }
       } catch (error) {
-        const errorMessage =
-          error && typeof error === 'object' && 'message' in error
-            ? error.message
-            : error
-        setModelLoadError(`${errorMessage}`)
+        if (!abortController.signal.aborted) {
+          const errorMessage =
+            error && typeof error === 'object' && 'message' in error
+              ? error.message
+              : error
+          setModelLoadError(`${errorMessage}`)
+        }
       } finally {
         updateLoadingModel(false)
         updateStreamingContent(undefined)
diff --git a/web-app/src/lib/completion.ts b/web-app/src/lib/completion.ts
index ac3c07833..5f5bb5c1c 100644
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@@ -185,14 +185,17 @@ export const sendCompletion = async (
   const engine = ExtensionManager.getInstance().getEngine(provider.provider)
 
   const completion = engine
-    ? await engine.chat({
-        messages: messages as chatCompletionRequestMessage[],
-        model: thread.model?.id,
-        tools: normalizeTools(tools),
-        tool_choice: tools.length ? 'auto' : undefined,
-        stream: true,
-        ...params,
-      })
+    ? await engine.chat(
+        {
+          messages: messages as chatCompletionRequestMessage[],
+          model: thread.model?.id,
+          tools: normalizeTools(tools),
+          tool_choice: tools.length ? 'auto' : undefined,
+          stream: true,
+          ...params,
+        },
+        abortController
+      )
     : stream
       ? await tokenJS.chat.completions.create(
           {
diff --git a/web-app/src/locales/de-DE/common.json b/web-app/src/locales/de-DE/common.json
index ab61a756a..0664f5246 100644
--- a/web-app/src/locales/de-DE/common.json
+++ b/web-app/src/locales/de-DE/common.json
@@ -251,7 +251,7 @@
     },
     "downloadComplete": {
       "title": "Download abgeschlossen",
-      "description": "Das Modell {{modelId}} wurde heruntergeladen"
+      "description": "{{item}} wurde heruntergeladen"
     },
     "downloadCancelled": {
       "title": "Download abgebrochen",
diff --git a/web-app/src/locales/en/common.json b/web-app/src/locales/en/common.json
index 500e6094c..2dbab0425 100644
--- a/web-app/src/locales/en/common.json
+++ b/web-app/src/locales/en/common.json
@@ -251,7 +251,7 @@
     },
     "downloadComplete": {
       "title": "Download Complete",
-      "description": "The model {{modelId}} has been downloaded"
+      "description": "{{item}} has been downloaded"
     },
     "downloadCancelled": {
       "title": "Download Cancelled",
diff --git a/web-app/src/locales/id/common.json b/web-app/src/locales/id/common.json
index 5ba59011b..0398ea2a4 100644
--- a/web-app/src/locales/id/common.json
+++ b/web-app/src/locales/id/common.json
@@ -244,7 +244,7 @@
     },
     "downloadComplete": {
       "title": "Unduhan Selesai",
-      "description": "Model {{modelId}} telah diunduh"
+      "description": "{{item}} telah diunduh"
     },
     "downloadCancelled": {
       "title": "Unduhan Dibatalkan",
diff --git a/web-app/src/locales/vn/common.json b/web-app/src/locales/vn/common.json
index 6d5edbf94..13ac2089e 100644
--- a/web-app/src/locales/vn/common.json
+++ b/web-app/src/locales/vn/common.json
@@ -244,7 +244,7 @@
     },
     "downloadComplete": {
       "title": "Tải xuống hoàn tất",
-      "description": "Mô hình {{modelId}} đã được tải xuống"
+      "description": "{{item}} đã được tải xuống"
     },
     "downloadCancelled": {
       "title": "Đã hủy tải xuống",
diff --git a/web-app/src/locales/zh-CN/common.json b/web-app/src/locales/zh-CN/common.json
index 40e392c97..231bdecfc 100644
--- a/web-app/src/locales/zh-CN/common.json
+++ b/web-app/src/locales/zh-CN/common.json
@@ -244,7 +244,7 @@
     },
     "downloadComplete": {
       "title": "下载完成",
-      "description": "模型 {{modelId}} 已下载"
+      "description": "{{item}} 已下载"
     },
     "downloadCancelled": {
       "title": "下载已取消",
diff --git a/web-app/src/locales/zh-TW/common.json b/web-app/src/locales/zh-TW/common.json
index 92b4a3c0d..e633cf017 100644
--- a/web-app/src/locales/zh-TW/common.json
+++ b/web-app/src/locales/zh-TW/common.json
@@ -244,7 +244,7 @@
     },
     "downloadComplete": {
       "title": "下載完成",
-      "description": "模型 {{modelId}} 已下載"
+      "description": "{{item}} 已下載"
     },
     "downloadCancelled": {
       "title": "下載已取消",