fix: Prevent race condition with auto-unload during rapid model loading (#5947)

This commit addresses a race condition where, with "Auto-Unload Old Models" enabled, rapidly attempting to load multiple models could result in more than one model being loaded simultaneously. Previously, the unloading logic did not account for models that were still in the process of loading when a new load operation was initiated. This allowed new models to start loading before the previous ones had fully completed their unload cycle. To resolve this: - A `loadingModels` map has been introduced to track promises for models currently in the loading state. - The `load` method now checks if a model is already being loaded and, if so, returns the existing promise, preventing duplicate load operations for the same model. - The `performLoad` method (which encapsulates the actual loading logic) now ensures that when `autoUnload` is active, it waits for any *other* models that are concurrently loading to finish before proceeding to unload all currently loaded models. This guarantees that the auto-unload mechanism properly unloads all models, including those initiated in quick succession, thereby preventing the race condition. This fixes the issue where clicking the start button very fast on multiple models would bypass the auto-unload functionality.
2025-07-28 12:59:48 +05:30 · 2025-07-28 12:59:48 +05:30 · 432c942330
commit 432c942330
parent a4e5973573
1 changed files with 48 additions and 5 deletions
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -149,6 +149,7 @@ export default class llamacpp_extension extends AIEngine {
  private apiSecret: string = 'JustAskNow'
  private pendingDownloads: Map<string, Promise<void>> = new Map()
  private isConfiguringBackends: boolean = false
+  private loadingModels = new Map<string, Promise<SessionInfo>>() // Track loading promises

  override async onLoad(): Promise<void> {
    super.onLoad() // Calls registerEngine() from AIEngine
@ -1132,12 +1133,54 @@ export default class llamacpp_extension extends AIEngine {
    if (sInfo) {
      throw new Error('Model already loaded!!')
    }
+
+    // If this model is already being loaded, return the existing promise
+    if (this.loadingModels.has(modelId)) {
+      return this.loadingModels.get(modelId)!
+    }
+
+    // Create the loading promise
+    const loadingPromise = this.performLoad(
+      modelId,
+      overrideSettings,
+      isEmbedding
+    )
+    this.loadingModels.set(modelId, loadingPromise)
+
+    try {
+      const result = await loadingPromise
+      return result
+    } finally {
+      this.loadingModels.delete(modelId)
+    }
+  }
+
+  private async performLoad(
+    modelId: string,
+    overrideSettings?: Partial<LlamacppConfig>,
+    isEmbedding: boolean = false
+  ): Promise<SessionInfo> {
    const loadedModels = await this.getLoadedModels()
-    if (loadedModels.length > 0 && this.autoUnload) {
-      // Unload all other models if auto-unload is enabled
-      await Promise.all(
-        loadedModels.map((loadedModel) => this.unload(loadedModel))
-      )
+
+    // Get OTHER models that are currently loading (exclude current model)
+    const otherLoadingPromises = Array.from(this.loadingModels.entries())
+      .filter(([id, _]) => id !== modelId)
+      .map(([_, promise]) => promise)
+
+    if (
+      this.autoUnload &&
+      (loadedModels.length > 0 || otherLoadingPromises.length > 0)
+    ) {
+      // Wait for OTHER loading models to finish, then unload everything
+      if (otherLoadingPromises.length > 0) {
+        await Promise.all(otherLoadingPromises)
+      }
+
+      // Now unload all loaded models
+      const allLoadedModels = await this.getLoadedModels()
+      if (allLoadedModels.length > 0) {
+        await Promise.all(allLoadedModels.map((model) => this.unload(model)))
+      }
    }
    const args: string[] = []
    const cfg = { ...this.config, ...(overrideSettings ?? {}) }