fix: Prevent race condition with auto-unload during rapid model loading (#5947)

This commit addresses a race condition where, with "Auto-Unload Old Models" enabled, rapidly attempting to load multiple models could result in more than one model being loaded simultaneously.

Previously, the unloading logic did not account for models that were still in the process of loading when a new load operation was initiated. This allowed new models to start loading before the previous ones had fully completed their unload cycle.

To resolve this:
- A `loadingModels` map has been introduced to track promises for models currently in the loading state.
- The `load` method now checks if a model is already being loaded and, if so, returns the existing promise, preventing duplicate load operations for the same model.
- The `performLoad` method (which encapsulates the actual loading logic) now ensures that when `autoUnload` is active, it waits for any *other* models that are concurrently loading to finish before proceeding to unload all currently loaded models. This guarantees that the auto-unload mechanism properly unloads all models, including those initiated in quick succession, thereby preventing the race condition.

This fixes the issue where clicking the start button very fast on multiple models would bypass the auto-unload functionality.
This commit is contained in:
Akarshan Biswas 2025-07-28 12:59:48 +05:30 committed by GitHub
parent a4e5973573
commit 432c942330
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -149,6 +149,7 @@ export default class llamacpp_extension extends AIEngine {
private apiSecret: string = 'JustAskNow'
private pendingDownloads: Map<string, Promise<void>> = new Map()
private isConfiguringBackends: boolean = false
private loadingModels = new Map<string, Promise<SessionInfo>>() // Track loading promises
override async onLoad(): Promise<void> {
super.onLoad() // Calls registerEngine() from AIEngine
@ -1132,12 +1133,54 @@ export default class llamacpp_extension extends AIEngine {
if (sInfo) {
throw new Error('Model already loaded!!')
}
// If this model is already being loaded, return the existing promise
if (this.loadingModels.has(modelId)) {
return this.loadingModels.get(modelId)!
}
// Create the loading promise
const loadingPromise = this.performLoad(
modelId,
overrideSettings,
isEmbedding
)
this.loadingModels.set(modelId, loadingPromise)
try {
const result = await loadingPromise
return result
} finally {
this.loadingModels.delete(modelId)
}
}
private async performLoad(
modelId: string,
overrideSettings?: Partial<LlamacppConfig>,
isEmbedding: boolean = false
): Promise<SessionInfo> {
const loadedModels = await this.getLoadedModels()
if (loadedModels.length > 0 && this.autoUnload) {
// Unload all other models if auto-unload is enabled
await Promise.all(
loadedModels.map((loadedModel) => this.unload(loadedModel))
)
// Get OTHER models that are currently loading (exclude current model)
const otherLoadingPromises = Array.from(this.loadingModels.entries())
.filter(([id, _]) => id !== modelId)
.map(([_, promise]) => promise)
if (
this.autoUnload &&
(loadedModels.length > 0 || otherLoadingPromises.length > 0)
) {
// Wait for OTHER loading models to finish, then unload everything
if (otherLoadingPromises.length > 0) {
await Promise.all(otherLoadingPromises)
}
// Now unload all loaded models
const allLoadedModels = await this.getLoadedModels()
if (allLoadedModels.length > 0) {
await Promise.all(allLoadedModels.map((model) => this.unload(model)))
}
}
const args: string[] = []
const cfg = { ...this.config, ...(overrideSettings ?? {}) }