diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 5726117e4..6157b2640 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -369,6 +369,24 @@ export default class llamacpp_extension extends AIEngine { return port } + private async sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)) + } + + private async waitForModelLoad(port: number, timeoutMs = 30_000): Promise { + const start = Date.now() + while (Date.now() - start < timeoutMs) { + try { + const res = await fetch(`http://localhost:${port}/health`) + if(res.ok) { + return + } + } catch (e) {} + await this.sleep(500) // 500 sec interval during rechecks + } + throw new Error(`Timed out loading model after ${timeoutMs}`) + } + override async load(modelId: string): Promise { const sInfo = this.findSessionByModel(modelId) if (sInfo) { @@ -464,6 +482,8 @@ export default class llamacpp_extension extends AIEngine { args }) + await this.waitForModelLoad(sInfo.port) + // Store the session info for later use this.activeSessions.set(sInfo.pid, sInfo) @@ -586,7 +606,7 @@ export default class llamacpp_extension extends AIEngine { const url = `${baseUrl}/chat/completions` const headers = { 'Content-Type': 'application/json', - 'Authorization': `Bearer ${sessionInfo.apiKey}`, + 'Authorization': `Bearer ${sessionInfo.api_key}`, } const body = JSON.stringify(opts)