From 81d6ed37858040457c75b51c311bf99477ca9f26 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Mon, 21 Jul 2025 08:59:50 +0530 Subject: [PATCH] feat: support per-model overrides in llama.cpp load() (#5820) * feat: support per-model overrides in llama.cpp load() Extend the `load()` method in the llama.cpp extension to accept optional `overrideSettings`, allowing fine-grained per-model configuration. This enables users to override provider-level settings such as `ctx_size`, `chat_template`, `n_gpu_layers`, etc., when loading a specific model. Fixes: #5818 (Feature Request - Jan v0.6.6) Use cases enabled: - Different context sizes per model (e.g., 4K vs 32K) - Model-specific chat templates (ChatML, Alpaca, etc.) - Performance tuning (threads, GPU layers) - Better memory management per deployment Maintains full backward compatibility with existing provider config. * swap overrideSettings and isEmbedding argument --- extensions/llamacpp-extension/src/index.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 47f0e631a..5544c0670 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -764,6 +764,7 @@ export default class llamacpp_extension extends AIEngine { override async load( modelId: string, + overrideSettings?: Partial, isEmbedding: boolean = false ): Promise { const sInfo = this.findSessionByModel(modelId) @@ -778,7 +779,7 @@ export default class llamacpp_extension extends AIEngine { ) } const args: string[] = [] - const cfg = this.config + const cfg = { ...this.config, ...(overrideSettings ?? {}) } const [version, backend] = cfg.version_backend.split('/') if (!version || !backend) { throw new Error(