From 0cbf35dc77d5a1a915657103c66ac2a69bdea479 Mon Sep 17 00:00:00 2001 From: Akarshan Date: Mon, 23 Jun 2025 20:43:54 +0530 Subject: [PATCH] Add auto unload setting to llamacpp-extension --- extensions/llamacpp-extension/settings.json | 37 ++++++++++++--------- extensions/llamacpp-extension/src/index.ts | 4 ++- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/extensions/llamacpp-extension/settings.json b/extensions/llamacpp-extension/settings.json index 206a73ab3..3d57fbdf9 100644 --- a/extensions/llamacpp-extension/settings.json +++ b/extensions/llamacpp-extension/settings.json @@ -9,6 +9,13 @@ "options": [] } }, + { + "key": "auto_unload_models", + "title": "Auto-Unload Old Models", + "description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.", + "controllerType": "checkbox", + "controllerProps": { "value": true } + }, { "key": "threads", "title": "Threads", @@ -21,7 +28,7 @@ "textAlign": "right" } }, - { + { "key": "threads_batch", "title": "Threads (Batch)", "description": "Number of threads for batch and prompt processing (default: same as Threads).", @@ -69,7 +76,7 @@ "textAlign": "right" } }, - { + { "key": "ubatch_size", "title": "uBatch Size", "description": "Physical maximum batch size for processing prompts.", @@ -93,7 +100,7 @@ "textAlign": "right" } }, - { + { "key": "device", "title": "Devices for Offload", "description": "Comma-separated list of devices to use for offloading (e.g., 'cuda:0', 'cuda:0,cuda:1'). Leave empty to use default/CPU only.", @@ -118,7 +125,7 @@ ] } }, - { + { "key": "main_gpu", "title": "Main GPU Index", "description": "The GPU to use for the model (split-mode=none) or intermediate results (split-mode=row).", @@ -145,7 +152,7 @@ "description": "Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).", "controllerType": "checkbox", "controllerProps": { - "value": true + "value": false } }, { @@ -256,7 +263,7 @@ "step": 0.01 } }, - { + { "key": "rope_freq_base", "title": "RoPE Frequency Base", "description": "RoPE base frequency (0 = loaded from model).", @@ -268,7 +275,7 @@ "textAlign": "right" } }, - { + { "key": "rope_freq_scale", "title": "RoPE Frequency Scale Factor", "description": "RoPE frequency scaling factor.", @@ -408,7 +415,7 @@ ] } }, - { + { "key": "mirostat_lr", "title": "Mirostat Learning Rate", "description": "Mirostat learning rate (eta).", @@ -436,7 +443,7 @@ "step": 0.01 } }, - { + { "key": "grammar_file", "title": "Grammar File", "description": "Path to a BNF-like grammar file to constrain generations.", @@ -447,7 +454,7 @@ "type": "text" } }, - { + { "key": "json_schema_file", "title": "JSON Schema File", "description": "Path to a JSON schema file to constrain generations.", @@ -464,11 +471,11 @@ "description": "Mirostat target entropy (tau).", "controllerType": "input", "controllerProps": { - "value": 0, - "options": [ - { "value": -1, "name": "unrestricted thinking budget" }, - { "value": 0, "name": "disable thinking" } - ] + "value": 0, + "options": [ + { "value": -1, "name": "unrestricted thinking budget" }, + { "value": 0, "name": "disable thinking" } + ] } } ] diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 06f8d7f34..95bd5d19c 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -30,6 +30,7 @@ import { invoke } from '@tauri-apps/api/core' type LlamacppConfig = { version_backend: string + auto_unload: boolean n_gpu_layers: number ctx_size: number threads: number @@ -106,6 +107,7 @@ interface EmbeddingData { export default class llamacpp_extension extends AIEngine { provider: string = 'llamacpp' + autoUnload: boolean = true readonly providerId: string = 'llamacpp' private config: LlamacppConfig @@ -132,7 +134,7 @@ export default class llamacpp_extension extends AIEngine { }) } } - + this.autoUnload = await this.getSetting('auto_unload_models', true) this.registerSettings(settings) let config = {}