Add auto unload setting to llamacpp-extension

This commit is contained in:
Akarshan 2025-06-23 20:43:54 +05:30 committed by Louis
parent f70bb2705d
commit 0cbf35dc77
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
2 changed files with 25 additions and 16 deletions

View File

@ -9,6 +9,13 @@
"options": [] "options": []
} }
}, },
{
"key": "auto_unload_models",
"title": "Auto-Unload Old Models",
"description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
"controllerType": "checkbox",
"controllerProps": { "value": true }
},
{ {
"key": "threads", "key": "threads",
"title": "Threads", "title": "Threads",
@ -21,7 +28,7 @@
"textAlign": "right" "textAlign": "right"
} }
}, },
{ {
"key": "threads_batch", "key": "threads_batch",
"title": "Threads (Batch)", "title": "Threads (Batch)",
"description": "Number of threads for batch and prompt processing (default: same as Threads).", "description": "Number of threads for batch and prompt processing (default: same as Threads).",
@ -69,7 +76,7 @@
"textAlign": "right" "textAlign": "right"
} }
}, },
{ {
"key": "ubatch_size", "key": "ubatch_size",
"title": "uBatch Size", "title": "uBatch Size",
"description": "Physical maximum batch size for processing prompts.", "description": "Physical maximum batch size for processing prompts.",
@ -93,7 +100,7 @@
"textAlign": "right" "textAlign": "right"
} }
}, },
{ {
"key": "device", "key": "device",
"title": "Devices for Offload", "title": "Devices for Offload",
"description": "Comma-separated list of devices to use for offloading (e.g., 'cuda:0', 'cuda:0,cuda:1'). Leave empty to use default/CPU only.", "description": "Comma-separated list of devices to use for offloading (e.g., 'cuda:0', 'cuda:0,cuda:1'). Leave empty to use default/CPU only.",
@ -118,7 +125,7 @@
] ]
} }
}, },
{ {
"key": "main_gpu", "key": "main_gpu",
"title": "Main GPU Index", "title": "Main GPU Index",
"description": "The GPU to use for the model (split-mode=none) or intermediate results (split-mode=row).", "description": "The GPU to use for the model (split-mode=none) or intermediate results (split-mode=row).",
@ -145,7 +152,7 @@
"description": "Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).", "description": "Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).",
"controllerType": "checkbox", "controllerType": "checkbox",
"controllerProps": { "controllerProps": {
"value": true "value": false
} }
}, },
{ {
@ -256,7 +263,7 @@
"step": 0.01 "step": 0.01
} }
}, },
{ {
"key": "rope_freq_base", "key": "rope_freq_base",
"title": "RoPE Frequency Base", "title": "RoPE Frequency Base",
"description": "RoPE base frequency (0 = loaded from model).", "description": "RoPE base frequency (0 = loaded from model).",
@ -268,7 +275,7 @@
"textAlign": "right" "textAlign": "right"
} }
}, },
{ {
"key": "rope_freq_scale", "key": "rope_freq_scale",
"title": "RoPE Frequency Scale Factor", "title": "RoPE Frequency Scale Factor",
"description": "RoPE frequency scaling factor.", "description": "RoPE frequency scaling factor.",
@ -408,7 +415,7 @@
] ]
} }
}, },
{ {
"key": "mirostat_lr", "key": "mirostat_lr",
"title": "Mirostat Learning Rate", "title": "Mirostat Learning Rate",
"description": "Mirostat learning rate (eta).", "description": "Mirostat learning rate (eta).",
@ -436,7 +443,7 @@
"step": 0.01 "step": 0.01
} }
}, },
{ {
"key": "grammar_file", "key": "grammar_file",
"title": "Grammar File", "title": "Grammar File",
"description": "Path to a BNF-like grammar file to constrain generations.", "description": "Path to a BNF-like grammar file to constrain generations.",
@ -447,7 +454,7 @@
"type": "text" "type": "text"
} }
}, },
{ {
"key": "json_schema_file", "key": "json_schema_file",
"title": "JSON Schema File", "title": "JSON Schema File",
"description": "Path to a JSON schema file to constrain generations.", "description": "Path to a JSON schema file to constrain generations.",
@ -464,11 +471,11 @@
"description": "Mirostat target entropy (tau).", "description": "Mirostat target entropy (tau).",
"controllerType": "input", "controllerType": "input",
"controllerProps": { "controllerProps": {
"value": 0, "value": 0,
"options": [ "options": [
{ "value": -1, "name": "unrestricted thinking budget" }, { "value": -1, "name": "unrestricted thinking budget" },
{ "value": 0, "name": "disable thinking" } { "value": 0, "name": "disable thinking" }
] ]
} }
} }
] ]

View File

@ -30,6 +30,7 @@ import { invoke } from '@tauri-apps/api/core'
type LlamacppConfig = { type LlamacppConfig = {
version_backend: string version_backend: string
auto_unload: boolean
n_gpu_layers: number n_gpu_layers: number
ctx_size: number ctx_size: number
threads: number threads: number
@ -106,6 +107,7 @@ interface EmbeddingData {
export default class llamacpp_extension extends AIEngine { export default class llamacpp_extension extends AIEngine {
provider: string = 'llamacpp' provider: string = 'llamacpp'
autoUnload: boolean = true
readonly providerId: string = 'llamacpp' readonly providerId: string = 'llamacpp'
private config: LlamacppConfig private config: LlamacppConfig
@ -132,7 +134,7 @@ export default class llamacpp_extension extends AIEngine {
}) })
} }
} }
this.autoUnload = await this.getSetting<boolean>('auto_unload_models', true)
this.registerSettings(settings) this.registerSettings(settings)
let config = {} let config = {}