Add auto unload setting to llamacpp-extension
This commit is contained in:
parent
f70bb2705d
commit
0cbf35dc77
@ -9,6 +9,13 @@
|
|||||||
"options": []
|
"options": []
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"key": "auto_unload_models",
|
||||||
|
"title": "Auto-Unload Old Models",
|
||||||
|
"description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
|
||||||
|
"controllerType": "checkbox",
|
||||||
|
"controllerProps": { "value": true }
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"key": "threads",
|
"key": "threads",
|
||||||
"title": "Threads",
|
"title": "Threads",
|
||||||
@ -21,7 +28,7 @@
|
|||||||
"textAlign": "right"
|
"textAlign": "right"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "threads_batch",
|
"key": "threads_batch",
|
||||||
"title": "Threads (Batch)",
|
"title": "Threads (Batch)",
|
||||||
"description": "Number of threads for batch and prompt processing (default: same as Threads).",
|
"description": "Number of threads for batch and prompt processing (default: same as Threads).",
|
||||||
@ -69,7 +76,7 @@
|
|||||||
"textAlign": "right"
|
"textAlign": "right"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "ubatch_size",
|
"key": "ubatch_size",
|
||||||
"title": "uBatch Size",
|
"title": "uBatch Size",
|
||||||
"description": "Physical maximum batch size for processing prompts.",
|
"description": "Physical maximum batch size for processing prompts.",
|
||||||
@ -93,7 +100,7 @@
|
|||||||
"textAlign": "right"
|
"textAlign": "right"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "device",
|
"key": "device",
|
||||||
"title": "Devices for Offload",
|
"title": "Devices for Offload",
|
||||||
"description": "Comma-separated list of devices to use for offloading (e.g., 'cuda:0', 'cuda:0,cuda:1'). Leave empty to use default/CPU only.",
|
"description": "Comma-separated list of devices to use for offloading (e.g., 'cuda:0', 'cuda:0,cuda:1'). Leave empty to use default/CPU only.",
|
||||||
@ -118,7 +125,7 @@
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "main_gpu",
|
"key": "main_gpu",
|
||||||
"title": "Main GPU Index",
|
"title": "Main GPU Index",
|
||||||
"description": "The GPU to use for the model (split-mode=none) or intermediate results (split-mode=row).",
|
"description": "The GPU to use for the model (split-mode=none) or intermediate results (split-mode=row).",
|
||||||
@ -145,7 +152,7 @@
|
|||||||
"description": "Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).",
|
"description": "Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).",
|
||||||
"controllerType": "checkbox",
|
"controllerType": "checkbox",
|
||||||
"controllerProps": {
|
"controllerProps": {
|
||||||
"value": true
|
"value": false
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -256,7 +263,7 @@
|
|||||||
"step": 0.01
|
"step": 0.01
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "rope_freq_base",
|
"key": "rope_freq_base",
|
||||||
"title": "RoPE Frequency Base",
|
"title": "RoPE Frequency Base",
|
||||||
"description": "RoPE base frequency (0 = loaded from model).",
|
"description": "RoPE base frequency (0 = loaded from model).",
|
||||||
@ -268,7 +275,7 @@
|
|||||||
"textAlign": "right"
|
"textAlign": "right"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "rope_freq_scale",
|
"key": "rope_freq_scale",
|
||||||
"title": "RoPE Frequency Scale Factor",
|
"title": "RoPE Frequency Scale Factor",
|
||||||
"description": "RoPE frequency scaling factor.",
|
"description": "RoPE frequency scaling factor.",
|
||||||
@ -408,7 +415,7 @@
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "mirostat_lr",
|
"key": "mirostat_lr",
|
||||||
"title": "Mirostat Learning Rate",
|
"title": "Mirostat Learning Rate",
|
||||||
"description": "Mirostat learning rate (eta).",
|
"description": "Mirostat learning rate (eta).",
|
||||||
@ -436,7 +443,7 @@
|
|||||||
"step": 0.01
|
"step": 0.01
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "grammar_file",
|
"key": "grammar_file",
|
||||||
"title": "Grammar File",
|
"title": "Grammar File",
|
||||||
"description": "Path to a BNF-like grammar file to constrain generations.",
|
"description": "Path to a BNF-like grammar file to constrain generations.",
|
||||||
@ -447,7 +454,7 @@
|
|||||||
"type": "text"
|
"type": "text"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"key": "json_schema_file",
|
"key": "json_schema_file",
|
||||||
"title": "JSON Schema File",
|
"title": "JSON Schema File",
|
||||||
"description": "Path to a JSON schema file to constrain generations.",
|
"description": "Path to a JSON schema file to constrain generations.",
|
||||||
@ -464,11 +471,11 @@
|
|||||||
"description": "Mirostat target entropy (tau).",
|
"description": "Mirostat target entropy (tau).",
|
||||||
"controllerType": "input",
|
"controllerType": "input",
|
||||||
"controllerProps": {
|
"controllerProps": {
|
||||||
"value": 0,
|
"value": 0,
|
||||||
"options": [
|
"options": [
|
||||||
{ "value": -1, "name": "unrestricted thinking budget" },
|
{ "value": -1, "name": "unrestricted thinking budget" },
|
||||||
{ "value": 0, "name": "disable thinking" }
|
{ "value": 0, "name": "disable thinking" }
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -30,6 +30,7 @@ import { invoke } from '@tauri-apps/api/core'
|
|||||||
|
|
||||||
type LlamacppConfig = {
|
type LlamacppConfig = {
|
||||||
version_backend: string
|
version_backend: string
|
||||||
|
auto_unload: boolean
|
||||||
n_gpu_layers: number
|
n_gpu_layers: number
|
||||||
ctx_size: number
|
ctx_size: number
|
||||||
threads: number
|
threads: number
|
||||||
@ -106,6 +107,7 @@ interface EmbeddingData {
|
|||||||
|
|
||||||
export default class llamacpp_extension extends AIEngine {
|
export default class llamacpp_extension extends AIEngine {
|
||||||
provider: string = 'llamacpp'
|
provider: string = 'llamacpp'
|
||||||
|
autoUnload: boolean = true
|
||||||
readonly providerId: string = 'llamacpp'
|
readonly providerId: string = 'llamacpp'
|
||||||
|
|
||||||
private config: LlamacppConfig
|
private config: LlamacppConfig
|
||||||
@ -132,7 +134,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
this.autoUnload = await this.getSetting<boolean>('auto_unload_models', true)
|
||||||
this.registerSettings(settings)
|
this.registerSettings(settings)
|
||||||
|
|
||||||
let config = {}
|
let config = {}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user