127 lines
3.5 KiB
JSON
127 lines
3.5 KiB
JSON
[
|
|
{
|
|
"key": "auto_unload_models",
|
|
"title": "Auto-Unload Old Models",
|
|
"description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "context_shift",
|
|
"title": "Context Shift",
|
|
"description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": false
|
|
}
|
|
},
|
|
{
|
|
"key": "cont_batching",
|
|
"title": "Continuous Batching",
|
|
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": ""
|
|
}
|
|
},
|
|
{
|
|
"key": "n_parallel",
|
|
"title": "Parallel Operations",
|
|
"description": "Number of prompts that can be processed simultaneously by the model.",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "",
|
|
"placeholder": "1",
|
|
"type": "number",
|
|
"textAlign": "right"
|
|
}
|
|
},
|
|
{
|
|
"key": "cpu_threads",
|
|
"title": "CPU Threads",
|
|
"description": "Number of CPU cores used for model processing when running without GPU.",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "",
|
|
"placeholder": "-1 (auto-detect)",
|
|
"type": "number",
|
|
"textAlign": "right"
|
|
}
|
|
},
|
|
{
|
|
"key": "threads_batch",
|
|
"title": "Threads (Batch)",
|
|
"description": "Number of threads for batch and prompt processing (default: same as Threads).",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "",
|
|
"placeholder": "-1 (same as Threads)",
|
|
"type": "number"
|
|
}
|
|
},
|
|
{
|
|
"key": "flash_attn",
|
|
"title": "Flash Attention",
|
|
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "caching_enabled",
|
|
"title": "Caching",
|
|
"description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "cache_type",
|
|
"title": "KV Cache Type",
|
|
"description": "Controls memory usage and precision trade-off.",
|
|
"controllerType": "dropdown",
|
|
"controllerProps": {
|
|
"value": "q8_0",
|
|
"options": [
|
|
{
|
|
"value": "q4_0",
|
|
"name": "q4_0"
|
|
},
|
|
{
|
|
"value": "q8_0",
|
|
"name": "q8_0"
|
|
},
|
|
{
|
|
"value": "f16",
|
|
"name": "f16"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"key": "use_mmap",
|
|
"title": "mmap",
|
|
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "hugging-face-access-token",
|
|
"title": "Hugging Face Access Token",
|
|
"description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub, allowing applications to perform specific actions specified by the scope of permissions granted.",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "",
|
|
"placeholder": "hf_**********************************",
|
|
"type": "password",
|
|
"inputActions": ["unobscure", "copy"]
|
|
}
|
|
}
|
|
]
|