Louis 86079074d2
🐛fix: update default extension settings (#5315)
* fix: update default extension settings

* chore: hide language setting on Prod
2025-06-17 17:37:31 +07:00

127 lines
3.5 KiB
JSON

[
{
"key": "auto_unload_models",
"title": "Auto-Unload Old Models",
"description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "context_shift",
"title": "Context Shift",
"description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
"controllerType": "checkbox",
"controllerProps": {
"value": false
}
},
{
"key": "cont_batching",
"title": "Continuous Batching",
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
"controllerType": "checkbox",
"controllerProps": {
"value": ""
}
},
{
"key": "n_parallel",
"title": "Parallel Operations",
"description": "Number of prompts that can be processed simultaneously by the model.",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "1",
"type": "number",
"textAlign": "right"
}
},
{
"key": "cpu_threads",
"title": "CPU Threads",
"description": "Number of CPU cores used for model processing when running without GPU.",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "-1 (auto-detect)",
"type": "number",
"textAlign": "right"
}
},
{
"key": "threads_batch",
"title": "Threads (Batch)",
"description": "Number of threads for batch and prompt processing (default: same as Threads).",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "-1 (same as Threads)",
"type": "number"
}
},
{
"key": "flash_attn",
"title": "Flash Attention",
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "caching_enabled",
"title": "Caching",
"description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "cache_type",
"title": "KV Cache Type",
"description": "Controls memory usage and precision trade-off.",
"controllerType": "dropdown",
"controllerProps": {
"value": "q8_0",
"options": [
{
"value": "q4_0",
"name": "q4_0"
},
{
"value": "q8_0",
"name": "q8_0"
},
{
"value": "f16",
"name": "f16"
}
]
}
},
{
"key": "use_mmap",
"title": "mmap",
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "hugging-face-access-token",
"title": "Hugging Face Access Token",
"description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub, allowing applications to perform specific actions specified by the scope of permissions granted.",
"controllerType": "input",
"controllerProps": {
"value": "",
"placeholder": "hf_**********************************",
"type": "password",
"inputActions": ["unobscure", "copy"]
}
}
]