70 lines
1.6 KiB
JSON
70 lines
1.6 KiB
JSON
[
|
|
{
|
|
"key": "cont_batching",
|
|
"title": "Continuous batching",
|
|
"description": "The number of parallel operations",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "n_parallel",
|
|
"title": "Parallel operations",
|
|
"description": "The number of parallel operations",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "4",
|
|
"placeholder": "4"
|
|
}
|
|
},
|
|
{
|
|
"key": "cpu_threads",
|
|
"title": "CPU Threads",
|
|
"description": "The number of CPU threads to use (when in CPU mode)",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "",
|
|
"placeholder": "Number of CPU threads"
|
|
}
|
|
},
|
|
{
|
|
"key": "flash_attn",
|
|
"title": "Flash Attention enabled",
|
|
"description": "To enable Flash Attention, default is true",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
|
|
{
|
|
"key": "caching_enabled",
|
|
"title": "Caching enabled",
|
|
"description": "To enable prompt caching or not",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "cache_type",
|
|
"title": "KV Cache Type",
|
|
"description": "KV cache type: f16, q8_0, q4_0, default is f16 (change this could break the model).",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"placeholder": "f16",
|
|
"value": "f16"
|
|
}
|
|
},
|
|
{
|
|
"key": "use_mmap",
|
|
"title": "To enable mmap",
|
|
"description": "To enable mmap, default is true",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
}
|
|
]
|