60 lines
1.3 KiB
JSON

[
{
"key": "cont_batching",
"title": "Continuous batching",
"description": "The number of parallel operations",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "n_parallel",
"title": "Parallel operations",
"description": "The number of parallel operations",
"controllerType": "input",
"controllerProps": {
"value": "4",
"placeholder": "4"
}
},
{
"key": "flash_attn",
"title": "Flash Attention enabled",
"description": "To enable Flash Attention, default is true",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "caching_enabled",
"title": "Caching enabled",
"description": "To enable prompt caching or not",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "cache_type",
"title": "KV Cache Type",
"description": "KV cache type: f16, q8_0, q4_0, default is f16 (change this could break the model).",
"controllerType": "input",
"controllerProps": {
"placeholder": "f16",
"value": "f16"
}
},
{
"key": "use_mmap",
"title": "To enable mmap",
"description": "To enable mmap, default is true",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
}
]