* chore: enable shortcut zoom (#5261) * chore: enable shortcut zoom * chore: update shortcut setting * fix: thinking block (#5263) * Merge pull request #5262 from menloresearch/chore/sync-new-hub-data chore: sync new hub data * ✨enhancement: model run improvement (#5268) * fix: mcp tool error handling * fix: error message * fix: trigger download from recommend model * fix: can't scroll hub * fix: show progress * ✨enhancement: prompt users to increase context size * ✨enhancement: rearrange action buttons for a better UX * 🔧chore: clean up logics --------- Co-authored-by: Faisal Amir <urmauur@gmail.com> * fix: glitch download from onboarding (#5269) * ✨enhancement: Model sources should not be hard coded from frontend (#5270) * 🐛fix: default onboarding model should use recommended quantizations (#5273) * 🐛fix: default onboarding model should use recommended quantizations * ✨enhancement: show context shift option in provider settings * 🔧chore: wording * 🔧 config: add to gitignore * 🐛fix: Jan-nano repo name changed (#5274) * 🚧 wip: disable showSpeedToken in ChatInput * 🐛 fix: commented out the wrong import * fix: masking value MCP env field (#5276) * ✨ feat: add token speed to each message that persist * ♻️ refactor: to follow prettier convention * 🐛 fix: exclude deleted field * 🧹 clean: all the missed console.log * ✨enhancement: out of context troubleshooting (#5275) * ✨enhancement: out of context troubleshooting * 🔧refactor: clean up * ✨enhancement: add setting chat width container (#5289) * ✨enhancement: add setting conversation width * ✨enahncement: cleanup log and change improve accesibility * ✨enahcement: move const beta version * 🐛fix: optional additional_information gpu (#5291) * 🐛fix: showing release notes for beta and prod (#5292) * 🐛fix: showing release notes for beta and prod * ♻️refactor: make an utils env * ♻️refactor: hide MCP for production * ♻️refactor: simplify the boolean expression fetch release note --------- Co-authored-by: Faisal Amir <urmauur@gmail.com> Co-authored-by: LazyYuuki <huy2840@gmail.com> Co-authored-by: Bui Quang Huy <34532913+LazyYuuki@users.noreply.github.com>
127 lines
3.5 KiB
JSON
127 lines
3.5 KiB
JSON
[
|
|
{
|
|
"key": "auto_unload_models",
|
|
"title": "Auto-Unload Old Models",
|
|
"description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "context_shift",
|
|
"title": "Context Shift",
|
|
"description": "Automatically shifts the context window when the model is unable to process the entire prompt, ensuring that the most relevant information is always included.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "cont_batching",
|
|
"title": "Continuous Batching",
|
|
"description": "Allows processing prompts in parallel with text generation, which usually improves performance.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": ""
|
|
}
|
|
},
|
|
{
|
|
"key": "n_parallel",
|
|
"title": "Parallel Operations",
|
|
"description": "Number of prompts that can be processed simultaneously by the model.",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "",
|
|
"placeholder": "1",
|
|
"type": "number",
|
|
"textAlign": "right"
|
|
}
|
|
},
|
|
{
|
|
"key": "cpu_threads",
|
|
"title": "CPU Threads",
|
|
"description": "Number of CPU cores used for model processing when running without GPU.",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "",
|
|
"placeholder": "-1 (auto-detect)",
|
|
"type": "number",
|
|
"textAlign": "right"
|
|
}
|
|
},
|
|
{
|
|
"key": "threads_batch",
|
|
"title": "Threads (Batch)",
|
|
"description": "Number of threads for batch and prompt processing (default: same as Threads).",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "",
|
|
"placeholder": "-1 (same as Threads)",
|
|
"type": "number"
|
|
}
|
|
},
|
|
{
|
|
"key": "flash_attn",
|
|
"title": "Flash Attention",
|
|
"description": "Optimizes memory usage and speeds up model inference using an efficient attention implementation.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "caching_enabled",
|
|
"title": "Caching",
|
|
"description": "Stores recent prompts and responses to improve speed when similar questions are asked.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "cache_type",
|
|
"title": "KV Cache Type",
|
|
"description": "Controls memory usage and precision trade-off.",
|
|
"controllerType": "dropdown",
|
|
"controllerProps": {
|
|
"value": "q8_0",
|
|
"options": [
|
|
{
|
|
"value": "q4_0",
|
|
"name": "q4_0"
|
|
},
|
|
{
|
|
"value": "q8_0",
|
|
"name": "q8_0"
|
|
},
|
|
{
|
|
"value": "f16",
|
|
"name": "f16"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"key": "use_mmap",
|
|
"title": "mmap",
|
|
"description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
|
|
"controllerType": "checkbox",
|
|
"controllerProps": {
|
|
"value": true
|
|
}
|
|
},
|
|
{
|
|
"key": "hugging-face-access-token",
|
|
"title": "Hugging Face Access Token",
|
|
"description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub, allowing applications to perform specific actions specified by the scope of permissions granted.",
|
|
"controllerType": "input",
|
|
"controllerProps": {
|
|
"value": "",
|
|
"placeholder": "hf_**********************************",
|
|
"type": "password",
|
|
"inputActions": ["unobscure", "copy"]
|
|
}
|
|
}
|
|
]
|