refactor: move thinking toggle to runtime settings for dynamic control (#5800)
* refactor: move thinking toggle to runtime settings for per-message control Replaces the static `reasoning_budget` config with a dynamic `enable_thinking` flag under `chat_template_kwargs`, allowing models like Jan-nano and Qwen3 to enable/disable thinking behavior at runtime, even mid-conversation. Requires UI update * remove engine argument
This commit is contained in:
parent
4699b07ca6
commit
92703bceb2
@ -98,6 +98,11 @@ export interface chatCompletionRequest {
|
||||
samplers?: string[] | null
|
||||
timings_per_token?: boolean | null
|
||||
post_sampling_probs?: boolean | null
|
||||
chat_template_kwargs?: chat_template_kdict | null
|
||||
}
|
||||
|
||||
export interface chat_template_kdict {
|
||||
enable_thinking: false
|
||||
}
|
||||
|
||||
export interface chatCompletionChunkChoiceDelta {
|
||||
|
||||
@ -492,18 +492,5 @@
|
||||
"placeholder": "path/to/schema.json",
|
||||
"type": "text"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "reasoning_budget",
|
||||
"title": "controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)",
|
||||
"description": "Mirostat target entropy (tau).",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": 0,
|
||||
"options": [
|
||||
{ "value": -1, "name": "unrestricted thinking budget" },
|
||||
{ "value": 0, "name": "disable thinking" }
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@ -25,7 +25,6 @@ import {
|
||||
downloadBackend,
|
||||
isBackendInstalled,
|
||||
getBackendExePath,
|
||||
getBackendDir,
|
||||
} from './backend'
|
||||
import { invoke } from '@tauri-apps/api/core'
|
||||
|
||||
@ -56,7 +55,6 @@ type LlamacppConfig = {
|
||||
rope_scale: number
|
||||
rope_freq_base: number
|
||||
rope_freq_scale: number
|
||||
reasoning_budget: number
|
||||
ctx_shift: boolean
|
||||
}
|
||||
|
||||
@ -855,7 +853,6 @@ export default class llamacpp_extension extends AIEngine {
|
||||
args.push('--rope-scale', String(cfg.rope_scale))
|
||||
args.push('--rope-freq-base', String(cfg.rope_freq_base))
|
||||
args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
|
||||
args.push('--reasoning-budget', String(cfg.reasoning_budget))
|
||||
}
|
||||
|
||||
console.log('Calling Tauri command llama_load with args:', args)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user