refactor: move thinking toggle to runtime settings for dynamic control (#5800)

* refactor: move thinking toggle to runtime settings for per-message control

Replaces the static `reasoning_budget` config with a dynamic `enable_thinking` flag under `chat_template_kwargs`, allowing models like Jan-nano and Qwen3 to enable/disable thinking behavior at runtime, even mid-conversation.
Requires UI update

* remove engine argument
This commit is contained in:
Akarshan Biswas 2025-07-17 20:18:24 +05:30 committed by GitHub
parent 4699b07ca6
commit 92703bceb2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 16 deletions

View File

@ -98,6 +98,11 @@ export interface chatCompletionRequest {
samplers?: string[] | null
timings_per_token?: boolean | null
post_sampling_probs?: boolean | null
chat_template_kwargs?: chat_template_kdict | null
}
export interface chat_template_kdict {
enable_thinking: false
}
export interface chatCompletionChunkChoiceDelta {

View File

@ -492,18 +492,5 @@
"placeholder": "path/to/schema.json",
"type": "text"
}
},
{
"key": "reasoning_budget",
"title": "controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)",
"description": "Mirostat target entropy (tau).",
"controllerType": "input",
"controllerProps": {
"value": 0,
"options": [
{ "value": -1, "name": "unrestricted thinking budget" },
{ "value": 0, "name": "disable thinking" }
]
}
}
]

View File

@ -25,7 +25,6 @@ import {
downloadBackend,
isBackendInstalled,
getBackendExePath,
getBackendDir,
} from './backend'
import { invoke } from '@tauri-apps/api/core'
@ -56,7 +55,6 @@ type LlamacppConfig = {
rope_scale: number
rope_freq_base: number
rope_freq_scale: number
reasoning_budget: number
ctx_shift: boolean
}
@ -855,7 +853,6 @@ export default class llamacpp_extension extends AIEngine {
args.push('--rope-scale', String(cfg.rope_scale))
args.push('--rope-freq-base', String(cfg.rope_freq_base))
args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
args.push('--reasoning-budget', String(cfg.reasoning_budget))
}
console.log('Calling Tauri command llama_load with args:', args)