refactor: move thinking toggle to runtime settings for dynamic control (#5800)

* refactor: move thinking toggle to runtime settings for per-message control

Replaces the static `reasoning_budget` config with a dynamic `enable_thinking` flag under `chat_template_kwargs`, allowing models like Jan-nano and Qwen3 to enable/disable thinking behavior at runtime, even mid-conversation.
Requires UI update

* remove engine argument
This commit is contained in:
Akarshan Biswas 2025-07-17 20:18:24 +05:30 committed by GitHub
parent 4699b07ca6
commit 92703bceb2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 16 deletions

View File

@ -98,6 +98,11 @@ export interface chatCompletionRequest {
samplers?: string[] | null samplers?: string[] | null
timings_per_token?: boolean | null timings_per_token?: boolean | null
post_sampling_probs?: boolean | null post_sampling_probs?: boolean | null
chat_template_kwargs?: chat_template_kdict | null
}
export interface chat_template_kdict {
enable_thinking: false
} }
export interface chatCompletionChunkChoiceDelta { export interface chatCompletionChunkChoiceDelta {

View File

@ -492,18 +492,5 @@
"placeholder": "path/to/schema.json", "placeholder": "path/to/schema.json",
"type": "text" "type": "text"
} }
},
{
"key": "reasoning_budget",
"title": "controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)",
"description": "Mirostat target entropy (tau).",
"controllerType": "input",
"controllerProps": {
"value": 0,
"options": [
{ "value": -1, "name": "unrestricted thinking budget" },
{ "value": 0, "name": "disable thinking" }
]
}
} }
] ]

View File

@ -25,7 +25,6 @@ import {
downloadBackend, downloadBackend,
isBackendInstalled, isBackendInstalled,
getBackendExePath, getBackendExePath,
getBackendDir,
} from './backend' } from './backend'
import { invoke } from '@tauri-apps/api/core' import { invoke } from '@tauri-apps/api/core'
@ -56,7 +55,6 @@ type LlamacppConfig = {
rope_scale: number rope_scale: number
rope_freq_base: number rope_freq_base: number
rope_freq_scale: number rope_freq_scale: number
reasoning_budget: number
ctx_shift: boolean ctx_shift: boolean
} }
@ -855,7 +853,6 @@ export default class llamacpp_extension extends AIEngine {
args.push('--rope-scale', String(cfg.rope_scale)) args.push('--rope-scale', String(cfg.rope_scale))
args.push('--rope-freq-base', String(cfg.rope_freq_base)) args.push('--rope-freq-base', String(cfg.rope_freq_base))
args.push('--rope-freq-scale', String(cfg.rope_freq_scale)) args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
args.push('--reasoning-budget', String(cfg.reasoning_budget))
} }
console.log('Calling Tauri command llama_load with args:', args) console.log('Calling Tauri command llama_load with args:', args)