refactor: move thinking toggle to runtime settings for dynamic control (#5800)

* refactor: move thinking toggle to runtime settings for per-message control Replaces the static `reasoning_budget` config with a dynamic `enable_thinking` flag under `chat_template_kwargs`, allowing models like Jan-nano and Qwen3 to enable/disable thinking behavior at runtime, even mid-conversation. Requires UI update * remove engine argument
2025-07-17 20:18:24 +05:30 · 2025-07-17 20:18:24 +05:30 · 92703bceb2
commit 92703bceb2
parent 4699b07ca6
3 changed files with 5 additions and 16 deletions
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -98,6 +98,11 @@ export interface chatCompletionRequest {
  samplers?: string[] | null
  timings_per_token?: boolean | null
  post_sampling_probs?: boolean | null
  chat_template_kwargs?: chat_template_kdict | null
 }
 export interface chat_template_kdict {
  enable_thinking: false
 }
 export interface chatCompletionChunkChoiceDelta {
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -492,18 +492,5 @@
      "placeholder": "path/to/schema.json",
      "type": "text"
    }
  },
  {
    "key": "reasoning_budget",
    "title": "controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)",
    "description": "Mirostat target entropy (tau).",
    "controllerType": "input",
    "controllerProps": {
      "value": 0,
      "options": [
        { "value": -1, "name": "unrestricted thinking budget" },
        { "value": 0, "name": "disable thinking" }
      ]
    }
  }
 ]
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -25,7 +25,6 @@ import {
  downloadBackend,
  isBackendInstalled,
  getBackendExePath,
  getBackendDir,
 } from './backend'
 import { invoke } from '@tauri-apps/api/core'
@ -56,7 +55,6 @@ type LlamacppConfig = {
  rope_scale: number
  rope_freq_base: number
  rope_freq_scale: number
  reasoning_budget: number
  ctx_shift: boolean
 }
@ -855,7 +853,6 @@ export default class llamacpp_extension extends AIEngine {
      args.push('--rope-scale', String(cfg.rope_scale))
      args.push('--rope-freq-base', String(cfg.rope_freq_base))
      args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
      args.push('--reasoning-budget', String(cfg.reasoning_budget))
    }
    console.log('Calling Tauri command llama_load with args:', args)