refactor: move thinking toggle to runtime settings for dynamic control (#5800)

* refactor: move thinking toggle to runtime settings for per-message control Replaces the static `reasoning_budget` config with a dynamic `enable_thinking` flag under `chat_template_kwargs`, allowing models like Jan-nano and Qwen3 to enable/disable thinking behavior at runtime, even mid-conversation. Requires UI update * remove engine argument
2025-07-17 20:18:24 +05:30 · 2025-07-17 20:18:24 +05:30 · 92703bceb2
commit 92703bceb2
parent 4699b07ca6
3 changed files with 5 additions and 16 deletions
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -98,6 +98,11 @@ export interface chatCompletionRequest {
  samplers?: string[] | null
  timings_per_token?: boolean | null
  post_sampling_probs?: boolean | null
+  chat_template_kwargs?: chat_template_kdict | null
+}
+
+export interface chat_template_kdict {
+  enable_thinking: false
 }

 export interface chatCompletionChunkChoiceDelta {
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -492,18 +492,5 @@
      "placeholder": "path/to/schema.json",
      "type": "text"
    }
-  },
-  {
-    "key": "reasoning_budget",
-    "title": "controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)",
-    "description": "Mirostat target entropy (tau).",
-    "controllerType": "input",
-    "controllerProps": {
-      "value": 0,
-      "options": [
-        { "value": -1, "name": "unrestricted thinking budget" },
-        { "value": 0, "name": "disable thinking" }
-      ]
-    }
  }
 ]
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -25,7 +25,6 @@ import {
  downloadBackend,
  isBackendInstalled,
  getBackendExePath,
-  getBackendDir,
 } from './backend'
 import { invoke } from '@tauri-apps/api/core'

@ -56,7 +55,6 @@ type LlamacppConfig = {
  rope_scale: number
  rope_freq_base: number
  rope_freq_scale: number
-  reasoning_budget: number
  ctx_shift: boolean
 }

@ -855,7 +853,6 @@ export default class llamacpp_extension extends AIEngine {
      args.push('--rope-scale', String(cfg.rope_scale))
      args.push('--rope-freq-base', String(cfg.rope_freq_base))
      args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
-      args.push('--reasoning-budget', String(cfg.reasoning_budget))
    }

    console.log('Calling Tauri command llama_load with args:', args)