diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 5c2298f8e..1a6b22d34 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -58,6 +58,7 @@ type LlamacppConfig = {
ubatch_size: number
device: string
split_mode: string
+ chat_template_kwargs: string
main_gpu: number
flash_attn: boolean
cont_batching: boolean
@@ -1393,6 +1394,9 @@ export default class llamacpp_extension extends AIEngine {
if (!cfg.reasoning_budget) {
args.push('--reasoning-budget', '0')
}
+ if (cfg.chat_template_kwargs) {
+ args.push('--chat-template-kwargs', cfg.chat_template_kwargs)
+ }
if (cfg.override_tensor_buffer_t)
args.push('--override-tensor', cfg.override_tensor_buffer_t)
// offload multimodal projector model to the GPU by default. if there is not enough memory
diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx
index ec89bb0ef..ebab4a787 100644
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@@ -721,7 +721,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
{selectedModel?.reasoning?.reasoning_effort ||
- 'auto'}
+ 'medium'}
{reasoningEffortOpen ? (
{
- {['auto', 'low', 'medium', 'high'].map(
- (effort) => (
-
{
- if (selectedModel?.reasoning) {
- selectedModel.reasoning.reasoning_effort =
- effort
- setReasoningEffortOpen(false)
+ {['low', 'medium', 'high'].map((effort) => (
+
{
+ if (selectedModel?.reasoning) {
+ selectedModel.reasoning.reasoning_effort =
+ effort
+ setReasoningEffortOpen(false)
+ // Restart model with new reasoning effort
+ try {
+ await stopModel(selectedModel.id)
+ } catch (error) {
+ console.error(
+ 'Error restarting model with new reasoning effort:',
+ error
+ )
}
- }}
- >
- {effort}
-
- )
- )}
+ }
+ }}
+ >
+ {effort}
+
+ ))}
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 71018348a..134dc1ae1 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -289,10 +289,6 @@ export const useChat = () => {
{
...modelSettings,
...currentAssistant.parameters,
- ...(selectedModel?.reasoning?.reasoning_budget &&
- selectedModel?.reasoning?.reasoning_effort !== 'auto' && {
- reasoning_effort: selectedModel?.reasoning?.reasoning_effort,
- }),
} as unknown as Record
)
diff --git a/web-app/src/services/models.ts b/web-app/src/services/models.ts
index 4ff9b25f0..b8d043f50 100644
--- a/web-app/src/services/models.ts
+++ b/web-app/src/services/models.ts
@@ -427,6 +427,10 @@ export const startModel = async (
...(modelConfig?.reasoning?.reasoning_budget !== undefined && {
reasoning_budget: modelConfig.reasoning.reasoning_budget,
}),
+ ...(modelConfig?.reasoning?.reasoning_budget &&
+ modelConfig?.reasoning?.reasoning_effort !== undefined && {
+ chat_template_kwargs: `{"reasoning_effort":"${modelConfig.reasoning.reasoning_effort}"}`,
+ }),
}
return engine.load(model, settings).catch((error) => {
diff --git a/web-app/src/services/providers.ts b/web-app/src/services/providers.ts
index 8e1fdbf37..12e2887b7 100644
--- a/web-app/src/services/providers.ts
+++ b/web-app/src/services/providers.ts
@@ -104,7 +104,7 @@ export const getProviders = async (): Promise => {
model.id,
'reasoning_effort'
))
- ? { reasoning_effort: 'auto' }
+ ? { reasoning_effort: 'medium' }
: {}),
},
}