diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 5c2298f8e..1a6b22d34 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -58,6 +58,7 @@ type LlamacppConfig = { ubatch_size: number device: string split_mode: string + chat_template_kwargs: string main_gpu: number flash_attn: boolean cont_batching: boolean @@ -1393,6 +1394,9 @@ export default class llamacpp_extension extends AIEngine { if (!cfg.reasoning_budget) { args.push('--reasoning-budget', '0') } + if (cfg.chat_template_kwargs) { + args.push('--chat-template-kwargs', cfg.chat_template_kwargs) + } if (cfg.override_tensor_buffer_t) args.push('--override-tensor', cfg.override_tensor_buffer_t) // offload multimodal projector model to the GPU by default. if there is not enough memory diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index ec89bb0ef..ebab4a787 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -721,7 +721,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
{selectedModel?.reasoning?.reasoning_effort || - 'auto'} + 'medium'} {reasoningEffortOpen ? ( {
- {['auto', 'low', 'medium', 'high'].map( - (effort) => ( -
{ - if (selectedModel?.reasoning) { - selectedModel.reasoning.reasoning_effort = - effort - setReasoningEffortOpen(false) + {['low', 'medium', 'high'].map((effort) => ( +
{ + if (selectedModel?.reasoning) { + selectedModel.reasoning.reasoning_effort = + effort + setReasoningEffortOpen(false) + // Restart model with new reasoning effort + try { + await stopModel(selectedModel.id) + } catch (error) { + console.error( + 'Error restarting model with new reasoning effort:', + error + ) } - }} - > - {effort} -
- ) - )} + } + }} + > + {effort} +
+ ))}
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts index 71018348a..134dc1ae1 100644 --- a/web-app/src/hooks/useChat.ts +++ b/web-app/src/hooks/useChat.ts @@ -289,10 +289,6 @@ export const useChat = () => { { ...modelSettings, ...currentAssistant.parameters, - ...(selectedModel?.reasoning?.reasoning_budget && - selectedModel?.reasoning?.reasoning_effort !== 'auto' && { - reasoning_effort: selectedModel?.reasoning?.reasoning_effort, - }), } as unknown as Record ) diff --git a/web-app/src/services/models.ts b/web-app/src/services/models.ts index 4ff9b25f0..b8d043f50 100644 --- a/web-app/src/services/models.ts +++ b/web-app/src/services/models.ts @@ -427,6 +427,10 @@ export const startModel = async ( ...(modelConfig?.reasoning?.reasoning_budget !== undefined && { reasoning_budget: modelConfig.reasoning.reasoning_budget, }), + ...(modelConfig?.reasoning?.reasoning_budget && + modelConfig?.reasoning?.reasoning_effort !== undefined && { + chat_template_kwargs: `{"reasoning_effort":"${modelConfig.reasoning.reasoning_effort}"}`, + }), } return engine.load(model, settings).catch((error) => { diff --git a/web-app/src/services/providers.ts b/web-app/src/services/providers.ts index 8e1fdbf37..12e2887b7 100644 --- a/web-app/src/services/providers.ts +++ b/web-app/src/services/providers.ts @@ -104,7 +104,7 @@ export const getProviders = async (): Promise => { model.id, 'reasoning_effort' )) - ? { reasoning_effort: 'auto' } + ? { reasoning_effort: 'medium' } : {}), }, }