chore: try using arg --chat-template-kwargs

This commit is contained in:
Faisal Amir 2025-09-03 23:08:14 +07:00
parent f14fd37398
commit 0d7578a980
5 changed files with 33 additions and 22 deletions

View File

@ -58,6 +58,7 @@ type LlamacppConfig = {
ubatch_size: number ubatch_size: number
device: string device: string
split_mode: string split_mode: string
chat_template_kwargs: string
main_gpu: number main_gpu: number
flash_attn: boolean flash_attn: boolean
cont_batching: boolean cont_batching: boolean
@ -1393,6 +1394,9 @@ export default class llamacpp_extension extends AIEngine {
if (!cfg.reasoning_budget) { if (!cfg.reasoning_budget) {
args.push('--reasoning-budget', '0') args.push('--reasoning-budget', '0')
} }
if (cfg.chat_template_kwargs) {
args.push('--chat-template-kwargs', cfg.chat_template_kwargs)
}
if (cfg.override_tensor_buffer_t) if (cfg.override_tensor_buffer_t)
args.push('--override-tensor', cfg.override_tensor_buffer_t) args.push('--override-tensor', cfg.override_tensor_buffer_t)
// offload multimodal projector model to the GPU by default. if there is not enough memory // offload multimodal projector model to the GPU by default. if there is not enough memory

View File

@ -721,7 +721,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
<div className="h-7 mr-1 text-sm rounded-sm text-main-view-fg/70 cursor-pointer flex items-center gap-1"> <div className="h-7 mr-1 text-sm rounded-sm text-main-view-fg/70 cursor-pointer flex items-center gap-1">
<span className="capitalize"> <span className="capitalize">
{selectedModel?.reasoning?.reasoning_effort || {selectedModel?.reasoning?.reasoning_effort ||
'auto'} 'medium'}
</span> </span>
{reasoningEffortOpen ? ( {reasoningEffortOpen ? (
<IconChevronUp <IconChevronUp
@ -738,23 +738,30 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
</PopoverTrigger> </PopoverTrigger>
<PopoverContent className="w-24 p-0" align="start"> <PopoverContent className="w-24 p-0" align="start">
<div className="py-1"> <div className="py-1">
{['auto', 'low', 'medium', 'high'].map( {['low', 'medium', 'high'].map((effort) => (
(effort) => ( <div
<div key={effort}
key={effort} className="px-2 py-1.5 text-sm cursor-pointer hover:bg-main-view-fg/20 capitalize"
className="px-2 py-1.5 text-sm cursor-pointer hover:bg-main-view-fg/20 capitalize" onClick={async () => {
onClick={async () => { if (selectedModel?.reasoning) {
if (selectedModel?.reasoning) { selectedModel.reasoning.reasoning_effort =
selectedModel.reasoning.reasoning_effort = effort
effort setReasoningEffortOpen(false)
setReasoningEffortOpen(false) // Restart model with new reasoning effort
try {
await stopModel(selectedModel.id)
} catch (error) {
console.error(
'Error restarting model with new reasoning effort:',
error
)
} }
}} }
> }}
{effort} >
</div> {effort}
) </div>
)} ))}
</div> </div>
</PopoverContent> </PopoverContent>
</Popover> </Popover>

View File

@ -289,10 +289,6 @@ export const useChat = () => {
{ {
...modelSettings, ...modelSettings,
...currentAssistant.parameters, ...currentAssistant.parameters,
...(selectedModel?.reasoning?.reasoning_budget &&
selectedModel?.reasoning?.reasoning_effort !== 'auto' && {
reasoning_effort: selectedModel?.reasoning?.reasoning_effort,
}),
} as unknown as Record<string, object> } as unknown as Record<string, object>
) )

View File

@ -427,6 +427,10 @@ export const startModel = async (
...(modelConfig?.reasoning?.reasoning_budget !== undefined && { ...(modelConfig?.reasoning?.reasoning_budget !== undefined && {
reasoning_budget: modelConfig.reasoning.reasoning_budget, reasoning_budget: modelConfig.reasoning.reasoning_budget,
}), }),
...(modelConfig?.reasoning?.reasoning_budget &&
modelConfig?.reasoning?.reasoning_effort !== undefined && {
chat_template_kwargs: `{"reasoning_effort":"${modelConfig.reasoning.reasoning_effort}"}`,
}),
} }
return engine.load(model, settings).catch((error) => { return engine.load(model, settings).catch((error) => {

View File

@ -104,7 +104,7 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
model.id, model.id,
'reasoning_effort' 'reasoning_effort'
)) ))
? { reasoning_effort: 'auto' } ? { reasoning_effort: 'medium' }
: {}), : {}),
}, },
} }