chore: try using arg --chat-template-kwargs

This commit is contained in:
Faisal Amir 2025-09-03 23:08:14 +07:00
parent f14fd37398
commit 0d7578a980
5 changed files with 33 additions and 22 deletions

View File

@ -58,6 +58,7 @@ type LlamacppConfig = {
ubatch_size: number
device: string
split_mode: string
chat_template_kwargs: string
main_gpu: number
flash_attn: boolean
cont_batching: boolean
@ -1393,6 +1394,9 @@ export default class llamacpp_extension extends AIEngine {
if (!cfg.reasoning_budget) {
args.push('--reasoning-budget', '0')
}
if (cfg.chat_template_kwargs) {
args.push('--chat-template-kwargs', cfg.chat_template_kwargs)
}
if (cfg.override_tensor_buffer_t)
args.push('--override-tensor', cfg.override_tensor_buffer_t)
// offload multimodal projector model to the GPU by default. if there is not enough memory

View File

@ -721,7 +721,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
<div className="h-7 mr-1 text-sm rounded-sm text-main-view-fg/70 cursor-pointer flex items-center gap-1">
<span className="capitalize">
{selectedModel?.reasoning?.reasoning_effort ||
'auto'}
'medium'}
</span>
{reasoningEffortOpen ? (
<IconChevronUp
@ -738,23 +738,30 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
</PopoverTrigger>
<PopoverContent className="w-24 p-0" align="start">
<div className="py-1">
{['auto', 'low', 'medium', 'high'].map(
(effort) => (
<div
key={effort}
className="px-2 py-1.5 text-sm cursor-pointer hover:bg-main-view-fg/20 capitalize"
onClick={async () => {
if (selectedModel?.reasoning) {
selectedModel.reasoning.reasoning_effort =
effort
setReasoningEffortOpen(false)
{['low', 'medium', 'high'].map((effort) => (
<div
key={effort}
className="px-2 py-1.5 text-sm cursor-pointer hover:bg-main-view-fg/20 capitalize"
onClick={async () => {
if (selectedModel?.reasoning) {
selectedModel.reasoning.reasoning_effort =
effort
setReasoningEffortOpen(false)
// Restart model with new reasoning effort
try {
await stopModel(selectedModel.id)
} catch (error) {
console.error(
'Error restarting model with new reasoning effort:',
error
)
}
}}
>
{effort}
</div>
)
)}
}
}}
>
{effort}
</div>
))}
</div>
</PopoverContent>
</Popover>

View File

@ -289,10 +289,6 @@ export const useChat = () => {
{
...modelSettings,
...currentAssistant.parameters,
...(selectedModel?.reasoning?.reasoning_budget &&
selectedModel?.reasoning?.reasoning_effort !== 'auto' && {
reasoning_effort: selectedModel?.reasoning?.reasoning_effort,
}),
} as unknown as Record<string, object>
)

View File

@ -427,6 +427,10 @@ export const startModel = async (
...(modelConfig?.reasoning?.reasoning_budget !== undefined && {
reasoning_budget: modelConfig.reasoning.reasoning_budget,
}),
...(modelConfig?.reasoning?.reasoning_budget &&
modelConfig?.reasoning?.reasoning_effort !== undefined && {
chat_template_kwargs: `{"reasoning_effort":"${modelConfig.reasoning.reasoning_effort}"}`,
}),
}
return engine.load(model, settings).catch((error) => {

View File

@ -104,7 +104,7 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
model.id,
'reasoning_effort'
))
? { reasoning_effort: 'auto' }
? { reasoning_effort: 'medium' }
: {}),
},
}