chore: try using arg --chat-template-kwargs
This commit is contained in:
parent
f14fd37398
commit
0d7578a980
@ -58,6 +58,7 @@ type LlamacppConfig = {
|
|||||||
ubatch_size: number
|
ubatch_size: number
|
||||||
device: string
|
device: string
|
||||||
split_mode: string
|
split_mode: string
|
||||||
|
chat_template_kwargs: string
|
||||||
main_gpu: number
|
main_gpu: number
|
||||||
flash_attn: boolean
|
flash_attn: boolean
|
||||||
cont_batching: boolean
|
cont_batching: boolean
|
||||||
@ -1393,6 +1394,9 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
if (!cfg.reasoning_budget) {
|
if (!cfg.reasoning_budget) {
|
||||||
args.push('--reasoning-budget', '0')
|
args.push('--reasoning-budget', '0')
|
||||||
}
|
}
|
||||||
|
if (cfg.chat_template_kwargs) {
|
||||||
|
args.push('--chat-template-kwargs', cfg.chat_template_kwargs)
|
||||||
|
}
|
||||||
if (cfg.override_tensor_buffer_t)
|
if (cfg.override_tensor_buffer_t)
|
||||||
args.push('--override-tensor', cfg.override_tensor_buffer_t)
|
args.push('--override-tensor', cfg.override_tensor_buffer_t)
|
||||||
// offload multimodal projector model to the GPU by default. if there is not enough memory
|
// offload multimodal projector model to the GPU by default. if there is not enough memory
|
||||||
|
|||||||
@ -721,7 +721,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
|
|||||||
<div className="h-7 mr-1 text-sm rounded-sm text-main-view-fg/70 cursor-pointer flex items-center gap-1">
|
<div className="h-7 mr-1 text-sm rounded-sm text-main-view-fg/70 cursor-pointer flex items-center gap-1">
|
||||||
<span className="capitalize">
|
<span className="capitalize">
|
||||||
{selectedModel?.reasoning?.reasoning_effort ||
|
{selectedModel?.reasoning?.reasoning_effort ||
|
||||||
'auto'}
|
'medium'}
|
||||||
</span>
|
</span>
|
||||||
{reasoningEffortOpen ? (
|
{reasoningEffortOpen ? (
|
||||||
<IconChevronUp
|
<IconChevronUp
|
||||||
@ -738,23 +738,30 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
|
|||||||
</PopoverTrigger>
|
</PopoverTrigger>
|
||||||
<PopoverContent className="w-24 p-0" align="start">
|
<PopoverContent className="w-24 p-0" align="start">
|
||||||
<div className="py-1">
|
<div className="py-1">
|
||||||
{['auto', 'low', 'medium', 'high'].map(
|
{['low', 'medium', 'high'].map((effort) => (
|
||||||
(effort) => (
|
<div
|
||||||
<div
|
key={effort}
|
||||||
key={effort}
|
className="px-2 py-1.5 text-sm cursor-pointer hover:bg-main-view-fg/20 capitalize"
|
||||||
className="px-2 py-1.5 text-sm cursor-pointer hover:bg-main-view-fg/20 capitalize"
|
onClick={async () => {
|
||||||
onClick={async () => {
|
if (selectedModel?.reasoning) {
|
||||||
if (selectedModel?.reasoning) {
|
selectedModel.reasoning.reasoning_effort =
|
||||||
selectedModel.reasoning.reasoning_effort =
|
effort
|
||||||
effort
|
setReasoningEffortOpen(false)
|
||||||
setReasoningEffortOpen(false)
|
// Restart model with new reasoning effort
|
||||||
|
try {
|
||||||
|
await stopModel(selectedModel.id)
|
||||||
|
} catch (error) {
|
||||||
|
console.error(
|
||||||
|
'Error restarting model with new reasoning effort:',
|
||||||
|
error
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}}
|
}
|
||||||
>
|
}}
|
||||||
{effort}
|
>
|
||||||
</div>
|
{effort}
|
||||||
)
|
</div>
|
||||||
)}
|
))}
|
||||||
</div>
|
</div>
|
||||||
</PopoverContent>
|
</PopoverContent>
|
||||||
</Popover>
|
</Popover>
|
||||||
|
|||||||
@ -289,10 +289,6 @@ export const useChat = () => {
|
|||||||
{
|
{
|
||||||
...modelSettings,
|
...modelSettings,
|
||||||
...currentAssistant.parameters,
|
...currentAssistant.parameters,
|
||||||
...(selectedModel?.reasoning?.reasoning_budget &&
|
|
||||||
selectedModel?.reasoning?.reasoning_effort !== 'auto' && {
|
|
||||||
reasoning_effort: selectedModel?.reasoning?.reasoning_effort,
|
|
||||||
}),
|
|
||||||
} as unknown as Record<string, object>
|
} as unknown as Record<string, object>
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -427,6 +427,10 @@ export const startModel = async (
|
|||||||
...(modelConfig?.reasoning?.reasoning_budget !== undefined && {
|
...(modelConfig?.reasoning?.reasoning_budget !== undefined && {
|
||||||
reasoning_budget: modelConfig.reasoning.reasoning_budget,
|
reasoning_budget: modelConfig.reasoning.reasoning_budget,
|
||||||
}),
|
}),
|
||||||
|
...(modelConfig?.reasoning?.reasoning_budget &&
|
||||||
|
modelConfig?.reasoning?.reasoning_effort !== undefined && {
|
||||||
|
chat_template_kwargs: `{"reasoning_effort":"${modelConfig.reasoning.reasoning_effort}"}`,
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
return engine.load(model, settings).catch((error) => {
|
return engine.load(model, settings).catch((error) => {
|
||||||
|
|||||||
@ -104,7 +104,7 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
|
|||||||
model.id,
|
model.id,
|
||||||
'reasoning_effort'
|
'reasoning_effort'
|
||||||
))
|
))
|
||||||
? { reasoning_effort: 'auto' }
|
? { reasoning_effort: 'medium' }
|
||||||
: {}),
|
: {}),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user