chore: try using arg --chat-template-kwargs
This commit is contained in:
parent
f14fd37398
commit
0d7578a980
@ -58,6 +58,7 @@ type LlamacppConfig = {
|
||||
ubatch_size: number
|
||||
device: string
|
||||
split_mode: string
|
||||
chat_template_kwargs: string
|
||||
main_gpu: number
|
||||
flash_attn: boolean
|
||||
cont_batching: boolean
|
||||
@ -1393,6 +1394,9 @@ export default class llamacpp_extension extends AIEngine {
|
||||
if (!cfg.reasoning_budget) {
|
||||
args.push('--reasoning-budget', '0')
|
||||
}
|
||||
if (cfg.chat_template_kwargs) {
|
||||
args.push('--chat-template-kwargs', cfg.chat_template_kwargs)
|
||||
}
|
||||
if (cfg.override_tensor_buffer_t)
|
||||
args.push('--override-tensor', cfg.override_tensor_buffer_t)
|
||||
// offload multimodal projector model to the GPU by default. if there is not enough memory
|
||||
|
||||
@ -721,7 +721,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
|
||||
<div className="h-7 mr-1 text-sm rounded-sm text-main-view-fg/70 cursor-pointer flex items-center gap-1">
|
||||
<span className="capitalize">
|
||||
{selectedModel?.reasoning?.reasoning_effort ||
|
||||
'auto'}
|
||||
'medium'}
|
||||
</span>
|
||||
{reasoningEffortOpen ? (
|
||||
<IconChevronUp
|
||||
@ -738,23 +738,30 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
|
||||
</PopoverTrigger>
|
||||
<PopoverContent className="w-24 p-0" align="start">
|
||||
<div className="py-1">
|
||||
{['auto', 'low', 'medium', 'high'].map(
|
||||
(effort) => (
|
||||
<div
|
||||
key={effort}
|
||||
className="px-2 py-1.5 text-sm cursor-pointer hover:bg-main-view-fg/20 capitalize"
|
||||
onClick={async () => {
|
||||
if (selectedModel?.reasoning) {
|
||||
selectedModel.reasoning.reasoning_effort =
|
||||
effort
|
||||
setReasoningEffortOpen(false)
|
||||
{['low', 'medium', 'high'].map((effort) => (
|
||||
<div
|
||||
key={effort}
|
||||
className="px-2 py-1.5 text-sm cursor-pointer hover:bg-main-view-fg/20 capitalize"
|
||||
onClick={async () => {
|
||||
if (selectedModel?.reasoning) {
|
||||
selectedModel.reasoning.reasoning_effort =
|
||||
effort
|
||||
setReasoningEffortOpen(false)
|
||||
// Restart model with new reasoning effort
|
||||
try {
|
||||
await stopModel(selectedModel.id)
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Error restarting model with new reasoning effort:',
|
||||
error
|
||||
)
|
||||
}
|
||||
}}
|
||||
>
|
||||
{effort}
|
||||
</div>
|
||||
)
|
||||
)}
|
||||
}
|
||||
}}
|
||||
>
|
||||
{effort}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</PopoverContent>
|
||||
</Popover>
|
||||
|
||||
@ -289,10 +289,6 @@ export const useChat = () => {
|
||||
{
|
||||
...modelSettings,
|
||||
...currentAssistant.parameters,
|
||||
...(selectedModel?.reasoning?.reasoning_budget &&
|
||||
selectedModel?.reasoning?.reasoning_effort !== 'auto' && {
|
||||
reasoning_effort: selectedModel?.reasoning?.reasoning_effort,
|
||||
}),
|
||||
} as unknown as Record<string, object>
|
||||
)
|
||||
|
||||
|
||||
@ -427,6 +427,10 @@ export const startModel = async (
|
||||
...(modelConfig?.reasoning?.reasoning_budget !== undefined && {
|
||||
reasoning_budget: modelConfig.reasoning.reasoning_budget,
|
||||
}),
|
||||
...(modelConfig?.reasoning?.reasoning_budget &&
|
||||
modelConfig?.reasoning?.reasoning_effort !== undefined && {
|
||||
chat_template_kwargs: `{"reasoning_effort":"${modelConfig.reasoning.reasoning_effort}"}`,
|
||||
}),
|
||||
}
|
||||
|
||||
return engine.load(model, settings).catch((error) => {
|
||||
|
||||
@ -104,7 +104,7 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
|
||||
model.id,
|
||||
'reasoning_effort'
|
||||
))
|
||||
? { reasoning_effort: 'auto' }
|
||||
? { reasoning_effort: 'medium' }
|
||||
: {}),
|
||||
},
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user