Compare commits

...

5 Commits

Author SHA1 Message Date
Faisal Amir
0d7578a980 chore: try using arg --chat-template-kwargs 2025-09-04 08:39:20 +07:00
Faisal Amir
f14fd37398 chore: no need restart model when change effort 2025-09-04 08:39:20 +07:00
Faisal Amir
371a0dd6ec chore: provider llamacpp will always check runtime capabilities 2025-09-04 08:39:20 +07:00
Faisal Amir
a8ca0d6418 chore: update logic detect capabilities 2025-09-04 08:39:20 +07:00
Faisal Amir
13a8727552 feat: enable reasoning configuration 2025-09-04 08:39:20 +07:00
11 changed files with 215 additions and 71 deletions

View File

@ -8,6 +8,7 @@ export interface chatCompletionRequestMessage {
content: string | null | Content[] // Content can be a string OR an array of content parts
reasoning?: string | null // Some models return reasoning in completed responses
reasoning_content?: string | null // Some models return reasoning in completed responses
reasoning_effort?: string | null
name?: string
tool_calls?: any[] // Simplified tool_call_id?: string
}
@ -281,5 +282,5 @@ export abstract class AIEngine extends BaseExtension {
* Check if a tool is supported by the model
* @param modelId
*/
abstract isToolSupported(modelId: string): Promise<boolean>
abstract isModelCapabilitySupported(modelId: string, capability: string): Promise<boolean>
}

View File

@ -58,6 +58,7 @@ type LlamacppConfig = {
ubatch_size: number
device: string
split_mode: string
chat_template_kwargs: string
main_gpu: number
flash_attn: boolean
cont_batching: boolean
@ -71,6 +72,7 @@ type LlamacppConfig = {
rope_scale: number
rope_freq_base: number
rope_freq_scale: number
reasoning_budget: boolean
ctx_shift: boolean
}
@ -1389,6 +1391,12 @@ export default class llamacpp_extension extends AIEngine {
// This is an expert level settings and should only be used by people
// who knows what they are doing.
// Takes a regex with matching tensor name as input
if (!cfg.reasoning_budget) {
args.push('--reasoning-budget', '0')
}
if (cfg.chat_template_kwargs) {
args.push('--chat-template-kwargs', cfg.chat_template_kwargs)
}
if (cfg.override_tensor_buffer_t)
args.push('--override-tensor', cfg.override_tensor_buffer_t)
// offload multimodal projector model to the GPU by default. if there is not enough memory
@ -1827,7 +1835,10 @@ export default class llamacpp_extension extends AIEngine {
* @param modelId
* @returns
*/
async isToolSupported(modelId: string): Promise<boolean> {
async isModelCapabilitySupported(
modelId: string,
capability: string
): Promise<boolean> {
const janDataFolderPath = await getJanDataFolderPath()
const modelConfigPath = await joinPath([
this.providerPath,
@ -1846,7 +1857,7 @@ export default class llamacpp_extension extends AIEngine {
])
return (await readGgufMetadata(modelPath)).metadata?.[
'tokenizer.chat_template'
]?.includes('tools')
]?.includes(capability)
}
/**

View File

@ -7,7 +7,7 @@ import {
import {
IconEye,
IconTool,
IconAtom,
IconBrain,
IconWorld,
IconCodeCircle2,
} from '@tabler/icons-react'
@ -30,7 +30,7 @@ const Capabilities = ({ capabilities }: CapabilitiesProps) => {
} else if (capability === 'tools') {
icon = <IconTool className="size-3.5" />
} else if (capability === 'reasoning') {
icon = <IconAtom className="size-3.5" />
icon = <IconBrain className="size-3.5" />
} else if (capability === 'embeddings') {
icon = <IconCodeCircle2 className="size-3.5" />
} else if (capability === 'web_search') {

View File

@ -12,15 +12,22 @@ import {
TooltipProvider,
TooltipTrigger,
} from '@/components/ui/tooltip'
import {
Popover,
PopoverContent,
PopoverTrigger,
} from '@/components/ui/popover'
import { ArrowRight } from 'lucide-react'
import {
IconPhoto,
IconWorld,
IconAtom,
IconBrain,
IconTool,
IconCodeCircle2,
IconPlayerStopFilled,
IconX,
IconChevronUp,
IconChevronDown,
} from '@tabler/icons-react'
import { useTranslation } from '@/i18n/react-i18next-compat'
import { useGeneralSetting } from '@/hooks/useGeneralSetting'
@ -33,7 +40,7 @@ import DropdownModelProvider from '@/containers/DropdownModelProvider'
import { ModelLoader } from '@/containers/loaders/ModelLoader'
import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
import { getConnectedServers } from '@/services/mcp'
import { checkMmprojExists } from '@/services/models'
import { checkMmprojExists, stopModel } from '@/services/models'
type ChatInputProps = {
className?: string
@ -61,6 +68,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
const maxRows = 10
const { selectedModel, selectedProvider } = useModelProvider()
const { sendMessage } = useChat()
const [message, setMessage] = useState('')
const [dropdownToolsAvailable, setDropdownToolsAvailable] = useState(false)
@ -77,6 +85,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
const [connectedServers, setConnectedServers] = useState<string[]>([])
const [isDragOver, setIsDragOver] = useState(false)
const [hasMmproj, setHasMmproj] = useState(false)
const [reasoningEffortOpen, setReasoningEffortOpen] = useState(false)
// Check for connected MCP servers
useEffect(() => {
@ -654,6 +663,112 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
</TooltipProvider>
)}
{selectedModel?.capabilities?.includes('reasoning') && (
<div className="flex gap-0.5 items-center">
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div
className="h-7 p-1 flex items-center justify-center rounded-sm hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out gap-1 cursor-pointer"
onClick={async () => {
if (
selectedModel?.reasoning &&
selectedProvider
) {
// Toggle reasoning budget
selectedModel.reasoning.reasoning_budget =
!selectedModel.reasoning.reasoning_budget
// If model is loaded, restart it with new settings
try {
await stopModel(selectedModel.id)
} catch (error) {
console.error(
'Error restarting model with new reasoning budget:',
error
)
}
}
}}
>
<IconBrain
size={18}
className={
selectedModel?.reasoning?.reasoning_budget
? 'text-main-view-fg/50'
: 'text-main-view-fg/20'
}
/>
</div>
</TooltipTrigger>
<TooltipContent>
<p>
{t('reasoning')}:{' '}
{selectedModel?.reasoning?.reasoning_budget
? 'On'
: 'Off'}
</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
{selectedModel?.reasoning?.reasoning_budget &&
selectedModel?.reasoning?.reasoning_effort && (
<Popover
open={reasoningEffortOpen}
onOpenChange={setReasoningEffortOpen}
>
<PopoverTrigger asChild>
<div className="h-7 mr-1 text-sm rounded-sm text-main-view-fg/70 cursor-pointer flex items-center gap-1">
<span className="capitalize">
{selectedModel?.reasoning?.reasoning_effort ||
'medium'}
</span>
{reasoningEffortOpen ? (
<IconChevronUp
size={12}
className="text-main-view-fg/50"
/>
) : (
<IconChevronDown
size={12}
className="text-main-view-fg/50"
/>
)}
</div>
</PopoverTrigger>
<PopoverContent className="w-24 p-0" align="start">
<div className="py-1">
{['low', 'medium', 'high'].map((effort) => (
<div
key={effort}
className="px-2 py-1.5 text-sm cursor-pointer hover:bg-main-view-fg/20 capitalize"
onClick={async () => {
if (selectedModel?.reasoning) {
selectedModel.reasoning.reasoning_effort =
effort
setReasoningEffortOpen(false)
// Restart model with new reasoning effort
try {
await stopModel(selectedModel.id)
} catch (error) {
console.error(
'Error restarting model with new reasoning effort:',
error
)
}
}
}}
>
{effort}
</div>
))}
</div>
</PopoverContent>
</Popover>
)}
</div>
)}
{selectedModel?.capabilities?.includes('tools') &&
hasActiveMCPServers && (
<TooltipProvider>
@ -728,23 +843,6 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
</Tooltip>
</TooltipProvider>
)}
{selectedModel?.capabilities?.includes('reasoning') && (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div className="h-7 p-1 flex items-center justify-center rounded-sm hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out gap-1">
<IconAtom
size={18}
className="text-main-view-fg/50"
/>
</div>
</TooltipTrigger>
<TooltipContent>
<p>{t('reasoning')}</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
)}
</div>
</div>

View File

@ -393,7 +393,7 @@ const DropdownModelProvider = ({
return (
<Popover open={open} onOpenChange={onOpenChange}>
<div className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-2 py-1 flex items-center gap-1.5 rounded-sm max-h-[32px] mr-0.5">
<div className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-2 py-1 flex items-center gap-1.5 rounded-sm max-h-[32px] mr-1">
<PopoverTrigger asChild>
<button
title={displayModel}

View File

@ -77,6 +77,7 @@ export const useModelProvider = create<ModelProviderState>()(
('id' in e || 'model' in e) &&
typeof (e.id ?? e.model) === 'string'
)
console.log(models, 'models')
const mergedModels = [
...(provider?.models ?? []).filter(
(e) =>
@ -99,7 +100,10 @@ export const useModelProvider = create<ModelProviderState>()(
return {
...model,
settings: settings,
capabilities: existingModel?.capabilities || model.capabilities,
capabilities:
provider.provider === 'llamacpp'
? model.capabilities
: existingModel?.capabilities || model.capabilities,
}
})

View File

@ -69,7 +69,7 @@ vi.mock('../models', () => ({
{ id: 'llama-2-7b', name: 'Llama 2 7B', description: 'Llama model' },
])
),
isToolSupported: vi.fn(() => Promise.resolve(false)),
isModelCapabilitySupported: vi.fn(() => Promise.resolve(false)),
}))
vi.mock('@/lib/extension', () => ({

View File

@ -415,14 +415,23 @@ export const startModel = async (
return keyMappings[key] || key
}
const settings = modelConfig?.settings
? Object.fromEntries(
Object.entries(modelConfig.settings).map(([key, value]) => [
mapSettingKey(key),
value.controller_props?.value,
])
)
: undefined
const settings = {
...(modelConfig?.settings
? Object.fromEntries(
Object.entries(modelConfig.settings).map(([key, value]) => [
mapSettingKey(key),
value.controller_props?.value,
])
)
: {}),
...(modelConfig?.reasoning?.reasoning_budget !== undefined && {
reasoning_budget: modelConfig.reasoning.reasoning_budget,
}),
...(modelConfig?.reasoning?.reasoning_budget &&
modelConfig?.reasoning?.reasoning_effort !== undefined && {
chat_template_kwargs: `{"reasoning_effort":"${modelConfig.reasoning.reasoning_effort}"}`,
}),
}
return engine.load(model, settings).catch((error) => {
console.error(
@ -439,11 +448,14 @@ export const startModel = async (
* @param modelId
* @returns
*/
export const isToolSupported = async (modelId: string): Promise<boolean> => {
export const isModelCapabilitySupported = async (
modelId: string,
capability: string
): Promise<boolean> => {
const engine = getEngine()
if (!engine) return false
return engine.isToolSupported(modelId)
return engine.isModelCapabilitySupported(modelId, capability)
}
/**

View File

@ -3,7 +3,7 @@ import { predefinedProviders } from '@/consts/providers'
import { EngineManager, SettingComponentProps } from '@janhq/core'
import { ModelCapabilities } from '@/types/models'
import { modelSettings } from '@/lib/predefined'
import { fetchModels, isToolSupported } from './models'
import { fetchModels, isModelCapabilitySupported } from './models'
import { ExtensionManager } from '@/lib/extension'
import { fetch as fetchTauri } from '@tauri-apps/plugin-http'
@ -63,39 +63,54 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
}
}) as ProviderSetting[],
models: await Promise.all(
models.map(
async (model) =>
({
id: model.id,
model: model.id,
name: model.name,
description: model.description,
capabilities:
'capabilities' in model
? (model.capabilities as string[])
: (await isToolSupported(model.id))
? [ModelCapabilities.TOOLS]
: [],
provider: providerName,
settings: Object.values(modelSettings).reduce(
(acc, setting) => {
let value = setting.controller_props.value
if (setting.key === 'ctx_len') {
value = 8192 // Default context length for Llama.cpp models
}
acc[setting.key] = {
...setting,
controller_props: {
...setting.controller_props,
value: value,
},
}
return acc
},
{} as Record<string, ProviderSetting>
),
}) as Model
)
models.map(async (model) => {
return {
id: model.id,
model: model.id,
name: model.name,
description: model.description,
capabilities: [
...((await isModelCapabilitySupported(model.id, 'tools'))
? [ModelCapabilities.TOOLS]
: []),
...((await isModelCapabilitySupported(model.id, 'reasoning'))
? [ModelCapabilities.REASONING]
: []),
],
provider: providerName,
settings: Object.values(modelSettings).reduce(
(acc, setting) => {
let value = setting.controller_props.value
if (setting.key === 'ctx_len') {
value = 8192 // Default context length for Llama.cpp models
}
acc[setting.key] = {
...setting,
controller_props: {
...setting.controller_props,
value: value,
},
}
return acc
},
{} as Record<string, ProviderSetting>
),
...(model.capabilities?.includes(ModelCapabilities.REASONING) ||
(await isModelCapabilitySupported(model.id, 'reasoning'))
? {
reasoning: {
reasoning_budget: true,
...((await isModelCapabilitySupported(
model.id,
'reasoning_effort'
))
? { reasoning_effort: 'medium' }
: {}),
},
}
: {}),
} as Model
})
),
}
runtimeProviders.push(provider)

View File

@ -33,6 +33,8 @@ type Model = {
format?: string
capabilities?: string[]
settings?: Record<string, ProviderSetting>
// eslint-disable-next-line @typescript-eslint/no-explicit-any
reasoning?: Record<string, any>
}
/**

View File

@ -7,6 +7,7 @@ export enum ModelCapabilities {
COMPLETION = 'completion',
TOOLS = 'tools',
EMBEDDINGS = 'embeddings',
REASONING = 'reasoning',
IMAGE_GENERATION = 'image_generation',
AUDIO_GENERATION = 'audio_generation',
TEXT_TO_IMAGE = 'text_to_image',