chore: fix model settings are not applied accordingly on change (#5231)

* chore: fix model settings are not applied accordingly on change

* chore: handle failed tool call

* chore: stop inference and model on reject
This commit is contained in:
Louis 2025-06-10 16:26:42 +07:00 committed by GitHub
parent dcb3f794d3
commit 51a321219d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 57 additions and 23 deletions

View File

@ -23,8 +23,8 @@
"description": "Number of prompts that can be processed simultaneously by the model.", "description": "Number of prompts that can be processed simultaneously by the model.",
"controllerType": "input", "controllerType": "input",
"controllerProps": { "controllerProps": {
"value": "4", "value": "1",
"placeholder": "4", "placeholder": "1",
"type": "number", "type": "number",
"textAlign": "right" "textAlign": "right"
} }

View File

@ -55,7 +55,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
shouldReconnect = true shouldReconnect = true
/** Default Engine model load settings */ /** Default Engine model load settings */
n_parallel: number = 4 n_parallel?: number
cont_batching: boolean = true cont_batching: boolean = true
caching_enabled: boolean = true caching_enabled: boolean = true
flash_attn: boolean = true flash_attn: boolean = true
@ -114,8 +114,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
// Register Settings // Register Settings
this.registerSettings(SETTINGS) this.registerSettings(SETTINGS)
this.n_parallel = const numParallel = await this.getSetting<string>(Settings.n_parallel, '')
Number(await this.getSetting<string>(Settings.n_parallel, '4')) ?? 4 if (numParallel.length > 0 && parseInt(numParallel) > 0) {
this.n_parallel = parseInt(numParallel)
}
this.cont_batching = await this.getSetting<boolean>( this.cont_batching = await this.getSetting<boolean>(
Settings.cont_batching, Settings.cont_batching,
true true
@ -184,7 +186,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
*/ */
onSettingUpdate<T>(key: string, value: T): void { onSettingUpdate<T>(key: string, value: T): void {
if (key === Settings.n_parallel && typeof value === 'string') { if (key === Settings.n_parallel && typeof value === 'string') {
this.n_parallel = Number(value) ?? 1 if (value.length > 0 && parseInt(value) > 0) {
this.n_parallel = parseInt(value)
}
} else if (key === Settings.cont_batching && typeof value === 'boolean') { } else if (key === Settings.cont_batching && typeof value === 'boolean') {
this.cont_batching = value as boolean this.cont_batching = value as boolean
} else if (key === Settings.caching_enabled && typeof value === 'boolean') { } else if (key === Settings.caching_enabled && typeof value === 'boolean') {

View File

@ -35,6 +35,7 @@ import DropdownModelProvider from '@/containers/DropdownModelProvider'
import { ModelLoader } from '@/containers/loaders/ModelLoader' import { ModelLoader } from '@/containers/loaders/ModelLoader'
import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable' import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
import { getConnectedServers } from '@/services/mcp' import { getConnectedServers } from '@/services/mcp'
import { stopAllModels } from '@/services/models'
type ChatInputProps = { type ChatInputProps = {
className?: string className?: string
@ -161,6 +162,7 @@ const ChatInput = ({
const stopStreaming = useCallback( const stopStreaming = useCallback(
(threadId: string) => { (threadId: string) => {
abortControllers[threadId]?.abort() abortControllers[threadId]?.abort()
stopAllModels()
}, },
[abortControllers] [abortControllers]
) )

View File

@ -61,6 +61,10 @@ export const useChat = () => {
return getProviderByName(selectedProvider) return getProviderByName(selectedProvider)
}, [selectedProvider, getProviderByName]) }, [selectedProvider, getProviderByName])
const currentProviderId = useMemo(() => {
return provider?.provider || selectedProvider
}, [provider, selectedProvider])
useEffect(() => { useEffect(() => {
function setTools() { function setTools() {
getTools().then((data: MCPTool[]) => { getTools().then((data: MCPTool[]) => {
@ -109,7 +113,10 @@ export const useChat = () => {
const activeThread = await getCurrentThread() const activeThread = await getCurrentThread()
resetTokenSpeed() resetTokenSpeed()
if (!activeThread || !provider) return const activeProvider = currentProviderId
? getProviderByName(currentProviderId)
: provider
if (!activeThread || !activeProvider) return
const messages = getMessages(activeThread.id) const messages = getMessages(activeThread.id)
const abortController = new AbortController() const abortController = new AbortController()
setAbortController(activeThread.id, abortController) setAbortController(activeThread.id, abortController)
@ -120,9 +127,11 @@ export const useChat = () => {
try { try {
if (selectedModel?.id) { if (selectedModel?.id) {
updateLoadingModel(true) updateLoadingModel(true)
await startModel(provider, selectedModel.id, abortController).catch( await startModel(
console.error activeProvider,
) selectedModel.id,
abortController
).catch(console.error)
updateLoadingModel(false) updateLoadingModel(false)
} }
@ -148,7 +157,7 @@ export const useChat = () => {
while (!isCompleted && !abortController.signal.aborted) { while (!isCompleted && !abortController.signal.aborted) {
const completion = await sendCompletion( const completion = await sendCompletion(
activeThread, activeThread,
provider, activeProvider,
builder.getMessages(), builder.getMessages(),
abortController, abortController,
availableTools, availableTools,
@ -194,7 +203,7 @@ export const useChat = () => {
accumulatedText.length === 0 && accumulatedText.length === 0 &&
toolCalls.length === 0 && toolCalls.length === 0 &&
activeThread.model?.id && activeThread.model?.id &&
provider.provider === 'llama.cpp' activeProvider.provider === 'llama.cpp'
) { ) {
await stopModel(activeThread.model.id, 'cortex') await stopModel(activeThread.model.id, 'cortex')
throw new Error('No response received from the model') throw new Error('No response received from the model')
@ -235,6 +244,8 @@ export const useChat = () => {
[ [
getCurrentThread, getCurrentThread,
resetTokenSpeed, resetTokenSpeed,
currentProviderId,
getProviderByName,
provider, provider,
getMessages, getMessages,
setAbortController, setAbortController,
@ -246,11 +257,11 @@ export const useChat = () => {
currentAssistant, currentAssistant,
tools, tools,
updateLoadingModel, updateLoadingModel,
updateTokenSpeed,
approvedTools,
showApprovalModal,
getDisabledToolsForThread, getDisabledToolsForThread,
approvedTools,
allowAllMCPPermissions, allowAllMCPPermissions,
showApprovalModal,
updateTokenSpeed,
] ]
) )

View File

@ -304,6 +304,17 @@ export const postMessageProcessing = async (
arguments: toolCall.function.arguments.length arguments: toolCall.function.arguments.length
? JSON.parse(toolCall.function.arguments) ? JSON.parse(toolCall.function.arguments)
: {}, : {},
}).catch((e) => {
console.error('Tool call failed:', e)
return {
content: [
{
type: 'text',
text: `Error calling tool ${toolCall.function.name}: ${e.message}`,
},
],
error: true,
}
}) })
: { : {
content: [ content: [

View File

@ -98,13 +98,15 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
'inferenceUrl' in value 'inferenceUrl' in value
? (value.inferenceUrl as string).replace('/chat/completions', '') ? (value.inferenceUrl as string).replace('/chat/completions', '')
: '', : '',
settings: (await value.getSettings()).map((setting) => ({ settings: (await value.getSettings()).map((setting) => {
key: setting.key, return {
title: setting.title, key: setting.key,
description: setting.description, title: setting.title,
controller_type: setting.controllerType as unknown, description: setting.description,
controller_props: setting.controllerProps as unknown, controller_type: setting.controllerType as unknown,
})) as ProviderSetting[], controller_props: setting.controllerProps as unknown,
}
}) as ProviderSetting[],
models: models.map((model) => ({ models: models.map((model) => ({
id: model.id, id: model.id,
model: model.id, model: model.id,
@ -117,9 +119,13 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
provider: providerName, provider: providerName,
settings: Object.values(modelSettings).reduce( settings: Object.values(modelSettings).reduce(
(acc, setting) => { (acc, setting) => {
const value = model[ let value = model[
setting.key as keyof typeof model setting.key as keyof typeof model
] as keyof typeof setting.controller_props.value ] as keyof typeof setting.controller_props.value
if (setting.key === 'ctx_len') {
// @ts-expect-error dynamic type
value = 4096 // Default context length for Llama.cpp models
}
acc[setting.key] = { acc[setting.key] = {
...setting, ...setting,
controller_props: { controller_props: {