chore: fix model settings are not applied accordingly on change (#5231)
* chore: fix model settings are not applied accordingly on change * chore: handle failed tool call * chore: stop inference and model on reject
This commit is contained in:
parent
dcb3f794d3
commit
51a321219d
@ -23,8 +23,8 @@
|
|||||||
"description": "Number of prompts that can be processed simultaneously by the model.",
|
"description": "Number of prompts that can be processed simultaneously by the model.",
|
||||||
"controllerType": "input",
|
"controllerType": "input",
|
||||||
"controllerProps": {
|
"controllerProps": {
|
||||||
"value": "4",
|
"value": "1",
|
||||||
"placeholder": "4",
|
"placeholder": "1",
|
||||||
"type": "number",
|
"type": "number",
|
||||||
"textAlign": "right"
|
"textAlign": "right"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -55,7 +55,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
shouldReconnect = true
|
shouldReconnect = true
|
||||||
|
|
||||||
/** Default Engine model load settings */
|
/** Default Engine model load settings */
|
||||||
n_parallel: number = 4
|
n_parallel?: number
|
||||||
cont_batching: boolean = true
|
cont_batching: boolean = true
|
||||||
caching_enabled: boolean = true
|
caching_enabled: boolean = true
|
||||||
flash_attn: boolean = true
|
flash_attn: boolean = true
|
||||||
@ -114,8 +114,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
// Register Settings
|
// Register Settings
|
||||||
this.registerSettings(SETTINGS)
|
this.registerSettings(SETTINGS)
|
||||||
|
|
||||||
this.n_parallel =
|
const numParallel = await this.getSetting<string>(Settings.n_parallel, '')
|
||||||
Number(await this.getSetting<string>(Settings.n_parallel, '4')) ?? 4
|
if (numParallel.length > 0 && parseInt(numParallel) > 0) {
|
||||||
|
this.n_parallel = parseInt(numParallel)
|
||||||
|
}
|
||||||
this.cont_batching = await this.getSetting<boolean>(
|
this.cont_batching = await this.getSetting<boolean>(
|
||||||
Settings.cont_batching,
|
Settings.cont_batching,
|
||||||
true
|
true
|
||||||
@ -184,7 +186,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
*/
|
*/
|
||||||
onSettingUpdate<T>(key: string, value: T): void {
|
onSettingUpdate<T>(key: string, value: T): void {
|
||||||
if (key === Settings.n_parallel && typeof value === 'string') {
|
if (key === Settings.n_parallel && typeof value === 'string') {
|
||||||
this.n_parallel = Number(value) ?? 1
|
if (value.length > 0 && parseInt(value) > 0) {
|
||||||
|
this.n_parallel = parseInt(value)
|
||||||
|
}
|
||||||
} else if (key === Settings.cont_batching && typeof value === 'boolean') {
|
} else if (key === Settings.cont_batching && typeof value === 'boolean') {
|
||||||
this.cont_batching = value as boolean
|
this.cont_batching = value as boolean
|
||||||
} else if (key === Settings.caching_enabled && typeof value === 'boolean') {
|
} else if (key === Settings.caching_enabled && typeof value === 'boolean') {
|
||||||
|
|||||||
@ -35,6 +35,7 @@ import DropdownModelProvider from '@/containers/DropdownModelProvider'
|
|||||||
import { ModelLoader } from '@/containers/loaders/ModelLoader'
|
import { ModelLoader } from '@/containers/loaders/ModelLoader'
|
||||||
import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
|
import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
|
||||||
import { getConnectedServers } from '@/services/mcp'
|
import { getConnectedServers } from '@/services/mcp'
|
||||||
|
import { stopAllModels } from '@/services/models'
|
||||||
|
|
||||||
type ChatInputProps = {
|
type ChatInputProps = {
|
||||||
className?: string
|
className?: string
|
||||||
@ -161,6 +162,7 @@ const ChatInput = ({
|
|||||||
const stopStreaming = useCallback(
|
const stopStreaming = useCallback(
|
||||||
(threadId: string) => {
|
(threadId: string) => {
|
||||||
abortControllers[threadId]?.abort()
|
abortControllers[threadId]?.abort()
|
||||||
|
stopAllModels()
|
||||||
},
|
},
|
||||||
[abortControllers]
|
[abortControllers]
|
||||||
)
|
)
|
||||||
|
|||||||
@ -61,6 +61,10 @@ export const useChat = () => {
|
|||||||
return getProviderByName(selectedProvider)
|
return getProviderByName(selectedProvider)
|
||||||
}, [selectedProvider, getProviderByName])
|
}, [selectedProvider, getProviderByName])
|
||||||
|
|
||||||
|
const currentProviderId = useMemo(() => {
|
||||||
|
return provider?.provider || selectedProvider
|
||||||
|
}, [provider, selectedProvider])
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
function setTools() {
|
function setTools() {
|
||||||
getTools().then((data: MCPTool[]) => {
|
getTools().then((data: MCPTool[]) => {
|
||||||
@ -109,7 +113,10 @@ export const useChat = () => {
|
|||||||
const activeThread = await getCurrentThread()
|
const activeThread = await getCurrentThread()
|
||||||
|
|
||||||
resetTokenSpeed()
|
resetTokenSpeed()
|
||||||
if (!activeThread || !provider) return
|
const activeProvider = currentProviderId
|
||||||
|
? getProviderByName(currentProviderId)
|
||||||
|
: provider
|
||||||
|
if (!activeThread || !activeProvider) return
|
||||||
const messages = getMessages(activeThread.id)
|
const messages = getMessages(activeThread.id)
|
||||||
const abortController = new AbortController()
|
const abortController = new AbortController()
|
||||||
setAbortController(activeThread.id, abortController)
|
setAbortController(activeThread.id, abortController)
|
||||||
@ -120,9 +127,11 @@ export const useChat = () => {
|
|||||||
try {
|
try {
|
||||||
if (selectedModel?.id) {
|
if (selectedModel?.id) {
|
||||||
updateLoadingModel(true)
|
updateLoadingModel(true)
|
||||||
await startModel(provider, selectedModel.id, abortController).catch(
|
await startModel(
|
||||||
console.error
|
activeProvider,
|
||||||
)
|
selectedModel.id,
|
||||||
|
abortController
|
||||||
|
).catch(console.error)
|
||||||
updateLoadingModel(false)
|
updateLoadingModel(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,7 +157,7 @@ export const useChat = () => {
|
|||||||
while (!isCompleted && !abortController.signal.aborted) {
|
while (!isCompleted && !abortController.signal.aborted) {
|
||||||
const completion = await sendCompletion(
|
const completion = await sendCompletion(
|
||||||
activeThread,
|
activeThread,
|
||||||
provider,
|
activeProvider,
|
||||||
builder.getMessages(),
|
builder.getMessages(),
|
||||||
abortController,
|
abortController,
|
||||||
availableTools,
|
availableTools,
|
||||||
@ -194,7 +203,7 @@ export const useChat = () => {
|
|||||||
accumulatedText.length === 0 &&
|
accumulatedText.length === 0 &&
|
||||||
toolCalls.length === 0 &&
|
toolCalls.length === 0 &&
|
||||||
activeThread.model?.id &&
|
activeThread.model?.id &&
|
||||||
provider.provider === 'llama.cpp'
|
activeProvider.provider === 'llama.cpp'
|
||||||
) {
|
) {
|
||||||
await stopModel(activeThread.model.id, 'cortex')
|
await stopModel(activeThread.model.id, 'cortex')
|
||||||
throw new Error('No response received from the model')
|
throw new Error('No response received from the model')
|
||||||
@ -235,6 +244,8 @@ export const useChat = () => {
|
|||||||
[
|
[
|
||||||
getCurrentThread,
|
getCurrentThread,
|
||||||
resetTokenSpeed,
|
resetTokenSpeed,
|
||||||
|
currentProviderId,
|
||||||
|
getProviderByName,
|
||||||
provider,
|
provider,
|
||||||
getMessages,
|
getMessages,
|
||||||
setAbortController,
|
setAbortController,
|
||||||
@ -246,11 +257,11 @@ export const useChat = () => {
|
|||||||
currentAssistant,
|
currentAssistant,
|
||||||
tools,
|
tools,
|
||||||
updateLoadingModel,
|
updateLoadingModel,
|
||||||
updateTokenSpeed,
|
|
||||||
approvedTools,
|
|
||||||
showApprovalModal,
|
|
||||||
getDisabledToolsForThread,
|
getDisabledToolsForThread,
|
||||||
|
approvedTools,
|
||||||
allowAllMCPPermissions,
|
allowAllMCPPermissions,
|
||||||
|
showApprovalModal,
|
||||||
|
updateTokenSpeed,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -304,6 +304,17 @@ export const postMessageProcessing = async (
|
|||||||
arguments: toolCall.function.arguments.length
|
arguments: toolCall.function.arguments.length
|
||||||
? JSON.parse(toolCall.function.arguments)
|
? JSON.parse(toolCall.function.arguments)
|
||||||
: {},
|
: {},
|
||||||
|
}).catch((e) => {
|
||||||
|
console.error('Tool call failed:', e)
|
||||||
|
return {
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: `Error calling tool ${toolCall.function.name}: ${e.message}`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
error: true,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
: {
|
: {
|
||||||
content: [
|
content: [
|
||||||
|
|||||||
@ -98,13 +98,15 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
|
|||||||
'inferenceUrl' in value
|
'inferenceUrl' in value
|
||||||
? (value.inferenceUrl as string).replace('/chat/completions', '')
|
? (value.inferenceUrl as string).replace('/chat/completions', '')
|
||||||
: '',
|
: '',
|
||||||
settings: (await value.getSettings()).map((setting) => ({
|
settings: (await value.getSettings()).map((setting) => {
|
||||||
key: setting.key,
|
return {
|
||||||
title: setting.title,
|
key: setting.key,
|
||||||
description: setting.description,
|
title: setting.title,
|
||||||
controller_type: setting.controllerType as unknown,
|
description: setting.description,
|
||||||
controller_props: setting.controllerProps as unknown,
|
controller_type: setting.controllerType as unknown,
|
||||||
})) as ProviderSetting[],
|
controller_props: setting.controllerProps as unknown,
|
||||||
|
}
|
||||||
|
}) as ProviderSetting[],
|
||||||
models: models.map((model) => ({
|
models: models.map((model) => ({
|
||||||
id: model.id,
|
id: model.id,
|
||||||
model: model.id,
|
model: model.id,
|
||||||
@ -117,9 +119,13 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
|
|||||||
provider: providerName,
|
provider: providerName,
|
||||||
settings: Object.values(modelSettings).reduce(
|
settings: Object.values(modelSettings).reduce(
|
||||||
(acc, setting) => {
|
(acc, setting) => {
|
||||||
const value = model[
|
let value = model[
|
||||||
setting.key as keyof typeof model
|
setting.key as keyof typeof model
|
||||||
] as keyof typeof setting.controller_props.value
|
] as keyof typeof setting.controller_props.value
|
||||||
|
if (setting.key === 'ctx_len') {
|
||||||
|
// @ts-expect-error dynamic type
|
||||||
|
value = 4096 // Default context length for Llama.cpp models
|
||||||
|
}
|
||||||
acc[setting.key] = {
|
acc[setting.key] = {
|
||||||
...setting,
|
...setting,
|
||||||
controller_props: {
|
controller_props: {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user