chore: allow users to setting model offload (#5134)

* chore: allow users to setting model offload

* chore: apply model.yaml configurations to default model settings

* chore: fallback default value
This commit is contained in:
Louis 2025-05-29 13:29:32 +07:00 committed by GitHub
parent 1b3f16b3e1
commit a1111033d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 48 additions and 10 deletions

View File

@ -1,4 +1,13 @@
[
{
"key": "auto_unload_models",
"title": "Auto-Unload Old Models",
"description": "Automatically unloads models that are not in use to free up memory. Ensure only one model is loaded at a time.",
"controllerType": "checkbox",
"controllerProps": {
"value": true
}
},
{
"key": "cont_batching",
"title": "Continuous Batching",

View File

@ -37,6 +37,7 @@ enum Settings {
use_mmap = 'use_mmap',
cpu_threads = 'cpu_threads',
huggingfaceToken = 'hugging-face-access-token',
auto_unload_models = 'auto_unload_models',
}
type LoadedModelResponse = { data: { engine: string; id: string }[] }
@ -61,7 +62,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
use_mmap: boolean = true
cache_type: string = 'f16'
cpu_threads?: number
auto_unload_models: boolean = true
/**
* The URL for making inference requests.
*/
@ -126,6 +127,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
this.cache_type = await this.getSetting<string>(Settings.cache_type, 'f16')
this.auto_unload_models = await this.getSetting<boolean>(
Settings.auto_unload_models,
true
)
const threads_number = Number(
await this.getSetting<string>(Settings.cpu_threads, '')
)
@ -176,6 +181,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
} else if (key === Settings.huggingfaceToken) {
this.updateCortexConfig({ huggingface_token: value })
} else if (key === Settings.auto_unload_models) {
this.auto_unload_models = value as boolean
}
}
@ -205,7 +212,15 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
console.log(`Model ${model.id} already loaded`)
return
}
if (this.auto_unload_models) {
// Unload the last used model if it is not the same as the current one
for (const lastUsedModel of loadedModels) {
if (lastUsedModel.id !== model.id) {
console.log(`Unloading last used model: ${lastUsedModel.id}`)
await this.unloadModel(lastUsedModel as Model)
}
}
}
return await this.apiInstance().then((api) =>
api
.post('v1/models/start', {

View File

@ -126,11 +126,9 @@ export const useChat = () => {
let availableTools = selectedModel?.capabilities?.includes('tools')
? tools
: []
while (
!isCompleted &&
!abortController.signal.aborted
// TODO: Max attempts can be set in the provider settings later
) {
// TODO: Later replaced by Agent setup?
const followUpWithToolUse = true
while (!isCompleted && !abortController.signal.aborted) {
const completion = await sendCompletion(
activeThread,
provider,
@ -200,7 +198,8 @@ export const useChat = () => {
addMessage(updatedMessage ?? finalContent)
isCompleted = !toolCalls.length
availableTools = []
// Do not create agent loop if there is no need for it
if (!followUpWithToolUse) availableTools = []
}
} catch (error) {
toast.error(

View File

@ -308,7 +308,7 @@ export const postMessageProcessing = async (
}
builder.addToolMessage(result.content[0]?.text ?? '', toolCall.id)
// update message metadata
return message
}
return message
}
}

View File

@ -77,7 +77,22 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
? (model.capabilities as string[])
: [ModelCapabilities.COMPLETION],
provider: providerName,
settings: modelSettings ,
settings: Object.values(modelSettings).reduce(
(acc, setting) => {
const value = model[
setting.key as keyof typeof model
] as keyof typeof setting.controller_props.value
acc[setting.key] = {
...setting,
controller_props: {
...setting.controller_props,
value: value ?? setting.controller_props.value,
},
}
return acc
},
{} as Record<string, ProviderSetting>
),
})),
}
runtimeProviders.push(provider)