fix: llama.cpp integration model load and chat experience (#5823)

* fix: stop generating should not stop running models

* fix: ensure backend ready before loading model

* fix: backend setting should not block onLoad
This commit is contained in:
Louis 2025-07-21 09:29:26 +07:00 committed by GitHub
parent 5696e951f2
commit bc4fe52f8d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 123 additions and 77 deletions

View File

@ -23,37 +23,37 @@ export interface InputAudio {
} }
export interface ToolFunction { export interface ToolFunction {
name: string; // Required: a-z, A-Z, 0-9, _, -, max length 64 name: string // Required: a-z, A-Z, 0-9, _, -, max length 64
description?: string; description?: string
parameters?: Record<string, unknown>; // JSON Schema object parameters?: Record<string, unknown> // JSON Schema object
strict?: boolean | null; // Defaults to false strict?: boolean | null // Defaults to false
} }
export interface Tool { export interface Tool {
type: 'function'; // Currently, only 'function' is supported type: 'function' // Currently, only 'function' is supported
function: ToolFunction; function: ToolFunction
} }
export interface ToolCallOptions { export interface ToolCallOptions {
tools?: Tool[]; tools?: Tool[]
} }
// A specific tool choice to force the model to call // A specific tool choice to force the model to call
export interface ToolCallSpec { export interface ToolCallSpec {
type: 'function'; type: 'function'
function: { function: {
name: string; name: string
}; }
} }
// tool_choice may be one of several modes or a specific call // tool_choice may be one of several modes or a specific call
export type ToolChoice = 'none' | 'auto' | 'required' | ToolCallSpec; export type ToolChoice = 'none' | 'auto' | 'required' | ToolCallSpec
export interface chatCompletionRequest { export interface chatCompletionRequest {
model: string; // Model ID, though for local it might be implicit via sessionInfo model: string // Model ID, though for local it might be implicit via sessionInfo
messages: chatCompletionRequestMessage[]; messages: chatCompletionRequestMessage[]
tools?: Tool[]; tools?: Tool[]
tool_choice?: ToolChoice; tool_choice?: ToolChoice
// Core sampling parameters // Core sampling parameters
temperature?: number | null temperature?: number | null
dynatemp_range?: number | null dynatemp_range?: number | null
@ -168,7 +168,7 @@ export type listResult = modelInfo[]
export interface SessionInfo { export interface SessionInfo {
pid: number // opaque handle for unload/chat pid: number // opaque handle for unload/chat
port: number // llama-server output port (corrected from portid) port: number // llama-server output port (corrected from portid)
model_id: string, //name of the model model_id: string //name of the model
model_path: string // path of the loaded model model_path: string // path of the loaded model
api_key: string api_key: string
} }
@ -242,7 +242,8 @@ export abstract class AIEngine extends BaseExtension {
* Sends a chat request to the model * Sends a chat request to the model
*/ */
abstract chat( abstract chat(
opts: chatCompletionRequest opts: chatCompletionRequest,
abortController?: AbortController
): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> ): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>>
/** /**
@ -261,8 +262,8 @@ export abstract class AIEngine extends BaseExtension {
abstract abortImport(modelId: string): Promise<void> abstract abortImport(modelId: string): Promise<void>
/** /**
* Get currently loaded models * Get currently loaded models
*/ */
abstract getLoadedModels(): Promise<string[]> abstract getLoadedModels(): Promise<string[]>
/** /**

View File

@ -118,12 +118,45 @@ export default class llamacpp_extension extends AIEngine {
private activeSessions: Map<number, SessionInfo> = new Map() private activeSessions: Map<number, SessionInfo> = new Map()
private providerPath!: string private providerPath!: string
private apiSecret: string = 'JustAskNow' private apiSecret: string = 'JustAskNow'
private pendingDownloads: Map<string, Promise<void>> = new Map()
override async onLoad(): Promise<void> { override async onLoad(): Promise<void> {
super.onLoad() // Calls registerEngine() from AIEngine super.onLoad() // Calls registerEngine() from AIEngine
let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration
// This makes the settings (including the backend options and initial value) available to the Jan UI.
this.registerSettings(settings)
// 5. Load all settings into this.config from the registered settings.
// This populates `this.config` with the *persisted* user settings, falling back
// to the *default* values specified in the settings definitions (which might have been
// updated in step 3 to reflect the best available backend).
let loadedConfig: any = {}
// Iterate over the cloned 'settings' array because its 'controllerProps.value'
// might have been updated in step 3 to define the UI default.
// 'getSetting' will retrieve the actual persisted user value if it exists, falling back
// to the 'defaultValue' passed (which is the 'controllerProps.value' from the cloned settings array).
for (const item of settings) {
const defaultValue = item.controllerProps.value
// Use the potentially updated default value from the settings array as the fallback for getSetting
loadedConfig[item.key] = await this.getSetting<typeof defaultValue>(
item.key,
defaultValue
)
}
this.config = loadedConfig as LlamacppConfig
// This sets the base directory where model files for this provider are stored.
this.providerPath = await joinPath([
await getJanDataFolderPath(),
this.providerId,
])
this.configureBackends()
}
async configureBackends(): Promise<void> {
// 1. Fetch available backends early // 1. Fetch available backends early
// This is necessary to populate the backend version dropdown in settings // This is necessary to populate the backend version dropdown in settings
// and to determine the best available backend for auto-update/default selection. // and to determine the best available backend for auto-update/default selection.
@ -226,6 +259,8 @@ export default class llamacpp_extension extends AIEngine {
) )
} }
let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration
// 3. Update the 'version_backend' setting definition in the cloned settings array // 3. Update the 'version_backend' setting definition in the cloned settings array
// This prepares the settings object that will be registered, influencing the UI default value. // This prepares the settings object that will be registered, influencing the UI default value.
const backendSettingIndex = settings.findIndex( const backendSettingIndex = settings.findIndex(
@ -274,28 +309,6 @@ export default class llamacpp_extension extends AIEngine {
// Cannot proceed if this critical setting is missing // Cannot proceed if this critical setting is missing
throw new Error('Critical setting "version_backend" not found.') throw new Error('Critical setting "version_backend" not found.')
} }
// This makes the settings (including the backend options and initial value) available to the Jan UI.
this.registerSettings(settings)
// 5. Load all settings into this.config from the registered settings.
// This populates `this.config` with the *persisted* user settings, falling back
// to the *default* values specified in the settings definitions (which might have been
// updated in step 3 to reflect the best available backend).
let loadedConfig: any = {}
// Iterate over the cloned 'settings' array because its 'controllerProps.value'
// might have been updated in step 3 to define the UI default.
// 'getSetting' will retrieve the actual persisted user value if it exists, falling back
// to the 'defaultValue' passed (which is the 'controllerProps.value' from the cloned settings array).
for (const item of settings) {
const defaultValue = item.controllerProps.value
// Use the potentially updated default value from the settings array as the fallback for getSetting
loadedConfig[item.key] = await this.getSetting<typeof defaultValue>(
item.key,
defaultValue
)
}
this.config = loadedConfig as LlamacppConfig
// At this point, this.config.version_backend holds the value that will be used // At this point, this.config.version_backend holds the value that will be used
// UNLESS auto-update logic overrides it for the current session. // UNLESS auto-update logic overrides it for the current session.
@ -328,7 +341,7 @@ export default class llamacpp_extension extends AIEngine {
`Auto-updating effective backend for this session from ${this.config.version_backend} to ${bestAvailableBackendString} (best available)` `Auto-updating effective backend for this session from ${this.config.version_backend} to ${bestAvailableBackendString} (best available)`
) )
try { try {
await downloadBackend(bestBackend, bestVersion) await this.ensureBackendReady(bestBackend, bestVersion)
effectiveBackendString = bestAvailableBackendString effectiveBackendString = bestAvailableBackendString
this.config.version_backend = effectiveBackendString this.config.version_backend = effectiveBackendString
this.getSettings().then((settings) => { this.getSettings().then((settings) => {
@ -435,7 +448,7 @@ export default class llamacpp_extension extends AIEngine {
// downloadBackend is called again here to ensure the *currently active* backend // downloadBackend is called again here to ensure the *currently active* backend
// is present, regardless of whether it was set by user config or auto-update. // is present, regardless of whether it was set by user config or auto-update.
// This call will do nothing if it was already downloaded during auto-update. // This call will do nothing if it was already downloaded during auto-update.
await downloadBackend(selectedBackend, selectedVersion) await this.ensureBackendReady(selectedBackend, selectedVersion)
console.log( console.log(
`Successfully installed effective backend: ${finalBackendToInstall}` `Successfully installed effective backend: ${finalBackendToInstall}`
) )
@ -461,13 +474,8 @@ export default class llamacpp_extension extends AIEngine {
} else { } else {
console.warn('No backend selected or available in config to install.') console.warn('No backend selected or available in config to install.')
} }
// This sets the base directory where model files for this provider are stored.
this.providerPath = await joinPath([
await getJanDataFolderPath(),
this.providerId,
])
} }
async getProviderPath(): Promise<string> { async getProviderPath(): Promise<string> {
if (!this.providerPath) { if (!this.providerPath) {
this.providerPath = await joinPath([ this.providerPath = await joinPath([
@ -500,10 +508,7 @@ export default class llamacpp_extension extends AIEngine {
const [version, backend] = valueStr.split('/') const [version, backend] = valueStr.split('/')
const closure = async () => { const closure = async () => {
const isInstalled = await isBackendInstalled(backend, version) await this.ensureBackendReady(backend, version)
if (!isInstalled) {
await downloadBackend(backend, version)
}
} }
closure() closure()
} }
@ -781,6 +786,9 @@ export default class llamacpp_extension extends AIEngine {
) )
} }
// Ensure backend is downloaded and ready before proceeding
await this.ensureBackendReady(backend, version)
const janDataFolderPath = await getJanDataFolderPath() const janDataFolderPath = await getJanDataFolderPath()
const modelConfigPath = await joinPath([ const modelConfigPath = await joinPath([
this.providerPath, this.providerPath,
@ -923,15 +931,49 @@ export default class llamacpp_extension extends AIEngine {
return `${this.provider}/${cleanModelId}` return `${this.provider}/${cleanModelId}`
} }
private async ensureBackendReady(
backend: string,
version: string
): Promise<void> {
const backendKey = `${version}/${backend}`
// Check if backend is already installed
const isInstalled = await isBackendInstalled(backend, version)
if (isInstalled) {
return
}
// Check if download is already in progress
if (this.pendingDownloads.has(backendKey)) {
console.log(
`Backend ${backendKey} download already in progress, waiting...`
)
await this.pendingDownloads.get(backendKey)
return
}
// Start new download
console.log(`Backend ${backendKey} not installed, downloading...`)
const downloadPromise = downloadBackend(backend, version).finally(() => {
this.pendingDownloads.delete(backendKey)
})
this.pendingDownloads.set(backendKey, downloadPromise)
await downloadPromise
console.log(`Backend ${backendKey} download completed`)
}
private async *handleStreamingResponse( private async *handleStreamingResponse(
url: string, url: string,
headers: HeadersInit, headers: HeadersInit,
body: string body: string,
abortController?: AbortController
): AsyncIterable<chatCompletionChunk> { ): AsyncIterable<chatCompletionChunk> {
const response = await fetch(url, { const response = await fetch(url, {
method: 'POST', method: 'POST',
headers, headers,
body, body,
signal: abortController?.signal,
}) })
if (!response.ok) { if (!response.ok) {
const errorData = await response.json().catch(() => null) const errorData = await response.json().catch(() => null)
@ -1035,7 +1077,7 @@ export default class llamacpp_extension extends AIEngine {
const body = JSON.stringify(opts) const body = JSON.stringify(opts)
if (opts.stream) { if (opts.stream) {
return this.handleStreamingResponse(url, headers, body) return this.handleStreamingResponse(url, headers, body, abortController)
} }
// Handle non-streaming response // Handle non-streaming response
const response = await fetch(url, { const response = await fetch(url, {

View File

@ -34,7 +34,6 @@ import DropdownModelProvider from '@/containers/DropdownModelProvider'
import { ModelLoader } from '@/containers/loaders/ModelLoader' import { ModelLoader } from '@/containers/loaders/ModelLoader'
import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable' import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
import { getConnectedServers } from '@/services/mcp' import { getConnectedServers } from '@/services/mcp'
import { stopAllModels } from '@/services/models'
type ChatInputProps = { type ChatInputProps = {
className?: string className?: string
@ -162,7 +161,6 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
const stopStreaming = useCallback( const stopStreaming = useCallback(
(threadId: string) => { (threadId: string) => {
abortControllers[threadId]?.abort() abortControllers[threadId]?.abort()
stopAllModels()
}, },
[abortControllers] [abortControllers]
) )

View File

@ -183,7 +183,7 @@ export function DownloadManagement() {
toast.success(t('common:toast.downloadComplete.title'), { toast.success(t('common:toast.downloadComplete.title'), {
id: 'download-complete', id: 'download-complete',
description: t('common:toast.downloadComplete.description', { description: t('common:toast.downloadComplete.description', {
modelId: state.modelId, item: state.modelId,
}), }),
}) })
}, },

View File

@ -399,11 +399,13 @@ export const useChat = () => {
if (!followUpWithToolUse) availableTools = [] if (!followUpWithToolUse) availableTools = []
} }
} catch (error) { } catch (error) {
const errorMessage = if (!abortController.signal.aborted) {
error && typeof error === 'object' && 'message' in error const errorMessage =
? error.message error && typeof error === 'object' && 'message' in error
: error ? error.message
setModelLoadError(`${errorMessage}`) : error
setModelLoadError(`${errorMessage}`)
}
} finally { } finally {
updateLoadingModel(false) updateLoadingModel(false)
updateStreamingContent(undefined) updateStreamingContent(undefined)

View File

@ -185,14 +185,17 @@ export const sendCompletion = async (
const engine = ExtensionManager.getInstance().getEngine(provider.provider) const engine = ExtensionManager.getInstance().getEngine(provider.provider)
const completion = engine const completion = engine
? await engine.chat({ ? await engine.chat(
messages: messages as chatCompletionRequestMessage[], {
model: thread.model?.id, messages: messages as chatCompletionRequestMessage[],
tools: normalizeTools(tools), model: thread.model?.id,
tool_choice: tools.length ? 'auto' : undefined, tools: normalizeTools(tools),
stream: true, tool_choice: tools.length ? 'auto' : undefined,
...params, stream: true,
}) ...params,
},
abortController
)
: stream : stream
? await tokenJS.chat.completions.create( ? await tokenJS.chat.completions.create(
{ {

View File

@ -251,7 +251,7 @@
}, },
"downloadComplete": { "downloadComplete": {
"title": "Download abgeschlossen", "title": "Download abgeschlossen",
"description": "Das Modell {{modelId}} wurde heruntergeladen" "description": "{{item}} wurde heruntergeladen"
}, },
"downloadCancelled": { "downloadCancelled": {
"title": "Download abgebrochen", "title": "Download abgebrochen",

View File

@ -251,7 +251,7 @@
}, },
"downloadComplete": { "downloadComplete": {
"title": "Download Complete", "title": "Download Complete",
"description": "The model {{modelId}} has been downloaded" "description": "{{item}} has been downloaded"
}, },
"downloadCancelled": { "downloadCancelled": {
"title": "Download Cancelled", "title": "Download Cancelled",

View File

@ -244,7 +244,7 @@
}, },
"downloadComplete": { "downloadComplete": {
"title": "Unduhan Selesai", "title": "Unduhan Selesai",
"description": "Model {{modelId}} telah diunduh" "description": "{{item}} telah diunduh"
}, },
"downloadCancelled": { "downloadCancelled": {
"title": "Unduhan Dibatalkan", "title": "Unduhan Dibatalkan",

View File

@ -244,7 +244,7 @@
}, },
"downloadComplete": { "downloadComplete": {
"title": "Tải xuống hoàn tất", "title": "Tải xuống hoàn tất",
"description": "Mô hình {{modelId}} đã được tải xuống" "description": "{{item}} đã được tải xuống"
}, },
"downloadCancelled": { "downloadCancelled": {
"title": "Đã hủy tải xuống", "title": "Đã hủy tải xuống",

View File

@ -244,7 +244,7 @@
}, },
"downloadComplete": { "downloadComplete": {
"title": "下载完成", "title": "下载完成",
"description": "模型 {{modelId}} 已下载" "description": "{{item}} 已下载"
}, },
"downloadCancelled": { "downloadCancelled": {
"title": "下载已取消", "title": "下载已取消",

View File

@ -244,7 +244,7 @@
}, },
"downloadComplete": { "downloadComplete": {
"title": "下載完成", "title": "下載完成",
"description": "模型 {{modelId}} 已下載" "description": "{{item}} 已下載"
}, },
"downloadCancelled": { "downloadCancelled": {
"title": "下載已取消", "title": "下載已取消",