fix: llama.cpp integration model load and chat experience (#5823)
* fix: stop generating should not stop running models * fix: ensure backend ready before loading model * fix: backend setting should not block onLoad
This commit is contained in:
parent
5696e951f2
commit
bc4fe52f8d
@ -23,37 +23,37 @@ export interface InputAudio {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface ToolFunction {
|
export interface ToolFunction {
|
||||||
name: string; // Required: a-z, A-Z, 0-9, _, -, max length 64
|
name: string // Required: a-z, A-Z, 0-9, _, -, max length 64
|
||||||
description?: string;
|
description?: string
|
||||||
parameters?: Record<string, unknown>; // JSON Schema object
|
parameters?: Record<string, unknown> // JSON Schema object
|
||||||
strict?: boolean | null; // Defaults to false
|
strict?: boolean | null // Defaults to false
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Tool {
|
export interface Tool {
|
||||||
type: 'function'; // Currently, only 'function' is supported
|
type: 'function' // Currently, only 'function' is supported
|
||||||
function: ToolFunction;
|
function: ToolFunction
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ToolCallOptions {
|
export interface ToolCallOptions {
|
||||||
tools?: Tool[];
|
tools?: Tool[]
|
||||||
}
|
}
|
||||||
|
|
||||||
// A specific tool choice to force the model to call
|
// A specific tool choice to force the model to call
|
||||||
export interface ToolCallSpec {
|
export interface ToolCallSpec {
|
||||||
type: 'function';
|
type: 'function'
|
||||||
function: {
|
function: {
|
||||||
name: string;
|
name: string
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// tool_choice may be one of several modes or a specific call
|
// tool_choice may be one of several modes or a specific call
|
||||||
export type ToolChoice = 'none' | 'auto' | 'required' | ToolCallSpec;
|
export type ToolChoice = 'none' | 'auto' | 'required' | ToolCallSpec
|
||||||
|
|
||||||
export interface chatCompletionRequest {
|
export interface chatCompletionRequest {
|
||||||
model: string; // Model ID, though for local it might be implicit via sessionInfo
|
model: string // Model ID, though for local it might be implicit via sessionInfo
|
||||||
messages: chatCompletionRequestMessage[];
|
messages: chatCompletionRequestMessage[]
|
||||||
tools?: Tool[];
|
tools?: Tool[]
|
||||||
tool_choice?: ToolChoice;
|
tool_choice?: ToolChoice
|
||||||
// Core sampling parameters
|
// Core sampling parameters
|
||||||
temperature?: number | null
|
temperature?: number | null
|
||||||
dynatemp_range?: number | null
|
dynatemp_range?: number | null
|
||||||
@ -168,7 +168,7 @@ export type listResult = modelInfo[]
|
|||||||
export interface SessionInfo {
|
export interface SessionInfo {
|
||||||
pid: number // opaque handle for unload/chat
|
pid: number // opaque handle for unload/chat
|
||||||
port: number // llama-server output port (corrected from portid)
|
port: number // llama-server output port (corrected from portid)
|
||||||
model_id: string, //name of the model
|
model_id: string //name of the model
|
||||||
model_path: string // path of the loaded model
|
model_path: string // path of the loaded model
|
||||||
api_key: string
|
api_key: string
|
||||||
}
|
}
|
||||||
@ -242,7 +242,8 @@ export abstract class AIEngine extends BaseExtension {
|
|||||||
* Sends a chat request to the model
|
* Sends a chat request to the model
|
||||||
*/
|
*/
|
||||||
abstract chat(
|
abstract chat(
|
||||||
opts: chatCompletionRequest
|
opts: chatCompletionRequest,
|
||||||
|
abortController?: AbortController
|
||||||
): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>>
|
): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -261,8 +262,8 @@ export abstract class AIEngine extends BaseExtension {
|
|||||||
abstract abortImport(modelId: string): Promise<void>
|
abstract abortImport(modelId: string): Promise<void>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get currently loaded models
|
* Get currently loaded models
|
||||||
*/
|
*/
|
||||||
abstract getLoadedModels(): Promise<string[]>
|
abstract getLoadedModels(): Promise<string[]>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -118,12 +118,45 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
private activeSessions: Map<number, SessionInfo> = new Map()
|
private activeSessions: Map<number, SessionInfo> = new Map()
|
||||||
private providerPath!: string
|
private providerPath!: string
|
||||||
private apiSecret: string = 'JustAskNow'
|
private apiSecret: string = 'JustAskNow'
|
||||||
|
private pendingDownloads: Map<string, Promise<void>> = new Map()
|
||||||
|
|
||||||
override async onLoad(): Promise<void> {
|
override async onLoad(): Promise<void> {
|
||||||
super.onLoad() // Calls registerEngine() from AIEngine
|
super.onLoad() // Calls registerEngine() from AIEngine
|
||||||
|
|
||||||
let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration
|
let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration
|
||||||
|
|
||||||
|
// This makes the settings (including the backend options and initial value) available to the Jan UI.
|
||||||
|
this.registerSettings(settings)
|
||||||
|
|
||||||
|
// 5. Load all settings into this.config from the registered settings.
|
||||||
|
// This populates `this.config` with the *persisted* user settings, falling back
|
||||||
|
// to the *default* values specified in the settings definitions (which might have been
|
||||||
|
// updated in step 3 to reflect the best available backend).
|
||||||
|
let loadedConfig: any = {}
|
||||||
|
// Iterate over the cloned 'settings' array because its 'controllerProps.value'
|
||||||
|
// might have been updated in step 3 to define the UI default.
|
||||||
|
// 'getSetting' will retrieve the actual persisted user value if it exists, falling back
|
||||||
|
// to the 'defaultValue' passed (which is the 'controllerProps.value' from the cloned settings array).
|
||||||
|
for (const item of settings) {
|
||||||
|
const defaultValue = item.controllerProps.value
|
||||||
|
// Use the potentially updated default value from the settings array as the fallback for getSetting
|
||||||
|
loadedConfig[item.key] = await this.getSetting<typeof defaultValue>(
|
||||||
|
item.key,
|
||||||
|
defaultValue
|
||||||
|
)
|
||||||
|
}
|
||||||
|
this.config = loadedConfig as LlamacppConfig
|
||||||
|
|
||||||
|
// This sets the base directory where model files for this provider are stored.
|
||||||
|
this.providerPath = await joinPath([
|
||||||
|
await getJanDataFolderPath(),
|
||||||
|
this.providerId,
|
||||||
|
])
|
||||||
|
|
||||||
|
this.configureBackends()
|
||||||
|
}
|
||||||
|
|
||||||
|
async configureBackends(): Promise<void> {
|
||||||
// 1. Fetch available backends early
|
// 1. Fetch available backends early
|
||||||
// This is necessary to populate the backend version dropdown in settings
|
// This is necessary to populate the backend version dropdown in settings
|
||||||
// and to determine the best available backend for auto-update/default selection.
|
// and to determine the best available backend for auto-update/default selection.
|
||||||
@ -226,6 +259,8 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let settings = structuredClone(SETTINGS) // Clone to modify settings definition before registration
|
||||||
|
|
||||||
// 3. Update the 'version_backend' setting definition in the cloned settings array
|
// 3. Update the 'version_backend' setting definition in the cloned settings array
|
||||||
// This prepares the settings object that will be registered, influencing the UI default value.
|
// This prepares the settings object that will be registered, influencing the UI default value.
|
||||||
const backendSettingIndex = settings.findIndex(
|
const backendSettingIndex = settings.findIndex(
|
||||||
@ -274,28 +309,6 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
// Cannot proceed if this critical setting is missing
|
// Cannot proceed if this critical setting is missing
|
||||||
throw new Error('Critical setting "version_backend" not found.')
|
throw new Error('Critical setting "version_backend" not found.')
|
||||||
}
|
}
|
||||||
|
|
||||||
// This makes the settings (including the backend options and initial value) available to the Jan UI.
|
|
||||||
this.registerSettings(settings)
|
|
||||||
|
|
||||||
// 5. Load all settings into this.config from the registered settings.
|
|
||||||
// This populates `this.config` with the *persisted* user settings, falling back
|
|
||||||
// to the *default* values specified in the settings definitions (which might have been
|
|
||||||
// updated in step 3 to reflect the best available backend).
|
|
||||||
let loadedConfig: any = {}
|
|
||||||
// Iterate over the cloned 'settings' array because its 'controllerProps.value'
|
|
||||||
// might have been updated in step 3 to define the UI default.
|
|
||||||
// 'getSetting' will retrieve the actual persisted user value if it exists, falling back
|
|
||||||
// to the 'defaultValue' passed (which is the 'controllerProps.value' from the cloned settings array).
|
|
||||||
for (const item of settings) {
|
|
||||||
const defaultValue = item.controllerProps.value
|
|
||||||
// Use the potentially updated default value from the settings array as the fallback for getSetting
|
|
||||||
loadedConfig[item.key] = await this.getSetting<typeof defaultValue>(
|
|
||||||
item.key,
|
|
||||||
defaultValue
|
|
||||||
)
|
|
||||||
}
|
|
||||||
this.config = loadedConfig as LlamacppConfig
|
|
||||||
// At this point, this.config.version_backend holds the value that will be used
|
// At this point, this.config.version_backend holds the value that will be used
|
||||||
// UNLESS auto-update logic overrides it for the current session.
|
// UNLESS auto-update logic overrides it for the current session.
|
||||||
|
|
||||||
@ -328,7 +341,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
`Auto-updating effective backend for this session from ${this.config.version_backend} to ${bestAvailableBackendString} (best available)`
|
`Auto-updating effective backend for this session from ${this.config.version_backend} to ${bestAvailableBackendString} (best available)`
|
||||||
)
|
)
|
||||||
try {
|
try {
|
||||||
await downloadBackend(bestBackend, bestVersion)
|
await this.ensureBackendReady(bestBackend, bestVersion)
|
||||||
effectiveBackendString = bestAvailableBackendString
|
effectiveBackendString = bestAvailableBackendString
|
||||||
this.config.version_backend = effectiveBackendString
|
this.config.version_backend = effectiveBackendString
|
||||||
this.getSettings().then((settings) => {
|
this.getSettings().then((settings) => {
|
||||||
@ -435,7 +448,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
// downloadBackend is called again here to ensure the *currently active* backend
|
// downloadBackend is called again here to ensure the *currently active* backend
|
||||||
// is present, regardless of whether it was set by user config or auto-update.
|
// is present, regardless of whether it was set by user config or auto-update.
|
||||||
// This call will do nothing if it was already downloaded during auto-update.
|
// This call will do nothing if it was already downloaded during auto-update.
|
||||||
await downloadBackend(selectedBackend, selectedVersion)
|
await this.ensureBackendReady(selectedBackend, selectedVersion)
|
||||||
console.log(
|
console.log(
|
||||||
`Successfully installed effective backend: ${finalBackendToInstall}`
|
`Successfully installed effective backend: ${finalBackendToInstall}`
|
||||||
)
|
)
|
||||||
@ -461,13 +474,8 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
} else {
|
} else {
|
||||||
console.warn('No backend selected or available in config to install.')
|
console.warn('No backend selected or available in config to install.')
|
||||||
}
|
}
|
||||||
|
|
||||||
// This sets the base directory where model files for this provider are stored.
|
|
||||||
this.providerPath = await joinPath([
|
|
||||||
await getJanDataFolderPath(),
|
|
||||||
this.providerId,
|
|
||||||
])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async getProviderPath(): Promise<string> {
|
async getProviderPath(): Promise<string> {
|
||||||
if (!this.providerPath) {
|
if (!this.providerPath) {
|
||||||
this.providerPath = await joinPath([
|
this.providerPath = await joinPath([
|
||||||
@ -500,10 +508,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const [version, backend] = valueStr.split('/')
|
const [version, backend] = valueStr.split('/')
|
||||||
|
|
||||||
const closure = async () => {
|
const closure = async () => {
|
||||||
const isInstalled = await isBackendInstalled(backend, version)
|
await this.ensureBackendReady(backend, version)
|
||||||
if (!isInstalled) {
|
|
||||||
await downloadBackend(backend, version)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
closure()
|
closure()
|
||||||
}
|
}
|
||||||
@ -781,6 +786,9 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure backend is downloaded and ready before proceeding
|
||||||
|
await this.ensureBackendReady(backend, version)
|
||||||
|
|
||||||
const janDataFolderPath = await getJanDataFolderPath()
|
const janDataFolderPath = await getJanDataFolderPath()
|
||||||
const modelConfigPath = await joinPath([
|
const modelConfigPath = await joinPath([
|
||||||
this.providerPath,
|
this.providerPath,
|
||||||
@ -923,15 +931,49 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
return `${this.provider}/${cleanModelId}`
|
return `${this.provider}/${cleanModelId}`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async ensureBackendReady(
|
||||||
|
backend: string,
|
||||||
|
version: string
|
||||||
|
): Promise<void> {
|
||||||
|
const backendKey = `${version}/${backend}`
|
||||||
|
|
||||||
|
// Check if backend is already installed
|
||||||
|
const isInstalled = await isBackendInstalled(backend, version)
|
||||||
|
if (isInstalled) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if download is already in progress
|
||||||
|
if (this.pendingDownloads.has(backendKey)) {
|
||||||
|
console.log(
|
||||||
|
`Backend ${backendKey} download already in progress, waiting...`
|
||||||
|
)
|
||||||
|
await this.pendingDownloads.get(backendKey)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start new download
|
||||||
|
console.log(`Backend ${backendKey} not installed, downloading...`)
|
||||||
|
const downloadPromise = downloadBackend(backend, version).finally(() => {
|
||||||
|
this.pendingDownloads.delete(backendKey)
|
||||||
|
})
|
||||||
|
|
||||||
|
this.pendingDownloads.set(backendKey, downloadPromise)
|
||||||
|
await downloadPromise
|
||||||
|
console.log(`Backend ${backendKey} download completed`)
|
||||||
|
}
|
||||||
|
|
||||||
private async *handleStreamingResponse(
|
private async *handleStreamingResponse(
|
||||||
url: string,
|
url: string,
|
||||||
headers: HeadersInit,
|
headers: HeadersInit,
|
||||||
body: string
|
body: string,
|
||||||
|
abortController?: AbortController
|
||||||
): AsyncIterable<chatCompletionChunk> {
|
): AsyncIterable<chatCompletionChunk> {
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers,
|
headers,
|
||||||
body,
|
body,
|
||||||
|
signal: abortController?.signal,
|
||||||
})
|
})
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
const errorData = await response.json().catch(() => null)
|
const errorData = await response.json().catch(() => null)
|
||||||
@ -1035,7 +1077,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
const body = JSON.stringify(opts)
|
const body = JSON.stringify(opts)
|
||||||
if (opts.stream) {
|
if (opts.stream) {
|
||||||
return this.handleStreamingResponse(url, headers, body)
|
return this.handleStreamingResponse(url, headers, body, abortController)
|
||||||
}
|
}
|
||||||
// Handle non-streaming response
|
// Handle non-streaming response
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
|
|||||||
@ -34,7 +34,6 @@ import DropdownModelProvider from '@/containers/DropdownModelProvider'
|
|||||||
import { ModelLoader } from '@/containers/loaders/ModelLoader'
|
import { ModelLoader } from '@/containers/loaders/ModelLoader'
|
||||||
import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
|
import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable'
|
||||||
import { getConnectedServers } from '@/services/mcp'
|
import { getConnectedServers } from '@/services/mcp'
|
||||||
import { stopAllModels } from '@/services/models'
|
|
||||||
|
|
||||||
type ChatInputProps = {
|
type ChatInputProps = {
|
||||||
className?: string
|
className?: string
|
||||||
@ -162,7 +161,6 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
|
|||||||
const stopStreaming = useCallback(
|
const stopStreaming = useCallback(
|
||||||
(threadId: string) => {
|
(threadId: string) => {
|
||||||
abortControllers[threadId]?.abort()
|
abortControllers[threadId]?.abort()
|
||||||
stopAllModels()
|
|
||||||
},
|
},
|
||||||
[abortControllers]
|
[abortControllers]
|
||||||
)
|
)
|
||||||
|
|||||||
@ -183,7 +183,7 @@ export function DownloadManagement() {
|
|||||||
toast.success(t('common:toast.downloadComplete.title'), {
|
toast.success(t('common:toast.downloadComplete.title'), {
|
||||||
id: 'download-complete',
|
id: 'download-complete',
|
||||||
description: t('common:toast.downloadComplete.description', {
|
description: t('common:toast.downloadComplete.description', {
|
||||||
modelId: state.modelId,
|
item: state.modelId,
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
|
|||||||
@ -399,11 +399,13 @@ export const useChat = () => {
|
|||||||
if (!followUpWithToolUse) availableTools = []
|
if (!followUpWithToolUse) availableTools = []
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorMessage =
|
if (!abortController.signal.aborted) {
|
||||||
error && typeof error === 'object' && 'message' in error
|
const errorMessage =
|
||||||
? error.message
|
error && typeof error === 'object' && 'message' in error
|
||||||
: error
|
? error.message
|
||||||
setModelLoadError(`${errorMessage}`)
|
: error
|
||||||
|
setModelLoadError(`${errorMessage}`)
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
updateLoadingModel(false)
|
updateLoadingModel(false)
|
||||||
updateStreamingContent(undefined)
|
updateStreamingContent(undefined)
|
||||||
|
|||||||
@ -185,14 +185,17 @@ export const sendCompletion = async (
|
|||||||
const engine = ExtensionManager.getInstance().getEngine(provider.provider)
|
const engine = ExtensionManager.getInstance().getEngine(provider.provider)
|
||||||
|
|
||||||
const completion = engine
|
const completion = engine
|
||||||
? await engine.chat({
|
? await engine.chat(
|
||||||
messages: messages as chatCompletionRequestMessage[],
|
{
|
||||||
model: thread.model?.id,
|
messages: messages as chatCompletionRequestMessage[],
|
||||||
tools: normalizeTools(tools),
|
model: thread.model?.id,
|
||||||
tool_choice: tools.length ? 'auto' : undefined,
|
tools: normalizeTools(tools),
|
||||||
stream: true,
|
tool_choice: tools.length ? 'auto' : undefined,
|
||||||
...params,
|
stream: true,
|
||||||
})
|
...params,
|
||||||
|
},
|
||||||
|
abortController
|
||||||
|
)
|
||||||
: stream
|
: stream
|
||||||
? await tokenJS.chat.completions.create(
|
? await tokenJS.chat.completions.create(
|
||||||
{
|
{
|
||||||
|
|||||||
@ -251,7 +251,7 @@
|
|||||||
},
|
},
|
||||||
"downloadComplete": {
|
"downloadComplete": {
|
||||||
"title": "Download abgeschlossen",
|
"title": "Download abgeschlossen",
|
||||||
"description": "Das Modell {{modelId}} wurde heruntergeladen"
|
"description": "{{item}} wurde heruntergeladen"
|
||||||
},
|
},
|
||||||
"downloadCancelled": {
|
"downloadCancelled": {
|
||||||
"title": "Download abgebrochen",
|
"title": "Download abgebrochen",
|
||||||
|
|||||||
@ -251,7 +251,7 @@
|
|||||||
},
|
},
|
||||||
"downloadComplete": {
|
"downloadComplete": {
|
||||||
"title": "Download Complete",
|
"title": "Download Complete",
|
||||||
"description": "The model {{modelId}} has been downloaded"
|
"description": "{{item}} has been downloaded"
|
||||||
},
|
},
|
||||||
"downloadCancelled": {
|
"downloadCancelled": {
|
||||||
"title": "Download Cancelled",
|
"title": "Download Cancelled",
|
||||||
|
|||||||
@ -244,7 +244,7 @@
|
|||||||
},
|
},
|
||||||
"downloadComplete": {
|
"downloadComplete": {
|
||||||
"title": "Unduhan Selesai",
|
"title": "Unduhan Selesai",
|
||||||
"description": "Model {{modelId}} telah diunduh"
|
"description": "{{item}} telah diunduh"
|
||||||
},
|
},
|
||||||
"downloadCancelled": {
|
"downloadCancelled": {
|
||||||
"title": "Unduhan Dibatalkan",
|
"title": "Unduhan Dibatalkan",
|
||||||
|
|||||||
@ -244,7 +244,7 @@
|
|||||||
},
|
},
|
||||||
"downloadComplete": {
|
"downloadComplete": {
|
||||||
"title": "Tải xuống hoàn tất",
|
"title": "Tải xuống hoàn tất",
|
||||||
"description": "Mô hình {{modelId}} đã được tải xuống"
|
"description": "{{item}} đã được tải xuống"
|
||||||
},
|
},
|
||||||
"downloadCancelled": {
|
"downloadCancelled": {
|
||||||
"title": "Đã hủy tải xuống",
|
"title": "Đã hủy tải xuống",
|
||||||
|
|||||||
@ -244,7 +244,7 @@
|
|||||||
},
|
},
|
||||||
"downloadComplete": {
|
"downloadComplete": {
|
||||||
"title": "下载完成",
|
"title": "下载完成",
|
||||||
"description": "模型 {{modelId}} 已下载"
|
"description": "{{item}} 已下载"
|
||||||
},
|
},
|
||||||
"downloadCancelled": {
|
"downloadCancelled": {
|
||||||
"title": "下载已取消",
|
"title": "下载已取消",
|
||||||
|
|||||||
@ -244,7 +244,7 @@
|
|||||||
},
|
},
|
||||||
"downloadComplete": {
|
"downloadComplete": {
|
||||||
"title": "下載完成",
|
"title": "下載完成",
|
||||||
"description": "模型 {{modelId}} 已下載"
|
"description": "{{item}} 已下載"
|
||||||
},
|
},
|
||||||
"downloadCancelled": {
|
"downloadCancelled": {
|
||||||
"title": "下載已取消",
|
"title": "下載已取消",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user