This commit introduces a new field, `is_embedding`, to the `SessionInfo` structure to clearly mark sessions running dedicated embedding models. Key changes: - Adds `is_embedding` to the `SessionInfo` interface in `AIEngine.ts` and the Rust backend. - Updates the `loadLlamaModel` command signatures to pass this new flag. - Modifies the llama.cpp extension's **auto-unload logic** to explicitly **filter out** and **not unload** any currently loaded embedding models when a new text generation model is loaded. This is a critical performance fix to prevent the embedding model (e.g., used for RAG) from being repeatedly reloaded. Also includes minor code style cleanup/reformatting in `jan-provider-web/provider.ts` for improved readability.
145 lines
3.4 KiB
TypeScript
145 lines
3.4 KiB
TypeScript
import { invoke } from '@tauri-apps/api/core'
|
|
|
|
// Types
|
|
export interface SessionInfo {
|
|
pid: number
|
|
port: number
|
|
model_id: string
|
|
model_path: string
|
|
api_key: string
|
|
}
|
|
|
|
export interface DeviceInfo {
|
|
id: string
|
|
name: string
|
|
memory: number
|
|
}
|
|
|
|
export interface GgufMetadata {
|
|
version: number
|
|
tensor_count: number
|
|
metadata: Record<string, string>
|
|
}
|
|
|
|
// Cleanup commands
|
|
export async function cleanupLlamaProcesses(): Promise<void> {
|
|
return await invoke('plugin:llamacpp|cleanup_llama_processes')
|
|
}
|
|
|
|
// LlamaCpp server commands
|
|
export async function loadLlamaModel(
|
|
backendPath: string,
|
|
libraryPath?: string,
|
|
args: string[] = [],
|
|
isEmbedding: boolean = false
|
|
): Promise<SessionInfo> {
|
|
return await invoke('plugin:llamacpp|load_llama_model', {
|
|
backendPath,
|
|
libraryPath,
|
|
args,
|
|
isEmbedding,
|
|
})
|
|
}
|
|
|
|
export async function unloadLlamaModel(pid: number): Promise<void> {
|
|
return await invoke('plugin:llamacpp|unload_llama_model', { pid })
|
|
}
|
|
|
|
export async function getDevices(
|
|
backendPath: string,
|
|
libraryPath?: string
|
|
): Promise<DeviceInfo[]> {
|
|
return await invoke('plugin:llamacpp|get_devices', {
|
|
backendPath,
|
|
libraryPath,
|
|
})
|
|
}
|
|
|
|
export async function generateApiKey(
|
|
modelId: string,
|
|
apiSecret: string
|
|
): Promise<string> {
|
|
return await invoke('plugin:llamacpp|generate_api_key', {
|
|
modelId,
|
|
apiSecret,
|
|
})
|
|
}
|
|
|
|
export async function isProcessRunning(pid: number): Promise<boolean> {
|
|
return await invoke('plugin:llamacpp|is_process_running', { pid })
|
|
}
|
|
|
|
export async function getRandomPort(): Promise<number> {
|
|
return await invoke('plugin:llamacpp|get_random_port')
|
|
}
|
|
|
|
export async function findSessionByModel(
|
|
modelId: string
|
|
): Promise<SessionInfo | null> {
|
|
return await invoke('plugin:llamacpp|find_session_by_model', { modelId })
|
|
}
|
|
|
|
export async function getLoadedModels(): Promise<string[]> {
|
|
return await invoke('plugin:llamacpp|get_loaded_models')
|
|
}
|
|
|
|
export async function getAllSessions(): Promise<SessionInfo[]> {
|
|
return await invoke('plugin:llamacpp|get_all_sessions')
|
|
}
|
|
|
|
export async function getSessionByModel(
|
|
modelId: string
|
|
): Promise<SessionInfo | null> {
|
|
return await invoke('plugin:llamacpp|get_session_by_model', { modelId })
|
|
}
|
|
|
|
// GGUF commands
|
|
export async function readGgufMetadata(path: string): Promise<GgufMetadata> {
|
|
return await invoke('plugin:llamacpp|read_gguf_metadata', { path })
|
|
}
|
|
|
|
export async function estimateKVCacheSize(
|
|
meta: Record<string, string>,
|
|
ctxSize?: number
|
|
): Promise<{ size: number; per_token_size: number }> {
|
|
return await invoke('plugin:llamacpp|estimate_kv_cache_size', {
|
|
meta,
|
|
ctxSize,
|
|
})
|
|
}
|
|
|
|
export async function getModelSize(path: string): Promise<number> {
|
|
return await invoke('plugin:llamacpp|get_model_size', { path })
|
|
}
|
|
|
|
export async function isModelSupported(
|
|
path: string,
|
|
ctxSize?: number
|
|
): Promise<'RED' | 'YELLOW' | 'GREEN'> {
|
|
return await invoke('plugin:llamacpp|is_model_supported', {
|
|
path,
|
|
ctxSize,
|
|
})
|
|
}
|
|
|
|
export async function planModelLoadInternal(
|
|
path: string,
|
|
memoryMode: string,
|
|
mmprojPath?: string,
|
|
requestedContext?: number
|
|
): Promise<{
|
|
gpuLayers: number
|
|
maxContextLength: number
|
|
noOffloadKVCache: boolean
|
|
offloadMmproj?: boolean
|
|
batchSize: number
|
|
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
|
|
}> {
|
|
return await invoke('plugin:llamacpp|plan_model_load', {
|
|
path,
|
|
memoryMode,
|
|
mmprojPath,
|
|
requestedContext,
|
|
})
|
|
}
|