Akarshan 7762cea10a
feat: Distinguish and preserve embedding model sessions
This commit introduces a new field, `is_embedding`, to the `SessionInfo` structure to clearly mark sessions running dedicated embedding models.

Key changes:
- Adds `is_embedding` to the `SessionInfo` interface in `AIEngine.ts` and the Rust backend.
- Updates the `loadLlamaModel` command signatures to pass this new flag.
- Modifies the llama.cpp extension's **auto-unload logic** to explicitly **filter out** and **not unload** any currently loaded embedding models when a new text generation model is loaded. This is a critical performance fix to prevent the embedding model (e.g., used for RAG) from being repeatedly reloaded.

Also includes minor code style cleanup/reformatting in `jan-provider-web/provider.ts` for improved readability.
2025-10-08 20:03:35 +05:30

145 lines
3.4 KiB
TypeScript

import { invoke } from '@tauri-apps/api/core'
// Types
export interface SessionInfo {
pid: number
port: number
model_id: string
model_path: string
api_key: string
}
export interface DeviceInfo {
id: string
name: string
memory: number
}
export interface GgufMetadata {
version: number
tensor_count: number
metadata: Record<string, string>
}
// Cleanup commands
export async function cleanupLlamaProcesses(): Promise<void> {
return await invoke('plugin:llamacpp|cleanup_llama_processes')
}
// LlamaCpp server commands
export async function loadLlamaModel(
backendPath: string,
libraryPath?: string,
args: string[] = [],
isEmbedding: boolean = false
): Promise<SessionInfo> {
return await invoke('plugin:llamacpp|load_llama_model', {
backendPath,
libraryPath,
args,
isEmbedding,
})
}
export async function unloadLlamaModel(pid: number): Promise<void> {
return await invoke('plugin:llamacpp|unload_llama_model', { pid })
}
export async function getDevices(
backendPath: string,
libraryPath?: string
): Promise<DeviceInfo[]> {
return await invoke('plugin:llamacpp|get_devices', {
backendPath,
libraryPath,
})
}
export async function generateApiKey(
modelId: string,
apiSecret: string
): Promise<string> {
return await invoke('plugin:llamacpp|generate_api_key', {
modelId,
apiSecret,
})
}
export async function isProcessRunning(pid: number): Promise<boolean> {
return await invoke('plugin:llamacpp|is_process_running', { pid })
}
export async function getRandomPort(): Promise<number> {
return await invoke('plugin:llamacpp|get_random_port')
}
export async function findSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|find_session_by_model', { modelId })
}
export async function getLoadedModels(): Promise<string[]> {
return await invoke('plugin:llamacpp|get_loaded_models')
}
export async function getAllSessions(): Promise<SessionInfo[]> {
return await invoke('plugin:llamacpp|get_all_sessions')
}
export async function getSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|get_session_by_model', { modelId })
}
// GGUF commands
export async function readGgufMetadata(path: string): Promise<GgufMetadata> {
return await invoke('plugin:llamacpp|read_gguf_metadata', { path })
}
export async function estimateKVCacheSize(
meta: Record<string, string>,
ctxSize?: number
): Promise<{ size: number; per_token_size: number }> {
return await invoke('plugin:llamacpp|estimate_kv_cache_size', {
meta,
ctxSize,
})
}
export async function getModelSize(path: string): Promise<number> {
return await invoke('plugin:llamacpp|get_model_size', { path })
}
export async function isModelSupported(
path: string,
ctxSize?: number
): Promise<'RED' | 'YELLOW' | 'GREEN'> {
return await invoke('plugin:llamacpp|is_model_supported', {
path,
ctxSize,
})
}
export async function planModelLoadInternal(
path: string,
memoryMode: string,
mmprojPath?: string,
requestedContext?: number
): Promise<{
gpuLayers: number
maxContextLength: number
noOffloadKVCache: boolean
offloadMmproj?: boolean
batchSize: number
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
}> {
return await invoke('plugin:llamacpp|plan_model_load', {
path,
memoryMode,
mmprojPath,
requestedContext,
})
}