Akarshan 8b15fe4ef2
feat: Simplify backend architecture
This commit introduces a functional flag for embedding models and refactors the backend detection logic for cleaner implementation.

Key changes:

 - Embedding Support: The loadLlamaModel API and SessionInfo now include an isEmbedding: boolean flag. This allows the core process to differentiate and correctly initialize models intended for embedding tasks.

 - Backend Naming Simplification (Refactor): Consolidated the CPU-specific backend tags (e.g., win-noavx-x64, win-avx2-x64) into generic *-common_cpus-x64 variants (e.g., win-common_cpus-x64). This streamlines supported backend detection.

 - File Structure Update: Changed the download path for CUDA runtime libraries (cudart) to place them inside the specific backend's directory (/build/bin/) rather than a shared lib folder, improving asset isolation.
2025-10-29 08:02:09 +05:30

152 lines
3.5 KiB
TypeScript

import { invoke } from '@tauri-apps/api/core'
// Types
export interface SessionInfo {
pid: number;
port: number;
model_id: string;
model_path: string;
is_embedding: boolean
api_key: string;
mmproj_path?: string;
}
export interface UnloadResult {
success: boolean;
error?: string;
}
export interface DeviceInfo {
id: string
name: string
memory: number
}
export interface GgufMetadata {
version: number
tensor_count: number
metadata: Record<string, string>
}
// Cleanup commands
export async function cleanupLlamaProcesses(): Promise<void> {
return await invoke('plugin:llamacpp|cleanup_llama_processes')
}
// LlamaCpp server commands
export async function loadLlamaModel(
backendPath: string,
args: string[],
envs: Record<string, string>,
isEmbedding: boolean
): Promise<SessionInfo> {
return await invoke('plugin:llamacpp|load_llama_model', {
backendPath,
args,
envs,
isEmbedding
})
}
export async function unloadLlamaModel(pid: number): Promise<UnloadResult> {
return await invoke('plugin:llamacpp|unload_llama_model', { pid })
}
export async function getDevices(
backendPath: string,
libraryPath?: string
): Promise<DeviceInfo[]> {
return await invoke('plugin:llamacpp|get_devices', {
backendPath,
libraryPath,
})
}
export async function generateApiKey(
modelId: string,
apiSecret: string
): Promise<string> {
return await invoke('plugin:llamacpp|generate_api_key', {
modelId,
apiSecret,
})
}
export async function isProcessRunning(pid: number): Promise<boolean> {
return await invoke('plugin:llamacpp|is_process_running', { pid })
}
export async function getRandomPort(): Promise<number> {
return await invoke('plugin:llamacpp|get_random_port')
}
export async function findSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|find_session_by_model', { modelId })
}
export async function getLoadedModels(): Promise<string[]> {
return await invoke('plugin:llamacpp|get_loaded_models')
}
export async function getAllSessions(): Promise<SessionInfo[]> {
return await invoke('plugin:llamacpp|get_all_sessions')
}
export async function getSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|get_session_by_model', { modelId })
}
// GGUF commands
export async function readGgufMetadata(path: string): Promise<GgufMetadata> {
return await invoke('plugin:llamacpp|read_gguf_metadata', { path })
}
export async function estimateKVCacheSize(
meta: Record<string, string>,
ctxSize?: number
): Promise<{ size: number; per_token_size: number }> {
return await invoke('plugin:llamacpp|estimate_kv_cache_size', {
meta,
ctxSize,
})
}
export async function getModelSize(path: string): Promise<number> {
return await invoke('plugin:llamacpp|get_model_size', { path })
}
export async function isModelSupported(
path: string,
ctxSize?: number
): Promise<'RED' | 'YELLOW' | 'GREEN'> {
return await invoke('plugin:llamacpp|is_model_supported', {
path,
ctxSize,
})
}
export async function planModelLoadInternal(
path: string,
memoryMode: string,
mmprojPath?: string,
requestedContext?: number
): Promise<{
gpuLayers: number
maxContextLength: number
noOffloadKVCache: boolean
offloadMmproj?: boolean
batchSize: number
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
}> {
return await invoke('plugin:llamacpp|plan_model_load', {
path,
memoryMode,
mmprojPath,
requestedContext,
})
}