This commit introduces a functional flag for embedding models and refactors the backend detection logic for cleaner implementation. Key changes: - Embedding Support: The loadLlamaModel API and SessionInfo now include an isEmbedding: boolean flag. This allows the core process to differentiate and correctly initialize models intended for embedding tasks. - Backend Naming Simplification (Refactor): Consolidated the CPU-specific backend tags (e.g., win-noavx-x64, win-avx2-x64) into generic *-common_cpus-x64 variants (e.g., win-common_cpus-x64). This streamlines supported backend detection. - File Structure Update: Changed the download path for CUDA runtime libraries (cudart) to place them inside the specific backend's directory (/build/bin/) rather than a shared lib folder, improving asset isolation.
152 lines
3.5 KiB
TypeScript
152 lines
3.5 KiB
TypeScript
import { invoke } from '@tauri-apps/api/core'
|
|
|
|
// Types
|
|
export interface SessionInfo {
|
|
pid: number;
|
|
port: number;
|
|
model_id: string;
|
|
model_path: string;
|
|
is_embedding: boolean
|
|
api_key: string;
|
|
mmproj_path?: string;
|
|
}
|
|
|
|
export interface UnloadResult {
|
|
success: boolean;
|
|
error?: string;
|
|
}
|
|
|
|
export interface DeviceInfo {
|
|
id: string
|
|
name: string
|
|
memory: number
|
|
}
|
|
|
|
export interface GgufMetadata {
|
|
version: number
|
|
tensor_count: number
|
|
metadata: Record<string, string>
|
|
}
|
|
|
|
// Cleanup commands
|
|
export async function cleanupLlamaProcesses(): Promise<void> {
|
|
return await invoke('plugin:llamacpp|cleanup_llama_processes')
|
|
}
|
|
|
|
// LlamaCpp server commands
|
|
export async function loadLlamaModel(
|
|
backendPath: string,
|
|
args: string[],
|
|
envs: Record<string, string>,
|
|
isEmbedding: boolean
|
|
): Promise<SessionInfo> {
|
|
return await invoke('plugin:llamacpp|load_llama_model', {
|
|
backendPath,
|
|
args,
|
|
envs,
|
|
isEmbedding
|
|
})
|
|
}
|
|
|
|
export async function unloadLlamaModel(pid: number): Promise<UnloadResult> {
|
|
return await invoke('plugin:llamacpp|unload_llama_model', { pid })
|
|
}
|
|
|
|
export async function getDevices(
|
|
backendPath: string,
|
|
libraryPath?: string
|
|
): Promise<DeviceInfo[]> {
|
|
return await invoke('plugin:llamacpp|get_devices', {
|
|
backendPath,
|
|
libraryPath,
|
|
})
|
|
}
|
|
|
|
export async function generateApiKey(
|
|
modelId: string,
|
|
apiSecret: string
|
|
): Promise<string> {
|
|
return await invoke('plugin:llamacpp|generate_api_key', {
|
|
modelId,
|
|
apiSecret,
|
|
})
|
|
}
|
|
|
|
export async function isProcessRunning(pid: number): Promise<boolean> {
|
|
return await invoke('plugin:llamacpp|is_process_running', { pid })
|
|
}
|
|
|
|
export async function getRandomPort(): Promise<number> {
|
|
return await invoke('plugin:llamacpp|get_random_port')
|
|
}
|
|
|
|
export async function findSessionByModel(
|
|
modelId: string
|
|
): Promise<SessionInfo | null> {
|
|
return await invoke('plugin:llamacpp|find_session_by_model', { modelId })
|
|
}
|
|
|
|
export async function getLoadedModels(): Promise<string[]> {
|
|
return await invoke('plugin:llamacpp|get_loaded_models')
|
|
}
|
|
|
|
export async function getAllSessions(): Promise<SessionInfo[]> {
|
|
return await invoke('plugin:llamacpp|get_all_sessions')
|
|
}
|
|
|
|
export async function getSessionByModel(
|
|
modelId: string
|
|
): Promise<SessionInfo | null> {
|
|
return await invoke('plugin:llamacpp|get_session_by_model', { modelId })
|
|
}
|
|
|
|
// GGUF commands
|
|
export async function readGgufMetadata(path: string): Promise<GgufMetadata> {
|
|
return await invoke('plugin:llamacpp|read_gguf_metadata', { path })
|
|
}
|
|
|
|
export async function estimateKVCacheSize(
|
|
meta: Record<string, string>,
|
|
ctxSize?: number
|
|
): Promise<{ size: number; per_token_size: number }> {
|
|
return await invoke('plugin:llamacpp|estimate_kv_cache_size', {
|
|
meta,
|
|
ctxSize,
|
|
})
|
|
}
|
|
|
|
export async function getModelSize(path: string): Promise<number> {
|
|
return await invoke('plugin:llamacpp|get_model_size', { path })
|
|
}
|
|
|
|
export async function isModelSupported(
|
|
path: string,
|
|
ctxSize?: number
|
|
): Promise<'RED' | 'YELLOW' | 'GREEN'> {
|
|
return await invoke('plugin:llamacpp|is_model_supported', {
|
|
path,
|
|
ctxSize,
|
|
})
|
|
}
|
|
|
|
export async function planModelLoadInternal(
|
|
path: string,
|
|
memoryMode: string,
|
|
mmprojPath?: string,
|
|
requestedContext?: number
|
|
): Promise<{
|
|
gpuLayers: number
|
|
maxContextLength: number
|
|
noOffloadKVCache: boolean
|
|
offloadMmproj?: boolean
|
|
batchSize: number
|
|
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
|
|
}> {
|
|
return await invoke('plugin:llamacpp|plan_model_load', {
|
|
path,
|
|
memoryMode,
|
|
mmprojPath,
|
|
requestedContext,
|
|
})
|
|
}
|