Akarshan 0c5fbc102c
refactor: Simplify Tauri plugin calls and enhance 'Flash Attention' setting
This commit introduces significant improvements to the llama.cpp extension, focusing on the 'Flash Attention' setting and refactoring Tauri plugin interactions for better code clarity and maintenance.

The backend interaction is streamlined by removing the unnecessary `libraryPath` argument from the Tauri plugin commands for loading models and listing devices.

* **Simplified API Calls:** The `loadLlamaModel`, `unloadLlamaModel`, and `get_devices` functions in both the extension and the Tauri plugin now manage the library path internally based on the backend executable's location.
* **Decoupled Logic:** The extension (`src/index.ts`) now uses the new, simplified Tauri plugin functions, which enhances modularity and reduces boilerplate code in the extension.
* **Type Consistency:** Added `UnloadResult` interface to `guest-js/index.ts` for consistency.

* **Updated UI Control:** The 'Flash Attention' setting in `settings.json` is changed from a boolean checkbox to a string-based dropdown, offering **'auto'**, **'on'**, and **'off'** options.
* **Improved Logic:** The extension logic in `src/index.ts` is updated to correctly handle the new string-based `flash_attn` configuration. It now passes the string value (`'auto'`, `'on'`, or `'off'`) directly as a command-line argument to the llama.cpp backend, simplifying the version-checking logic previously required for older llama.cpp versions. The old, complex logic tied to specific backend versions is removed.

This refactoring cleans up the extension's codebase and moves environment and path setup concerns into the Tauri plugin where they are most relevant.
2025-10-29 08:00:57 +05:30

149 lines
3.4 KiB
TypeScript

import { invoke } from '@tauri-apps/api/core'
// Types
export interface SessionInfo {
pid: number;
port: number;
model_id: string;
model_path: string;
api_key: string;
mmproj_path?: string;
}
export interface UnloadResult {
success: boolean;
error?: string;
}
export interface DeviceInfo {
id: string
name: string
memory: number
}
export interface GgufMetadata {
version: number
tensor_count: number
metadata: Record<string, string>
}
// Cleanup commands
export async function cleanupLlamaProcesses(): Promise<void> {
return await invoke('plugin:llamacpp|cleanup_llama_processes')
}
// LlamaCpp server commands
export async function loadLlamaModel(
backendPath: string,
args: string[],
envs: Record<string, string>
): Promise<SessionInfo> {
return await invoke('plugin:llamacpp|load_llama_model', {
backendPath,
args,
envs
})
}
export async function unloadLlamaModel(pid: number): Promise<UnloadResult> {
return await invoke('plugin:llamacpp|unload_llama_model', { pid })
}
export async function getDevices(
backendPath: string,
libraryPath?: string
): Promise<DeviceInfo[]> {
return await invoke('plugin:llamacpp|get_devices', {
backendPath,
libraryPath,
})
}
export async function generateApiKey(
modelId: string,
apiSecret: string
): Promise<string> {
return await invoke('plugin:llamacpp|generate_api_key', {
modelId,
apiSecret,
})
}
export async function isProcessRunning(pid: number): Promise<boolean> {
return await invoke('plugin:llamacpp|is_process_running', { pid })
}
export async function getRandomPort(): Promise<number> {
return await invoke('plugin:llamacpp|get_random_port')
}
export async function findSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|find_session_by_model', { modelId })
}
export async function getLoadedModels(): Promise<string[]> {
return await invoke('plugin:llamacpp|get_loaded_models')
}
export async function getAllSessions(): Promise<SessionInfo[]> {
return await invoke('plugin:llamacpp|get_all_sessions')
}
export async function getSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|get_session_by_model', { modelId })
}
// GGUF commands
export async function readGgufMetadata(path: string): Promise<GgufMetadata> {
return await invoke('plugin:llamacpp|read_gguf_metadata', { path })
}
export async function estimateKVCacheSize(
meta: Record<string, string>,
ctxSize?: number
): Promise<{ size: number; per_token_size: number }> {
return await invoke('plugin:llamacpp|estimate_kv_cache_size', {
meta,
ctxSize,
})
}
export async function getModelSize(path: string): Promise<number> {
return await invoke('plugin:llamacpp|get_model_size', { path })
}
export async function isModelSupported(
path: string,
ctxSize?: number
): Promise<'RED' | 'YELLOW' | 'GREEN'> {
return await invoke('plugin:llamacpp|is_model_supported', {
path,
ctxSize,
})
}
export async function planModelLoadInternal(
path: string,
memoryMode: string,
mmprojPath?: string,
requestedContext?: number
): Promise<{
gpuLayers: number
maxContextLength: number
noOffloadKVCache: boolean
offloadMmproj?: boolean
batchSize: number
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
}> {
return await invoke('plugin:llamacpp|plan_model_load', {
path,
memoryMode,
mmprojPath,
requestedContext,
})
}