From 1ae7c0b59a9f31be3b5ee1cd6bf33deac537e137 Mon Sep 17 00:00:00 2001 From: Thien Tran Date: Fri, 30 May 2025 13:55:31 +0800 Subject: [PATCH] update version/backend format. fix bugs around load() --- extensions/llamacpp-extension/settings.json | 6 +-- extensions/llamacpp-extension/src/index.ts | 53 ++++++++++--------- .../inference_llamacpp_extension/server.rs | 50 +++++++++-------- 3 files changed, 58 insertions(+), 51 deletions(-) diff --git a/extensions/llamacpp-extension/settings.json b/extensions/llamacpp-extension/settings.json index 1d8bca20b..206a73ab3 100644 --- a/extensions/llamacpp-extension/settings.json +++ b/extensions/llamacpp-extension/settings.json @@ -1,8 +1,8 @@ [ { - "key": "backend", - "title": "Backend", - "description": "Backend for llama.cpp", + "key": "version_backend", + "title": "Version & Backend", + "description": "Version and Backend for llama.cpp", "controllerType": "dropdown", "controllerProps": { "value": "none", diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 455406b7a..f97b3e11b 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -29,7 +29,7 @@ import { import { invoke } from '@tauri-apps/api/core' type LlamacppConfig = { - backend: string + version_backend: string n_gpu_layers: number ctx_size: number threads: number @@ -100,14 +100,14 @@ export default class llamacpp_extension extends AIEngine { // update backend settings for (let item of settings) { - if (item.key === 'backend') { + if (item.key === 'version_backend') { // NOTE: is there a race condition between when tauri IPC is available // and when the extension is loaded? - const backends = await listSupportedBackends() - console.log('Available backends:', backends) - item.controllerProps.options = backends.map((b) => { + const version_backends = await listSupportedBackends() + console.log('Available version/backends:', version_backends) + item.controllerProps.options = version_backends.map((b) => { const { version, backend } = b - const key = `${version}-${backend}` + const key = `${version}/${backend}` return { value: key, name: key } }) } @@ -156,9 +156,7 @@ export default class llamacpp_extension extends AIEngine { if (key === 'backend') { const valueStr = value as string - const idx = valueStr.indexOf('-') - const version = valueStr.slice(0, idx) - const backend = valueStr.slice(idx + 1) + const [version, backend] = valueStr.split('/') const closure = async () => { const isInstalled = await isBackendInstalled(backend, version) @@ -391,11 +389,19 @@ export default class llamacpp_extension extends AIEngine { const args: string[] = [] const cfg = this.config const sysInfo = await window.core.api.getSystemInfo() - const [backend, version] = cfg.backend.split('-') + const [version, backend] = cfg.version_backend.split('/') + if (!version || !backend) { + // TODO: sometimes version_backend is not set correctly. to investigate + throw new Error( + `Invalid version/backend format: ${cfg.version_backend}. Expected format: /` + ) + } + const exe_name = sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server' + const janDataFolderPath = await getJanDataFolderPath() const backendPath = await joinPath([ - await getJanDataFolderPath(), + janDataFolderPath, 'llamacpp', 'backends', backend, @@ -404,30 +410,30 @@ export default class llamacpp_extension extends AIEngine { 'bin', exe_name, ]) - const modelPath = await joinPath([ + const modelConfigPath = await joinPath([ this.modelsBasePath, this.provider, modelId, + 'model.yml', ]) - const modelConfigPath = await joinPath([modelPath, 'model.yml']) - const modelConfig = await invoke('read_yaml', { - modelConfigPath, - }) + const modelConfig = await invoke('read_yaml', { path: modelConfigPath }) const port = await this.getRandomPort() // disable llama-server webui args.push('--no-webui') // update key for security; TODO: (qnixsynapse) Make it more secure const api_key = await this.generateApiKey(modelId) - args.push(`--api-key ${api_key}`) + args.push('--api-key', api_key) // model option is required - // TODO: llama.cpp extension lookup model path based on modelId - args.push('-m', modelConfig.model_path) + // NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path + const modelPath = await joinPath([janDataFolderPath, modelConfig.model_path]) + args.push('-m', modelPath) args.push('-a', modelId) - args.push('--port', String(port)) // Default port if not specified + args.push('--port', String(port)) if (modelConfig.mmproj_path) { - args.push('--mmproj', modelConfig.mmproj_path) + const mmprojPath = await joinPath([janDataFolderPath, modelConfig.mmproj_path]) + args.push('--mmproj', mmprojPath) } if (cfg.ctx_size !== undefined) { @@ -468,10 +474,7 @@ export default class llamacpp_extension extends AIEngine { console.log('Calling Tauri command llama_load with args:', args) try { - const sInfo = await invoke('load_llama_model', { - backendPath: backendPath, - args: args, - }) + const sInfo = await invoke('load_llama_model', { backendPath, args }) // Store the session info for later use this.activeSessions.set(sInfo.sessionId, sInfo) diff --git a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs index 849a14a17..59f0790b0 100644 --- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs +++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs @@ -1,12 +1,12 @@ +use base64::{engine::general_purpose, Engine as _}; +use hmac::{Hmac, Mac}; +use serde::{Deserialize, Serialize}; +use sha2::Sha256; use std::path::PathBuf; -use serde::{Serialize, Deserialize}; use tauri::{AppHandle, State}; // Import Manager trait +use thiserror; use tokio::process::Command; use uuid::Uuid; -use thiserror; -use hmac::{Hmac, Mac}; -use sha2::Sha256; -use base64::{Engine as _, engine::general_purpose}; use crate::core::state::AppState; @@ -16,8 +16,8 @@ type HmacSha256 = Hmac; pub enum serverError { #[error("Server is already running")] AlreadyRunning, - // #[error("Server is not running")] - // NotRunning, + // #[error("Server is not running")] + // NotRunning, #[error("Failed to locate server binary: {0}")] BinaryNotFound(String), #[error("IO error: {0}")] @@ -40,10 +40,10 @@ type ServerResult = Result; #[derive(Debug, Serialize, Deserialize)] pub struct sessionInfo { - pub pid: String, // opaque handle for unload/chat - pub port: u16, // llama-server output port + pub pid: String, // opaque handle for unload/chat + pub port: u16, // llama-server output port pub modelId: String, - pub modelPath: String, // path of the loaded model + pub modelPath: String, // path of the loaded model pub apiKey: String, } @@ -56,10 +56,10 @@ pub struct unloadResult { // --- Load Command --- #[tauri::command] pub async fn load_llama_model( - _app_handle: AppHandle, // Get the AppHandle + _app_handle: AppHandle, // Get the AppHandle state: State<'_, AppState>, // Access the shared state - backendPath: String, - args: Vec, // Arguments from the frontend + backend_path: &str, + args: Vec, // Arguments from the frontend ) -> ServerResult { let mut process_lock = state.llama_server_process.lock().await; @@ -68,25 +68,25 @@ pub async fn load_llama_model( return Err(serverError::AlreadyRunning); } - log::info!("Attempting to launch server at path: {:?}", engineBasePath); + log::info!("Attempting to launch server at path: {:?}", backend_path); log::info!("Using arguments: {:?}", args); - let server_path_buf = PathBuf::from(&engineBasePath); + let server_path_buf = PathBuf::from(backend_path); if !server_path_buf.exists() { log::error!( "Server binary not found at expected path: {:?}", - engineBasePath + backend_path ); return Err(serverError::BinaryNotFound(format!( "Binary not found at {:?}", - engineBasePath + backend_path ))); } let port = 8080; // Default port // Configure the command to run the server - let mut command = Command::new(engineBasePath); + let mut command = Command::new(backend_path); let modelPath = args[2].replace("-m", ""); let apiKey = args[1].replace("--api-key", ""); @@ -124,7 +124,10 @@ pub async fn load_llama_model( // --- Unload Command --- #[tauri::command] -pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) -> ServerResult { +pub async fn unload_llama_model( + session_id: String, + state: State<'_, AppState>, +) -> ServerResult { let mut process_lock = state.llama_server_process.lock().await; // Take the child process out of the Option, leaving None in its place if let Some(mut child) = process_lock.take() { @@ -144,8 +147,10 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) return Ok(unloadResult { success: false, - error: Some(format!("Session ID mismatch: provided {} doesn't match process {}", - session_id, process_pid)), + error: Some(format!( + "Session ID mismatch: provided {} doesn't match process {}", + session_id, process_pid + )), }); } @@ -177,7 +182,7 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) } else { log::warn!("Attempted to unload server, but no process was running"); - // If no process is running but client thinks there is, + // If no process is running but client thinks there is, // still report success since the end state is what they wanted Ok(unloadResult { success: true, @@ -198,4 +203,3 @@ pub fn generate_api_key(modelId: String, apiSecret: String) -> Result