diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 7681c967a..3d124b37b 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -41,6 +41,7 @@ type LlamacppConfig = { auto_unload: boolean chat_template: string n_gpu_layers: number + offload_mmproj: boolean override_tensor_buffer_t: string ctx_size: number threads: number @@ -1221,6 +1222,10 @@ export default class llamacpp_extension extends AIEngine { // Takes a regex with matching tensor name as input if (cfg.override_tensor_buffer_t) args.push('--override-tensor', cfg.override_tensor_buffer_t) + // offload multimodal projector model to the GPU by default. if there is not enough memory + // turn this setting off will keep the projector model on the CPU but the image processing can + // take longer + if (cfg.offload_mmproj === false) args.push('--no-mmproj-offload') args.push('-a', modelId) args.push('--port', String(port)) if (modelConfig.mmproj_path) { @@ -1645,4 +1650,18 @@ export default class llamacpp_extension extends AIEngine { 'tokenizer.chat_template' ]?.includes('tools') } + + private async loadMetadata(path: string): Promise { + try { + const data = await invoke( + 'plugin:llamacpp|read_gguf_metadata', + { + path: path, + } + ) + return data + } catch (err) { + throw err + } + } } diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index fc6bfd301..32638bc56 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -854,8 +854,18 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08440b3dd222c3d0433e63e097463969485f112baff337dfdaca043a0d760570" +dependencies = [ + "darling_core 0.21.2", + "darling_macro 0.21.2", ] [[package]] @@ -872,13 +882,38 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "darling_core" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25b7912bc28a04ab1b7715a68ea03aaa15662b43a1a4b2c480531fd19f8bf7e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.104", +] + [[package]] name = "darling_macro" version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ - "darling_core", + "darling_core 0.20.11", + "quote", + "syn 2.0.104", +] + +[[package]] +name = "darling_macro" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce154b9bea7fb0c8e8326e62d00354000c36e79770ff21b8c84e3aa267d9d531" +dependencies = [ + "darling_core 0.21.2", "quote", "syn 2.0.104", ] @@ -3984,8 +4019,8 @@ dependencies = [ [[package]] name = "rmcp" -version = "0.2.1" -source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=3196c95f1dfafbffbdcdd6d365c94969ac975e6a#3196c95f1dfafbffbdcdd6d365c94969ac975e6a" +version = "0.5.0" +source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=209dbac50f51737ad953c3a2c8e28f3619b6c277#209dbac50f51737ad953c3a2c8e28f3619b6c277" dependencies = [ "base64 0.22.1", "chrono", @@ -4010,10 +4045,10 @@ dependencies = [ [[package]] name = "rmcp-macros" -version = "0.2.1" -source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=3196c95f1dfafbffbdcdd6d365c94969ac975e6a#3196c95f1dfafbffbdcdd6d365c94969ac975e6a" +version = "0.5.0" +source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=209dbac50f51737ad953c3a2c8e28f3619b6c277#209dbac50f51737ad953c3a2c8e28f3619b6c277" dependencies = [ - "darling", + "darling 0.21.2", "proc-macro2", "quote", "serde_json", @@ -4408,7 +4443,7 @@ version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" dependencies = [ - "darling", + "darling 0.20.11", "proc-macro2", "quote", "syn 2.0.104", @@ -6868,7 +6903,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a76ff259533532054cfbaefb115c613203c73707017459206380f03b3b3f266e" dependencies = [ - "darling", + "darling 0.20.11", "proc-macro2", "quote", "syn 2.0.104", diff --git a/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs b/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs index a2592f345..bc9a7213c 100644 --- a/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs +++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs @@ -11,7 +11,7 @@ use tokio::time::Instant; use crate::device::{get_devices_from_backend, DeviceInfo}; use crate::error::{ErrorCode, LlamacppError, ServerError, ServerResult}; -use crate::path::{validate_binary_path, validate_model_path}; +use crate::path::{validate_binary_path, validate_model_path, validate_mmproj_path}; use crate::process::{ find_session_by_model_id, get_all_active_sessions, get_all_loaded_model_ids, get_random_available_port, is_process_running_by_pid, @@ -53,6 +53,7 @@ pub async fn load_llama_model( let port = parse_port_from_args(&args); let model_path_pb = validate_model_path(&mut args)?; + let _mmproj_path_pb = validate_mmproj_path(&mut args)?; let api_key = extract_arg_value(&args, "--api-key"); let model_id = extract_arg_value(&args, "-a"); diff --git a/src-tauri/plugins/tauri-plugin-llamacpp/src/path.rs b/src-tauri/plugins/tauri-plugin-llamacpp/src/path.rs index 44ed00109..a62fb069a 100644 --- a/src-tauri/plugins/tauri-plugin-llamacpp/src/path.rs +++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/path.rs @@ -98,3 +98,50 @@ pub fn validate_model_path(args: &mut Vec) -> ServerResult { Ok(model_path_pb) } + +/// Validate mmproj path exists and update args with platform-appropriate path format +pub fn validate_mmproj_path(args: &mut Vec) -> ServerResult> { + let mmproj_path_index = match args.iter().position(|arg| arg == "--mmproj") { + Some(index) => index, + None => return Ok(None), // mmproj is optional + }; + + let mmproj_path = args.get(mmproj_path_index + 1).cloned().ok_or_else(|| { + LlamacppError::new( + ErrorCode::ModelLoadFailed, + "Mmproj path was not provided after '--mmproj' flag.".into(), + None, + ) + })?; + + let mmproj_path_pb = PathBuf::from(&mmproj_path); + if !mmproj_path_pb.exists() { + let err_msg = format!( + "Invalid or inaccessible mmproj path: {}", + mmproj_path_pb.display() + ); + log::error!("{}", &err_msg); + return Err(LlamacppError::new( + ErrorCode::ModelFileNotFound, + "The specified mmproj file does not exist or is not accessible.".into(), + Some(err_msg), + ) + .into()); + } + + #[cfg(windows)] + { + // use short path on Windows + if let Some(short) = get_short_path(&mmproj_path_pb) { + args[mmproj_path_index + 1] = short; + } else { + args[mmproj_path_index + 1] = mmproj_path_pb.display().to_string(); + } + } + #[cfg(not(windows))] + { + args[mmproj_path_index + 1] = mmproj_path_pb.display().to_string(); + } + + Ok(Some(mmproj_path_pb)) +}