feat: Add offload_mmproj option and validation

This commit introduces a new configuration option offload_mmproj to the llamacpp extension. The offload_mmproj setting allows users to control whether the multimodal projector model is offloaded to the GPU. By default, it's offloaded for better performance. If set to false, the projector model will remain on the CPU, which can be useful in low GPU memory scenarios, though image processing might take longer. Additionally, this commit adds validate_mmproj_path to ensure the provided --mmproj path is valid and accessible, preventing issues during model loading. This change also refactors some invoke calls for improved readability.
2025-08-15 19:30:56 +05:30 · 2025-08-15 19:30:56 +05:30 · 9afeb5e514
commit 9afeb5e514
parent e04bb86171
4 changed files with 113 additions and 11 deletions
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -41,6 +41,7 @@ type LlamacppConfig = {
  auto_unload: boolean
  chat_template: string
  n_gpu_layers: number
+  offload_mmproj: boolean
  override_tensor_buffer_t: string
  ctx_size: number
  threads: number
@ -1221,6 +1222,10 @@ export default class llamacpp_extension extends AIEngine {
    // Takes a regex with matching tensor name as input
    if (cfg.override_tensor_buffer_t)
      args.push('--override-tensor', cfg.override_tensor_buffer_t)
+    // offload multimodal projector model to the GPU by default. if there is not enough memory
+    // turn this setting off will keep the projector model on the CPU but the image processing can
+    // take longer
+    if (cfg.offload_mmproj === false) args.push('--no-mmproj-offload')
    args.push('-a', modelId)
    args.push('--port', String(port))
    if (modelConfig.mmproj_path) {
@ -1645,4 +1650,18 @@ export default class llamacpp_extension extends AIEngine {
      'tokenizer.chat_template'
    ]?.includes('tools')
  }
+
+  private async loadMetadata(path: string): Promise<GgufMetadata> {
+    try {
+      const data = await invoke<GgufMetadata>(
+        'plugin:llamacpp|read_gguf_metadata',
+        {
+          path: path,
+        }
+      )
+      return data
+    } catch (err) {
+      throw err
+    }
+  }
 }
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@ -854,8 +854,18 @@ version = "0.20.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
 dependencies = [
- "darling_core",
- "darling_macro",
+ "darling_core 0.20.11",
+ "darling_macro 0.20.11",
+]
+
+[[package]]
+name = "darling"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08440b3dd222c3d0433e63e097463969485f112baff337dfdaca043a0d760570"
+dependencies = [
+ "darling_core 0.21.2",
+ "darling_macro 0.21.2",
 ]

 [[package]]
@ -872,13 +882,38 @@ dependencies = [
 "syn 2.0.104",
 ]

+[[package]]
+name = "darling_core"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d25b7912bc28a04ab1b7715a68ea03aaa15662b43a1a4b2c480531fd19f8bf7e"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "darling_macro"
 version = "0.20.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
 dependencies = [
- "darling_core",
+ "darling_core 0.20.11",
+ "quote",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce154b9bea7fb0c8e8326e62d00354000c36e79770ff21b8c84e3aa267d9d531"
+dependencies = [
+ "darling_core 0.21.2",
 "quote",
 "syn 2.0.104",
 ]
@ -3984,8 +4019,8 @@ dependencies = [

 [[package]]
 name = "rmcp"
-version = "0.2.1"
-source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=3196c95f1dfafbffbdcdd6d365c94969ac975e6a#3196c95f1dfafbffbdcdd6d365c94969ac975e6a"
+version = "0.5.0"
+source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=209dbac50f51737ad953c3a2c8e28f3619b6c277#209dbac50f51737ad953c3a2c8e28f3619b6c277"
 dependencies = [
 "base64 0.22.1",
 "chrono",
@ -4010,10 +4045,10 @@ dependencies = [

 [[package]]
 name = "rmcp-macros"
-version = "0.2.1"
-source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=3196c95f1dfafbffbdcdd6d365c94969ac975e6a#3196c95f1dfafbffbdcdd6d365c94969ac975e6a"
+version = "0.5.0"
+source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=209dbac50f51737ad953c3a2c8e28f3619b6c277#209dbac50f51737ad953c3a2c8e28f3619b6c277"
 dependencies = [
- "darling",
+ "darling 0.21.2",
 "proc-macro2",
 "quote",
 "serde_json",
@ -4408,7 +4443,7 @@ version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
 dependencies = [
- "darling",
+ "darling 0.20.11",
 "proc-macro2",
 "quote",
 "syn 2.0.104",
@ -6868,7 +6903,7 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a76ff259533532054cfbaefb115c613203c73707017459206380f03b3b3f266e"
 dependencies = [
- "darling",
+ "darling 0.20.11",
 "proc-macro2",
 "quote",
 "syn 2.0.104",
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs
@ -11,7 +11,7 @@ use tokio::time::Instant;

 use crate::device::{get_devices_from_backend, DeviceInfo};
 use crate::error::{ErrorCode, LlamacppError, ServerError, ServerResult};
-use crate::path::{validate_binary_path, validate_model_path};
+use crate::path::{validate_binary_path, validate_model_path, validate_mmproj_path};
 use crate::process::{
    find_session_by_model_id, get_all_active_sessions, get_all_loaded_model_ids,
    get_random_available_port, is_process_running_by_pid,
@ -53,6 +53,7 @@ pub async fn load_llama_model<R: Runtime>(

    let port = parse_port_from_args(&args);
    let model_path_pb = validate_model_path(&mut args)?;
+    let _mmproj_path_pb = validate_mmproj_path(&mut args)?;

    let api_key = extract_arg_value(&args, "--api-key");
    let model_id = extract_arg_value(&args, "-a");
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/path.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/path.rs
@ -98,3 +98,50 @@ pub fn validate_model_path(args: &mut Vec<String>) -> ServerResult<PathBuf> {

    Ok(model_path_pb)
 }
+
+/// Validate mmproj path exists and update args with platform-appropriate path format
+pub fn validate_mmproj_path(args: &mut Vec<String>) -> ServerResult<Option<PathBuf>> {
+   let mmproj_path_index = match args.iter().position(|arg| arg == "--mmproj") {
+       Some(index) => index,
+       None => return Ok(None), // mmproj is optional
+   };
+
+   let mmproj_path = args.get(mmproj_path_index + 1).cloned().ok_or_else(|| {
+       LlamacppError::new(
+           ErrorCode::ModelLoadFailed,
+           "Mmproj path was not provided after '--mmproj' flag.".into(),
+           None,
+       )
+   })?;
+
+   let mmproj_path_pb = PathBuf::from(&mmproj_path);
+   if !mmproj_path_pb.exists() {
+       let err_msg = format!(
+           "Invalid or inaccessible mmproj path: {}",
+           mmproj_path_pb.display()
+       );
+       log::error!("{}", &err_msg);
+       return Err(LlamacppError::new(
+           ErrorCode::ModelFileNotFound,
+           "The specified mmproj file does not exist or is not accessible.".into(),
+           Some(err_msg),
+       )
+       .into());
+   }
+
+   #[cfg(windows)]
+   {
+       // use short path on Windows
+       if let Some(short) = get_short_path(&mmproj_path_pb) {
+           args[mmproj_path_index + 1] = short;
+       } else {
+           args[mmproj_path_index + 1] = mmproj_path_pb.display().to_string();
+       }
+   }
+   #[cfg(not(windows))]
+   {
+       args[mmproj_path_index + 1] = mmproj_path_pb.display().to_string();
+   }
+
+   Ok(Some(mmproj_path_pb))
+}