feat: Add offload_mmproj option and validation

This commit introduces a new configuration option offload_mmproj to the llamacpp extension.

The offload_mmproj setting allows users to control whether the multimodal projector model is offloaded to the GPU. By default, it's offloaded for better performance. If set to false, the projector model will remain on the CPU, which can be useful in low GPU memory scenarios, though image processing might take longer.

Additionally, this commit adds validate_mmproj_path to ensure the provided --mmproj path is valid and accessible, preventing issues during model loading.

This change also refactors some invoke calls for improved readability.
This commit is contained in:
Akarshan 2025-08-15 19:30:56 +05:30 committed by Faisal Amir
parent e04bb86171
commit 9afeb5e514
4 changed files with 113 additions and 11 deletions

View File

@ -41,6 +41,7 @@ type LlamacppConfig = {
auto_unload: boolean
chat_template: string
n_gpu_layers: number
offload_mmproj: boolean
override_tensor_buffer_t: string
ctx_size: number
threads: number
@ -1221,6 +1222,10 @@ export default class llamacpp_extension extends AIEngine {
// Takes a regex with matching tensor name as input
if (cfg.override_tensor_buffer_t)
args.push('--override-tensor', cfg.override_tensor_buffer_t)
// offload multimodal projector model to the GPU by default. if there is not enough memory
// turn this setting off will keep the projector model on the CPU but the image processing can
// take longer
if (cfg.offload_mmproj === false) args.push('--no-mmproj-offload')
args.push('-a', modelId)
args.push('--port', String(port))
if (modelConfig.mmproj_path) {
@ -1645,4 +1650,18 @@ export default class llamacpp_extension extends AIEngine {
'tokenizer.chat_template'
]?.includes('tools')
}
private async loadMetadata(path: string): Promise<GgufMetadata> {
try {
const data = await invoke<GgufMetadata>(
'plugin:llamacpp|read_gguf_metadata',
{
path: path,
}
)
return data
} catch (err) {
throw err
}
}
}

55
src-tauri/Cargo.lock generated
View File

@ -854,8 +854,18 @@ version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
dependencies = [
"darling_core",
"darling_macro",
"darling_core 0.20.11",
"darling_macro 0.20.11",
]
[[package]]
name = "darling"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08440b3dd222c3d0433e63e097463969485f112baff337dfdaca043a0d760570"
dependencies = [
"darling_core 0.21.2",
"darling_macro 0.21.2",
]
[[package]]
@ -872,13 +882,38 @@ dependencies = [
"syn 2.0.104",
]
[[package]]
name = "darling_core"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d25b7912bc28a04ab1b7715a68ea03aaa15662b43a1a4b2c480531fd19f8bf7e"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn 2.0.104",
]
[[package]]
name = "darling_macro"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core",
"darling_core 0.20.11",
"quote",
"syn 2.0.104",
]
[[package]]
name = "darling_macro"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce154b9bea7fb0c8e8326e62d00354000c36e79770ff21b8c84e3aa267d9d531"
dependencies = [
"darling_core 0.21.2",
"quote",
"syn 2.0.104",
]
@ -3984,8 +4019,8 @@ dependencies = [
[[package]]
name = "rmcp"
version = "0.2.1"
source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=3196c95f1dfafbffbdcdd6d365c94969ac975e6a#3196c95f1dfafbffbdcdd6d365c94969ac975e6a"
version = "0.5.0"
source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=209dbac50f51737ad953c3a2c8e28f3619b6c277#209dbac50f51737ad953c3a2c8e28f3619b6c277"
dependencies = [
"base64 0.22.1",
"chrono",
@ -4010,10 +4045,10 @@ dependencies = [
[[package]]
name = "rmcp-macros"
version = "0.2.1"
source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=3196c95f1dfafbffbdcdd6d365c94969ac975e6a#3196c95f1dfafbffbdcdd6d365c94969ac975e6a"
version = "0.5.0"
source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=209dbac50f51737ad953c3a2c8e28f3619b6c277#209dbac50f51737ad953c3a2c8e28f3619b6c277"
dependencies = [
"darling",
"darling 0.21.2",
"proc-macro2",
"quote",
"serde_json",
@ -4408,7 +4443,7 @@ version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
dependencies = [
"darling",
"darling 0.20.11",
"proc-macro2",
"quote",
"syn 2.0.104",
@ -6868,7 +6903,7 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a76ff259533532054cfbaefb115c613203c73707017459206380f03b3b3f266e"
dependencies = [
"darling",
"darling 0.20.11",
"proc-macro2",
"quote",
"syn 2.0.104",

View File

@ -11,7 +11,7 @@ use tokio::time::Instant;
use crate::device::{get_devices_from_backend, DeviceInfo};
use crate::error::{ErrorCode, LlamacppError, ServerError, ServerResult};
use crate::path::{validate_binary_path, validate_model_path};
use crate::path::{validate_binary_path, validate_model_path, validate_mmproj_path};
use crate::process::{
find_session_by_model_id, get_all_active_sessions, get_all_loaded_model_ids,
get_random_available_port, is_process_running_by_pid,
@ -53,6 +53,7 @@ pub async fn load_llama_model<R: Runtime>(
let port = parse_port_from_args(&args);
let model_path_pb = validate_model_path(&mut args)?;
let _mmproj_path_pb = validate_mmproj_path(&mut args)?;
let api_key = extract_arg_value(&args, "--api-key");
let model_id = extract_arg_value(&args, "-a");

View File

@ -98,3 +98,50 @@ pub fn validate_model_path(args: &mut Vec<String>) -> ServerResult<PathBuf> {
Ok(model_path_pb)
}
/// Validate mmproj path exists and update args with platform-appropriate path format
pub fn validate_mmproj_path(args: &mut Vec<String>) -> ServerResult<Option<PathBuf>> {
let mmproj_path_index = match args.iter().position(|arg| arg == "--mmproj") {
Some(index) => index,
None => return Ok(None), // mmproj is optional
};
let mmproj_path = args.get(mmproj_path_index + 1).cloned().ok_or_else(|| {
LlamacppError::new(
ErrorCode::ModelLoadFailed,
"Mmproj path was not provided after '--mmproj' flag.".into(),
None,
)
})?;
let mmproj_path_pb = PathBuf::from(&mmproj_path);
if !mmproj_path_pb.exists() {
let err_msg = format!(
"Invalid or inaccessible mmproj path: {}",
mmproj_path_pb.display()
);
log::error!("{}", &err_msg);
return Err(LlamacppError::new(
ErrorCode::ModelFileNotFound,
"The specified mmproj file does not exist or is not accessible.".into(),
Some(err_msg),
)
.into());
}
#[cfg(windows)]
{
// use short path on Windows
if let Some(short) = get_short_path(&mmproj_path_pb) {
args[mmproj_path_index + 1] = short;
} else {
args[mmproj_path_index + 1] = mmproj_path_pb.display().to_string();
}
}
#[cfg(not(windows))]
{
args[mmproj_path_index + 1] = mmproj_path_pb.display().to_string();
}
Ok(Some(mmproj_path_pb))
}