feat: Add offload_mmproj option and validation
This commit introduces a new configuration option offload_mmproj to the llamacpp extension. The offload_mmproj setting allows users to control whether the multimodal projector model is offloaded to the GPU. By default, it's offloaded for better performance. If set to false, the projector model will remain on the CPU, which can be useful in low GPU memory scenarios, though image processing might take longer. Additionally, this commit adds validate_mmproj_path to ensure the provided --mmproj path is valid and accessible, preventing issues during model loading. This change also refactors some invoke calls for improved readability.
This commit is contained in:
parent
e04bb86171
commit
9afeb5e514
@ -41,6 +41,7 @@ type LlamacppConfig = {
|
||||
auto_unload: boolean
|
||||
chat_template: string
|
||||
n_gpu_layers: number
|
||||
offload_mmproj: boolean
|
||||
override_tensor_buffer_t: string
|
||||
ctx_size: number
|
||||
threads: number
|
||||
@ -1221,6 +1222,10 @@ export default class llamacpp_extension extends AIEngine {
|
||||
// Takes a regex with matching tensor name as input
|
||||
if (cfg.override_tensor_buffer_t)
|
||||
args.push('--override-tensor', cfg.override_tensor_buffer_t)
|
||||
// offload multimodal projector model to the GPU by default. if there is not enough memory
|
||||
// turn this setting off will keep the projector model on the CPU but the image processing can
|
||||
// take longer
|
||||
if (cfg.offload_mmproj === false) args.push('--no-mmproj-offload')
|
||||
args.push('-a', modelId)
|
||||
args.push('--port', String(port))
|
||||
if (modelConfig.mmproj_path) {
|
||||
@ -1645,4 +1650,18 @@ export default class llamacpp_extension extends AIEngine {
|
||||
'tokenizer.chat_template'
|
||||
]?.includes('tools')
|
||||
}
|
||||
|
||||
private async loadMetadata(path: string): Promise<GgufMetadata> {
|
||||
try {
|
||||
const data = await invoke<GgufMetadata>(
|
||||
'plugin:llamacpp|read_gguf_metadata',
|
||||
{
|
||||
path: path,
|
||||
}
|
||||
)
|
||||
return data
|
||||
} catch (err) {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
55
src-tauri/Cargo.lock
generated
55
src-tauri/Cargo.lock
generated
@ -854,8 +854,18 @@ version = "0.20.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"darling_macro",
|
||||
"darling_core 0.20.11",
|
||||
"darling_macro 0.20.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08440b3dd222c3d0433e63e097463969485f112baff337dfdaca043a0d760570"
|
||||
dependencies = [
|
||||
"darling_core 0.21.2",
|
||||
"darling_macro 0.21.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -872,13 +882,38 @@ dependencies = [
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_core"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d25b7912bc28a04ab1b7715a68ea03aaa15662b43a1a4b2c480531fd19f8bf7e"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.20.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"darling_core 0.20.11",
|
||||
"quote",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce154b9bea7fb0c8e8326e62d00354000c36e79770ff21b8c84e3aa267d9d531"
|
||||
dependencies = [
|
||||
"darling_core 0.21.2",
|
||||
"quote",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
@ -3984,8 +4019,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rmcp"
|
||||
version = "0.2.1"
|
||||
source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=3196c95f1dfafbffbdcdd6d365c94969ac975e6a#3196c95f1dfafbffbdcdd6d365c94969ac975e6a"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=209dbac50f51737ad953c3a2c8e28f3619b6c277#209dbac50f51737ad953c3a2c8e28f3619b6c277"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
@ -4010,10 +4045,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rmcp-macros"
|
||||
version = "0.2.1"
|
||||
source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=3196c95f1dfafbffbdcdd6d365c94969ac975e6a#3196c95f1dfafbffbdcdd6d365c94969ac975e6a"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=209dbac50f51737ad953c3a2c8e28f3619b6c277#209dbac50f51737ad953c3a2c8e28f3619b6c277"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"darling 0.21.2",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde_json",
|
||||
@ -4408,7 +4443,7 @@ version = "3.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"darling 0.20.11",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.104",
|
||||
@ -6868,7 +6903,7 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a76ff259533532054cfbaefb115c613203c73707017459206380f03b3b3f266e"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"darling 0.20.11",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.104",
|
||||
|
||||
@ -11,7 +11,7 @@ use tokio::time::Instant;
|
||||
|
||||
use crate::device::{get_devices_from_backend, DeviceInfo};
|
||||
use crate::error::{ErrorCode, LlamacppError, ServerError, ServerResult};
|
||||
use crate::path::{validate_binary_path, validate_model_path};
|
||||
use crate::path::{validate_binary_path, validate_model_path, validate_mmproj_path};
|
||||
use crate::process::{
|
||||
find_session_by_model_id, get_all_active_sessions, get_all_loaded_model_ids,
|
||||
get_random_available_port, is_process_running_by_pid,
|
||||
@ -53,6 +53,7 @@ pub async fn load_llama_model<R: Runtime>(
|
||||
|
||||
let port = parse_port_from_args(&args);
|
||||
let model_path_pb = validate_model_path(&mut args)?;
|
||||
let _mmproj_path_pb = validate_mmproj_path(&mut args)?;
|
||||
|
||||
let api_key = extract_arg_value(&args, "--api-key");
|
||||
let model_id = extract_arg_value(&args, "-a");
|
||||
|
||||
@ -98,3 +98,50 @@ pub fn validate_model_path(args: &mut Vec<String>) -> ServerResult<PathBuf> {
|
||||
|
||||
Ok(model_path_pb)
|
||||
}
|
||||
|
||||
/// Validate mmproj path exists and update args with platform-appropriate path format
|
||||
pub fn validate_mmproj_path(args: &mut Vec<String>) -> ServerResult<Option<PathBuf>> {
|
||||
let mmproj_path_index = match args.iter().position(|arg| arg == "--mmproj") {
|
||||
Some(index) => index,
|
||||
None => return Ok(None), // mmproj is optional
|
||||
};
|
||||
|
||||
let mmproj_path = args.get(mmproj_path_index + 1).cloned().ok_or_else(|| {
|
||||
LlamacppError::new(
|
||||
ErrorCode::ModelLoadFailed,
|
||||
"Mmproj path was not provided after '--mmproj' flag.".into(),
|
||||
None,
|
||||
)
|
||||
})?;
|
||||
|
||||
let mmproj_path_pb = PathBuf::from(&mmproj_path);
|
||||
if !mmproj_path_pb.exists() {
|
||||
let err_msg = format!(
|
||||
"Invalid or inaccessible mmproj path: {}",
|
||||
mmproj_path_pb.display()
|
||||
);
|
||||
log::error!("{}", &err_msg);
|
||||
return Err(LlamacppError::new(
|
||||
ErrorCode::ModelFileNotFound,
|
||||
"The specified mmproj file does not exist or is not accessible.".into(),
|
||||
Some(err_msg),
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
{
|
||||
// use short path on Windows
|
||||
if let Some(short) = get_short_path(&mmproj_path_pb) {
|
||||
args[mmproj_path_index + 1] = short;
|
||||
} else {
|
||||
args[mmproj_path_index + 1] = mmproj_path_pb.display().to_string();
|
||||
}
|
||||
}
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
args[mmproj_path_index + 1] = mmproj_path_pb.display().to_string();
|
||||
}
|
||||
|
||||
Ok(Some(mmproj_path_pb))
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user