feat: Adjust RAM/VRAM calculation for unified memory systems (#6687)

* feat: Adjust RAM/VRAM calculation for unified memory systems This commit refactors the logic for calculating **total RAM** and **total VRAM** in `is_model_supported` and `plan_model_load` commands, specifically targeting systems with **unified memory** (like modern macOS devices where the GPU list may be empty). The changes are as follows: * **Total RAM Calculation:** If no GPUs are detected (`sys_info.gpus.is_empty()` is true), **total RAM** is now set to $0$. This avoids confusing total system memory with dedicated GPU memory when planning model placement. * **Total VRAM Calculation:** If no GPUs are detected, **total VRAM** is still calculated as the system's **total memory (RAM)**, as this shared memory acts as VRAM on unified memory architectures. This adjustment improves the accuracy of memory availability checks and model planning on unified memory systems. * fix: total usable memory in case there is no system vram reported * chore: temporarily change to self-hosted runner mac * ci: revert back to github hosted runner macos --------- Co-authored-by: Louis <louis@jan.ai> Co-authored-by: Minh141120 <minh.itptit@gmail.com>
2025-10-01 17:28:14 +05:30 · 2025-10-01 17:28:14 +05:30 · 0f0ba43b7f
commit 0f0ba43b7f
parent 6a4aaaec87
2 changed files with 28 additions and 22 deletions
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
@ -89,19 +89,25 @@ pub async fn is_model_supported<R: Runtime>(
    );
    const RESERVE_BYTES: u64 = 2288490189;
-    let total_system_memory = system_info.total_memory * 1024 * 1024;
+    let total_system_memory: u64 = match system_info.gpus.is_empty() {
        // on MacOS with unified memory, treat RAM = 0 for now
        true => 0,
        false => system_info.total_memory * 1024 * 1024,
    };
    // Calculate total VRAM from all GPUs
-    let total_vram: u64 = if system_info.gpus.is_empty() {
+    let total_vram: u64 = match system_info.gpus.is_empty() {
        // On macOS with unified memory, GPU info may be empty
        // Use total RAM as VRAM since memory is shared
-        log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
+        true => {
-        total_system_memory
+            log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
-    } else {
+            system_info.total_memory * 1024 * 1024
-        system_info
+        }
        false => system_info
            .gpus
            .iter()
            .map(|g| g.total_memory * 1024 * 1024)
-            .sum::<u64>()
+            .sum::<u64>(),
    };
    log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
@ -115,7 +121,7 @@ pub async fn is_model_supported<R: Runtime>(
    let usable_total_memory = if total_system_memory > RESERVE_BYTES {
        (total_system_memory - RESERVE_BYTES) + usable_vram
    } else {
-        0
+        usable_vram
    };
    log::info!("System RAM: {} bytes", &total_system_memory);
    log::info!("Total VRAM: {} bytes", &total_vram);
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/model_planner.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/model_planner.rs
@ -82,25 +82,25 @@ pub async fn plan_model_load<R: Runtime>(
    log::info!("Got GPUs:\n{:?}", &sys_info.gpus);
-    let total_ram: u64 = sys_info.total_memory * 1024 * 1024;
+    let total_ram: u64 = match sys_info.gpus.is_empty() {
-    log::info!(
+        // Consider RAM as 0 for unified memory
-        "Total system memory reported from tauri_plugin_hardware(in bytes): {}",
+        true => 0,
-        &total_ram
+        false => sys_info.total_memory * 1024 * 1024,
-    );
+    };
-    let total_vram: u64 = if sys_info.gpus.is_empty() {
+    // Calculate total VRAM from all GPUs
-        // On macOS with unified memory, GPU info may be empty
+    let total_vram: u64 = match sys_info.gpus.is_empty() {
-        // Use total RAM as VRAM since memory is shared
+        true => {
-        log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
+            log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
-        total_ram
+            sys_info.total_memory * 1024 * 1024
-    } else {
+        }
-        sys_info
+        false => sys_info
            .gpus
            .iter()
            .map(|g| g.total_memory * 1024 * 1024)
-            .sum::<u64>()
+            .sum::<u64>(),
    };
-
+    log::info!("Total RAM reported/calculated (in bytes): {}", &total_ram);
    log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
    let usable_vram: u64 = if total_vram > RESERVE_BYTES {
        (((total_vram - RESERVE_BYTES) as f64) * multiplier) as u64