feat: Adjust RAM/VRAM calculation for unified memory systems (#6687)

* feat: Adjust RAM/VRAM calculation for unified memory systems

This commit refactors the logic for calculating **total RAM** and **total VRAM** in `is_model_supported` and `plan_model_load` commands, specifically targeting systems with **unified memory** (like modern macOS devices where the GPU list may be empty).

The changes are as follows:

* **Total RAM Calculation:** If no GPUs are detected (`sys_info.gpus.is_empty()` is true), **total RAM** is now set to $0$. This avoids confusing total system memory with dedicated GPU memory when planning model placement.
* **Total VRAM Calculation:** If no GPUs are detected, **total VRAM** is still calculated as the system's **total memory (RAM)**, as this shared memory acts as VRAM on unified memory architectures.

This adjustment improves the accuracy of memory availability checks and model planning on unified memory systems.

* fix: total usable memory in case there is no system vram reported

* chore: temporarily change to self-hosted runner mac

* ci: revert back to github hosted runner macos

---------

Co-authored-by: Louis <louis@jan.ai>
Co-authored-by: Minh141120 <minh.itptit@gmail.com>
This commit is contained in:
Akarshan Biswas 2025-10-01 17:28:14 +05:30 committed by GitHub
parent 6a4aaaec87
commit 0f0ba43b7f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 28 additions and 22 deletions

View File

@ -89,19 +89,25 @@ pub async fn is_model_supported<R: Runtime>(
);
const RESERVE_BYTES: u64 = 2288490189;
let total_system_memory = system_info.total_memory * 1024 * 1024;
let total_system_memory: u64 = match system_info.gpus.is_empty() {
// on MacOS with unified memory, treat RAM = 0 for now
true => 0,
false => system_info.total_memory * 1024 * 1024,
};
// Calculate total VRAM from all GPUs
let total_vram: u64 = if system_info.gpus.is_empty() {
let total_vram: u64 = match system_info.gpus.is_empty() {
// On macOS with unified memory, GPU info may be empty
// Use total RAM as VRAM since memory is shared
log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
total_system_memory
} else {
system_info
true => {
log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
system_info.total_memory * 1024 * 1024
}
false => system_info
.gpus
.iter()
.map(|g| g.total_memory * 1024 * 1024)
.sum::<u64>()
.sum::<u64>(),
};
log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
@ -115,7 +121,7 @@ pub async fn is_model_supported<R: Runtime>(
let usable_total_memory = if total_system_memory > RESERVE_BYTES {
(total_system_memory - RESERVE_BYTES) + usable_vram
} else {
0
usable_vram
};
log::info!("System RAM: {} bytes", &total_system_memory);
log::info!("Total VRAM: {} bytes", &total_vram);

View File

@ -82,25 +82,25 @@ pub async fn plan_model_load<R: Runtime>(
log::info!("Got GPUs:\n{:?}", &sys_info.gpus);
let total_ram: u64 = sys_info.total_memory * 1024 * 1024;
log::info!(
"Total system memory reported from tauri_plugin_hardware(in bytes): {}",
&total_ram
);
let total_ram: u64 = match sys_info.gpus.is_empty() {
// Consider RAM as 0 for unified memory
true => 0,
false => sys_info.total_memory * 1024 * 1024,
};
let total_vram: u64 = if sys_info.gpus.is_empty() {
// On macOS with unified memory, GPU info may be empty
// Use total RAM as VRAM since memory is shared
log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
total_ram
} else {
sys_info
// Calculate total VRAM from all GPUs
let total_vram: u64 = match sys_info.gpus.is_empty() {
true => {
log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
sys_info.total_memory * 1024 * 1024
}
false => sys_info
.gpus
.iter()
.map(|g| g.total_memory * 1024 * 1024)
.sum::<u64>()
.sum::<u64>(),
};
log::info!("Total RAM reported/calculated (in bytes): {}", &total_ram);
log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
let usable_vram: u64 = if total_vram > RESERVE_BYTES {
(((total_vram - RESERVE_BYTES) as f64) * multiplier) as u64