* add libnvidia-ml.so.1 fallback for linux * fix AMD memory usage on Linux * add os_type. use std::env::consts::ARCH directly
121 lines
3.7 KiB
Rust
121 lines
3.7 KiB
Rust
use super::{GpuInfo, GpuUsage, Vendor};
|
|
use nvml_wrapper::{error::NvmlError, Nvml};
|
|
use std::sync::OnceLock;
|
|
|
|
static NVML: OnceLock<Option<Nvml>> = OnceLock::new();
|
|
|
|
#[derive(Debug, Clone, serde::Serialize)]
|
|
pub struct NvidiaInfo {
|
|
pub index: u32,
|
|
pub compute_capability: String,
|
|
}
|
|
|
|
fn get_nvml() -> Option<&'static Nvml> {
|
|
NVML.get_or_init(|| {
|
|
let result = Nvml::init().or_else(|e| {
|
|
// fallback
|
|
if cfg!(target_os = "linux") {
|
|
let lib_path = std::ffi::OsStr::new("libnvidia-ml.so.1");
|
|
Nvml::builder().lib_path(lib_path).init()
|
|
} else {
|
|
Err(e)
|
|
}
|
|
});
|
|
|
|
// NvmlError doesn't implement Copy, so we have to store an Option in OnceLock
|
|
match result {
|
|
Ok(nvml) => Some(nvml),
|
|
Err(e) => {
|
|
log::error!("Unable to initialize NVML: {}", e);
|
|
None
|
|
}
|
|
}
|
|
})
|
|
.as_ref()
|
|
}
|
|
|
|
impl GpuInfo {
|
|
pub fn get_usage_nvidia(&self) -> GpuUsage {
|
|
let index = match self.nvidia_info {
|
|
Some(ref nvidia_info) => nvidia_info.index,
|
|
None => {
|
|
log::error!("get_usage_nvidia() called on non-NVIDIA GPU");
|
|
return self.get_usage_unsupported();
|
|
}
|
|
};
|
|
let closure = || -> Result<GpuUsage, NvmlError> {
|
|
let nvml = get_nvml().ok_or(NvmlError::Unknown)?;
|
|
let device = nvml.device_by_index(index)?;
|
|
let mem_info = device.memory_info()?;
|
|
Ok(GpuUsage {
|
|
uuid: self.uuid.clone(),
|
|
used_memory: mem_info.used / 1024 / 1024, // bytes to MiB
|
|
total_memory: mem_info.total / 1024 / 1024, // bytes to MiB
|
|
})
|
|
};
|
|
closure().unwrap_or_else(|e| {
|
|
log::error!("Failed to get memory usage for NVIDIA GPU {}: {}", index, e);
|
|
self.get_usage_unsupported()
|
|
})
|
|
}
|
|
}
|
|
|
|
pub fn get_nvidia_gpus() -> Vec<GpuInfo> {
|
|
let closure = || -> Result<Vec<GpuInfo>, NvmlError> {
|
|
let nvml = get_nvml().ok_or(NvmlError::Unknown)?;
|
|
let num_gpus = nvml.device_count()?;
|
|
let driver_version = nvml.sys_driver_version()?;
|
|
|
|
let mut gpus = Vec::with_capacity(num_gpus as usize);
|
|
for i in 0..num_gpus {
|
|
let device = nvml.device_by_index(i)?;
|
|
gpus.push(GpuInfo {
|
|
name: device.name()?,
|
|
total_memory: device.memory_info()?.total / 1024 / 1024, // bytes to MiB
|
|
vendor: Vendor::NVIDIA,
|
|
uuid: {
|
|
let mut uuid = device.uuid()?;
|
|
if uuid.starts_with("GPU-") {
|
|
uuid = uuid[4..].to_string();
|
|
}
|
|
uuid
|
|
},
|
|
driver_version: driver_version.clone(),
|
|
nvidia_info: Some(NvidiaInfo {
|
|
index: i,
|
|
compute_capability: {
|
|
let cc = device.cuda_compute_capability()?;
|
|
format!("{}.{}", cc.major, cc.minor)
|
|
},
|
|
}),
|
|
vulkan_info: None,
|
|
});
|
|
}
|
|
|
|
Ok(gpus)
|
|
};
|
|
|
|
match closure() {
|
|
Ok(gpus) => gpus,
|
|
Err(e) => {
|
|
log::error!("Failed to get NVIDIA GPUs: {}", e);
|
|
vec![]
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_get_nvidia_gpus() {
|
|
let gpus = get_nvidia_gpus();
|
|
for (i, gpu) in gpus.iter().enumerate() {
|
|
println!("GPU {}:", i);
|
|
println!(" {:?}", gpu);
|
|
println!(" {:?}", gpu.get_usage());
|
|
}
|
|
}
|
|
}
|