From 1d0bb53f2aef119465a76a30352826ff36a3467f Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Wed, 23 Jul 2025 19:20:12 +0530 Subject: [PATCH] feat: add support for querying available backend devices (#5877) * feat: add support for querying available backend devices This change introduces a new `get_devices` method to the `llamacpp_extension` engine that allows the frontend to query and display a list of available devices (e.g., Vulkan, CUDA, SYCL) from the compiled `llama-server` binary. * Added `DeviceList` interface to represent GPU/device metadata. * Implemented `getDevices(): Promise` method. * Splits `version/backend`, ensures backend is ready. * Invokes the new Tauri command `get_devices`. * Introduced a new `get_devices` Tauri command. * Parses `llama-server --list-devices` output to extract available devices with memory info. * Introduced `DeviceInfo` struct (`id`, `name`, `mem`, `free`) and exposed it via serialization. * Robust parsing logic using string processing (non-regex) to locate memory stats. * Registered the new command in the `tauri::Builder` in `lib.rs`. * Fixed logic to correctly parse multiple devices from the llama-server output. * Handles common failure modes: binary not found, malformed memory info, etc. This sets the foundation for device selection, memory-aware model loading, and improved diagnostics in Jan AI engine setup flows. * Update extensions/llamacpp-extension/src/index.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --------- Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --- extensions/llamacpp-extension/src/index.ts | 33 +++ .../inference_llamacpp_extension/server.rs | 254 ++++++++++++++++++ src-tauri/src/lib.rs | 1 + 3 files changed, 288 insertions(+) diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 3a2b0951a..673a80687 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -92,6 +92,13 @@ interface EmbeddingData { index: number object: string } + +interface DeviceList { + id: string + name: string + mem: number + free: number +} /** * Override the default app.log function to use Jan's logging system. * @param args @@ -1223,6 +1230,32 @@ export default class llamacpp_extension extends AIEngine { return lmodels } + async getDevices(): Promise { + const cfg = this.config + const [version, backend] = cfg.version_backend.split('/') + if (!version || !backend) { + throw new Error( + `Invalid version/backend format: ${cfg.version_backend}. Expected format: /` + ) + } + + // Ensure backend is downloaded and ready before proceeding + await this.ensureBackendReady(backend, version) + logger.info('Calling Tauri command getDevices with arg --list-devices') + const backendPath = await getBackendExePath(backend, version) + const libraryPath = await joinPath([await this.getProviderPath(), 'lib']) + try { + const dList = await invoke('get_devices', { + backendPath, + libraryPath, + }) + return dList + } catch (error) { + logger.error('Failed to query devices:\n', error) + throw new Error(`Failed to load llama-server: ${error}`) + } + } + async embed(text: string[]): Promise { let sInfo = this.findSessionByModel('sentence-transformer-mini') if (!sInfo) { diff --git a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs index 19bc38e24..d00ba3732 100644 --- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs +++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs @@ -28,6 +28,8 @@ pub enum ServerError { Io(#[from] std::io::Error), #[error("Jan API error: {0}")] Tauri(#[from] tauri::Error), + #[error("Parse error: {0}")] + ParseError(String), } // impl serialization for tauri @@ -57,6 +59,14 @@ pub struct UnloadResult { error: Option, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DeviceInfo { + pub id: String, + pub name: String, + pub mem: i32, + pub free: i32, +} + // --- Load Command --- #[tauri::command] pub async fn load_llama_model( @@ -362,6 +372,250 @@ pub async fn unload_llama_model( } } +#[tauri::command] +pub async fn get_devices( + backend_path: &str, + library_path: Option<&str>, +) -> ServerResult> { + log::info!("Getting devices from server at path: {:?}", backend_path); + + let server_path_buf = PathBuf::from(backend_path); + if !server_path_buf.exists() { + log::error!( + "Server binary not found at expected path: {:?}", + backend_path + ); + return Err(ServerError::BinaryNotFound(format!( + "Binary not found at {:?}", + backend_path + ))); + } + + // Configure the command to run the server with --list-devices + let mut command = Command::new(backend_path); + command.arg("--list-devices"); + + // Set up library path similar to load function + if let Some(lib_path) = library_path { + if cfg!(target_os = "linux") { + let new_lib_path = match std::env::var("LD_LIBRARY_PATH") { + Ok(path) => format!("{}:{}", path, lib_path), + Err(_) => lib_path.to_string(), + }; + command.env("LD_LIBRARY_PATH", new_lib_path); + } else if cfg!(target_os = "windows") { + let new_path = match std::env::var("PATH") { + Ok(path) => format!("{};{}", path, lib_path), + Err(_) => lib_path.to_string(), + }; + command.env("PATH", new_path); + + // Normalize the path by removing UNC prefix if present + let normalized_path = lib_path.trim_start_matches(r"\\?\").to_string(); + log::info!("Library path:\n{}", &normalized_path); + + // Only set current_dir if the normalized path exists and is a directory + let path = std::path::Path::new(&normalized_path); + if path.exists() && path.is_dir() { + command.current_dir(&normalized_path); + } else { + log::warn!( + "Library path '{}' does not exist or is not a directory", + normalized_path + ); + } + } else { + log::warn!("Library path setting is not supported on this OS"); + } + } + + command.stdout(Stdio::piped()); + command.stderr(Stdio::piped()); + + #[cfg(all(windows, target_arch = "x86_64"))] + { + use std::os::windows::process::CommandExt; + const CREATE_NO_WINDOW: u32 = 0x0800_0000; + const CREATE_NEW_PROCESS_GROUP: u32 = 0x0000_0200; + command.creation_flags(CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP); + } + + // Execute the command and wait for completion + let output = timeout(Duration::from_secs(30), command.output()) + .await + .map_err(|_| ServerError::LlamacppError("Timeout waiting for device list".to_string()))? + .map_err(ServerError::Io)?; + + // Check if command executed successfully + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + log::error!("llama-server --list-devices failed: {}", stderr); + return Err(ServerError::LlamacppError(format!( + "Command failed with exit code {:?}: {}", + output.status.code(), + stderr + ))); + } + + // Parse the output + let stdout = String::from_utf8_lossy(&output.stdout); + log::info!("Device list output:\n{}", stdout); + + parse_device_output(&stdout) +} + +fn parse_device_output(output: &str) -> ServerResult> { + let mut devices = Vec::new(); + let mut found_devices_section = false; + + for raw in output.lines() { + // detect header (ignoring whitespace) + if raw.trim() == "Available devices:" { + found_devices_section = true; + continue; + } + + if !found_devices_section { + continue; + } + + // skip blank lines + if raw.trim().is_empty() { + continue; + } + + // now parse any non-blank line after the header + let line = raw.trim(); + if let Some(device) = parse_device_line(line)? { + devices.push(device); + } + } + + if devices.is_empty() && found_devices_section { + log::warn!("No devices found in output"); + } else if !found_devices_section { + return Err(ServerError::ParseError( + "Could not find 'Available devices:' section in output".to_string(), + )); + } + + Ok(devices) +} + + +fn parse_device_line(line: &str) -> ServerResult> { + let line = line.trim(); + + log::info!("Parsing device line: '{}'", line); + + // Expected formats: + // "Vulkan0: Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)" + // "CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)" + // "SYCL0: Intel(R) Arc(TM) A750 Graphics (8000 MiB, 7721 MiB free)" + + // Split by colon to get ID and rest + let parts: Vec<&str> = line.splitn(2, ':').collect(); + if parts.len() != 2 { + log::warn!("Skipping malformed device line: {}", line); + return Ok(None); + } + + let id = parts[0].trim().to_string(); + let rest = parts[1].trim(); + + // Use regex-like approach to find the memory pattern at the end + // Look for pattern: (number MiB, number MiB free) at the end + if let Some(memory_match) = find_memory_pattern(rest) { + let (memory_start, memory_content) = memory_match; + let name = rest[..memory_start].trim().to_string(); + + // Parse memory info: "8128 MiB, 8128 MiB free" + let memory_parts: Vec<&str> = memory_content.split(',').collect(); + if memory_parts.len() >= 2 { + if let (Ok(total_mem), Ok(free_mem)) = ( + parse_memory_value(memory_parts[0].trim()), + parse_memory_value(memory_parts[1].trim()) + ) { + log::info!("Parsed device - ID: '{}', Name: '{}', Mem: {}, Free: {}", id, name, total_mem, free_mem); + + return Ok(Some(DeviceInfo { + id, + name, + mem: total_mem, + free: free_mem, + })); + } + } + } + + log::warn!("Could not parse device line: {}", line); + Ok(None) +} + +fn find_memory_pattern(text: &str) -> Option<(usize, &str)> { + // Find the last parenthesis that contains the memory pattern + let mut last_match = None; + let mut chars = text.char_indices().peekable(); + + while let Some((start_idx, ch)) = chars.next() { + if ch == '(' { + // Find the closing parenthesis + let remaining = &text[start_idx + 1..]; + if let Some(close_pos) = remaining.find(')') { + let content = &remaining[..close_pos]; + + // Check if this looks like memory info + if is_memory_pattern(content) { + last_match = Some((start_idx, content)); + } + } + } + } + + last_match +} + +fn is_memory_pattern(content: &str) -> bool { + // Check if content matches pattern like "8128 MiB, 8128 MiB free" + // Must contain: numbers, "MiB", comma, "free" + if !(content.contains("MiB") && content.contains("free") && content.contains(',')) { + return false; + } + + let parts: Vec<&str> = content.split(',').collect(); + if parts.len() != 2 { + return false; + } + + parts.iter().all(|part| { + let part = part.trim(); + // Each part should start with a number and contain "MiB" + part.split_whitespace().next() + .map_or(false, |first_word| first_word.parse::().is_ok()) && + part.contains("MiB") + }) +} + +fn parse_memory_value(mem_str: &str) -> ServerResult { + // Handle formats like "8000 MiB" or "7721 MiB free" + let parts: Vec<&str> = mem_str.split_whitespace().collect(); + if parts.is_empty() { + return Err(ServerError::ParseError(format!( + "Empty memory value: '{}'", + mem_str + ))); + } + + // Take the first part which should be the number + let number_str = parts[0]; + number_str.parse::().map_err(|_| { + ServerError::ParseError(format!( + "Could not parse memory value: '{}'", + number_str + )) + }) +} + // crypto #[tauri::command] pub fn generate_api_key(model_id: String, api_secret: String) -> Result { diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index b713b5cd0..69a512a0e 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -94,6 +94,7 @@ pub fn run() { // llama-cpp extension core::utils::extensions::inference_llamacpp_extension::server::load_llama_model, core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model, + core::utils::extensions::inference_llamacpp_extension::server::get_devices, core::utils::extensions::inference_llamacpp_extension::server::generate_api_key, core::utils::extensions::inference_llamacpp_extension::server::is_process_running, ])