From 1d0bb53f2aef119465a76a30352826ff36a3467f Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Wed, 23 Jul 2025 19:20:12 +0530
Subject: [PATCH] feat: add support for querying available backend devices
 (#5877)

* feat: add support for querying available backend devices

This change introduces a new `get_devices` method to the `llamacpp_extension` engine that allows the frontend to query and display a list of available devices (e.g., Vulkan, CUDA, SYCL) from the compiled `llama-server` binary.

* Added `DeviceList` interface to represent GPU/device metadata.
* Implemented `getDevices(): Promise<DeviceList[]>` method.

  * Splits `version/backend`, ensures backend is ready.
  * Invokes the new Tauri command `get_devices`.

* Introduced a new `get_devices` Tauri command.
* Parses `llama-server --list-devices` output to extract available devices with memory info.
* Introduced `DeviceInfo` struct (`id`, `name`, `mem`, `free`) and exposed it via serialization.
* Robust parsing logic using string processing (non-regex) to locate memory stats.
* Registered the new command in the `tauri::Builder` in `lib.rs`.

* Fixed logic to correctly parse multiple devices from the llama-server output.
* Handles common failure modes: binary not found, malformed memory info, etc.

This sets the foundation for device selection, memory-aware model loading, and improved diagnostics in Jan AI engine setup flows.

* Update extensions/llamacpp-extension/src/index.ts

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

---------

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
---
 extensions/llamacpp-extension/src/index.ts    |  33 +++
 .../inference_llamacpp_extension/server.rs    | 254 ++++++++++++++++++
 src-tauri/src/lib.rs                          |   1 +
 3 files changed, 288 insertions(+)
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 3a2b0951a..673a80687 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -92,6 +92,13 @@ interface EmbeddingData {
   index: number
   object: string
 }
+
+interface DeviceList {
+  id: string
+  name: string
+  mem: number
+  free: number
+}
 /**
  * Override the default app.log function to use Jan's logging system.
  * @param args
@@ -1223,6 +1230,32 @@ export default class llamacpp_extension extends AIEngine {
     return lmodels
   }
 
+  async getDevices(): Promise<DeviceList[]> {
+    const cfg = this.config
+    const [version, backend] = cfg.version_backend.split('/')
+    if (!version || !backend) {
+      throw new Error(
+        `Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
+      )
+    }
+
+    // Ensure backend is downloaded and ready before proceeding
+    await this.ensureBackendReady(backend, version)
+    logger.info('Calling Tauri command getDevices with arg --list-devices')
+    const backendPath = await getBackendExePath(backend, version)
+    const libraryPath = await joinPath([await this.getProviderPath(), 'lib'])
+    try {
+      const dList = await invoke<DeviceList[]>('get_devices', {
+        backendPath,
+        libraryPath,
+      })
+      return dList
+    } catch (error) {
+      logger.error('Failed to query devices:\n', error)
+      throw new Error(`Failed to load llama-server: ${error}`)
+    }
+  }
+
   async embed(text: string[]): Promise<EmbeddingResponse> {
     let sInfo = this.findSessionByModel('sentence-transformer-mini')
     if (!sInfo) {
diff --git a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
index 19bc38e24..d00ba3732 100644
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@@ -28,6 +28,8 @@ pub enum ServerError {
     Io(#[from] std::io::Error),
     #[error("Jan API error: {0}")]
     Tauri(#[from] tauri::Error),
+    #[error("Parse error: {0}")]
+    ParseError(String),
 }
 
 // impl serialization for tauri
@@ -57,6 +59,14 @@ pub struct UnloadResult {
     error: Option<String>,
 }
 
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DeviceInfo {
+    pub id: String,
+    pub name: String,
+    pub mem: i32,
+    pub free: i32,
+}
+
 // --- Load Command ---
 #[tauri::command]
 pub async fn load_llama_model(
@@ -362,6 +372,250 @@ pub async fn unload_llama_model(
     }
 }
 
+#[tauri::command]
+pub async fn get_devices(
+    backend_path: &str,
+    library_path: Option<&str>,
+) -> ServerResult<Vec<DeviceInfo>> {
+    log::info!("Getting devices from server at path: {:?}", backend_path);
+
+    let server_path_buf = PathBuf::from(backend_path);
+    if !server_path_buf.exists() {
+        log::error!(
+            "Server binary not found at expected path: {:?}",
+            backend_path
+        );
+        return Err(ServerError::BinaryNotFound(format!(
+            "Binary not found at {:?}",
+            backend_path
+        )));
+    }
+
+    // Configure the command to run the server with --list-devices
+    let mut command = Command::new(backend_path);
+    command.arg("--list-devices");
+
+    // Set up library path similar to load function
+    if let Some(lib_path) = library_path {
+        if cfg!(target_os = "linux") {
+            let new_lib_path = match std::env::var("LD_LIBRARY_PATH") {
+                Ok(path) => format!("{}:{}", path, lib_path),
+                Err(_) => lib_path.to_string(),
+            };
+            command.env("LD_LIBRARY_PATH", new_lib_path);
+        } else if cfg!(target_os = "windows") {
+            let new_path = match std::env::var("PATH") {
+                Ok(path) => format!("{};{}", path, lib_path),
+                Err(_) => lib_path.to_string(),
+            };
+            command.env("PATH", new_path);
+
+            // Normalize the path by removing UNC prefix if present
+            let normalized_path = lib_path.trim_start_matches(r"\\?\").to_string();
+            log::info!("Library path:\n{}", &normalized_path);
+
+            // Only set current_dir if the normalized path exists and is a directory
+            let path = std::path::Path::new(&normalized_path);
+            if path.exists() && path.is_dir() {
+                command.current_dir(&normalized_path);
+            } else {
+                log::warn!(
+                    "Library path '{}' does not exist or is not a directory",
+                    normalized_path
+                );
+            }
+        } else {
+            log::warn!("Library path setting is not supported on this OS");
+        }
+    }
+
+    command.stdout(Stdio::piped());
+    command.stderr(Stdio::piped());
+
+    #[cfg(all(windows, target_arch = "x86_64"))]
+    {
+        use std::os::windows::process::CommandExt;
+        const CREATE_NO_WINDOW: u32 = 0x0800_0000;
+        const CREATE_NEW_PROCESS_GROUP: u32 = 0x0000_0200;
+        command.creation_flags(CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP);
+    }
+
+    // Execute the command and wait for completion
+    let output = timeout(Duration::from_secs(30), command.output())
+        .await
+        .map_err(|_| ServerError::LlamacppError("Timeout waiting for device list".to_string()))?
+        .map_err(ServerError::Io)?;
+
+    // Check if command executed successfully
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        log::error!("llama-server --list-devices failed: {}", stderr);
+        return Err(ServerError::LlamacppError(format!(
+            "Command failed with exit code {:?}: {}",
+            output.status.code(),
+            stderr
+        )));
+    }
+
+    // Parse the output
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    log::info!("Device list output:\n{}", stdout);
+
+    parse_device_output(&stdout)
+}
+
+fn parse_device_output(output: &str) -> ServerResult<Vec<DeviceInfo>> {
+    let mut devices = Vec::new();
+    let mut found_devices_section = false;
+
+    for raw in output.lines() {
+        // detect header (ignoring whitespace)
+        if raw.trim() == "Available devices:" {
+            found_devices_section = true;
+            continue;
+        }
+
+        if !found_devices_section {
+            continue;
+        }
+
+        // skip blank lines
+        if raw.trim().is_empty() {
+            continue;
+        }
+
+        // now parse any non-blank line after the header
+        let line = raw.trim();
+        if let Some(device) = parse_device_line(line)? {
+            devices.push(device);
+        }
+    }
+
+    if devices.is_empty() && found_devices_section {
+        log::warn!("No devices found in output");
+    } else if !found_devices_section {
+        return Err(ServerError::ParseError(
+            "Could not find 'Available devices:' section in output".to_string(),
+        ));
+    }
+
+    Ok(devices)
+}
+
+
+fn parse_device_line(line: &str) -> ServerResult<Option<DeviceInfo>> {
+    let line = line.trim();
+    
+    log::info!("Parsing device line: '{}'", line);
+    
+    // Expected formats:
+    // "Vulkan0: Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)"
+    // "CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)"
+    // "SYCL0: Intel(R) Arc(TM) A750 Graphics (8000 MiB, 7721 MiB free)"
+    
+    // Split by colon to get ID and rest
+    let parts: Vec<&str> = line.splitn(2, ':').collect();
+    if parts.len() != 2 {
+        log::warn!("Skipping malformed device line: {}", line);
+        return Ok(None);
+    }
+
+    let id = parts[0].trim().to_string();
+    let rest = parts[1].trim();
+
+    // Use regex-like approach to find the memory pattern at the end
+    // Look for pattern: (number MiB, number MiB free) at the end
+    if let Some(memory_match) = find_memory_pattern(rest) {
+        let (memory_start, memory_content) = memory_match;
+        let name = rest[..memory_start].trim().to_string();
+        
+        // Parse memory info: "8128 MiB, 8128 MiB free"
+        let memory_parts: Vec<&str> = memory_content.split(',').collect();
+        if memory_parts.len() >= 2 {
+            if let (Ok(total_mem), Ok(free_mem)) = (
+                parse_memory_value(memory_parts[0].trim()),
+                parse_memory_value(memory_parts[1].trim())
+            ) {
+                log::info!("Parsed device - ID: '{}', Name: '{}', Mem: {}, Free: {}", id, name, total_mem, free_mem);
+                
+                return Ok(Some(DeviceInfo {
+                    id,
+                    name,
+                    mem: total_mem,
+                    free: free_mem,
+                }));
+            }
+        }
+    }
+
+    log::warn!("Could not parse device line: {}", line);
+    Ok(None)
+}
+
+fn find_memory_pattern(text: &str) -> Option<(usize, &str)> {
+    // Find the last parenthesis that contains the memory pattern
+    let mut last_match = None;
+    let mut chars = text.char_indices().peekable();
+    
+    while let Some((start_idx, ch)) = chars.next() {
+        if ch == '(' {
+            // Find the closing parenthesis
+            let remaining = &text[start_idx + 1..];
+            if let Some(close_pos) = remaining.find(')') {
+                let content = &remaining[..close_pos];
+                
+                // Check if this looks like memory info
+                if is_memory_pattern(content) {
+                    last_match = Some((start_idx, content));
+                }
+            }
+        }
+    }
+    
+    last_match
+}
+
+fn is_memory_pattern(content: &str) -> bool {
+    // Check if content matches pattern like "8128 MiB, 8128 MiB free"
+    // Must contain: numbers, "MiB", comma, "free"
+    if !(content.contains("MiB") && content.contains("free") && content.contains(',')) {
+        return false;
+    }
+    
+    let parts: Vec<&str> = content.split(',').collect();
+    if parts.len() != 2 {
+        return false;
+    }
+    
+    parts.iter().all(|part| {
+        let part = part.trim();
+        // Each part should start with a number and contain "MiB"
+        part.split_whitespace().next()
+            .map_or(false, |first_word| first_word.parse::<i32>().is_ok()) &&
+        part.contains("MiB")
+    })
+}
+
+fn parse_memory_value(mem_str: &str) -> ServerResult<i32> {
+    // Handle formats like "8000 MiB" or "7721 MiB free"
+    let parts: Vec<&str> = mem_str.split_whitespace().collect();
+    if parts.is_empty() {
+        return Err(ServerError::ParseError(format!(
+            "Empty memory value: '{}'",
+            mem_str
+        )));
+    }
+
+    // Take the first part which should be the number
+    let number_str = parts[0];
+    number_str.parse::<i32>().map_err(|_| {
+        ServerError::ParseError(format!(
+            "Could not parse memory value: '{}'",
+            number_str
+        ))
+    })
+}
+
 // crypto
 #[tauri::command]
 pub fn generate_api_key(model_id: String, api_secret: String) -> Result<String, String> {
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index b713b5cd0..69a512a0e 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -94,6 +94,7 @@ pub fn run() {
             // llama-cpp extension
             core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
             core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
+            core::utils::extensions::inference_llamacpp_extension::server::get_devices,
             core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
             core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
         ])