feat: Enhance port selection with availability check (#5966)

This change improves the robustness of the llama.cpp extension's server port selection. Previously, the `getRandomPort()` method only checked for ports already in use by active sessions, which could lead to model load failures if the chosen port was occupied by another external process. This change introduces a new Tauri command, `is_port_available`, which performs a system-level check to ensure the randomly selected port is truly free before attempting to start the llama-server. It also adds a retry mechanism with a maximum number of attempts (20,000) to find an available port, throwing an error if no suitable port is found within the specified range after all attempts. This enhancement prevents port conflicts and improves the reliability and user experience of the llama.cpp extension within Jan. Closes #5965
2025-07-29 18:01:52 +05:30 · 2025-07-29 18:01:52 +05:30 · f61ce886a0
commit f61ce886a0
parent eb714776ba
3 changed files with 49 additions and 27 deletions
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -782,7 +782,7 @@ export default class llamacpp_extension extends AIEngine {
      }
      closure()
    } else if (key === 'auto_unload') {
-        this.autoUnload = value as boolean
+      this.autoUnload = value as boolean
    }
  }

@ -1075,15 +1075,27 @@ export default class llamacpp_extension extends AIEngine {
   * Function to find a random port
   */
  private async getRandomPort(): Promise<number> {
-    let port: number
-    do {
-      port = Math.floor(Math.random() * 1000) + 3000
-    } while (
-      Array.from(this.activeSessions.values()).some(
+    const MAX_ATTEMPTS = 20000
+    let attempts = 0
+
+    while (attempts < MAX_ATTEMPTS) {
+      const port = Math.floor(Math.random() * 1000) + 3000
+
+      const isAlreadyUsed = Array.from(this.activeSessions.values()).some(
        (info) => info.port === port
      )
+
+      if (!isAlreadyUsed) {
+        const isAvailable = await invoke<boolean>('is_port_available', { port })
+        if (isAvailable) return port
+      }
+
+      attempts++
+    }
+
+    throw new Error(
+      'Failed to find an available port for the model to load'
    )
-    return port
  }

  private async sleep(ms: number): Promise<void> {
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -502,17 +502,16 @@ fn parse_device_output(output: &str) -> ServerResult<Vec<DeviceInfo>> {
    Ok(devices)
 }

-
 fn parse_device_line(line: &str) -> ServerResult<Option<DeviceInfo>> {
    let line = line.trim();
-    
+
    log::info!("Parsing device line: '{}'", line);
-    
+
    // Expected formats:
    // "Vulkan0: Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)"
    // "CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)"
    // "SYCL0: Intel(R) Arc(TM) A750 Graphics (8000 MiB, 7721 MiB free)"
-    
+
    // Split by colon to get ID and rest
    let parts: Vec<&str> = line.splitn(2, ':').collect();
    if parts.len() != 2 {
@ -528,16 +527,22 @@ fn parse_device_line(line: &str) -> ServerResult<Option<DeviceInfo>> {
    if let Some(memory_match) = find_memory_pattern(rest) {
        let (memory_start, memory_content) = memory_match;
        let name = rest[..memory_start].trim().to_string();
-        
+
        // Parse memory info: "8128 MiB, 8128 MiB free"
        let memory_parts: Vec<&str> = memory_content.split(',').collect();
        if memory_parts.len() >= 2 {
            if let (Ok(total_mem), Ok(free_mem)) = (
                parse_memory_value(memory_parts[0].trim()),
-                parse_memory_value(memory_parts[1].trim())
+                parse_memory_value(memory_parts[1].trim()),
            ) {
-                log::info!("Parsed device - ID: '{}', Name: '{}', Mem: {}, Free: {}", id, name, total_mem, free_mem);
-                
+                log::info!(
+                    "Parsed device - ID: '{}', Name: '{}', Mem: {}, Free: {}",
+                    id,
+                    name,
+                    total_mem,
+                    free_mem
+                );
+
                return Ok(Some(DeviceInfo {
                    id,
                    name,
@ -556,14 +561,14 @@ fn find_memory_pattern(text: &str) -> Option<(usize, &str)> {
    // Find the last parenthesis that contains the memory pattern
    let mut last_match = None;
    let mut chars = text.char_indices().peekable();
-    
+
    while let Some((start_idx, ch)) = chars.next() {
        if ch == '(' {
            // Find the closing parenthesis
            let remaining = &text[start_idx + 1..];
            if let Some(close_pos) = remaining.find(')') {
                let content = &remaining[..close_pos];
-                
+
                // Check if this looks like memory info
                if is_memory_pattern(content) {
                    last_match = Some((start_idx, content));
@ -571,7 +576,7 @@ fn find_memory_pattern(text: &str) -> Option<(usize, &str)> {
            }
        }
    }
-    
+
    last_match
 }

@ -581,18 +586,19 @@ fn is_memory_pattern(content: &str) -> bool {
    if !(content.contains("MiB") && content.contains("free") && content.contains(',')) {
        return false;
    }
-    
+
    let parts: Vec<&str> = content.split(',').collect();
    if parts.len() != 2 {
        return false;
    }
-    
+
    parts.iter().all(|part| {
        let part = part.trim();
        // Each part should start with a number and contain "MiB"
-        part.split_whitespace().next()
-            .map_or(false, |first_word| first_word.parse::<i32>().is_ok()) &&
-        part.contains("MiB")
+        part.split_whitespace()
+            .next()
+            .map_or(false, |first_word| first_word.parse::<i32>().is_ok())
+            && part.contains("MiB")
    })
 }

@ -609,10 +615,7 @@ fn parse_memory_value(mem_str: &str) -> ServerResult<i32> {
    // Take the first part which should be the number
    let number_str = parts[0];
    number_str.parse::<i32>().map_err(|_| {
-        ServerError::ParseError(format!(
-            "Could not parse memory value: '{}'",
-            number_str
-        ))
+        ServerError::ParseError(format!("Could not parse memory value: '{}'", number_str))
    })
 }

@ -643,3 +646,9 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<

    Ok(alive)
 }
+
+// check port availability
+#[tauri::command]
+pub fn is_port_available(port: u16) -> bool {
+    std::net::TcpListener::bind(("127.0.0.1", port)).is_ok()
+}
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -96,6 +96,7 @@ pub fn run() {
            core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::get_devices,
+            core::utils::extensions::inference_llamacpp_extension::server::is_port_available,
            core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
            core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
        ])