From f61ce886a0051220e2ebedf11183587febdf6e14 Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Tue, 29 Jul 2025 18:01:52 +0530
Subject: [PATCH] feat: Enhance port selection with availability check (#5966)

This change improves the robustness of the llama.cpp extension's server port selection.

Previously, the `getRandomPort()` method only checked for ports already in use by active sessions, which could lead to model load failures if the chosen port was occupied by another external process.

This change introduces a new Tauri command, `is_port_available`, which performs a system-level check to ensure the randomly selected port is truly free before attempting to start the llama-server. It also adds a retry mechanism with a maximum number of attempts (20,000) to find an available port, throwing an error if no suitable port is found within the specified range after all attempts.

This enhancement prevents port conflicts and improves the reliability and user experience of the llama.cpp extension within Jan.

Closes #5965
---
 extensions/llamacpp-extension/src/index.ts    | 26 +++++++---
 .../inference_llamacpp_extension/server.rs    | 49 +++++++++++--------
 src-tauri/src/lib.rs                          |  1 +
 3 files changed, 49 insertions(+), 27 deletions(-)
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 3f6c70f04..e2dbb453d 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -782,7 +782,7 @@ export default class llamacpp_extension extends AIEngine {
       }
       closure()
     } else if (key === 'auto_unload') {
-        this.autoUnload = value as boolean
+      this.autoUnload = value as boolean
     }
   }
 
@@ -1075,15 +1075,27 @@ export default class llamacpp_extension extends AIEngine {
    * Function to find a random port
    */
   private async getRandomPort(): Promise<number> {
-    let port: number
-    do {
-      port = Math.floor(Math.random() * 1000) + 3000
-    } while (
-      Array.from(this.activeSessions.values()).some(
+    const MAX_ATTEMPTS = 20000
+    let attempts = 0
+
+    while (attempts < MAX_ATTEMPTS) {
+      const port = Math.floor(Math.random() * 1000) + 3000
+
+      const isAlreadyUsed = Array.from(this.activeSessions.values()).some(
         (info) => info.port === port
       )
+
+      if (!isAlreadyUsed) {
+        const isAvailable = await invoke<boolean>('is_port_available', { port })
+        if (isAvailable) return port
+      }
+
+      attempts++
+    }
+
+    throw new Error(
+      'Failed to find an available port for the model to load'
     )
-    return port
   }
 
   private async sleep(ms: number): Promise<void> {
diff --git a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
index d00ba3732..83e4c8fc2 100644
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@@ -502,17 +502,16 @@ fn parse_device_output(output: &str) -> ServerResult<Vec<DeviceInfo>> {
     Ok(devices)
 }
 
-
 fn parse_device_line(line: &str) -> ServerResult<Option<DeviceInfo>> {
     let line = line.trim();
-    
+
     log::info!("Parsing device line: '{}'", line);
-    
+
     // Expected formats:
     // "Vulkan0: Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)"
     // "CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)"
     // "SYCL0: Intel(R) Arc(TM) A750 Graphics (8000 MiB, 7721 MiB free)"
-    
+
     // Split by colon to get ID and rest
     let parts: Vec<&str> = line.splitn(2, ':').collect();
     if parts.len() != 2 {
@@ -528,16 +527,22 @@ fn parse_device_line(line: &str) -> ServerResult<Option<DeviceInfo>> {
     if let Some(memory_match) = find_memory_pattern(rest) {
         let (memory_start, memory_content) = memory_match;
         let name = rest[..memory_start].trim().to_string();
-        
+
         // Parse memory info: "8128 MiB, 8128 MiB free"
         let memory_parts: Vec<&str> = memory_content.split(',').collect();
         if memory_parts.len() >= 2 {
             if let (Ok(total_mem), Ok(free_mem)) = (
                 parse_memory_value(memory_parts[0].trim()),
-                parse_memory_value(memory_parts[1].trim())
+                parse_memory_value(memory_parts[1].trim()),
             ) {
-                log::info!("Parsed device - ID: '{}', Name: '{}', Mem: {}, Free: {}", id, name, total_mem, free_mem);
-                
+                log::info!(
+                    "Parsed device - ID: '{}', Name: '{}', Mem: {}, Free: {}",
+                    id,
+                    name,
+                    total_mem,
+                    free_mem
+                );
+
                 return Ok(Some(DeviceInfo {
                     id,
                     name,
@@ -556,14 +561,14 @@ fn find_memory_pattern(text: &str) -> Option<(usize, &str)> {
     // Find the last parenthesis that contains the memory pattern
     let mut last_match = None;
     let mut chars = text.char_indices().peekable();
-    
+
     while let Some((start_idx, ch)) = chars.next() {
         if ch == '(' {
             // Find the closing parenthesis
             let remaining = &text[start_idx + 1..];
             if let Some(close_pos) = remaining.find(')') {
                 let content = &remaining[..close_pos];
-                
+
                 // Check if this looks like memory info
                 if is_memory_pattern(content) {
                     last_match = Some((start_idx, content));
@@ -571,7 +576,7 @@ fn find_memory_pattern(text: &str) -> Option<(usize, &str)> {
             }
         }
     }
-    
+
     last_match
 }
 
@@ -581,18 +586,19 @@ fn is_memory_pattern(content: &str) -> bool {
     if !(content.contains("MiB") && content.contains("free") && content.contains(',')) {
         return false;
     }
-    
+
     let parts: Vec<&str> = content.split(',').collect();
     if parts.len() != 2 {
         return false;
     }
-    
+
     parts.iter().all(|part| {
         let part = part.trim();
         // Each part should start with a number and contain "MiB"
-        part.split_whitespace().next()
-            .map_or(false, |first_word| first_word.parse::<i32>().is_ok()) &&
-        part.contains("MiB")
+        part.split_whitespace()
+            .next()
+            .map_or(false, |first_word| first_word.parse::<i32>().is_ok())
+            && part.contains("MiB")
     })
 }
 
@@ -609,10 +615,7 @@ fn parse_memory_value(mem_str: &str) -> ServerResult<i32> {
     // Take the first part which should be the number
     let number_str = parts[0];
     number_str.parse::<i32>().map_err(|_| {
-        ServerError::ParseError(format!(
-            "Could not parse memory value: '{}'",
-            number_str
-        ))
+        ServerError::ParseError(format!("Could not parse memory value: '{}'", number_str))
     })
 }
 
@@ -643,3 +646,9 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<
 
     Ok(alive)
 }
+
+// check port availability
+#[tauri::command]
+pub fn is_port_available(port: u16) -> bool {
+    std::net::TcpListener::bind(("127.0.0.1", port)).is_ok()
+}
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 5174cd578..1baef020b 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -96,6 +96,7 @@ pub fn run() {
             core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
             core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
             core::utils::extensions::inference_llamacpp_extension::server::get_devices,
+            core::utils::extensions::inference_llamacpp_extension::server::is_port_available,
             core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
             core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
         ])