✨ Feat: Improved llamacpp Server Stability and Diagnostics (#5761)

* feat: Improve llamacpp server error reporting and model load stability This commit introduces significant improvements to how the llamacpp server process is managed and how its errors are reported. Key changes: - **Enhanced Error Reporting:** The llamacpp server's stdout and stderr are now piped and captured. If the llamacpp process exits prematurely or fails to start, its stderr output is captured and returned as a `LlamacppError`. This provides much more specific and actionable diagnostic information for users and developers. - **Increased Model Load Timeout:** The `waitForModelLoad` timeout has been increased from 30 seconds to 240 seconds (4 minutes). This addresses issues where larger models or slower systems would prematurely time out during the model loading phase. - **API Secret Update:** The internal API secret for the llamacpp extension has been updated from 'Jan' to 'JustAskNow'. - **Version Bump:** The application version in `tauri.conf.json` has been incremented to `0.6.901`. * fix: should not spam load requests * test: add test to cover the fix * refactor: clean up * test: add more test case --------- Co-authored-by: Louis <louis@jan.ai>
2025-07-14 11:55:44 +05:30 · 2025-07-14 11:55:44 +05:30 · dee98f41d1
commit dee98f41d1
parent 96ba42e411
5 changed files with 72 additions and 17 deletions
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -117,7 +117,7 @@ export default class llamacpp_extension extends AIEngine {
  private config: LlamacppConfig
  private activeSessions: Map<number, SessionInfo> = new Map()
  private providerPath!: string
-  private apiSecret: string = 'Jan'
+  private apiSecret: string = 'JustAskNow'

  override async onLoad(): Promise<void> {
    super.onLoad() // Calls registerEngine() from AIEngine
@ -713,7 +713,7 @@ export default class llamacpp_extension extends AIEngine {

  private async waitForModelLoad(
    sInfo: SessionInfo,
-    timeoutMs = 30_000
+    timeoutMs = 240_000
  ): Promise<void> {
    const start = Date.now()
    while (Date.now() - start < timeoutMs) {
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -3,10 +3,12 @@ use hmac::{Hmac, Mac};
 use serde::{Deserialize, Serialize};
 use sha2::Sha256;
 use std::path::PathBuf;
+use std::process::Stdio;
 use std::time::Duration;
 use sysinfo::{Pid, ProcessesToUpdate, System};
 use tauri::State; // Import Manager trait
 use thiserror;
+use tokio::io::{AsyncBufReadExt, BufReader};
 use tokio::process::Command;
 use tokio::time::timeout;

@ -17,10 +19,8 @@ type HmacSha256 = Hmac<Sha256>;
 // Error type for server commands
 #[derive(Debug, thiserror::Error)]
 pub enum ServerError {
-    // #[error("Server is already running")]
-    // AlreadyRunning,
-    //  #[error("Server is not running")]
-    //  NotRunning,
+    #[error("llamacpp error: {0}")]
+    LlamacppError(String),
    #[error("Failed to locate server binary: {0}")]
    BinaryNotFound(String),
    #[error("IO error: {0}")]
@ -56,6 +56,17 @@ pub struct UnloadResult {
    error: Option<String>,
 }

+async fn capture_stderr(stderr: impl tokio::io::AsyncRead + Unpin) -> String {
+    let mut reader = BufReader::new(stderr).lines();
+    let mut buf = String::new();
+    while let Ok(Some(line)) = reader.next_line().await {
+        log::info!("[llamacpp] {}", line); // Don't use log::error!
+        buf.push_str(&line);
+        buf.push('\n');
+    }
+    buf
+}
+
 // --- Load Command ---
 #[tauri::command]
 pub async fn load_llama_model(
@ -138,9 +149,8 @@ pub async fn load_llama_model(
        }
    }

-    // Optional: Redirect stdio if needed (e.g., for logging within Jan)
-    // command.stdout(Stdio::piped());
-    // command.stderr(Stdio::piped());
+    command.stdout(Stdio::piped());
+    command.stderr(Stdio::piped());
    #[cfg(all(windows, target_arch = "x86_64"))]
    {
        use std::os::windows::process::CommandExt;
@ -149,7 +159,28 @@ pub async fn load_llama_model(
    }

    // Spawn the child process
-    let child = command.spawn().map_err(ServerError::Io)?;
+    let mut child = command.spawn().map_err(ServerError::Io)?;
+
+    let stderr = child.stderr.take().expect("stderr was piped");
+    let stderr_task = tokio::spawn(capture_stderr(stderr));
+
+    let stdout = child.stdout.take().expect("stdout was piped");
+    tokio::spawn(async move {
+        let mut reader = BufReader::new(stdout).lines();
+        while let Ok(Some(line)) = reader.next_line().await {
+            log::info!("[llamacpp stdout] {}", line);
+        }
+    });
+
+    tokio::time::sleep(Duration::from_millis(300)).await;
+    if let Some(status) = child.try_wait()? {
+        if !status.success() {
+            let stderr_output = stderr_task.await.unwrap_or_default();
+            log::error!("llama.cpp exited early with code {status:?}");
+            log::error!("--- stderr ---\n{}", stderr_output);
+            return Err(ServerError::LlamacppError(stderr_output.trim().to_string()));
+        }
+    }

    // Get the PID to use as session ID
    let pid = child.id().map(|id| id as i32).unwrap_or(-1);
@ -280,4 +311,3 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<

    Ok(alive)
 }
-
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@ -1,7 +1,7 @@
 {
  "$schema": "https://schema.tauri.app/config/2",
  "productName": "Jan",
-  "version": "0.6.900",
+  "version": "0.6.901",
  "identifier": "jan.ai.app",
  "build": {
    "frontendDist": "../web-app/dist",
--- a/web-app/src/services/tests/models.test.ts
+++ b/web-app/src/services/tests/models.test.ts
@ -97,13 +97,17 @@ describe('models service', () => {
        statusText: 'Not Found',
      })

-      await expect(fetchModelCatalog()).rejects.toThrow('Failed to fetch model catalog: 404 Not Found')
+      await expect(fetchModelCatalog()).rejects.toThrow(
+        'Failed to fetch model catalog: 404 Not Found'
+      )
    })

    it('should handle network error', async () => {
      ;(fetch as any).mockRejectedValue(new Error('Network error'))

-      await expect(fetchModelCatalog()).rejects.toThrow('Failed to fetch model catalog: Network error')
+      await expect(fetchModelCatalog()).rejects.toThrow(
+        'Failed to fetch model catalog: Network error'
+      )
    })
  })

@ -209,6 +213,9 @@ describe('models service', () => {
      const model = 'model1'
      const mockSession = { id: 'session1' }

+      mockEngine.getLoadedModels.mockResolvedValue({
+        includes: () => false,
+      })
      mockEngine.load.mockResolvedValue(mockSession)

      const result = await startModel(provider, model)
@ -222,10 +229,23 @@ describe('models service', () => {
      const model = 'model1'
      const error = new Error('Failed to start model')

+      mockEngine.getLoadedModels.mockResolvedValue({
+        includes: () => false,
+      })
      mockEngine.load.mockRejectedValue(error)

      await expect(startModel(provider, model)).rejects.toThrow(error)
    })
+    it('should not load model again', async () => {
+      const provider = { provider: 'openai', models: [] } as ProviderObject
+      const model = 'model1'
+
+      mockEngine.getLoadedModels.mockResolvedValue({
+        includes: () => true,
+      })
+      expect(mockEngine.load).toBeCalledTimes(0)
+      await expect(startModel(provider, model)).resolves.toBe(undefined)
+    })
  })

  describe('configurePullOptions', () => {
@ -248,8 +268,11 @@ describe('models service', () => {

      await configurePullOptions(proxyOptions)

-      expect(consoleSpy).toHaveBeenCalledWith('Configuring proxy options:', proxyOptions)
+      expect(consoleSpy).toHaveBeenCalledWith(
+        'Configuring proxy options:',
+        proxyOptions
+      )
      consoleSpy.mockRestore()
    })
  })
-})
+})
--- a/web-app/src/services/models.ts
+++ b/web-app/src/services/models.ts
@ -145,7 +145,9 @@ export const stopAllModels = async () => {
 export const startModel = async (
  provider: ProviderObject,
  model: string
-): Promise<SessionInfo> => {
+): Promise<SessionInfo | undefined> => {
+  if ((await getEngine(provider.provider).getLoadedModels()).includes(model))
+    return undefined
  return getEngine(provider.provider)
    .load(model)
    .catch((error) => {