Feat: Improved llamacpp Server Stability and Diagnostics (#5761)

* feat: Improve llamacpp server error reporting and model load stability

This commit introduces significant improvements to how the llamacpp server
process is managed and how its errors are reported.

Key changes:
- **Enhanced Error Reporting:** The llamacpp server's stdout and stderr
  are now piped and captured. If the llamacpp process exits prematurely
  or fails to start, its stderr output is captured and returned as a
  `LlamacppError`. This provides much more specific and actionable
  diagnostic information for users and developers.
- **Increased Model Load Timeout:** The `waitForModelLoad` timeout has
  been increased from 30 seconds to 240 seconds (4 minutes). This
  addresses issues where larger models or slower systems would
  prematurely time out during the model loading phase.
- **API Secret Update:** The internal API secret for the llamacpp
  extension has been updated from 'Jan' to 'JustAskNow'.
- **Version Bump:** The application version in `tauri.conf.json` has
  been incremented to `0.6.901`.

* fix: should not spam load requests

* test: add test to cover the fix

* refactor: clean up

* test: add more test case

---------

Co-authored-by: Louis <louis@jan.ai>
This commit is contained in:
Akarshan Biswas 2025-07-14 11:55:44 +05:30 committed by GitHub
parent 96ba42e411
commit dee98f41d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 72 additions and 17 deletions

View File

@ -117,7 +117,7 @@ export default class llamacpp_extension extends AIEngine {
private config: LlamacppConfig
private activeSessions: Map<number, SessionInfo> = new Map()
private providerPath!: string
private apiSecret: string = 'Jan'
private apiSecret: string = 'JustAskNow'
override async onLoad(): Promise<void> {
super.onLoad() // Calls registerEngine() from AIEngine
@ -713,7 +713,7 @@ export default class llamacpp_extension extends AIEngine {
private async waitForModelLoad(
sInfo: SessionInfo,
timeoutMs = 30_000
timeoutMs = 240_000
): Promise<void> {
const start = Date.now()
while (Date.now() - start < timeoutMs) {

View File

@ -3,10 +3,12 @@ use hmac::{Hmac, Mac};
use serde::{Deserialize, Serialize};
use sha2::Sha256;
use std::path::PathBuf;
use std::process::Stdio;
use std::time::Duration;
use sysinfo::{Pid, ProcessesToUpdate, System};
use tauri::State; // Import Manager trait
use thiserror;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command;
use tokio::time::timeout;
@ -17,10 +19,8 @@ type HmacSha256 = Hmac<Sha256>;
// Error type for server commands
#[derive(Debug, thiserror::Error)]
pub enum ServerError {
// #[error("Server is already running")]
// AlreadyRunning,
// #[error("Server is not running")]
// NotRunning,
#[error("llamacpp error: {0}")]
LlamacppError(String),
#[error("Failed to locate server binary: {0}")]
BinaryNotFound(String),
#[error("IO error: {0}")]
@ -56,6 +56,17 @@ pub struct UnloadResult {
error: Option<String>,
}
async fn capture_stderr(stderr: impl tokio::io::AsyncRead + Unpin) -> String {
let mut reader = BufReader::new(stderr).lines();
let mut buf = String::new();
while let Ok(Some(line)) = reader.next_line().await {
log::info!("[llamacpp] {}", line); // Don't use log::error!
buf.push_str(&line);
buf.push('\n');
}
buf
}
// --- Load Command ---
#[tauri::command]
pub async fn load_llama_model(
@ -138,9 +149,8 @@ pub async fn load_llama_model(
}
}
// Optional: Redirect stdio if needed (e.g., for logging within Jan)
// command.stdout(Stdio::piped());
// command.stderr(Stdio::piped());
command.stdout(Stdio::piped());
command.stderr(Stdio::piped());
#[cfg(all(windows, target_arch = "x86_64"))]
{
use std::os::windows::process::CommandExt;
@ -149,7 +159,28 @@ pub async fn load_llama_model(
}
// Spawn the child process
let child = command.spawn().map_err(ServerError::Io)?;
let mut child = command.spawn().map_err(ServerError::Io)?;
let stderr = child.stderr.take().expect("stderr was piped");
let stderr_task = tokio::spawn(capture_stderr(stderr));
let stdout = child.stdout.take().expect("stdout was piped");
tokio::spawn(async move {
let mut reader = BufReader::new(stdout).lines();
while let Ok(Some(line)) = reader.next_line().await {
log::info!("[llamacpp stdout] {}", line);
}
});
tokio::time::sleep(Duration::from_millis(300)).await;
if let Some(status) = child.try_wait()? {
if !status.success() {
let stderr_output = stderr_task.await.unwrap_or_default();
log::error!("llama.cpp exited early with code {status:?}");
log::error!("--- stderr ---\n{}", stderr_output);
return Err(ServerError::LlamacppError(stderr_output.trim().to_string()));
}
}
// Get the PID to use as session ID
let pid = child.id().map(|id| id as i32).unwrap_or(-1);
@ -280,4 +311,3 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<
Ok(alive)
}

View File

@ -1,7 +1,7 @@
{
"$schema": "https://schema.tauri.app/config/2",
"productName": "Jan",
"version": "0.6.900",
"version": "0.6.901",
"identifier": "jan.ai.app",
"build": {
"frontendDist": "../web-app/dist",

View File

@ -97,13 +97,17 @@ describe('models service', () => {
statusText: 'Not Found',
})
await expect(fetchModelCatalog()).rejects.toThrow('Failed to fetch model catalog: 404 Not Found')
await expect(fetchModelCatalog()).rejects.toThrow(
'Failed to fetch model catalog: 404 Not Found'
)
})
it('should handle network error', async () => {
;(fetch as any).mockRejectedValue(new Error('Network error'))
await expect(fetchModelCatalog()).rejects.toThrow('Failed to fetch model catalog: Network error')
await expect(fetchModelCatalog()).rejects.toThrow(
'Failed to fetch model catalog: Network error'
)
})
})
@ -209,6 +213,9 @@ describe('models service', () => {
const model = 'model1'
const mockSession = { id: 'session1' }
mockEngine.getLoadedModels.mockResolvedValue({
includes: () => false,
})
mockEngine.load.mockResolvedValue(mockSession)
const result = await startModel(provider, model)
@ -222,10 +229,23 @@ describe('models service', () => {
const model = 'model1'
const error = new Error('Failed to start model')
mockEngine.getLoadedModels.mockResolvedValue({
includes: () => false,
})
mockEngine.load.mockRejectedValue(error)
await expect(startModel(provider, model)).rejects.toThrow(error)
})
it('should not load model again', async () => {
const provider = { provider: 'openai', models: [] } as ProviderObject
const model = 'model1'
mockEngine.getLoadedModels.mockResolvedValue({
includes: () => true,
})
expect(mockEngine.load).toBeCalledTimes(0)
await expect(startModel(provider, model)).resolves.toBe(undefined)
})
})
describe('configurePullOptions', () => {
@ -248,8 +268,11 @@ describe('models service', () => {
await configurePullOptions(proxyOptions)
expect(consoleSpy).toHaveBeenCalledWith('Configuring proxy options:', proxyOptions)
expect(consoleSpy).toHaveBeenCalledWith(
'Configuring proxy options:',
proxyOptions
)
consoleSpy.mockRestore()
})
})
})
})

View File

@ -145,7 +145,9 @@ export const stopAllModels = async () => {
export const startModel = async (
provider: ProviderObject,
model: string
): Promise<SessionInfo> => {
): Promise<SessionInfo | undefined> => {
if ((await getEngine(provider.provider).getLoadedModels()).includes(model))
return undefined
return getEngine(provider.provider)
.load(model)
.catch((error) => {