✨ Feat: Improved llamacpp Server Stability and Diagnostics (#5761)
* feat: Improve llamacpp server error reporting and model load stability This commit introduces significant improvements to how the llamacpp server process is managed and how its errors are reported. Key changes: - **Enhanced Error Reporting:** The llamacpp server's stdout and stderr are now piped and captured. If the llamacpp process exits prematurely or fails to start, its stderr output is captured and returned as a `LlamacppError`. This provides much more specific and actionable diagnostic information for users and developers. - **Increased Model Load Timeout:** The `waitForModelLoad` timeout has been increased from 30 seconds to 240 seconds (4 minutes). This addresses issues where larger models or slower systems would prematurely time out during the model loading phase. - **API Secret Update:** The internal API secret for the llamacpp extension has been updated from 'Jan' to 'JustAskNow'. - **Version Bump:** The application version in `tauri.conf.json` has been incremented to `0.6.901`. * fix: should not spam load requests * test: add test to cover the fix * refactor: clean up * test: add more test case --------- Co-authored-by: Louis <louis@jan.ai>
This commit is contained in:
parent
96ba42e411
commit
dee98f41d1
@ -117,7 +117,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
private config: LlamacppConfig
|
||||
private activeSessions: Map<number, SessionInfo> = new Map()
|
||||
private providerPath!: string
|
||||
private apiSecret: string = 'Jan'
|
||||
private apiSecret: string = 'JustAskNow'
|
||||
|
||||
override async onLoad(): Promise<void> {
|
||||
super.onLoad() // Calls registerEngine() from AIEngine
|
||||
@ -713,7 +713,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
|
||||
private async waitForModelLoad(
|
||||
sInfo: SessionInfo,
|
||||
timeoutMs = 30_000
|
||||
timeoutMs = 240_000
|
||||
): Promise<void> {
|
||||
const start = Date.now()
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
|
||||
@ -3,10 +3,12 @@ use hmac::{Hmac, Mac};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::Sha256;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::time::Duration;
|
||||
use sysinfo::{Pid, ProcessesToUpdate, System};
|
||||
use tauri::State; // Import Manager trait
|
||||
use thiserror;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
use tokio::time::timeout;
|
||||
|
||||
@ -17,10 +19,8 @@ type HmacSha256 = Hmac<Sha256>;
|
||||
// Error type for server commands
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ServerError {
|
||||
// #[error("Server is already running")]
|
||||
// AlreadyRunning,
|
||||
// #[error("Server is not running")]
|
||||
// NotRunning,
|
||||
#[error("llamacpp error: {0}")]
|
||||
LlamacppError(String),
|
||||
#[error("Failed to locate server binary: {0}")]
|
||||
BinaryNotFound(String),
|
||||
#[error("IO error: {0}")]
|
||||
@ -56,6 +56,17 @@ pub struct UnloadResult {
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
async fn capture_stderr(stderr: impl tokio::io::AsyncRead + Unpin) -> String {
|
||||
let mut reader = BufReader::new(stderr).lines();
|
||||
let mut buf = String::new();
|
||||
while let Ok(Some(line)) = reader.next_line().await {
|
||||
log::info!("[llamacpp] {}", line); // Don't use log::error!
|
||||
buf.push_str(&line);
|
||||
buf.push('\n');
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
// --- Load Command ---
|
||||
#[tauri::command]
|
||||
pub async fn load_llama_model(
|
||||
@ -138,9 +149,8 @@ pub async fn load_llama_model(
|
||||
}
|
||||
}
|
||||
|
||||
// Optional: Redirect stdio if needed (e.g., for logging within Jan)
|
||||
// command.stdout(Stdio::piped());
|
||||
// command.stderr(Stdio::piped());
|
||||
command.stdout(Stdio::piped());
|
||||
command.stderr(Stdio::piped());
|
||||
#[cfg(all(windows, target_arch = "x86_64"))]
|
||||
{
|
||||
use std::os::windows::process::CommandExt;
|
||||
@ -149,7 +159,28 @@ pub async fn load_llama_model(
|
||||
}
|
||||
|
||||
// Spawn the child process
|
||||
let child = command.spawn().map_err(ServerError::Io)?;
|
||||
let mut child = command.spawn().map_err(ServerError::Io)?;
|
||||
|
||||
let stderr = child.stderr.take().expect("stderr was piped");
|
||||
let stderr_task = tokio::spawn(capture_stderr(stderr));
|
||||
|
||||
let stdout = child.stdout.take().expect("stdout was piped");
|
||||
tokio::spawn(async move {
|
||||
let mut reader = BufReader::new(stdout).lines();
|
||||
while let Ok(Some(line)) = reader.next_line().await {
|
||||
log::info!("[llamacpp stdout] {}", line);
|
||||
}
|
||||
});
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(300)).await;
|
||||
if let Some(status) = child.try_wait()? {
|
||||
if !status.success() {
|
||||
let stderr_output = stderr_task.await.unwrap_or_default();
|
||||
log::error!("llama.cpp exited early with code {status:?}");
|
||||
log::error!("--- stderr ---\n{}", stderr_output);
|
||||
return Err(ServerError::LlamacppError(stderr_output.trim().to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
// Get the PID to use as session ID
|
||||
let pid = child.id().map(|id| id as i32).unwrap_or(-1);
|
||||
@ -280,4 +311,3 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<
|
||||
|
||||
Ok(alive)
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"$schema": "https://schema.tauri.app/config/2",
|
||||
"productName": "Jan",
|
||||
"version": "0.6.900",
|
||||
"version": "0.6.901",
|
||||
"identifier": "jan.ai.app",
|
||||
"build": {
|
||||
"frontendDist": "../web-app/dist",
|
||||
|
||||
@ -97,13 +97,17 @@ describe('models service', () => {
|
||||
statusText: 'Not Found',
|
||||
})
|
||||
|
||||
await expect(fetchModelCatalog()).rejects.toThrow('Failed to fetch model catalog: 404 Not Found')
|
||||
await expect(fetchModelCatalog()).rejects.toThrow(
|
||||
'Failed to fetch model catalog: 404 Not Found'
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle network error', async () => {
|
||||
;(fetch as any).mockRejectedValue(new Error('Network error'))
|
||||
|
||||
await expect(fetchModelCatalog()).rejects.toThrow('Failed to fetch model catalog: Network error')
|
||||
await expect(fetchModelCatalog()).rejects.toThrow(
|
||||
'Failed to fetch model catalog: Network error'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
@ -209,6 +213,9 @@ describe('models service', () => {
|
||||
const model = 'model1'
|
||||
const mockSession = { id: 'session1' }
|
||||
|
||||
mockEngine.getLoadedModels.mockResolvedValue({
|
||||
includes: () => false,
|
||||
})
|
||||
mockEngine.load.mockResolvedValue(mockSession)
|
||||
|
||||
const result = await startModel(provider, model)
|
||||
@ -222,10 +229,23 @@ describe('models service', () => {
|
||||
const model = 'model1'
|
||||
const error = new Error('Failed to start model')
|
||||
|
||||
mockEngine.getLoadedModels.mockResolvedValue({
|
||||
includes: () => false,
|
||||
})
|
||||
mockEngine.load.mockRejectedValue(error)
|
||||
|
||||
await expect(startModel(provider, model)).rejects.toThrow(error)
|
||||
})
|
||||
it('should not load model again', async () => {
|
||||
const provider = { provider: 'openai', models: [] } as ProviderObject
|
||||
const model = 'model1'
|
||||
|
||||
mockEngine.getLoadedModels.mockResolvedValue({
|
||||
includes: () => true,
|
||||
})
|
||||
expect(mockEngine.load).toBeCalledTimes(0)
|
||||
await expect(startModel(provider, model)).resolves.toBe(undefined)
|
||||
})
|
||||
})
|
||||
|
||||
describe('configurePullOptions', () => {
|
||||
@ -248,8 +268,11 @@ describe('models service', () => {
|
||||
|
||||
await configurePullOptions(proxyOptions)
|
||||
|
||||
expect(consoleSpy).toHaveBeenCalledWith('Configuring proxy options:', proxyOptions)
|
||||
expect(consoleSpy).toHaveBeenCalledWith(
|
||||
'Configuring proxy options:',
|
||||
proxyOptions
|
||||
)
|
||||
consoleSpy.mockRestore()
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@ -145,7 +145,9 @@ export const stopAllModels = async () => {
|
||||
export const startModel = async (
|
||||
provider: ProviderObject,
|
||||
model: string
|
||||
): Promise<SessionInfo> => {
|
||||
): Promise<SessionInfo | undefined> => {
|
||||
if ((await getEngine(provider.provider).getLoadedModels()).includes(model))
|
||||
return undefined
|
||||
return getEngine(provider.provider)
|
||||
.load(model)
|
||||
.catch((error) => {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user