✨ Feat: Improved llamacpp Server Stability and Diagnostics (#5761)
* feat: Improve llamacpp server error reporting and model load stability This commit introduces significant improvements to how the llamacpp server process is managed and how its errors are reported. Key changes: - **Enhanced Error Reporting:** The llamacpp server's stdout and stderr are now piped and captured. If the llamacpp process exits prematurely or fails to start, its stderr output is captured and returned as a `LlamacppError`. This provides much more specific and actionable diagnostic information for users and developers. - **Increased Model Load Timeout:** The `waitForModelLoad` timeout has been increased from 30 seconds to 240 seconds (4 minutes). This addresses issues where larger models or slower systems would prematurely time out during the model loading phase. - **API Secret Update:** The internal API secret for the llamacpp extension has been updated from 'Jan' to 'JustAskNow'. - **Version Bump:** The application version in `tauri.conf.json` has been incremented to `0.6.901`. * fix: should not spam load requests * test: add test to cover the fix * refactor: clean up * test: add more test case --------- Co-authored-by: Louis <louis@jan.ai>
This commit is contained in:
parent
96ba42e411
commit
dee98f41d1
@ -117,7 +117,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
private config: LlamacppConfig
|
private config: LlamacppConfig
|
||||||
private activeSessions: Map<number, SessionInfo> = new Map()
|
private activeSessions: Map<number, SessionInfo> = new Map()
|
||||||
private providerPath!: string
|
private providerPath!: string
|
||||||
private apiSecret: string = 'Jan'
|
private apiSecret: string = 'JustAskNow'
|
||||||
|
|
||||||
override async onLoad(): Promise<void> {
|
override async onLoad(): Promise<void> {
|
||||||
super.onLoad() // Calls registerEngine() from AIEngine
|
super.onLoad() // Calls registerEngine() from AIEngine
|
||||||
@ -713,7 +713,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
private async waitForModelLoad(
|
private async waitForModelLoad(
|
||||||
sInfo: SessionInfo,
|
sInfo: SessionInfo,
|
||||||
timeoutMs = 30_000
|
timeoutMs = 240_000
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const start = Date.now()
|
const start = Date.now()
|
||||||
while (Date.now() - start < timeoutMs) {
|
while (Date.now() - start < timeoutMs) {
|
||||||
|
|||||||
@ -3,10 +3,12 @@ use hmac::{Hmac, Mac};
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use sha2::Sha256;
|
use sha2::Sha256;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::process::Stdio;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use sysinfo::{Pid, ProcessesToUpdate, System};
|
use sysinfo::{Pid, ProcessesToUpdate, System};
|
||||||
use tauri::State; // Import Manager trait
|
use tauri::State; // Import Manager trait
|
||||||
use thiserror;
|
use thiserror;
|
||||||
|
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||||
use tokio::process::Command;
|
use tokio::process::Command;
|
||||||
use tokio::time::timeout;
|
use tokio::time::timeout;
|
||||||
|
|
||||||
@ -17,10 +19,8 @@ type HmacSha256 = Hmac<Sha256>;
|
|||||||
// Error type for server commands
|
// Error type for server commands
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum ServerError {
|
pub enum ServerError {
|
||||||
// #[error("Server is already running")]
|
#[error("llamacpp error: {0}")]
|
||||||
// AlreadyRunning,
|
LlamacppError(String),
|
||||||
// #[error("Server is not running")]
|
|
||||||
// NotRunning,
|
|
||||||
#[error("Failed to locate server binary: {0}")]
|
#[error("Failed to locate server binary: {0}")]
|
||||||
BinaryNotFound(String),
|
BinaryNotFound(String),
|
||||||
#[error("IO error: {0}")]
|
#[error("IO error: {0}")]
|
||||||
@ -56,6 +56,17 @@ pub struct UnloadResult {
|
|||||||
error: Option<String>,
|
error: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn capture_stderr(stderr: impl tokio::io::AsyncRead + Unpin) -> String {
|
||||||
|
let mut reader = BufReader::new(stderr).lines();
|
||||||
|
let mut buf = String::new();
|
||||||
|
while let Ok(Some(line)) = reader.next_line().await {
|
||||||
|
log::info!("[llamacpp] {}", line); // Don't use log::error!
|
||||||
|
buf.push_str(&line);
|
||||||
|
buf.push('\n');
|
||||||
|
}
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
|
||||||
// --- Load Command ---
|
// --- Load Command ---
|
||||||
#[tauri::command]
|
#[tauri::command]
|
||||||
pub async fn load_llama_model(
|
pub async fn load_llama_model(
|
||||||
@ -138,9 +149,8 @@ pub async fn load_llama_model(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optional: Redirect stdio if needed (e.g., for logging within Jan)
|
command.stdout(Stdio::piped());
|
||||||
// command.stdout(Stdio::piped());
|
command.stderr(Stdio::piped());
|
||||||
// command.stderr(Stdio::piped());
|
|
||||||
#[cfg(all(windows, target_arch = "x86_64"))]
|
#[cfg(all(windows, target_arch = "x86_64"))]
|
||||||
{
|
{
|
||||||
use std::os::windows::process::CommandExt;
|
use std::os::windows::process::CommandExt;
|
||||||
@ -149,7 +159,28 @@ pub async fn load_llama_model(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Spawn the child process
|
// Spawn the child process
|
||||||
let child = command.spawn().map_err(ServerError::Io)?;
|
let mut child = command.spawn().map_err(ServerError::Io)?;
|
||||||
|
|
||||||
|
let stderr = child.stderr.take().expect("stderr was piped");
|
||||||
|
let stderr_task = tokio::spawn(capture_stderr(stderr));
|
||||||
|
|
||||||
|
let stdout = child.stdout.take().expect("stdout was piped");
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut reader = BufReader::new(stdout).lines();
|
||||||
|
while let Ok(Some(line)) = reader.next_line().await {
|
||||||
|
log::info!("[llamacpp stdout] {}", line);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
tokio::time::sleep(Duration::from_millis(300)).await;
|
||||||
|
if let Some(status) = child.try_wait()? {
|
||||||
|
if !status.success() {
|
||||||
|
let stderr_output = stderr_task.await.unwrap_or_default();
|
||||||
|
log::error!("llama.cpp exited early with code {status:?}");
|
||||||
|
log::error!("--- stderr ---\n{}", stderr_output);
|
||||||
|
return Err(ServerError::LlamacppError(stderr_output.trim().to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Get the PID to use as session ID
|
// Get the PID to use as session ID
|
||||||
let pid = child.id().map(|id| id as i32).unwrap_or(-1);
|
let pid = child.id().map(|id| id as i32).unwrap_or(-1);
|
||||||
@ -280,4 +311,3 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<
|
|||||||
|
|
||||||
Ok(alive)
|
Ok(alive)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://schema.tauri.app/config/2",
|
"$schema": "https://schema.tauri.app/config/2",
|
||||||
"productName": "Jan",
|
"productName": "Jan",
|
||||||
"version": "0.6.900",
|
"version": "0.6.901",
|
||||||
"identifier": "jan.ai.app",
|
"identifier": "jan.ai.app",
|
||||||
"build": {
|
"build": {
|
||||||
"frontendDist": "../web-app/dist",
|
"frontendDist": "../web-app/dist",
|
||||||
|
|||||||
@ -97,13 +97,17 @@ describe('models service', () => {
|
|||||||
statusText: 'Not Found',
|
statusText: 'Not Found',
|
||||||
})
|
})
|
||||||
|
|
||||||
await expect(fetchModelCatalog()).rejects.toThrow('Failed to fetch model catalog: 404 Not Found')
|
await expect(fetchModelCatalog()).rejects.toThrow(
|
||||||
|
'Failed to fetch model catalog: 404 Not Found'
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should handle network error', async () => {
|
it('should handle network error', async () => {
|
||||||
;(fetch as any).mockRejectedValue(new Error('Network error'))
|
;(fetch as any).mockRejectedValue(new Error('Network error'))
|
||||||
|
|
||||||
await expect(fetchModelCatalog()).rejects.toThrow('Failed to fetch model catalog: Network error')
|
await expect(fetchModelCatalog()).rejects.toThrow(
|
||||||
|
'Failed to fetch model catalog: Network error'
|
||||||
|
)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -209,6 +213,9 @@ describe('models service', () => {
|
|||||||
const model = 'model1'
|
const model = 'model1'
|
||||||
const mockSession = { id: 'session1' }
|
const mockSession = { id: 'session1' }
|
||||||
|
|
||||||
|
mockEngine.getLoadedModels.mockResolvedValue({
|
||||||
|
includes: () => false,
|
||||||
|
})
|
||||||
mockEngine.load.mockResolvedValue(mockSession)
|
mockEngine.load.mockResolvedValue(mockSession)
|
||||||
|
|
||||||
const result = await startModel(provider, model)
|
const result = await startModel(provider, model)
|
||||||
@ -222,10 +229,23 @@ describe('models service', () => {
|
|||||||
const model = 'model1'
|
const model = 'model1'
|
||||||
const error = new Error('Failed to start model')
|
const error = new Error('Failed to start model')
|
||||||
|
|
||||||
|
mockEngine.getLoadedModels.mockResolvedValue({
|
||||||
|
includes: () => false,
|
||||||
|
})
|
||||||
mockEngine.load.mockRejectedValue(error)
|
mockEngine.load.mockRejectedValue(error)
|
||||||
|
|
||||||
await expect(startModel(provider, model)).rejects.toThrow(error)
|
await expect(startModel(provider, model)).rejects.toThrow(error)
|
||||||
})
|
})
|
||||||
|
it('should not load model again', async () => {
|
||||||
|
const provider = { provider: 'openai', models: [] } as ProviderObject
|
||||||
|
const model = 'model1'
|
||||||
|
|
||||||
|
mockEngine.getLoadedModels.mockResolvedValue({
|
||||||
|
includes: () => true,
|
||||||
|
})
|
||||||
|
expect(mockEngine.load).toBeCalledTimes(0)
|
||||||
|
await expect(startModel(provider, model)).resolves.toBe(undefined)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('configurePullOptions', () => {
|
describe('configurePullOptions', () => {
|
||||||
@ -248,8 +268,11 @@ describe('models service', () => {
|
|||||||
|
|
||||||
await configurePullOptions(proxyOptions)
|
await configurePullOptions(proxyOptions)
|
||||||
|
|
||||||
expect(consoleSpy).toHaveBeenCalledWith('Configuring proxy options:', proxyOptions)
|
expect(consoleSpy).toHaveBeenCalledWith(
|
||||||
|
'Configuring proxy options:',
|
||||||
|
proxyOptions
|
||||||
|
)
|
||||||
consoleSpy.mockRestore()
|
consoleSpy.mockRestore()
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@ -145,7 +145,9 @@ export const stopAllModels = async () => {
|
|||||||
export const startModel = async (
|
export const startModel = async (
|
||||||
provider: ProviderObject,
|
provider: ProviderObject,
|
||||||
model: string
|
model: string
|
||||||
): Promise<SessionInfo> => {
|
): Promise<SessionInfo | undefined> => {
|
||||||
|
if ((await getEngine(provider.provider).getLoadedModels()).includes(model))
|
||||||
|
return undefined
|
||||||
return getEngine(provider.provider)
|
return getEngine(provider.provider)
|
||||||
.load(model)
|
.load(model)
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user