Add process aliveness check

This commit is contained in:
Akarshan 2025-07-02 10:06:25 +05:30 committed by Louis
parent 0dbfde4c80
commit 449bf17692
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
3 changed files with 23 additions and 7 deletions

View File

@ -402,20 +402,21 @@ export default class llamacpp_extension extends AIEngine {
}
private async waitForModelLoad(
port: number,
sInfo: SessionInfo,
timeoutMs = 30_000
): Promise<void> {
const start = Date.now()
while (Date.now() - start < timeoutMs) {
try {
const res = await fetch(`http://localhost:${port}/health`)
const res = await fetch(`http://localhost:${sInfo.port}/health`)
if (res.ok) {
return
}
} catch (e) {}
await this.sleep(500) // 500 sec interval during rechecks
}
throw new Error(`Timed out loading model after ${timeoutMs}`)
await this.unload(sInfo.pid)
throw new Error(`Timed out loading model after ${timeoutMs}... killing llamacpp`)
}
override async load(
@ -482,7 +483,7 @@ export default class llamacpp_extension extends AIEngine {
}
// Add remaining options from the interface
if (cfg.n_gpu_layers > 0) args.push('-ngl', String(cfg.n_gpu_layers))
args.push('-ngl', String(cfg.n_gpu_layers > 0 ? cfg.n_gpu_layers : 100))
if (cfg.threads > 0) args.push('--threads', String(cfg.threads))
if (cfg.threads_batch > 0)
args.push('--threads-batch', String(cfg.threads_batch))
@ -496,7 +497,7 @@ export default class llamacpp_extension extends AIEngine {
// Boolean flags
if (cfg.flash_attn) args.push('--flash-attn')
if (cfg.cont_batching) args.push('--cont-batching')
if (cfg.no_mmap) args.push('--no-mmap')
args.push('--no-mmap')
if (cfg.mlock) args.push('--mlock')
if (cfg.no_kv_offload) args.push('--no-kv-offload')
if (isEmbedding) {
@ -528,10 +529,10 @@ export default class llamacpp_extension extends AIEngine {
args,
})
await this.waitForModelLoad(sInfo.port)
// Store the session info for later use
this.activeSessions.set(sInfo.pid, sInfo)
await this.waitForModelLoad(sInfo)
return sInfo
} catch (error) {
@ -654,6 +655,10 @@ export default class llamacpp_extension extends AIEngine {
if (!sessionInfo) {
throw new Error(`No active session found for model: ${opts.model}`)
}
const result = invoke<boolean>('is_process_running', { pid: sessionInfo.pid })
if (!result) {
throw new Error("Model have crashed! Please reload!")
}
const baseUrl = `http://localhost:${sessionInfo.port}/v1`
const url = `${baseUrl}/chat/completions`
console.log('Session Info:', sessionInfo, sessionInfo.api_key)

View File

@ -9,6 +9,7 @@ use tokio::process::Command;
use uuid::Uuid;
use std::time::Duration;
use tokio::time::timeout;
use sysinfo::{Pid, ProcessesToUpdate, System};
use crate::core::state::AppState;
@ -244,3 +245,12 @@ pub fn generate_api_key(model_id: String, api_secret: String) -> Result<String,
let hash = general_purpose::STANDARD.encode(code_bytes);
Ok(hash)
}
// process aliveness check
#[tauri::command]
pub fn is_process_running(pid: u32) -> Result<bool, String> {
let mut system = System::new();
system.refresh_processes(ProcessesToUpdate::All, true);
let process_pid = Pid::from(pid as usize);
Ok(system.process(process_pid).is_some())
}

View File

@ -95,6 +95,7 @@ pub fn run() {
core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
])
.manage(AppState {
app_token: Some(generate_app_token()),