Add process aliveness check
This commit is contained in:
parent
0dbfde4c80
commit
449bf17692
@ -402,20 +402,21 @@ export default class llamacpp_extension extends AIEngine {
|
||||
}
|
||||
|
||||
private async waitForModelLoad(
|
||||
port: number,
|
||||
sInfo: SessionInfo,
|
||||
timeoutMs = 30_000
|
||||
): Promise<void> {
|
||||
const start = Date.now()
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
try {
|
||||
const res = await fetch(`http://localhost:${port}/health`)
|
||||
const res = await fetch(`http://localhost:${sInfo.port}/health`)
|
||||
if (res.ok) {
|
||||
return
|
||||
}
|
||||
} catch (e) {}
|
||||
await this.sleep(500) // 500 sec interval during rechecks
|
||||
}
|
||||
throw new Error(`Timed out loading model after ${timeoutMs}`)
|
||||
await this.unload(sInfo.pid)
|
||||
throw new Error(`Timed out loading model after ${timeoutMs}... killing llamacpp`)
|
||||
}
|
||||
|
||||
override async load(
|
||||
@ -482,7 +483,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
}
|
||||
|
||||
// Add remaining options from the interface
|
||||
if (cfg.n_gpu_layers > 0) args.push('-ngl', String(cfg.n_gpu_layers))
|
||||
args.push('-ngl', String(cfg.n_gpu_layers > 0 ? cfg.n_gpu_layers : 100))
|
||||
if (cfg.threads > 0) args.push('--threads', String(cfg.threads))
|
||||
if (cfg.threads_batch > 0)
|
||||
args.push('--threads-batch', String(cfg.threads_batch))
|
||||
@ -496,7 +497,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
// Boolean flags
|
||||
if (cfg.flash_attn) args.push('--flash-attn')
|
||||
if (cfg.cont_batching) args.push('--cont-batching')
|
||||
if (cfg.no_mmap) args.push('--no-mmap')
|
||||
args.push('--no-mmap')
|
||||
if (cfg.mlock) args.push('--mlock')
|
||||
if (cfg.no_kv_offload) args.push('--no-kv-offload')
|
||||
if (isEmbedding) {
|
||||
@ -528,10 +529,10 @@ export default class llamacpp_extension extends AIEngine {
|
||||
args,
|
||||
})
|
||||
|
||||
await this.waitForModelLoad(sInfo.port)
|
||||
|
||||
// Store the session info for later use
|
||||
this.activeSessions.set(sInfo.pid, sInfo)
|
||||
await this.waitForModelLoad(sInfo)
|
||||
|
||||
|
||||
return sInfo
|
||||
} catch (error) {
|
||||
@ -654,6 +655,10 @@ export default class llamacpp_extension extends AIEngine {
|
||||
if (!sessionInfo) {
|
||||
throw new Error(`No active session found for model: ${opts.model}`)
|
||||
}
|
||||
const result = invoke<boolean>('is_process_running', { pid: sessionInfo.pid })
|
||||
if (!result) {
|
||||
throw new Error("Model have crashed! Please reload!")
|
||||
}
|
||||
const baseUrl = `http://localhost:${sessionInfo.port}/v1`
|
||||
const url = `${baseUrl}/chat/completions`
|
||||
console.log('Session Info:', sessionInfo, sessionInfo.api_key)
|
||||
|
||||
@ -9,6 +9,7 @@ use tokio::process::Command;
|
||||
use uuid::Uuid;
|
||||
use std::time::Duration;
|
||||
use tokio::time::timeout;
|
||||
use sysinfo::{Pid, ProcessesToUpdate, System};
|
||||
|
||||
use crate::core::state::AppState;
|
||||
|
||||
@ -244,3 +245,12 @@ pub fn generate_api_key(model_id: String, api_secret: String) -> Result<String,
|
||||
let hash = general_purpose::STANDARD.encode(code_bytes);
|
||||
Ok(hash)
|
||||
}
|
||||
|
||||
// process aliveness check
|
||||
#[tauri::command]
|
||||
pub fn is_process_running(pid: u32) -> Result<bool, String> {
|
||||
let mut system = System::new();
|
||||
system.refresh_processes(ProcessesToUpdate::All, true);
|
||||
let process_pid = Pid::from(pid as usize);
|
||||
Ok(system.process(process_pid).is_some())
|
||||
}
|
||||
|
||||
@ -95,6 +95,7 @@ pub fn run() {
|
||||
core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
|
||||
])
|
||||
.manage(AppState {
|
||||
app_token: Some(generate_app_token()),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user