Add process aliveness check
This commit is contained in:
parent
0dbfde4c80
commit
449bf17692
@ -402,20 +402,21 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async waitForModelLoad(
|
private async waitForModelLoad(
|
||||||
port: number,
|
sInfo: SessionInfo,
|
||||||
timeoutMs = 30_000
|
timeoutMs = 30_000
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const start = Date.now()
|
const start = Date.now()
|
||||||
while (Date.now() - start < timeoutMs) {
|
while (Date.now() - start < timeoutMs) {
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`http://localhost:${port}/health`)
|
const res = await fetch(`http://localhost:${sInfo.port}/health`)
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
} catch (e) {}
|
} catch (e) {}
|
||||||
await this.sleep(500) // 500 sec interval during rechecks
|
await this.sleep(500) // 500 sec interval during rechecks
|
||||||
}
|
}
|
||||||
throw new Error(`Timed out loading model after ${timeoutMs}`)
|
await this.unload(sInfo.pid)
|
||||||
|
throw new Error(`Timed out loading model after ${timeoutMs}... killing llamacpp`)
|
||||||
}
|
}
|
||||||
|
|
||||||
override async load(
|
override async load(
|
||||||
@ -482,7 +483,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add remaining options from the interface
|
// Add remaining options from the interface
|
||||||
if (cfg.n_gpu_layers > 0) args.push('-ngl', String(cfg.n_gpu_layers))
|
args.push('-ngl', String(cfg.n_gpu_layers > 0 ? cfg.n_gpu_layers : 100))
|
||||||
if (cfg.threads > 0) args.push('--threads', String(cfg.threads))
|
if (cfg.threads > 0) args.push('--threads', String(cfg.threads))
|
||||||
if (cfg.threads_batch > 0)
|
if (cfg.threads_batch > 0)
|
||||||
args.push('--threads-batch', String(cfg.threads_batch))
|
args.push('--threads-batch', String(cfg.threads_batch))
|
||||||
@ -496,7 +497,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
// Boolean flags
|
// Boolean flags
|
||||||
if (cfg.flash_attn) args.push('--flash-attn')
|
if (cfg.flash_attn) args.push('--flash-attn')
|
||||||
if (cfg.cont_batching) args.push('--cont-batching')
|
if (cfg.cont_batching) args.push('--cont-batching')
|
||||||
if (cfg.no_mmap) args.push('--no-mmap')
|
args.push('--no-mmap')
|
||||||
if (cfg.mlock) args.push('--mlock')
|
if (cfg.mlock) args.push('--mlock')
|
||||||
if (cfg.no_kv_offload) args.push('--no-kv-offload')
|
if (cfg.no_kv_offload) args.push('--no-kv-offload')
|
||||||
if (isEmbedding) {
|
if (isEmbedding) {
|
||||||
@ -528,10 +529,10 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
args,
|
args,
|
||||||
})
|
})
|
||||||
|
|
||||||
await this.waitForModelLoad(sInfo.port)
|
|
||||||
|
|
||||||
// Store the session info for later use
|
// Store the session info for later use
|
||||||
this.activeSessions.set(sInfo.pid, sInfo)
|
this.activeSessions.set(sInfo.pid, sInfo)
|
||||||
|
await this.waitForModelLoad(sInfo)
|
||||||
|
|
||||||
|
|
||||||
return sInfo
|
return sInfo
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -654,6 +655,10 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
if (!sessionInfo) {
|
if (!sessionInfo) {
|
||||||
throw new Error(`No active session found for model: ${opts.model}`)
|
throw new Error(`No active session found for model: ${opts.model}`)
|
||||||
}
|
}
|
||||||
|
const result = invoke<boolean>('is_process_running', { pid: sessionInfo.pid })
|
||||||
|
if (!result) {
|
||||||
|
throw new Error("Model have crashed! Please reload!")
|
||||||
|
}
|
||||||
const baseUrl = `http://localhost:${sessionInfo.port}/v1`
|
const baseUrl = `http://localhost:${sessionInfo.port}/v1`
|
||||||
const url = `${baseUrl}/chat/completions`
|
const url = `${baseUrl}/chat/completions`
|
||||||
console.log('Session Info:', sessionInfo, sessionInfo.api_key)
|
console.log('Session Info:', sessionInfo, sessionInfo.api_key)
|
||||||
|
|||||||
@ -9,6 +9,7 @@ use tokio::process::Command;
|
|||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::time::timeout;
|
use tokio::time::timeout;
|
||||||
|
use sysinfo::{Pid, ProcessesToUpdate, System};
|
||||||
|
|
||||||
use crate::core::state::AppState;
|
use crate::core::state::AppState;
|
||||||
|
|
||||||
@ -244,3 +245,12 @@ pub fn generate_api_key(model_id: String, api_secret: String) -> Result<String,
|
|||||||
let hash = general_purpose::STANDARD.encode(code_bytes);
|
let hash = general_purpose::STANDARD.encode(code_bytes);
|
||||||
Ok(hash)
|
Ok(hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// process aliveness check
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn is_process_running(pid: u32) -> Result<bool, String> {
|
||||||
|
let mut system = System::new();
|
||||||
|
system.refresh_processes(ProcessesToUpdate::All, true);
|
||||||
|
let process_pid = Pid::from(pid as usize);
|
||||||
|
Ok(system.process(process_pid).is_some())
|
||||||
|
}
|
||||||
|
|||||||
@ -95,6 +95,7 @@ pub fn run() {
|
|||||||
core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
|
core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
|
||||||
core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
|
core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
|
||||||
core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
|
core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
|
||||||
|
core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
|
||||||
])
|
])
|
||||||
.manage(AppState {
|
.manage(AppState {
|
||||||
app_token: Some(generate_app_token()),
|
app_token: Some(generate_app_token()),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user