From 449bf17692eba15074dd2384822ea4cc9bc2ed3f Mon Sep 17 00:00:00 2001 From: Akarshan Date: Wed, 2 Jul 2025 10:06:25 +0530 Subject: [PATCH] Add process aliveness check --- extensions/llamacpp-extension/src/index.ts | 19 ++++++++++++------- .../inference_llamacpp_extension/server.rs | 10 ++++++++++ src-tauri/src/lib.rs | 1 + 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 3a8902ab5..59a6d8b94 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -402,20 +402,21 @@ export default class llamacpp_extension extends AIEngine { } private async waitForModelLoad( - port: number, + sInfo: SessionInfo, timeoutMs = 30_000 ): Promise { const start = Date.now() while (Date.now() - start < timeoutMs) { try { - const res = await fetch(`http://localhost:${port}/health`) + const res = await fetch(`http://localhost:${sInfo.port}/health`) if (res.ok) { return } } catch (e) {} await this.sleep(500) // 500 sec interval during rechecks } - throw new Error(`Timed out loading model after ${timeoutMs}`) + await this.unload(sInfo.pid) + throw new Error(`Timed out loading model after ${timeoutMs}... killing llamacpp`) } override async load( @@ -482,7 +483,7 @@ export default class llamacpp_extension extends AIEngine { } // Add remaining options from the interface - if (cfg.n_gpu_layers > 0) args.push('-ngl', String(cfg.n_gpu_layers)) + args.push('-ngl', String(cfg.n_gpu_layers > 0 ? cfg.n_gpu_layers : 100)) if (cfg.threads > 0) args.push('--threads', String(cfg.threads)) if (cfg.threads_batch > 0) args.push('--threads-batch', String(cfg.threads_batch)) @@ -496,7 +497,7 @@ export default class llamacpp_extension extends AIEngine { // Boolean flags if (cfg.flash_attn) args.push('--flash-attn') if (cfg.cont_batching) args.push('--cont-batching') - if (cfg.no_mmap) args.push('--no-mmap') + args.push('--no-mmap') if (cfg.mlock) args.push('--mlock') if (cfg.no_kv_offload) args.push('--no-kv-offload') if (isEmbedding) { @@ -528,10 +529,10 @@ export default class llamacpp_extension extends AIEngine { args, }) - await this.waitForModelLoad(sInfo.port) - // Store the session info for later use this.activeSessions.set(sInfo.pid, sInfo) + await this.waitForModelLoad(sInfo) + return sInfo } catch (error) { @@ -654,6 +655,10 @@ export default class llamacpp_extension extends AIEngine { if (!sessionInfo) { throw new Error(`No active session found for model: ${opts.model}`) } + const result = invoke('is_process_running', { pid: sessionInfo.pid }) + if (!result) { + throw new Error("Model have crashed! Please reload!") + } const baseUrl = `http://localhost:${sessionInfo.port}/v1` const url = `${baseUrl}/chat/completions` console.log('Session Info:', sessionInfo, sessionInfo.api_key) diff --git a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs index 62f422f43..61c5b22a4 100644 --- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs +++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs @@ -9,6 +9,7 @@ use tokio::process::Command; use uuid::Uuid; use std::time::Duration; use tokio::time::timeout; +use sysinfo::{Pid, ProcessesToUpdate, System}; use crate::core::state::AppState; @@ -244,3 +245,12 @@ pub fn generate_api_key(model_id: String, api_secret: String) -> Result Result { + let mut system = System::new(); + system.refresh_processes(ProcessesToUpdate::All, true); + let process_pid = Pid::from(pid as usize); + Ok(system.process(process_pid).is_some()) +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index fa8968a0d..b713b5cd0 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -95,6 +95,7 @@ pub fn run() { core::utils::extensions::inference_llamacpp_extension::server::load_llama_model, core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model, core::utils::extensions::inference_llamacpp_extension::server::generate_api_key, + core::utils::extensions::inference_llamacpp_extension::server::is_process_running, ]) .manage(AppState { app_token: Some(generate_app_token()),