Add process aliveness check

2025-07-02 10:06:25 +05:30 · 2025-07-02 10:06:25 +05:30 · 449bf17692
commit 449bf17692
parent 0dbfde4c80
3 changed files with 23 additions and 7 deletions
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -402,20 +402,21 @@ export default class llamacpp_extension extends AIEngine {
  }
  private async waitForModelLoad(
-    port: number,
+    sInfo: SessionInfo,
    timeoutMs = 30_000
  ): Promise<void> {
    const start = Date.now()
    while (Date.now() - start < timeoutMs) {
      try {
-        const res = await fetch(`http://localhost:${port}/health`)
+        const res = await fetch(`http://localhost:${sInfo.port}/health`)
        if (res.ok) {
          return
        }
      } catch (e) {}
      await this.sleep(500) // 500 sec interval during rechecks
    }
-    throw new Error(`Timed out loading model after ${timeoutMs}`)
+    await this.unload(sInfo.pid)
    throw new Error(`Timed out loading model after ${timeoutMs}... killing llamacpp`)
  }
  override async load(
@ -482,7 +483,7 @@ export default class llamacpp_extension extends AIEngine {
    }
    // Add remaining options from the interface
-    if (cfg.n_gpu_layers > 0) args.push('-ngl', String(cfg.n_gpu_layers))
+    args.push('-ngl', String(cfg.n_gpu_layers > 0 ? cfg.n_gpu_layers : 100))
    if (cfg.threads > 0) args.push('--threads', String(cfg.threads))
    if (cfg.threads_batch > 0)
      args.push('--threads-batch', String(cfg.threads_batch))
@ -496,7 +497,7 @@ export default class llamacpp_extension extends AIEngine {
    // Boolean flags
    if (cfg.flash_attn) args.push('--flash-attn')
    if (cfg.cont_batching) args.push('--cont-batching')
-    if (cfg.no_mmap) args.push('--no-mmap')
+    args.push('--no-mmap')
    if (cfg.mlock) args.push('--mlock')
    if (cfg.no_kv_offload) args.push('--no-kv-offload')
    if (isEmbedding) {
@ -528,10 +529,10 @@ export default class llamacpp_extension extends AIEngine {
        args,
      })
      await this.waitForModelLoad(sInfo.port)
      // Store the session info for later use
      this.activeSessions.set(sInfo.pid, sInfo)
      await this.waitForModelLoad(sInfo)
      return sInfo
    } catch (error) {
@ -654,6 +655,10 @@ export default class llamacpp_extension extends AIEngine {
    if (!sessionInfo) {
      throw new Error(`No active session found for model: ${opts.model}`)
    }
    const result = invoke<boolean>('is_process_running', { pid: sessionInfo.pid })
    if (!result) {
        throw new Error("Model have crashed! Please reload!")
    }
    const baseUrl = `http://localhost:${sessionInfo.port}/v1`
    const url = `${baseUrl}/chat/completions`
    console.log('Session Info:', sessionInfo, sessionInfo.api_key)
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -9,6 +9,7 @@ use tokio::process::Command;
 use uuid::Uuid;
 use std::time::Duration;
 use tokio::time::timeout;
 use sysinfo::{Pid, ProcessesToUpdate, System};
 use crate::core::state::AppState;
@ -244,3 +245,12 @@ pub fn generate_api_key(model_id: String, api_secret: String) -> Result<String,
    let hash = general_purpose::STANDARD.encode(code_bytes);
    Ok(hash)
 }
 // process aliveness check
 #[tauri::command]
 pub fn is_process_running(pid: u32) -> Result<bool, String> {
    let mut system = System::new();
    system.refresh_processes(ProcessesToUpdate::All, true);
    let process_pid = Pid::from(pid as usize);
    Ok(system.process(process_pid).is_some())
 }
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -95,6 +95,7 @@ pub fn run() {
            core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
            core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
        ])
        .manage(AppState {
            app_token: Some(generate_app_token()),