update version/backend format. fix bugs around load()

2025-05-30 13:55:31 +08:00 · 2025-05-30 13:55:31 +08:00 · 1ae7c0b59a
commit 1ae7c0b59a
parent fd9e034461
3 changed files with 58 additions and 51 deletions
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -1,8 +1,8 @@
 [
  {
-    "key": "backend",
-    "title": "Backend",
-    "description": "Backend for llama.cpp",
+    "key": "version_backend",
+    "title": "Version & Backend",
+    "description": "Version and Backend for llama.cpp",
    "controllerType": "dropdown",
    "controllerProps": {
      "value": "none",
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -29,7 +29,7 @@ import {
 import { invoke } from '@tauri-apps/api/core'

 type LlamacppConfig = {
-  backend: string
+  version_backend: string
  n_gpu_layers: number
  ctx_size: number
  threads: number
@ -100,14 +100,14 @@ export default class llamacpp_extension extends AIEngine {

    // update backend settings
    for (let item of settings) {
-      if (item.key === 'backend') {
+      if (item.key === 'version_backend') {
        // NOTE: is there a race condition between when tauri IPC is available
        // and when the extension is loaded?
-        const backends = await listSupportedBackends()
-        console.log('Available backends:', backends)
-        item.controllerProps.options = backends.map((b) => {
+        const version_backends = await listSupportedBackends()
+        console.log('Available version/backends:', version_backends)
+        item.controllerProps.options = version_backends.map((b) => {
          const { version, backend } = b
-          const key = `${version}-${backend}`
+          const key = `${version}/${backend}`
          return { value: key, name: key }
        })
      }
@ -156,9 +156,7 @@ export default class llamacpp_extension extends AIEngine {

    if (key === 'backend') {
      const valueStr = value as string
-      const idx = valueStr.indexOf('-')
-      const version = valueStr.slice(0, idx)
-      const backend = valueStr.slice(idx + 1)
+      const [version, backend] = valueStr.split('/')

      const closure = async () => {
        const isInstalled = await isBackendInstalled(backend, version)
@ -391,11 +389,19 @@ export default class llamacpp_extension extends AIEngine {
    const args: string[] = []
    const cfg = this.config
    const sysInfo = await window.core.api.getSystemInfo()
-    const [backend, version] = cfg.backend.split('-')
+    const [version, backend] = cfg.version_backend.split('/')
+    if (!version || !backend) {
+      // TODO: sometimes version_backend is not set correctly. to investigate
+      throw new Error(
+        `Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
+      )
+    }
+
    const exe_name =
      sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server'
+    const janDataFolderPath = await getJanDataFolderPath()
    const backendPath = await joinPath([
-      await getJanDataFolderPath(),
+      janDataFolderPath,
      'llamacpp',
      'backends',
      backend,
@ -404,30 +410,30 @@ export default class llamacpp_extension extends AIEngine {
      'bin',
      exe_name,
    ])
-    const modelPath = await joinPath([
+    const modelConfigPath = await joinPath([
      this.modelsBasePath,
      this.provider,
      modelId,
+      'model.yml',
    ])
-    const modelConfigPath = await joinPath([modelPath, 'model.yml'])
-    const modelConfig = await invoke<ModelConfig>('read_yaml', {
-      modelConfigPath,
-    })
+    const modelConfig = await invoke<ModelConfig>('read_yaml', { path: modelConfigPath })
    const port = await this.getRandomPort()

    // disable llama-server webui
    args.push('--no-webui')
    // update key for security; TODO: (qnixsynapse) Make it more secure
    const api_key = await this.generateApiKey(modelId)
-    args.push(`--api-key ${api_key}`)
+    args.push('--api-key', api_key)

    // model option is required
-    // TODO: llama.cpp extension lookup model path based on modelId
-    args.push('-m', modelConfig.model_path)
+    // NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
+    const modelPath = await joinPath([janDataFolderPath, modelConfig.model_path])
+    args.push('-m', modelPath)
    args.push('-a', modelId)
-    args.push('--port', String(port)) // Default port if not specified
+    args.push('--port', String(port))
    if (modelConfig.mmproj_path) {
-      args.push('--mmproj', modelConfig.mmproj_path)
+      const mmprojPath = await joinPath([janDataFolderPath, modelConfig.mmproj_path])
+      args.push('--mmproj', mmprojPath)
    }

    if (cfg.ctx_size !== undefined) {
@ -468,10 +474,7 @@ export default class llamacpp_extension extends AIEngine {
    console.log('Calling Tauri command llama_load with args:', args)

    try {
-      const sInfo = await invoke<sessionInfo>('load_llama_model', {
-        backendPath: backendPath,
-        args: args,
-      })
+      const sInfo = await invoke<sessionInfo>('load_llama_model', { backendPath, args })

      // Store the session info for later use
      this.activeSessions.set(sInfo.sessionId, sInfo)
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -1,12 +1,12 @@
+use base64::{engine::general_purpose, Engine as _};
+use hmac::{Hmac, Mac};
+use serde::{Deserialize, Serialize};
+use sha2::Sha256;
 use std::path::PathBuf;
-use serde::{Serialize, Deserialize};
 use tauri::{AppHandle, State}; // Import Manager trait
+use thiserror;
 use tokio::process::Command;
 use uuid::Uuid;
-use thiserror;
-use hmac::{Hmac, Mac};
-use sha2::Sha256;
-use base64::{Engine as _, engine::general_purpose};

 use crate::core::state::AppState;

@ -16,8 +16,8 @@ type HmacSha256 = Hmac<Sha256>;
 pub enum serverError {
    #[error("Server is already running")]
    AlreadyRunning,
-  //  #[error("Server is not running")]
-  //  NotRunning,
+    //  #[error("Server is not running")]
+    //  NotRunning,
    #[error("Failed to locate server binary: {0}")]
    BinaryNotFound(String),
    #[error("IO error: {0}")]
@ -40,10 +40,10 @@ type ServerResult<T> = Result<T, serverError>;

 #[derive(Debug, Serialize, Deserialize)]
 pub struct sessionInfo {
-    pub pid: String,       // opaque handle for unload/chat
-    pub port: u16, // llama-server output port
+    pub pid: String, // opaque handle for unload/chat
+    pub port: u16,   // llama-server output port
    pub modelId: String,
-    pub modelPath: String,       // path of the loaded model
+    pub modelPath: String, // path of the loaded model
    pub apiKey: String,
 }

@ -56,10 +56,10 @@ pub struct unloadResult {
 // --- Load Command ---
 #[tauri::command]
 pub async fn load_llama_model(
-    _app_handle: AppHandle,      // Get the AppHandle
+    _app_handle: AppHandle,     // Get the AppHandle
    state: State<'_, AppState>, // Access the shared state
-    backendPath: String,
-    args: Vec<String>,          // Arguments from the frontend
+    backend_path: &str,
+    args: Vec<String>, // Arguments from the frontend
 ) -> ServerResult<sessionInfo> {
    let mut process_lock = state.llama_server_process.lock().await;

@ -68,25 +68,25 @@ pub async fn load_llama_model(
        return Err(serverError::AlreadyRunning);
    }

-    log::info!("Attempting to launch server at path: {:?}", engineBasePath);
+    log::info!("Attempting to launch server at path: {:?}", backend_path);
    log::info!("Using arguments: {:?}", args);

-    let server_path_buf = PathBuf::from(&engineBasePath);
+    let server_path_buf = PathBuf::from(backend_path);
    if !server_path_buf.exists() {
        log::error!(
            "Server binary not found at expected path: {:?}",
-            engineBasePath
+            backend_path
        );
        return Err(serverError::BinaryNotFound(format!(
            "Binary not found at {:?}",
-            engineBasePath
+            backend_path
        )));
    }

    let port = 8080; // Default port

    // Configure the command to run the server
-    let mut command = Command::new(engineBasePath);
+    let mut command = Command::new(backend_path);

    let modelPath = args[2].replace("-m", "");
    let apiKey = args[1].replace("--api-key", "");
@ -124,7 +124,10 @@ pub async fn load_llama_model(

 // --- Unload Command ---
 #[tauri::command]
-pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) -> ServerResult<unloadResult> {
+pub async fn unload_llama_model(
+    session_id: String,
+    state: State<'_, AppState>,
+) -> ServerResult<unloadResult> {
    let mut process_lock = state.llama_server_process.lock().await;
    // Take the child process out of the Option, leaving None in its place
    if let Some(mut child) = process_lock.take() {
@ -144,8 +147,10 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>)

            return Ok(unloadResult {
                success: false,
-                error: Some(format!("Session ID mismatch: provided {} doesn't match process {}", 
-                    session_id, process_pid)),
+                error: Some(format!(
+                    "Session ID mismatch: provided {} doesn't match process {}",
+                    session_id, process_pid
+                )),
            });
        }

@ -198,4 +203,3 @@ pub fn generate_api_key(modelId: String, apiSecret: String) -> Result<String, St
    let hash = general_purpose::STANDARD.encode(code_bytes);
    Ok(hash)
 }
-