update version/backend format. fix bugs around load()

2025-05-30 13:55:31 +08:00 · 2025-05-30 13:55:31 +08:00 · 1ae7c0b59a
commit 1ae7c0b59a
parent fd9e034461
3 changed files with 58 additions and 51 deletions
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -1,8 +1,8 @@
 [
  {
-    "key": "backend",
+    "key": "version_backend",
-    "title": "Backend",
+    "title": "Version & Backend",
-    "description": "Backend for llama.cpp",
+    "description": "Version and Backend for llama.cpp",
    "controllerType": "dropdown",
    "controllerProps": {
      "value": "none",
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -29,7 +29,7 @@ import {
 import { invoke } from '@tauri-apps/api/core'
 type LlamacppConfig = {
-  backend: string
+  version_backend: string
  n_gpu_layers: number
  ctx_size: number
  threads: number
@ -100,14 +100,14 @@ export default class llamacpp_extension extends AIEngine {
    // update backend settings
    for (let item of settings) {
-      if (item.key === 'backend') {
+      if (item.key === 'version_backend') {
        // NOTE: is there a race condition between when tauri IPC is available
        // and when the extension is loaded?
-        const backends = await listSupportedBackends()
+        const version_backends = await listSupportedBackends()
-        console.log('Available backends:', backends)
+        console.log('Available version/backends:', version_backends)
-        item.controllerProps.options = backends.map((b) => {
+        item.controllerProps.options = version_backends.map((b) => {
          const { version, backend } = b
-          const key = `${version}-${backend}`
+          const key = `${version}/${backend}`
          return { value: key, name: key }
        })
      }
@ -156,9 +156,7 @@ export default class llamacpp_extension extends AIEngine {
    if (key === 'backend') {
      const valueStr = value as string
-      const idx = valueStr.indexOf('-')
+      const [version, backend] = valueStr.split('/')
      const version = valueStr.slice(0, idx)
      const backend = valueStr.slice(idx + 1)
      const closure = async () => {
        const isInstalled = await isBackendInstalled(backend, version)
@ -391,11 +389,19 @@ export default class llamacpp_extension extends AIEngine {
    const args: string[] = []
    const cfg = this.config
    const sysInfo = await window.core.api.getSystemInfo()
-    const [backend, version] = cfg.backend.split('-')
+    const [version, backend] = cfg.version_backend.split('/')
    if (!version || !backend) {
      // TODO: sometimes version_backend is not set correctly. to investigate
      throw new Error(
        `Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
      )
    }
    const exe_name =
      sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server'
    const janDataFolderPath = await getJanDataFolderPath()
    const backendPath = await joinPath([
-      await getJanDataFolderPath(),
+      janDataFolderPath,
      'llamacpp',
      'backends',
      backend,
@ -404,30 +410,30 @@ export default class llamacpp_extension extends AIEngine {
      'bin',
      exe_name,
    ])
-    const modelPath = await joinPath([
+    const modelConfigPath = await joinPath([
      this.modelsBasePath,
      this.provider,
      modelId,
      'model.yml',
    ])
-    const modelConfigPath = await joinPath([modelPath, 'model.yml'])
+    const modelConfig = await invoke<ModelConfig>('read_yaml', { path: modelConfigPath })
    const modelConfig = await invoke<ModelConfig>('read_yaml', {
      modelConfigPath,
    })
    const port = await this.getRandomPort()
    // disable llama-server webui
    args.push('--no-webui')
    // update key for security; TODO: (qnixsynapse) Make it more secure
    const api_key = await this.generateApiKey(modelId)
-    args.push(`--api-key ${api_key}`)
+    args.push('--api-key', api_key)
    // model option is required
-    // TODO: llama.cpp extension lookup model path based on modelId
+    // NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
-    args.push('-m', modelConfig.model_path)
+    const modelPath = await joinPath([janDataFolderPath, modelConfig.model_path])
    args.push('-m', modelPath)
    args.push('-a', modelId)
-    args.push('--port', String(port)) // Default port if not specified
+    args.push('--port', String(port))
    if (modelConfig.mmproj_path) {
-      args.push('--mmproj', modelConfig.mmproj_path)
+      const mmprojPath = await joinPath([janDataFolderPath, modelConfig.mmproj_path])
      args.push('--mmproj', mmprojPath)
    }
    if (cfg.ctx_size !== undefined) {
@ -468,10 +474,7 @@ export default class llamacpp_extension extends AIEngine {
    console.log('Calling Tauri command llama_load with args:', args)
    try {
-      const sInfo = await invoke<sessionInfo>('load_llama_model', {
+      const sInfo = await invoke<sessionInfo>('load_llama_model', { backendPath, args })
        backendPath: backendPath,
        args: args,
      })
      // Store the session info for later use
      this.activeSessions.set(sInfo.sessionId, sInfo)
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -1,12 +1,12 @@
 use base64::{engine::general_purpose, Engine as _};
 use hmac::{Hmac, Mac};
 use serde::{Deserialize, Serialize};
 use sha2::Sha256;
 use std::path::PathBuf;
 use serde::{Serialize, Deserialize};
 use tauri::{AppHandle, State}; // Import Manager trait
 use thiserror;
 use tokio::process::Command;
 use uuid::Uuid;
 use thiserror;
 use hmac::{Hmac, Mac};
 use sha2::Sha256;
 use base64::{Engine as _, engine::general_purpose};
 use crate::core::state::AppState;
@ -58,7 +58,7 @@ pub struct unloadResult {
 pub async fn load_llama_model(
    _app_handle: AppHandle,     // Get the AppHandle
    state: State<'_, AppState>, // Access the shared state
-    backendPath: String,
+    backend_path: &str,
    args: Vec<String>, // Arguments from the frontend
 ) -> ServerResult<sessionInfo> {
    let mut process_lock = state.llama_server_process.lock().await;
@ -68,25 +68,25 @@ pub async fn load_llama_model(
        return Err(serverError::AlreadyRunning);
    }
-    log::info!("Attempting to launch server at path: {:?}", engineBasePath);
+    log::info!("Attempting to launch server at path: {:?}", backend_path);
    log::info!("Using arguments: {:?}", args);
-    let server_path_buf = PathBuf::from(&engineBasePath);
+    let server_path_buf = PathBuf::from(backend_path);
    if !server_path_buf.exists() {
        log::error!(
            "Server binary not found at expected path: {:?}",
-            engineBasePath
+            backend_path
        );
        return Err(serverError::BinaryNotFound(format!(
            "Binary not found at {:?}",
-            engineBasePath
+            backend_path
        )));
    }
    let port = 8080; // Default port
    // Configure the command to run the server
-    let mut command = Command::new(engineBasePath);
+    let mut command = Command::new(backend_path);
    let modelPath = args[2].replace("-m", "");
    let apiKey = args[1].replace("--api-key", "");
@ -124,7 +124,10 @@ pub async fn load_llama_model(
 // --- Unload Command ---
 #[tauri::command]
-pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) -> ServerResult<unloadResult> {
+pub async fn unload_llama_model(
    session_id: String,
    state: State<'_, AppState>,
 ) -> ServerResult<unloadResult> {
    let mut process_lock = state.llama_server_process.lock().await;
    // Take the child process out of the Option, leaving None in its place
    if let Some(mut child) = process_lock.take() {
@ -144,8 +147,10 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>)
            return Ok(unloadResult {
                success: false,
-                error: Some(format!("Session ID mismatch: provided {} doesn't match process {}", 
+                error: Some(format!(
-                    session_id, process_pid)),
+                    "Session ID mismatch: provided {} doesn't match process {}",
                    session_id, process_pid
                )),
            });
        }
@ -198,4 +203,3 @@ pub fn generate_api_key(modelId: String, apiSecret: String) -> Result<String, St
    let hash = general_purpose::STANDARD.encode(code_bytes);
    Ok(hash)
 }