From 1ae7c0b59a9f31be3b5ee1cd6bf33deac537e137 Mon Sep 17 00:00:00 2001
From: Thien Tran <gau.nernst@yahoo.com.sg>
Date: Fri, 30 May 2025 13:55:31 +0800
Subject: [PATCH] update version/backend format. fix bugs around load()

---
 extensions/llamacpp-extension/settings.json   |  6 +--
 extensions/llamacpp-extension/src/index.ts    | 53 ++++++++++---------
 .../inference_llamacpp_extension/server.rs    | 50 +++++++++--------
 3 files changed, 58 insertions(+), 51 deletions(-)
diff --git a/extensions/llamacpp-extension/settings.json b/extensions/llamacpp-extension/settings.json
index 1d8bca20b..206a73ab3 100644
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@@ -1,8 +1,8 @@
 [
   {
-    "key": "backend",
-    "title": "Backend",
-    "description": "Backend for llama.cpp",
+    "key": "version_backend",
+    "title": "Version & Backend",
+    "description": "Version and Backend for llama.cpp",
     "controllerType": "dropdown",
     "controllerProps": {
       "value": "none",
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 455406b7a..f97b3e11b 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -29,7 +29,7 @@ import {
 import { invoke } from '@tauri-apps/api/core'
 
 type LlamacppConfig = {
-  backend: string
+  version_backend: string
   n_gpu_layers: number
   ctx_size: number
   threads: number
@@ -100,14 +100,14 @@ export default class llamacpp_extension extends AIEngine {
 
     // update backend settings
     for (let item of settings) {
-      if (item.key === 'backend') {
+      if (item.key === 'version_backend') {
         // NOTE: is there a race condition between when tauri IPC is available
         // and when the extension is loaded?
-        const backends = await listSupportedBackends()
-        console.log('Available backends:', backends)
-        item.controllerProps.options = backends.map((b) => {
+        const version_backends = await listSupportedBackends()
+        console.log('Available version/backends:', version_backends)
+        item.controllerProps.options = version_backends.map((b) => {
           const { version, backend } = b
-          const key = `${version}-${backend}`
+          const key = `${version}/${backend}`
           return { value: key, name: key }
         })
       }
@@ -156,9 +156,7 @@ export default class llamacpp_extension extends AIEngine {
 
     if (key === 'backend') {
       const valueStr = value as string
-      const idx = valueStr.indexOf('-')
-      const version = valueStr.slice(0, idx)
-      const backend = valueStr.slice(idx + 1)
+      const [version, backend] = valueStr.split('/')
 
       const closure = async () => {
         const isInstalled = await isBackendInstalled(backend, version)
@@ -391,11 +389,19 @@ export default class llamacpp_extension extends AIEngine {
     const args: string[] = []
     const cfg = this.config
     const sysInfo = await window.core.api.getSystemInfo()
-    const [backend, version] = cfg.backend.split('-')
+    const [version, backend] = cfg.version_backend.split('/')
+    if (!version || !backend) {
+      // TODO: sometimes version_backend is not set correctly. to investigate
+      throw new Error(
+        `Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
+      )
+    }
+
     const exe_name =
       sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server'
+    const janDataFolderPath = await getJanDataFolderPath()
     const backendPath = await joinPath([
-      await getJanDataFolderPath(),
+      janDataFolderPath,
       'llamacpp',
       'backends',
       backend,
@@ -404,30 +410,30 @@ export default class llamacpp_extension extends AIEngine {
       'bin',
       exe_name,
     ])
-    const modelPath = await joinPath([
+    const modelConfigPath = await joinPath([
       this.modelsBasePath,
       this.provider,
       modelId,
+      'model.yml',
     ])
-    const modelConfigPath = await joinPath([modelPath, 'model.yml'])
-    const modelConfig = await invoke<ModelConfig>('read_yaml', {
-      modelConfigPath,
-    })
+    const modelConfig = await invoke<ModelConfig>('read_yaml', { path: modelConfigPath })
     const port = await this.getRandomPort()
 
     // disable llama-server webui
     args.push('--no-webui')
     // update key for security; TODO: (qnixsynapse) Make it more secure
     const api_key = await this.generateApiKey(modelId)
-    args.push(`--api-key ${api_key}`)
+    args.push('--api-key', api_key)
 
     // model option is required
-    // TODO: llama.cpp extension lookup model path based on modelId
-    args.push('-m', modelConfig.model_path)
+    // NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
+    const modelPath = await joinPath([janDataFolderPath, modelConfig.model_path])
+    args.push('-m', modelPath)
     args.push('-a', modelId)
-    args.push('--port', String(port)) // Default port if not specified
+    args.push('--port', String(port))
     if (modelConfig.mmproj_path) {
-      args.push('--mmproj', modelConfig.mmproj_path)
+      const mmprojPath = await joinPath([janDataFolderPath, modelConfig.mmproj_path])
+      args.push('--mmproj', mmprojPath)
     }
 
     if (cfg.ctx_size !== undefined) {
@@ -468,10 +474,7 @@ export default class llamacpp_extension extends AIEngine {
     console.log('Calling Tauri command llama_load with args:', args)
 
     try {
-      const sInfo = await invoke<sessionInfo>('load_llama_model', {
-        backendPath: backendPath,
-        args: args,
-      })
+      const sInfo = await invoke<sessionInfo>('load_llama_model', { backendPath, args })
 
       // Store the session info for later use
       this.activeSessions.set(sInfo.sessionId, sInfo)
diff --git a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
index 849a14a17..59f0790b0 100644
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@@ -1,12 +1,12 @@
+use base64::{engine::general_purpose, Engine as _};
+use hmac::{Hmac, Mac};
+use serde::{Deserialize, Serialize};
+use sha2::Sha256;
 use std::path::PathBuf;
-use serde::{Serialize, Deserialize};
 use tauri::{AppHandle, State}; // Import Manager trait
+use thiserror;
 use tokio::process::Command;
 use uuid::Uuid;
-use thiserror;
-use hmac::{Hmac, Mac};
-use sha2::Sha256;
-use base64::{Engine as _, engine::general_purpose};
 
 use crate::core::state::AppState;
 
@@ -16,8 +16,8 @@ type HmacSha256 = Hmac<Sha256>;
 pub enum serverError {
     #[error("Server is already running")]
     AlreadyRunning,
-  //  #[error("Server is not running")]
-  //  NotRunning,
+    //  #[error("Server is not running")]
+    //  NotRunning,
     #[error("Failed to locate server binary: {0}")]
     BinaryNotFound(String),
     #[error("IO error: {0}")]
@@ -40,10 +40,10 @@ type ServerResult<T> = Result<T, serverError>;
 
 #[derive(Debug, Serialize, Deserialize)]
 pub struct sessionInfo {
-    pub pid: String,       // opaque handle for unload/chat
-    pub port: u16, // llama-server output port
+    pub pid: String, // opaque handle for unload/chat
+    pub port: u16,   // llama-server output port
     pub modelId: String,
-    pub modelPath: String,       // path of the loaded model
+    pub modelPath: String, // path of the loaded model
     pub apiKey: String,
 }
 
@@ -56,10 +56,10 @@ pub struct unloadResult {
 // --- Load Command ---
 #[tauri::command]
 pub async fn load_llama_model(
-    _app_handle: AppHandle,      // Get the AppHandle
+    _app_handle: AppHandle,     // Get the AppHandle
     state: State<'_, AppState>, // Access the shared state
-    backendPath: String,
-    args: Vec<String>,          // Arguments from the frontend
+    backend_path: &str,
+    args: Vec<String>, // Arguments from the frontend
 ) -> ServerResult<sessionInfo> {
     let mut process_lock = state.llama_server_process.lock().await;
 
@@ -68,25 +68,25 @@ pub async fn load_llama_model(
         return Err(serverError::AlreadyRunning);
     }
 
-    log::info!("Attempting to launch server at path: {:?}", engineBasePath);
+    log::info!("Attempting to launch server at path: {:?}", backend_path);
     log::info!("Using arguments: {:?}", args);
 
-    let server_path_buf = PathBuf::from(&engineBasePath);
+    let server_path_buf = PathBuf::from(backend_path);
     if !server_path_buf.exists() {
         log::error!(
             "Server binary not found at expected path: {:?}",
-            engineBasePath
+            backend_path
         );
         return Err(serverError::BinaryNotFound(format!(
             "Binary not found at {:?}",
-            engineBasePath
+            backend_path
         )));
     }
 
     let port = 8080; // Default port
 
     // Configure the command to run the server
-    let mut command = Command::new(engineBasePath);
+    let mut command = Command::new(backend_path);
 
     let modelPath = args[2].replace("-m", "");
     let apiKey = args[1].replace("--api-key", "");
@@ -124,7 +124,10 @@ pub async fn load_llama_model(
 
 // --- Unload Command ---
 #[tauri::command]
-pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) -> ServerResult<unloadResult> {
+pub async fn unload_llama_model(
+    session_id: String,
+    state: State<'_, AppState>,
+) -> ServerResult<unloadResult> {
     let mut process_lock = state.llama_server_process.lock().await;
     // Take the child process out of the Option, leaving None in its place
     if let Some(mut child) = process_lock.take() {
@@ -144,8 +147,10 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>)
 
             return Ok(unloadResult {
                 success: false,
-                error: Some(format!("Session ID mismatch: provided {} doesn't match process {}", 
-                    session_id, process_pid)),
+                error: Some(format!(
+                    "Session ID mismatch: provided {} doesn't match process {}",
+                    session_id, process_pid
+                )),
             });
         }
 
@@ -177,7 +182,7 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>)
     } else {
         log::warn!("Attempted to unload server, but no process was running");
 
-        // If no process is running but client thinks there is, 
+        // If no process is running but client thinks there is,
         // still report success since the end state is what they wanted
         Ok(unloadResult {
             success: true,
@@ -198,4 +203,3 @@ pub fn generate_api_key(modelId: String, apiSecret: String) -> Result<String, St
     let hash = general_purpose::STANDARD.encode(code_bytes);
     Ok(hash)
 }
-