Fixup: llama-server load

2025-05-19 19:33:22 +05:30 · 2025-05-19 19:33:22 +05:30 · 021f8ae80f
commit 021f8ae80f
parent ed6f86d4b1
2 changed files with 81 additions and 14 deletions
--- a/extensions/llamacpp-extension/src/types.ts
+++ b/extensions/llamacpp-extension/src/types.ts
@ -77,6 +77,7 @@ export interface ModelInfo {
  name: string;          // human‑readable, e.g., "Qwen3 4B Q4_0"
  quant_type?: string;    // q4_0 (optional as it might be part of ID or name)
  providerId: string;    // e.g. "llama.cpp"
+  port: number;
  sizeBytes: number;
  tags?: string[];
  path?: string;          // Absolute path to the model file, if applicable
@ -106,24 +107,38 @@ export interface PullResult {
 }

 // 3. /load
-export interface LoadOptions {
-  providerId: string;
-  modelPath: string;
-  /** any provider‑specific tuning options for llama.cpp server */
-  options?: {
-    port?: number; // 0 means dynamic port
-    n_gpu_layers?: number;
-    n_ctx?: number; // context size
-    // ... other llama-cpp-python or llama.cpp server flags
-    [key: string]: any;
-  };
+export interface loadOptions {
+  modelPath: string
+  port?: number
+  n_gpu_layers?: number
+  n_ctx?: number
+  threads?: number
+  threads_batch?: number
+  ctx_size?: number
+  n_predict?: number
+  batch_size?: number
+  ubatch_size?: number
+  device?: string
+  split_mode?: string
+  main_gpu?: number
+  flash_attn?: boolean
+  cont_batching?: boolean
+  no_mmap?: boolean
+  mlock?: boolean
+  no_kv_offload?: boolean
+  cache_type_k?: string
+  cache_type_v?: string
+  defrag_thold?: number
+  rope_scaling?: string
+  rope_scale?: number
+  rope_freq_base?: number
+  rope_freq_scale?: number
 }

 export interface SessionInfo {
  sessionId: string;    // opaque handle for unload/chat
  port: number;       // llama-server output port (corrected from portid)
  modelPath: string;    // path of the loaded model
-  providerId: string;
  settings: Record<string, unknown>; // The actual settings used to load
 }

--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -1,7 +1,11 @@
 use std::path::PathBuf;
+use serde::{Serialize, Deserialize};
 use tauri::path::BaseDirectory;
 use tauri::{AppHandle, Manager, State}; // Import Manager trait
 use tokio::process::Command;
+use std::collections::HashMap;
+use uuid::Uuid;
+use thiserror;

 use crate::core::state::AppState;

@ -61,13 +65,21 @@ fn get_server_path(app_handle: &AppHandle) -> ServerResult<PathBuf> {
    // })
 }

+#[derive(Debug, Serialize, Deserialize)]
+pub struct SessionInfo {
+    pub session_id: String,       // opaque handle for unload/chat
+    pub port: u16,                // llama-server output port
+    pub model_path: String,       // path of the loaded model
+    pub settings: HashMap<String, serde_json::Value>, // The actual settings used to load
+}
+
 // --- Load Command ---
 #[tauri::command]
 pub async fn load(
    app_handle: AppHandle,      // Get the AppHandle
    state: State<'_, AppState>, // Access the shared state
    args: Vec<String>,          // Arguments from the frontend
-) -> ServerResult<()> {
+) -> ServerResult<SessionInfo> {
    let mut process_lock = state.llama_server_process.lock().await;

    if process_lock.is_some() {
@ -90,6 +102,38 @@ pub async fn load(
        )));
    }

+    let mut port = 8080; // Default port
+    let mut model_path = String::new();
+    let mut settings: HashMap<String, serde_json::Value> = HashMap::new();
+
+    // Extract arguments into settings map and specific fields
+    let mut i = 0;
+    while i < args.len() {
+        if args[i] == "--port" && i + 1 < args.len() {
+            if let Ok(p) = args[i + 1].parse::<u16>() {
+                port = p;
+            }
+            settings.insert("port".to_string(), serde_json::Value::String(args[i + 1].clone()));
+            i += 2;
+        } else if args[i] == "-m" && i + 1 < args.len() {
+            model_path = args[i + 1].clone();
+            settings.insert("modelPath".to_string(), serde_json::Value::String(model_path.clone()));
+            i += 2;
+        } else if i + 1 < args.len() && args[i].starts_with("-") {
+            // Store other arguments as settings
+            let key = args[i].trim_start_matches("-").trim_start_matches("-");
+            settings.insert(key.to_string(), serde_json::Value::String(args[i + 1].clone()));
+            i += 2;
+        } else {
+            // Handle boolean flags
+            if args[i].starts_with("-") {
+                let key = args[i].trim_start_matches("-").trim_start_matches("-");
+                settings.insert(key.to_string(), serde_json::Value::Bool(true));
+            }
+            i += 1;
+        }
+    }
+
    // Configure the command to run the server
    let mut command = Command::new(server_path);
    command.args(args);
@ -106,7 +150,15 @@ pub async fn load(
    // Store the child process handle in the state
    *process_lock = Some(child);

-    Ok(())
+    let session_id = format!("session_{}", Uuid::new_v4());
+    let session_info = SessionInfo {
+        session_id,
+        port,
+        model_path,
+        settings,
+    };
+
+    Ok(session_info)
 }

 // --- Unload Command ---