Fixup: llama-server load

This commit is contained in:
Akarshan Biswas 2025-05-19 19:33:22 +05:30 committed by Louis
parent ed6f86d4b1
commit 021f8ae80f
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
2 changed files with 81 additions and 14 deletions

View File

@ -77,6 +77,7 @@ export interface ModelInfo {
name: string; // humanreadable, e.g., "Qwen3 4B Q4_0"
quant_type?: string; // q4_0 (optional as it might be part of ID or name)
providerId: string; // e.g. "llama.cpp"
port: number;
sizeBytes: number;
tags?: string[];
path?: string; // Absolute path to the model file, if applicable
@ -106,24 +107,38 @@ export interface PullResult {
}
// 3. /load
export interface LoadOptions {
providerId: string;
modelPath: string;
/** any providerspecific tuning options for llama.cpp server */
options?: {
port?: number; // 0 means dynamic port
n_gpu_layers?: number;
n_ctx?: number; // context size
// ... other llama-cpp-python or llama.cpp server flags
[key: string]: any;
};
export interface loadOptions {
modelPath: string
port?: number
n_gpu_layers?: number
n_ctx?: number
threads?: number
threads_batch?: number
ctx_size?: number
n_predict?: number
batch_size?: number
ubatch_size?: number
device?: string
split_mode?: string
main_gpu?: number
flash_attn?: boolean
cont_batching?: boolean
no_mmap?: boolean
mlock?: boolean
no_kv_offload?: boolean
cache_type_k?: string
cache_type_v?: string
defrag_thold?: number
rope_scaling?: string
rope_scale?: number
rope_freq_base?: number
rope_freq_scale?: number
}
export interface SessionInfo {
sessionId: string; // opaque handle for unload/chat
port: number; // llama-server output port (corrected from portid)
modelPath: string; // path of the loaded model
providerId: string;
settings: Record<string, unknown>; // The actual settings used to load
}

View File

@ -1,7 +1,11 @@
use std::path::PathBuf;
use serde::{Serialize, Deserialize};
use tauri::path::BaseDirectory;
use tauri::{AppHandle, Manager, State}; // Import Manager trait
use tokio::process::Command;
use std::collections::HashMap;
use uuid::Uuid;
use thiserror;
use crate::core::state::AppState;
@ -61,13 +65,21 @@ fn get_server_path(app_handle: &AppHandle) -> ServerResult<PathBuf> {
// })
}
#[derive(Debug, Serialize, Deserialize)]
pub struct SessionInfo {
pub session_id: String, // opaque handle for unload/chat
pub port: u16, // llama-server output port
pub model_path: String, // path of the loaded model
pub settings: HashMap<String, serde_json::Value>, // The actual settings used to load
}
// --- Load Command ---
#[tauri::command]
pub async fn load(
app_handle: AppHandle, // Get the AppHandle
state: State<'_, AppState>, // Access the shared state
args: Vec<String>, // Arguments from the frontend
) -> ServerResult<()> {
) -> ServerResult<SessionInfo> {
let mut process_lock = state.llama_server_process.lock().await;
if process_lock.is_some() {
@ -90,6 +102,38 @@ pub async fn load(
)));
}
let mut port = 8080; // Default port
let mut model_path = String::new();
let mut settings: HashMap<String, serde_json::Value> = HashMap::new();
// Extract arguments into settings map and specific fields
let mut i = 0;
while i < args.len() {
if args[i] == "--port" && i + 1 < args.len() {
if let Ok(p) = args[i + 1].parse::<u16>() {
port = p;
}
settings.insert("port".to_string(), serde_json::Value::String(args[i + 1].clone()));
i += 2;
} else if args[i] == "-m" && i + 1 < args.len() {
model_path = args[i + 1].clone();
settings.insert("modelPath".to_string(), serde_json::Value::String(model_path.clone()));
i += 2;
} else if i + 1 < args.len() && args[i].starts_with("-") {
// Store other arguments as settings
let key = args[i].trim_start_matches("-").trim_start_matches("-");
settings.insert(key.to_string(), serde_json::Value::String(args[i + 1].clone()));
i += 2;
} else {
// Handle boolean flags
if args[i].starts_with("-") {
let key = args[i].trim_start_matches("-").trim_start_matches("-");
settings.insert(key.to_string(), serde_json::Value::Bool(true));
}
i += 1;
}
}
// Configure the command to run the server
let mut command = Command::new(server_path);
command.args(args);
@ -106,7 +150,15 @@ pub async fn load(
// Store the child process handle in the state
*process_lock = Some(child);
Ok(())
let session_id = format!("session_{}", Uuid::new_v4());
let session_info = SessionInfo {
session_id,
port,
model_path,
settings,
};
Ok(session_info)
}
// --- Unload Command ---