Fixup: llama-server load
This commit is contained in:
parent
ed6f86d4b1
commit
021f8ae80f
@ -77,6 +77,7 @@ export interface ModelInfo {
|
||||
name: string; // human‑readable, e.g., "Qwen3 4B Q4_0"
|
||||
quant_type?: string; // q4_0 (optional as it might be part of ID or name)
|
||||
providerId: string; // e.g. "llama.cpp"
|
||||
port: number;
|
||||
sizeBytes: number;
|
||||
tags?: string[];
|
||||
path?: string; // Absolute path to the model file, if applicable
|
||||
@ -106,24 +107,38 @@ export interface PullResult {
|
||||
}
|
||||
|
||||
// 3. /load
|
||||
export interface LoadOptions {
|
||||
providerId: string;
|
||||
modelPath: string;
|
||||
/** any provider‑specific tuning options for llama.cpp server */
|
||||
options?: {
|
||||
port?: number; // 0 means dynamic port
|
||||
n_gpu_layers?: number;
|
||||
n_ctx?: number; // context size
|
||||
// ... other llama-cpp-python or llama.cpp server flags
|
||||
[key: string]: any;
|
||||
};
|
||||
export interface loadOptions {
|
||||
modelPath: string
|
||||
port?: number
|
||||
n_gpu_layers?: number
|
||||
n_ctx?: number
|
||||
threads?: number
|
||||
threads_batch?: number
|
||||
ctx_size?: number
|
||||
n_predict?: number
|
||||
batch_size?: number
|
||||
ubatch_size?: number
|
||||
device?: string
|
||||
split_mode?: string
|
||||
main_gpu?: number
|
||||
flash_attn?: boolean
|
||||
cont_batching?: boolean
|
||||
no_mmap?: boolean
|
||||
mlock?: boolean
|
||||
no_kv_offload?: boolean
|
||||
cache_type_k?: string
|
||||
cache_type_v?: string
|
||||
defrag_thold?: number
|
||||
rope_scaling?: string
|
||||
rope_scale?: number
|
||||
rope_freq_base?: number
|
||||
rope_freq_scale?: number
|
||||
}
|
||||
|
||||
export interface SessionInfo {
|
||||
sessionId: string; // opaque handle for unload/chat
|
||||
port: number; // llama-server output port (corrected from portid)
|
||||
modelPath: string; // path of the loaded model
|
||||
providerId: string;
|
||||
settings: Record<string, unknown>; // The actual settings used to load
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
use std::path::PathBuf;
|
||||
use serde::{Serialize, Deserialize};
|
||||
use tauri::path::BaseDirectory;
|
||||
use tauri::{AppHandle, Manager, State}; // Import Manager trait
|
||||
use tokio::process::Command;
|
||||
use std::collections::HashMap;
|
||||
use uuid::Uuid;
|
||||
use thiserror;
|
||||
|
||||
use crate::core::state::AppState;
|
||||
|
||||
@ -61,13 +65,21 @@ fn get_server_path(app_handle: &AppHandle) -> ServerResult<PathBuf> {
|
||||
// })
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SessionInfo {
|
||||
pub session_id: String, // opaque handle for unload/chat
|
||||
pub port: u16, // llama-server output port
|
||||
pub model_path: String, // path of the loaded model
|
||||
pub settings: HashMap<String, serde_json::Value>, // The actual settings used to load
|
||||
}
|
||||
|
||||
// --- Load Command ---
|
||||
#[tauri::command]
|
||||
pub async fn load(
|
||||
app_handle: AppHandle, // Get the AppHandle
|
||||
state: State<'_, AppState>, // Access the shared state
|
||||
args: Vec<String>, // Arguments from the frontend
|
||||
) -> ServerResult<()> {
|
||||
) -> ServerResult<SessionInfo> {
|
||||
let mut process_lock = state.llama_server_process.lock().await;
|
||||
|
||||
if process_lock.is_some() {
|
||||
@ -90,6 +102,38 @@ pub async fn load(
|
||||
)));
|
||||
}
|
||||
|
||||
let mut port = 8080; // Default port
|
||||
let mut model_path = String::new();
|
||||
let mut settings: HashMap<String, serde_json::Value> = HashMap::new();
|
||||
|
||||
// Extract arguments into settings map and specific fields
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
if args[i] == "--port" && i + 1 < args.len() {
|
||||
if let Ok(p) = args[i + 1].parse::<u16>() {
|
||||
port = p;
|
||||
}
|
||||
settings.insert("port".to_string(), serde_json::Value::String(args[i + 1].clone()));
|
||||
i += 2;
|
||||
} else if args[i] == "-m" && i + 1 < args.len() {
|
||||
model_path = args[i + 1].clone();
|
||||
settings.insert("modelPath".to_string(), serde_json::Value::String(model_path.clone()));
|
||||
i += 2;
|
||||
} else if i + 1 < args.len() && args[i].starts_with("-") {
|
||||
// Store other arguments as settings
|
||||
let key = args[i].trim_start_matches("-").trim_start_matches("-");
|
||||
settings.insert(key.to_string(), serde_json::Value::String(args[i + 1].clone()));
|
||||
i += 2;
|
||||
} else {
|
||||
// Handle boolean flags
|
||||
if args[i].starts_with("-") {
|
||||
let key = args[i].trim_start_matches("-").trim_start_matches("-");
|
||||
settings.insert(key.to_string(), serde_json::Value::Bool(true));
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Configure the command to run the server
|
||||
let mut command = Command::new(server_path);
|
||||
command.args(args);
|
||||
@ -106,7 +150,15 @@ pub async fn load(
|
||||
// Store the child process handle in the state
|
||||
*process_lock = Some(child);
|
||||
|
||||
Ok(())
|
||||
let session_id = format!("session_{}", Uuid::new_v4());
|
||||
let session_info = SessionInfo {
|
||||
session_id,
|
||||
port,
|
||||
model_path,
|
||||
settings,
|
||||
};
|
||||
|
||||
Ok(session_info)
|
||||
}
|
||||
|
||||
// --- Unload Command ---
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user