update version/backend format. fix bugs around load()

This commit is contained in:
Thien Tran 2025-05-30 13:55:31 +08:00 committed by Louis
parent fd9e034461
commit 1ae7c0b59a
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
3 changed files with 58 additions and 51 deletions

View File

@ -1,8 +1,8 @@
[
{
"key": "backend",
"title": "Backend",
"description": "Backend for llama.cpp",
"key": "version_backend",
"title": "Version & Backend",
"description": "Version and Backend for llama.cpp",
"controllerType": "dropdown",
"controllerProps": {
"value": "none",

View File

@ -29,7 +29,7 @@ import {
import { invoke } from '@tauri-apps/api/core'
type LlamacppConfig = {
backend: string
version_backend: string
n_gpu_layers: number
ctx_size: number
threads: number
@ -100,14 +100,14 @@ export default class llamacpp_extension extends AIEngine {
// update backend settings
for (let item of settings) {
if (item.key === 'backend') {
if (item.key === 'version_backend') {
// NOTE: is there a race condition between when tauri IPC is available
// and when the extension is loaded?
const backends = await listSupportedBackends()
console.log('Available backends:', backends)
item.controllerProps.options = backends.map((b) => {
const version_backends = await listSupportedBackends()
console.log('Available version/backends:', version_backends)
item.controllerProps.options = version_backends.map((b) => {
const { version, backend } = b
const key = `${version}-${backend}`
const key = `${version}/${backend}`
return { value: key, name: key }
})
}
@ -156,9 +156,7 @@ export default class llamacpp_extension extends AIEngine {
if (key === 'backend') {
const valueStr = value as string
const idx = valueStr.indexOf('-')
const version = valueStr.slice(0, idx)
const backend = valueStr.slice(idx + 1)
const [version, backend] = valueStr.split('/')
const closure = async () => {
const isInstalled = await isBackendInstalled(backend, version)
@ -391,11 +389,19 @@ export default class llamacpp_extension extends AIEngine {
const args: string[] = []
const cfg = this.config
const sysInfo = await window.core.api.getSystemInfo()
const [backend, version] = cfg.backend.split('-')
const [version, backend] = cfg.version_backend.split('/')
if (!version || !backend) {
// TODO: sometimes version_backend is not set correctly. to investigate
throw new Error(
`Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
)
}
const exe_name =
sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server'
const janDataFolderPath = await getJanDataFolderPath()
const backendPath = await joinPath([
await getJanDataFolderPath(),
janDataFolderPath,
'llamacpp',
'backends',
backend,
@ -404,30 +410,30 @@ export default class llamacpp_extension extends AIEngine {
'bin',
exe_name,
])
const modelPath = await joinPath([
const modelConfigPath = await joinPath([
this.modelsBasePath,
this.provider,
modelId,
'model.yml',
])
const modelConfigPath = await joinPath([modelPath, 'model.yml'])
const modelConfig = await invoke<ModelConfig>('read_yaml', {
modelConfigPath,
})
const modelConfig = await invoke<ModelConfig>('read_yaml', { path: modelConfigPath })
const port = await this.getRandomPort()
// disable llama-server webui
args.push('--no-webui')
// update key for security; TODO: (qnixsynapse) Make it more secure
const api_key = await this.generateApiKey(modelId)
args.push(`--api-key ${api_key}`)
args.push('--api-key', api_key)
// model option is required
// TODO: llama.cpp extension lookup model path based on modelId
args.push('-m', modelConfig.model_path)
// NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
const modelPath = await joinPath([janDataFolderPath, modelConfig.model_path])
args.push('-m', modelPath)
args.push('-a', modelId)
args.push('--port', String(port)) // Default port if not specified
args.push('--port', String(port))
if (modelConfig.mmproj_path) {
args.push('--mmproj', modelConfig.mmproj_path)
const mmprojPath = await joinPath([janDataFolderPath, modelConfig.mmproj_path])
args.push('--mmproj', mmprojPath)
}
if (cfg.ctx_size !== undefined) {
@ -468,10 +474,7 @@ export default class llamacpp_extension extends AIEngine {
console.log('Calling Tauri command llama_load with args:', args)
try {
const sInfo = await invoke<sessionInfo>('load_llama_model', {
backendPath: backendPath,
args: args,
})
const sInfo = await invoke<sessionInfo>('load_llama_model', { backendPath, args })
// Store the session info for later use
this.activeSessions.set(sInfo.sessionId, sInfo)

View File

@ -1,12 +1,12 @@
use base64::{engine::general_purpose, Engine as _};
use hmac::{Hmac, Mac};
use serde::{Deserialize, Serialize};
use sha2::Sha256;
use std::path::PathBuf;
use serde::{Serialize, Deserialize};
use tauri::{AppHandle, State}; // Import Manager trait
use thiserror;
use tokio::process::Command;
use uuid::Uuid;
use thiserror;
use hmac::{Hmac, Mac};
use sha2::Sha256;
use base64::{Engine as _, engine::general_purpose};
use crate::core::state::AppState;
@ -16,8 +16,8 @@ type HmacSha256 = Hmac<Sha256>;
pub enum serverError {
#[error("Server is already running")]
AlreadyRunning,
// #[error("Server is not running")]
// NotRunning,
// #[error("Server is not running")]
// NotRunning,
#[error("Failed to locate server binary: {0}")]
BinaryNotFound(String),
#[error("IO error: {0}")]
@ -40,10 +40,10 @@ type ServerResult<T> = Result<T, serverError>;
#[derive(Debug, Serialize, Deserialize)]
pub struct sessionInfo {
pub pid: String, // opaque handle for unload/chat
pub port: u16, // llama-server output port
pub pid: String, // opaque handle for unload/chat
pub port: u16, // llama-server output port
pub modelId: String,
pub modelPath: String, // path of the loaded model
pub modelPath: String, // path of the loaded model
pub apiKey: String,
}
@ -56,10 +56,10 @@ pub struct unloadResult {
// --- Load Command ---
#[tauri::command]
pub async fn load_llama_model(
_app_handle: AppHandle, // Get the AppHandle
_app_handle: AppHandle, // Get the AppHandle
state: State<'_, AppState>, // Access the shared state
backendPath: String,
args: Vec<String>, // Arguments from the frontend
backend_path: &str,
args: Vec<String>, // Arguments from the frontend
) -> ServerResult<sessionInfo> {
let mut process_lock = state.llama_server_process.lock().await;
@ -68,25 +68,25 @@ pub async fn load_llama_model(
return Err(serverError::AlreadyRunning);
}
log::info!("Attempting to launch server at path: {:?}", engineBasePath);
log::info!("Attempting to launch server at path: {:?}", backend_path);
log::info!("Using arguments: {:?}", args);
let server_path_buf = PathBuf::from(&engineBasePath);
let server_path_buf = PathBuf::from(backend_path);
if !server_path_buf.exists() {
log::error!(
"Server binary not found at expected path: {:?}",
engineBasePath
backend_path
);
return Err(serverError::BinaryNotFound(format!(
"Binary not found at {:?}",
engineBasePath
backend_path
)));
}
let port = 8080; // Default port
// Configure the command to run the server
let mut command = Command::new(engineBasePath);
let mut command = Command::new(backend_path);
let modelPath = args[2].replace("-m", "");
let apiKey = args[1].replace("--api-key", "");
@ -124,7 +124,10 @@ pub async fn load_llama_model(
// --- Unload Command ---
#[tauri::command]
pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) -> ServerResult<unloadResult> {
pub async fn unload_llama_model(
session_id: String,
state: State<'_, AppState>,
) -> ServerResult<unloadResult> {
let mut process_lock = state.llama_server_process.lock().await;
// Take the child process out of the Option, leaving None in its place
if let Some(mut child) = process_lock.take() {
@ -144,8 +147,10 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>)
return Ok(unloadResult {
success: false,
error: Some(format!("Session ID mismatch: provided {} doesn't match process {}",
session_id, process_pid)),
error: Some(format!(
"Session ID mismatch: provided {} doesn't match process {}",
session_id, process_pid
)),
});
}
@ -177,7 +182,7 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>)
} else {
log::warn!("Attempted to unload server, but no process was running");
// If no process is running but client thinks there is,
// If no process is running but client thinks there is,
// still report success since the end state is what they wanted
Ok(unloadResult {
success: true,
@ -198,4 +203,3 @@ pub fn generate_api_key(modelId: String, apiSecret: String) -> Result<String, St
let hash = general_purpose::STANDARD.encode(code_bytes);
Ok(hash)
}