update version/backend format. fix bugs around load()

This commit is contained in:
Thien Tran 2025-05-30 13:55:31 +08:00 committed by Louis
parent fd9e034461
commit 1ae7c0b59a
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
3 changed files with 58 additions and 51 deletions

View File

@ -1,8 +1,8 @@
[ [
{ {
"key": "backend", "key": "version_backend",
"title": "Backend", "title": "Version & Backend",
"description": "Backend for llama.cpp", "description": "Version and Backend for llama.cpp",
"controllerType": "dropdown", "controllerType": "dropdown",
"controllerProps": { "controllerProps": {
"value": "none", "value": "none",

View File

@ -29,7 +29,7 @@ import {
import { invoke } from '@tauri-apps/api/core' import { invoke } from '@tauri-apps/api/core'
type LlamacppConfig = { type LlamacppConfig = {
backend: string version_backend: string
n_gpu_layers: number n_gpu_layers: number
ctx_size: number ctx_size: number
threads: number threads: number
@ -100,14 +100,14 @@ export default class llamacpp_extension extends AIEngine {
// update backend settings // update backend settings
for (let item of settings) { for (let item of settings) {
if (item.key === 'backend') { if (item.key === 'version_backend') {
// NOTE: is there a race condition between when tauri IPC is available // NOTE: is there a race condition between when tauri IPC is available
// and when the extension is loaded? // and when the extension is loaded?
const backends = await listSupportedBackends() const version_backends = await listSupportedBackends()
console.log('Available backends:', backends) console.log('Available version/backends:', version_backends)
item.controllerProps.options = backends.map((b) => { item.controllerProps.options = version_backends.map((b) => {
const { version, backend } = b const { version, backend } = b
const key = `${version}-${backend}` const key = `${version}/${backend}`
return { value: key, name: key } return { value: key, name: key }
}) })
} }
@ -156,9 +156,7 @@ export default class llamacpp_extension extends AIEngine {
if (key === 'backend') { if (key === 'backend') {
const valueStr = value as string const valueStr = value as string
const idx = valueStr.indexOf('-') const [version, backend] = valueStr.split('/')
const version = valueStr.slice(0, idx)
const backend = valueStr.slice(idx + 1)
const closure = async () => { const closure = async () => {
const isInstalled = await isBackendInstalled(backend, version) const isInstalled = await isBackendInstalled(backend, version)
@ -391,11 +389,19 @@ export default class llamacpp_extension extends AIEngine {
const args: string[] = [] const args: string[] = []
const cfg = this.config const cfg = this.config
const sysInfo = await window.core.api.getSystemInfo() const sysInfo = await window.core.api.getSystemInfo()
const [backend, version] = cfg.backend.split('-') const [version, backend] = cfg.version_backend.split('/')
if (!version || !backend) {
// TODO: sometimes version_backend is not set correctly. to investigate
throw new Error(
`Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
)
}
const exe_name = const exe_name =
sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server' sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server'
const janDataFolderPath = await getJanDataFolderPath()
const backendPath = await joinPath([ const backendPath = await joinPath([
await getJanDataFolderPath(), janDataFolderPath,
'llamacpp', 'llamacpp',
'backends', 'backends',
backend, backend,
@ -404,30 +410,30 @@ export default class llamacpp_extension extends AIEngine {
'bin', 'bin',
exe_name, exe_name,
]) ])
const modelPath = await joinPath([ const modelConfigPath = await joinPath([
this.modelsBasePath, this.modelsBasePath,
this.provider, this.provider,
modelId, modelId,
'model.yml',
]) ])
const modelConfigPath = await joinPath([modelPath, 'model.yml']) const modelConfig = await invoke<ModelConfig>('read_yaml', { path: modelConfigPath })
const modelConfig = await invoke<ModelConfig>('read_yaml', {
modelConfigPath,
})
const port = await this.getRandomPort() const port = await this.getRandomPort()
// disable llama-server webui // disable llama-server webui
args.push('--no-webui') args.push('--no-webui')
// update key for security; TODO: (qnixsynapse) Make it more secure // update key for security; TODO: (qnixsynapse) Make it more secure
const api_key = await this.generateApiKey(modelId) const api_key = await this.generateApiKey(modelId)
args.push(`--api-key ${api_key}`) args.push('--api-key', api_key)
// model option is required // model option is required
// TODO: llama.cpp extension lookup model path based on modelId // NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
args.push('-m', modelConfig.model_path) const modelPath = await joinPath([janDataFolderPath, modelConfig.model_path])
args.push('-m', modelPath)
args.push('-a', modelId) args.push('-a', modelId)
args.push('--port', String(port)) // Default port if not specified args.push('--port', String(port))
if (modelConfig.mmproj_path) { if (modelConfig.mmproj_path) {
args.push('--mmproj', modelConfig.mmproj_path) const mmprojPath = await joinPath([janDataFolderPath, modelConfig.mmproj_path])
args.push('--mmproj', mmprojPath)
} }
if (cfg.ctx_size !== undefined) { if (cfg.ctx_size !== undefined) {
@ -468,10 +474,7 @@ export default class llamacpp_extension extends AIEngine {
console.log('Calling Tauri command llama_load with args:', args) console.log('Calling Tauri command llama_load with args:', args)
try { try {
const sInfo = await invoke<sessionInfo>('load_llama_model', { const sInfo = await invoke<sessionInfo>('load_llama_model', { backendPath, args })
backendPath: backendPath,
args: args,
})
// Store the session info for later use // Store the session info for later use
this.activeSessions.set(sInfo.sessionId, sInfo) this.activeSessions.set(sInfo.sessionId, sInfo)

View File

@ -1,12 +1,12 @@
use base64::{engine::general_purpose, Engine as _};
use hmac::{Hmac, Mac};
use serde::{Deserialize, Serialize};
use sha2::Sha256;
use std::path::PathBuf; use std::path::PathBuf;
use serde::{Serialize, Deserialize};
use tauri::{AppHandle, State}; // Import Manager trait use tauri::{AppHandle, State}; // Import Manager trait
use thiserror;
use tokio::process::Command; use tokio::process::Command;
use uuid::Uuid; use uuid::Uuid;
use thiserror;
use hmac::{Hmac, Mac};
use sha2::Sha256;
use base64::{Engine as _, engine::general_purpose};
use crate::core::state::AppState; use crate::core::state::AppState;
@ -58,7 +58,7 @@ pub struct unloadResult {
pub async fn load_llama_model( pub async fn load_llama_model(
_app_handle: AppHandle, // Get the AppHandle _app_handle: AppHandle, // Get the AppHandle
state: State<'_, AppState>, // Access the shared state state: State<'_, AppState>, // Access the shared state
backendPath: String, backend_path: &str,
args: Vec<String>, // Arguments from the frontend args: Vec<String>, // Arguments from the frontend
) -> ServerResult<sessionInfo> { ) -> ServerResult<sessionInfo> {
let mut process_lock = state.llama_server_process.lock().await; let mut process_lock = state.llama_server_process.lock().await;
@ -68,25 +68,25 @@ pub async fn load_llama_model(
return Err(serverError::AlreadyRunning); return Err(serverError::AlreadyRunning);
} }
log::info!("Attempting to launch server at path: {:?}", engineBasePath); log::info!("Attempting to launch server at path: {:?}", backend_path);
log::info!("Using arguments: {:?}", args); log::info!("Using arguments: {:?}", args);
let server_path_buf = PathBuf::from(&engineBasePath); let server_path_buf = PathBuf::from(backend_path);
if !server_path_buf.exists() { if !server_path_buf.exists() {
log::error!( log::error!(
"Server binary not found at expected path: {:?}", "Server binary not found at expected path: {:?}",
engineBasePath backend_path
); );
return Err(serverError::BinaryNotFound(format!( return Err(serverError::BinaryNotFound(format!(
"Binary not found at {:?}", "Binary not found at {:?}",
engineBasePath backend_path
))); )));
} }
let port = 8080; // Default port let port = 8080; // Default port
// Configure the command to run the server // Configure the command to run the server
let mut command = Command::new(engineBasePath); let mut command = Command::new(backend_path);
let modelPath = args[2].replace("-m", ""); let modelPath = args[2].replace("-m", "");
let apiKey = args[1].replace("--api-key", ""); let apiKey = args[1].replace("--api-key", "");
@ -124,7 +124,10 @@ pub async fn load_llama_model(
// --- Unload Command --- // --- Unload Command ---
#[tauri::command] #[tauri::command]
pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) -> ServerResult<unloadResult> { pub async fn unload_llama_model(
session_id: String,
state: State<'_, AppState>,
) -> ServerResult<unloadResult> {
let mut process_lock = state.llama_server_process.lock().await; let mut process_lock = state.llama_server_process.lock().await;
// Take the child process out of the Option, leaving None in its place // Take the child process out of the Option, leaving None in its place
if let Some(mut child) = process_lock.take() { if let Some(mut child) = process_lock.take() {
@ -144,8 +147,10 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>)
return Ok(unloadResult { return Ok(unloadResult {
success: false, success: false,
error: Some(format!("Session ID mismatch: provided {} doesn't match process {}", error: Some(format!(
session_id, process_pid)), "Session ID mismatch: provided {} doesn't match process {}",
session_id, process_pid
)),
}); });
} }
@ -198,4 +203,3 @@ pub fn generate_api_key(modelId: String, apiSecret: String) -> Result<String, St
let hash = general_purpose::STANDARD.encode(code_bytes); let hash = general_purpose::STANDARD.encode(code_bytes);
Ok(hash) Ok(hash)
} }