update version/backend format. fix bugs around load()
This commit is contained in:
parent
fd9e034461
commit
1ae7c0b59a
@ -1,8 +1,8 @@
|
||||
[
|
||||
{
|
||||
"key": "backend",
|
||||
"title": "Backend",
|
||||
"description": "Backend for llama.cpp",
|
||||
"key": "version_backend",
|
||||
"title": "Version & Backend",
|
||||
"description": "Version and Backend for llama.cpp",
|
||||
"controllerType": "dropdown",
|
||||
"controllerProps": {
|
||||
"value": "none",
|
||||
|
||||
@ -29,7 +29,7 @@ import {
|
||||
import { invoke } from '@tauri-apps/api/core'
|
||||
|
||||
type LlamacppConfig = {
|
||||
backend: string
|
||||
version_backend: string
|
||||
n_gpu_layers: number
|
||||
ctx_size: number
|
||||
threads: number
|
||||
@ -100,14 +100,14 @@ export default class llamacpp_extension extends AIEngine {
|
||||
|
||||
// update backend settings
|
||||
for (let item of settings) {
|
||||
if (item.key === 'backend') {
|
||||
if (item.key === 'version_backend') {
|
||||
// NOTE: is there a race condition between when tauri IPC is available
|
||||
// and when the extension is loaded?
|
||||
const backends = await listSupportedBackends()
|
||||
console.log('Available backends:', backends)
|
||||
item.controllerProps.options = backends.map((b) => {
|
||||
const version_backends = await listSupportedBackends()
|
||||
console.log('Available version/backends:', version_backends)
|
||||
item.controllerProps.options = version_backends.map((b) => {
|
||||
const { version, backend } = b
|
||||
const key = `${version}-${backend}`
|
||||
const key = `${version}/${backend}`
|
||||
return { value: key, name: key }
|
||||
})
|
||||
}
|
||||
@ -156,9 +156,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
|
||||
if (key === 'backend') {
|
||||
const valueStr = value as string
|
||||
const idx = valueStr.indexOf('-')
|
||||
const version = valueStr.slice(0, idx)
|
||||
const backend = valueStr.slice(idx + 1)
|
||||
const [version, backend] = valueStr.split('/')
|
||||
|
||||
const closure = async () => {
|
||||
const isInstalled = await isBackendInstalled(backend, version)
|
||||
@ -391,11 +389,19 @@ export default class llamacpp_extension extends AIEngine {
|
||||
const args: string[] = []
|
||||
const cfg = this.config
|
||||
const sysInfo = await window.core.api.getSystemInfo()
|
||||
const [backend, version] = cfg.backend.split('-')
|
||||
const [version, backend] = cfg.version_backend.split('/')
|
||||
if (!version || !backend) {
|
||||
// TODO: sometimes version_backend is not set correctly. to investigate
|
||||
throw new Error(
|
||||
`Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
|
||||
)
|
||||
}
|
||||
|
||||
const exe_name =
|
||||
sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server'
|
||||
const janDataFolderPath = await getJanDataFolderPath()
|
||||
const backendPath = await joinPath([
|
||||
await getJanDataFolderPath(),
|
||||
janDataFolderPath,
|
||||
'llamacpp',
|
||||
'backends',
|
||||
backend,
|
||||
@ -404,30 +410,30 @@ export default class llamacpp_extension extends AIEngine {
|
||||
'bin',
|
||||
exe_name,
|
||||
])
|
||||
const modelPath = await joinPath([
|
||||
const modelConfigPath = await joinPath([
|
||||
this.modelsBasePath,
|
||||
this.provider,
|
||||
modelId,
|
||||
'model.yml',
|
||||
])
|
||||
const modelConfigPath = await joinPath([modelPath, 'model.yml'])
|
||||
const modelConfig = await invoke<ModelConfig>('read_yaml', {
|
||||
modelConfigPath,
|
||||
})
|
||||
const modelConfig = await invoke<ModelConfig>('read_yaml', { path: modelConfigPath })
|
||||
const port = await this.getRandomPort()
|
||||
|
||||
// disable llama-server webui
|
||||
args.push('--no-webui')
|
||||
// update key for security; TODO: (qnixsynapse) Make it more secure
|
||||
const api_key = await this.generateApiKey(modelId)
|
||||
args.push(`--api-key ${api_key}`)
|
||||
args.push('--api-key', api_key)
|
||||
|
||||
// model option is required
|
||||
// TODO: llama.cpp extension lookup model path based on modelId
|
||||
args.push('-m', modelConfig.model_path)
|
||||
// NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
|
||||
const modelPath = await joinPath([janDataFolderPath, modelConfig.model_path])
|
||||
args.push('-m', modelPath)
|
||||
args.push('-a', modelId)
|
||||
args.push('--port', String(port)) // Default port if not specified
|
||||
args.push('--port', String(port))
|
||||
if (modelConfig.mmproj_path) {
|
||||
args.push('--mmproj', modelConfig.mmproj_path)
|
||||
const mmprojPath = await joinPath([janDataFolderPath, modelConfig.mmproj_path])
|
||||
args.push('--mmproj', mmprojPath)
|
||||
}
|
||||
|
||||
if (cfg.ctx_size !== undefined) {
|
||||
@ -468,10 +474,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
console.log('Calling Tauri command llama_load with args:', args)
|
||||
|
||||
try {
|
||||
const sInfo = await invoke<sessionInfo>('load_llama_model', {
|
||||
backendPath: backendPath,
|
||||
args: args,
|
||||
})
|
||||
const sInfo = await invoke<sessionInfo>('load_llama_model', { backendPath, args })
|
||||
|
||||
// Store the session info for later use
|
||||
this.activeSessions.set(sInfo.sessionId, sInfo)
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
use base64::{engine::general_purpose, Engine as _};
|
||||
use hmac::{Hmac, Mac};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::Sha256;
|
||||
use std::path::PathBuf;
|
||||
use serde::{Serialize, Deserialize};
|
||||
use tauri::{AppHandle, State}; // Import Manager trait
|
||||
use thiserror;
|
||||
use tokio::process::Command;
|
||||
use uuid::Uuid;
|
||||
use thiserror;
|
||||
use hmac::{Hmac, Mac};
|
||||
use sha2::Sha256;
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
|
||||
use crate::core::state::AppState;
|
||||
|
||||
@ -16,8 +16,8 @@ type HmacSha256 = Hmac<Sha256>;
|
||||
pub enum serverError {
|
||||
#[error("Server is already running")]
|
||||
AlreadyRunning,
|
||||
// #[error("Server is not running")]
|
||||
// NotRunning,
|
||||
// #[error("Server is not running")]
|
||||
// NotRunning,
|
||||
#[error("Failed to locate server binary: {0}")]
|
||||
BinaryNotFound(String),
|
||||
#[error("IO error: {0}")]
|
||||
@ -40,10 +40,10 @@ type ServerResult<T> = Result<T, serverError>;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct sessionInfo {
|
||||
pub pid: String, // opaque handle for unload/chat
|
||||
pub port: u16, // llama-server output port
|
||||
pub pid: String, // opaque handle for unload/chat
|
||||
pub port: u16, // llama-server output port
|
||||
pub modelId: String,
|
||||
pub modelPath: String, // path of the loaded model
|
||||
pub modelPath: String, // path of the loaded model
|
||||
pub apiKey: String,
|
||||
}
|
||||
|
||||
@ -56,10 +56,10 @@ pub struct unloadResult {
|
||||
// --- Load Command ---
|
||||
#[tauri::command]
|
||||
pub async fn load_llama_model(
|
||||
_app_handle: AppHandle, // Get the AppHandle
|
||||
_app_handle: AppHandle, // Get the AppHandle
|
||||
state: State<'_, AppState>, // Access the shared state
|
||||
backendPath: String,
|
||||
args: Vec<String>, // Arguments from the frontend
|
||||
backend_path: &str,
|
||||
args: Vec<String>, // Arguments from the frontend
|
||||
) -> ServerResult<sessionInfo> {
|
||||
let mut process_lock = state.llama_server_process.lock().await;
|
||||
|
||||
@ -68,25 +68,25 @@ pub async fn load_llama_model(
|
||||
return Err(serverError::AlreadyRunning);
|
||||
}
|
||||
|
||||
log::info!("Attempting to launch server at path: {:?}", engineBasePath);
|
||||
log::info!("Attempting to launch server at path: {:?}", backend_path);
|
||||
log::info!("Using arguments: {:?}", args);
|
||||
|
||||
let server_path_buf = PathBuf::from(&engineBasePath);
|
||||
let server_path_buf = PathBuf::from(backend_path);
|
||||
if !server_path_buf.exists() {
|
||||
log::error!(
|
||||
"Server binary not found at expected path: {:?}",
|
||||
engineBasePath
|
||||
backend_path
|
||||
);
|
||||
return Err(serverError::BinaryNotFound(format!(
|
||||
"Binary not found at {:?}",
|
||||
engineBasePath
|
||||
backend_path
|
||||
)));
|
||||
}
|
||||
|
||||
let port = 8080; // Default port
|
||||
|
||||
// Configure the command to run the server
|
||||
let mut command = Command::new(engineBasePath);
|
||||
let mut command = Command::new(backend_path);
|
||||
|
||||
let modelPath = args[2].replace("-m", "");
|
||||
let apiKey = args[1].replace("--api-key", "");
|
||||
@ -124,7 +124,10 @@ pub async fn load_llama_model(
|
||||
|
||||
// --- Unload Command ---
|
||||
#[tauri::command]
|
||||
pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) -> ServerResult<unloadResult> {
|
||||
pub async fn unload_llama_model(
|
||||
session_id: String,
|
||||
state: State<'_, AppState>,
|
||||
) -> ServerResult<unloadResult> {
|
||||
let mut process_lock = state.llama_server_process.lock().await;
|
||||
// Take the child process out of the Option, leaving None in its place
|
||||
if let Some(mut child) = process_lock.take() {
|
||||
@ -144,8 +147,10 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>)
|
||||
|
||||
return Ok(unloadResult {
|
||||
success: false,
|
||||
error: Some(format!("Session ID mismatch: provided {} doesn't match process {}",
|
||||
session_id, process_pid)),
|
||||
error: Some(format!(
|
||||
"Session ID mismatch: provided {} doesn't match process {}",
|
||||
session_id, process_pid
|
||||
)),
|
||||
});
|
||||
}
|
||||
|
||||
@ -198,4 +203,3 @@ pub fn generate_api_key(modelId: String, apiSecret: String) -> Result<String, St
|
||||
let hash = general_purpose::STANDARD.encode(code_bytes);
|
||||
Ok(hash)
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user