update version/backend format. fix bugs around load()
This commit is contained in:
parent
fd9e034461
commit
1ae7c0b59a
@ -1,8 +1,8 @@
|
|||||||
[
|
[
|
||||||
{
|
{
|
||||||
"key": "backend",
|
"key": "version_backend",
|
||||||
"title": "Backend",
|
"title": "Version & Backend",
|
||||||
"description": "Backend for llama.cpp",
|
"description": "Version and Backend for llama.cpp",
|
||||||
"controllerType": "dropdown",
|
"controllerType": "dropdown",
|
||||||
"controllerProps": {
|
"controllerProps": {
|
||||||
"value": "none",
|
"value": "none",
|
||||||
|
|||||||
@ -29,7 +29,7 @@ import {
|
|||||||
import { invoke } from '@tauri-apps/api/core'
|
import { invoke } from '@tauri-apps/api/core'
|
||||||
|
|
||||||
type LlamacppConfig = {
|
type LlamacppConfig = {
|
||||||
backend: string
|
version_backend: string
|
||||||
n_gpu_layers: number
|
n_gpu_layers: number
|
||||||
ctx_size: number
|
ctx_size: number
|
||||||
threads: number
|
threads: number
|
||||||
@ -100,14 +100,14 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
// update backend settings
|
// update backend settings
|
||||||
for (let item of settings) {
|
for (let item of settings) {
|
||||||
if (item.key === 'backend') {
|
if (item.key === 'version_backend') {
|
||||||
// NOTE: is there a race condition between when tauri IPC is available
|
// NOTE: is there a race condition between when tauri IPC is available
|
||||||
// and when the extension is loaded?
|
// and when the extension is loaded?
|
||||||
const backends = await listSupportedBackends()
|
const version_backends = await listSupportedBackends()
|
||||||
console.log('Available backends:', backends)
|
console.log('Available version/backends:', version_backends)
|
||||||
item.controllerProps.options = backends.map((b) => {
|
item.controllerProps.options = version_backends.map((b) => {
|
||||||
const { version, backend } = b
|
const { version, backend } = b
|
||||||
const key = `${version}-${backend}`
|
const key = `${version}/${backend}`
|
||||||
return { value: key, name: key }
|
return { value: key, name: key }
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -156,9 +156,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
if (key === 'backend') {
|
if (key === 'backend') {
|
||||||
const valueStr = value as string
|
const valueStr = value as string
|
||||||
const idx = valueStr.indexOf('-')
|
const [version, backend] = valueStr.split('/')
|
||||||
const version = valueStr.slice(0, idx)
|
|
||||||
const backend = valueStr.slice(idx + 1)
|
|
||||||
|
|
||||||
const closure = async () => {
|
const closure = async () => {
|
||||||
const isInstalled = await isBackendInstalled(backend, version)
|
const isInstalled = await isBackendInstalled(backend, version)
|
||||||
@ -391,11 +389,19 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const args: string[] = []
|
const args: string[] = []
|
||||||
const cfg = this.config
|
const cfg = this.config
|
||||||
const sysInfo = await window.core.api.getSystemInfo()
|
const sysInfo = await window.core.api.getSystemInfo()
|
||||||
const [backend, version] = cfg.backend.split('-')
|
const [version, backend] = cfg.version_backend.split('/')
|
||||||
|
if (!version || !backend) {
|
||||||
|
// TODO: sometimes version_backend is not set correctly. to investigate
|
||||||
|
throw new Error(
|
||||||
|
`Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
const exe_name =
|
const exe_name =
|
||||||
sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server'
|
sysInfo.os_type === 'windows' ? 'llama-server.exe' : 'llama-server'
|
||||||
|
const janDataFolderPath = await getJanDataFolderPath()
|
||||||
const backendPath = await joinPath([
|
const backendPath = await joinPath([
|
||||||
await getJanDataFolderPath(),
|
janDataFolderPath,
|
||||||
'llamacpp',
|
'llamacpp',
|
||||||
'backends',
|
'backends',
|
||||||
backend,
|
backend,
|
||||||
@ -404,30 +410,30 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
'bin',
|
'bin',
|
||||||
exe_name,
|
exe_name,
|
||||||
])
|
])
|
||||||
const modelPath = await joinPath([
|
const modelConfigPath = await joinPath([
|
||||||
this.modelsBasePath,
|
this.modelsBasePath,
|
||||||
this.provider,
|
this.provider,
|
||||||
modelId,
|
modelId,
|
||||||
|
'model.yml',
|
||||||
])
|
])
|
||||||
const modelConfigPath = await joinPath([modelPath, 'model.yml'])
|
const modelConfig = await invoke<ModelConfig>('read_yaml', { path: modelConfigPath })
|
||||||
const modelConfig = await invoke<ModelConfig>('read_yaml', {
|
|
||||||
modelConfigPath,
|
|
||||||
})
|
|
||||||
const port = await this.getRandomPort()
|
const port = await this.getRandomPort()
|
||||||
|
|
||||||
// disable llama-server webui
|
// disable llama-server webui
|
||||||
args.push('--no-webui')
|
args.push('--no-webui')
|
||||||
// update key for security; TODO: (qnixsynapse) Make it more secure
|
// update key for security; TODO: (qnixsynapse) Make it more secure
|
||||||
const api_key = await this.generateApiKey(modelId)
|
const api_key = await this.generateApiKey(modelId)
|
||||||
args.push(`--api-key ${api_key}`)
|
args.push('--api-key', api_key)
|
||||||
|
|
||||||
// model option is required
|
// model option is required
|
||||||
// TODO: llama.cpp extension lookup model path based on modelId
|
// NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
|
||||||
args.push('-m', modelConfig.model_path)
|
const modelPath = await joinPath([janDataFolderPath, modelConfig.model_path])
|
||||||
|
args.push('-m', modelPath)
|
||||||
args.push('-a', modelId)
|
args.push('-a', modelId)
|
||||||
args.push('--port', String(port)) // Default port if not specified
|
args.push('--port', String(port))
|
||||||
if (modelConfig.mmproj_path) {
|
if (modelConfig.mmproj_path) {
|
||||||
args.push('--mmproj', modelConfig.mmproj_path)
|
const mmprojPath = await joinPath([janDataFolderPath, modelConfig.mmproj_path])
|
||||||
|
args.push('--mmproj', mmprojPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cfg.ctx_size !== undefined) {
|
if (cfg.ctx_size !== undefined) {
|
||||||
@ -468,10 +474,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
console.log('Calling Tauri command llama_load with args:', args)
|
console.log('Calling Tauri command llama_load with args:', args)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const sInfo = await invoke<sessionInfo>('load_llama_model', {
|
const sInfo = await invoke<sessionInfo>('load_llama_model', { backendPath, args })
|
||||||
backendPath: backendPath,
|
|
||||||
args: args,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Store the session info for later use
|
// Store the session info for later use
|
||||||
this.activeSessions.set(sInfo.sessionId, sInfo)
|
this.activeSessions.set(sInfo.sessionId, sInfo)
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
|
use base64::{engine::general_purpose, Engine as _};
|
||||||
|
use hmac::{Hmac, Mac};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sha2::Sha256;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use serde::{Serialize, Deserialize};
|
|
||||||
use tauri::{AppHandle, State}; // Import Manager trait
|
use tauri::{AppHandle, State}; // Import Manager trait
|
||||||
|
use thiserror;
|
||||||
use tokio::process::Command;
|
use tokio::process::Command;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
use thiserror;
|
|
||||||
use hmac::{Hmac, Mac};
|
|
||||||
use sha2::Sha256;
|
|
||||||
use base64::{Engine as _, engine::general_purpose};
|
|
||||||
|
|
||||||
use crate::core::state::AppState;
|
use crate::core::state::AppState;
|
||||||
|
|
||||||
@ -58,7 +58,7 @@ pub struct unloadResult {
|
|||||||
pub async fn load_llama_model(
|
pub async fn load_llama_model(
|
||||||
_app_handle: AppHandle, // Get the AppHandle
|
_app_handle: AppHandle, // Get the AppHandle
|
||||||
state: State<'_, AppState>, // Access the shared state
|
state: State<'_, AppState>, // Access the shared state
|
||||||
backendPath: String,
|
backend_path: &str,
|
||||||
args: Vec<String>, // Arguments from the frontend
|
args: Vec<String>, // Arguments from the frontend
|
||||||
) -> ServerResult<sessionInfo> {
|
) -> ServerResult<sessionInfo> {
|
||||||
let mut process_lock = state.llama_server_process.lock().await;
|
let mut process_lock = state.llama_server_process.lock().await;
|
||||||
@ -68,25 +68,25 @@ pub async fn load_llama_model(
|
|||||||
return Err(serverError::AlreadyRunning);
|
return Err(serverError::AlreadyRunning);
|
||||||
}
|
}
|
||||||
|
|
||||||
log::info!("Attempting to launch server at path: {:?}", engineBasePath);
|
log::info!("Attempting to launch server at path: {:?}", backend_path);
|
||||||
log::info!("Using arguments: {:?}", args);
|
log::info!("Using arguments: {:?}", args);
|
||||||
|
|
||||||
let server_path_buf = PathBuf::from(&engineBasePath);
|
let server_path_buf = PathBuf::from(backend_path);
|
||||||
if !server_path_buf.exists() {
|
if !server_path_buf.exists() {
|
||||||
log::error!(
|
log::error!(
|
||||||
"Server binary not found at expected path: {:?}",
|
"Server binary not found at expected path: {:?}",
|
||||||
engineBasePath
|
backend_path
|
||||||
);
|
);
|
||||||
return Err(serverError::BinaryNotFound(format!(
|
return Err(serverError::BinaryNotFound(format!(
|
||||||
"Binary not found at {:?}",
|
"Binary not found at {:?}",
|
||||||
engineBasePath
|
backend_path
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
let port = 8080; // Default port
|
let port = 8080; // Default port
|
||||||
|
|
||||||
// Configure the command to run the server
|
// Configure the command to run the server
|
||||||
let mut command = Command::new(engineBasePath);
|
let mut command = Command::new(backend_path);
|
||||||
|
|
||||||
let modelPath = args[2].replace("-m", "");
|
let modelPath = args[2].replace("-m", "");
|
||||||
let apiKey = args[1].replace("--api-key", "");
|
let apiKey = args[1].replace("--api-key", "");
|
||||||
@ -124,7 +124,10 @@ pub async fn load_llama_model(
|
|||||||
|
|
||||||
// --- Unload Command ---
|
// --- Unload Command ---
|
||||||
#[tauri::command]
|
#[tauri::command]
|
||||||
pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>) -> ServerResult<unloadResult> {
|
pub async fn unload_llama_model(
|
||||||
|
session_id: String,
|
||||||
|
state: State<'_, AppState>,
|
||||||
|
) -> ServerResult<unloadResult> {
|
||||||
let mut process_lock = state.llama_server_process.lock().await;
|
let mut process_lock = state.llama_server_process.lock().await;
|
||||||
// Take the child process out of the Option, leaving None in its place
|
// Take the child process out of the Option, leaving None in its place
|
||||||
if let Some(mut child) = process_lock.take() {
|
if let Some(mut child) = process_lock.take() {
|
||||||
@ -144,8 +147,10 @@ pub async fn unload_llama_model(session_id: String, state: State<'_, AppState>)
|
|||||||
|
|
||||||
return Ok(unloadResult {
|
return Ok(unloadResult {
|
||||||
success: false,
|
success: false,
|
||||||
error: Some(format!("Session ID mismatch: provided {} doesn't match process {}",
|
error: Some(format!(
|
||||||
session_id, process_pid)),
|
"Session ID mismatch: provided {} doesn't match process {}",
|
||||||
|
session_id, process_pid
|
||||||
|
)),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,4 +203,3 @@ pub fn generate_api_key(modelId: String, apiSecret: String) -> Result<String, St
|
|||||||
let hash = general_purpose::STANDARD.encode(code_bytes);
|
let hash = general_purpose::STANDARD.encode(code_bytes);
|
||||||
Ok(hash)
|
Ok(hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user