feat: Add support for custom environmental variables to llama.cpp (#6256)
This commit adds a new setting `llamacpp_env` to the llama.cpp extension, allowing users to specify custom environment variables. These variables are passed to the backend process when it starts. A new function `parseEnvFromString` is introduced to handle the parsing of the semicolon-separated key-value pairs from the user input. The environment variables are then used in the `load` function and when listing available devices. This enables more flexible configuration of the llama.cpp backend, such as specifying visible GPUs for Vulkan. This change also updates the Tauri command `get_devices` to accept environment variables, ensuring that device discovery respects the user's settings.
This commit is contained in:
parent
5c4deff215
commit
5c3a6fec32
@ -10,7 +10,18 @@
|
||||
"recommended": ""
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"key": "llamacpp_env",
|
||||
"title": "Environmental variables",
|
||||
"description": "Environmental variables for llama.cpp(KEY=VALUE), separated by ';'",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "none",
|
||||
"placeholder": "Eg, GGML_VK_VISIBLE_DEVICES='0,1'",
|
||||
"type": "text",
|
||||
"textAlign": "right"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "auto_update_engine",
|
||||
"title": "Auto update engine",
|
||||
|
||||
@ -41,6 +41,7 @@ type LlamacppConfig = {
|
||||
version_backend: string
|
||||
auto_update_engine: boolean
|
||||
auto_unload: boolean
|
||||
llamacpp_env: string
|
||||
chat_template: string
|
||||
n_gpu_layers: number
|
||||
offload_mmproj: boolean
|
||||
@ -153,6 +154,7 @@ const logger = {
|
||||
export default class llamacpp_extension extends AIEngine {
|
||||
provider: string = 'llamacpp'
|
||||
autoUnload: boolean = true
|
||||
llamacpp_env: string = ''
|
||||
readonly providerId: string = 'llamacpp'
|
||||
|
||||
private config: LlamacppConfig
|
||||
@ -183,6 +185,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
this.config = loadedConfig as LlamacppConfig
|
||||
|
||||
this.autoUnload = this.config.auto_unload
|
||||
this.llamacpp_env = this.config.llamacpp_env
|
||||
|
||||
// This sets the base directory where model files for this provider are stored.
|
||||
this.providerPath = await joinPath([
|
||||
@ -827,6 +830,8 @@ export default class llamacpp_extension extends AIEngine {
|
||||
closure()
|
||||
} else if (key === 'auto_unload') {
|
||||
this.autoUnload = value as boolean
|
||||
} else if (key === 'llamacpp_env') {
|
||||
this.llamacpp_env = value as string
|
||||
}
|
||||
}
|
||||
|
||||
@ -1253,6 +1258,27 @@ export default class llamacpp_extension extends AIEngine {
|
||||
}
|
||||
}
|
||||
|
||||
private parseEnvFromString(
|
||||
target: Record<string, string>,
|
||||
envString: string
|
||||
): void {
|
||||
envString
|
||||
.split(';')
|
||||
.filter((pair) => pair.trim())
|
||||
.forEach((pair) => {
|
||||
const [key, ...valueParts] = pair.split('=')
|
||||
const cleanKey = key?.trim()
|
||||
|
||||
if (
|
||||
cleanKey &&
|
||||
valueParts.length > 0 &&
|
||||
!cleanKey.startsWith('LLAMA')
|
||||
) {
|
||||
target[cleanKey] = valueParts.join('=').trim()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
override async load(
|
||||
modelId: string,
|
||||
overrideSettings?: Partial<LlamacppConfig>,
|
||||
@ -1341,6 +1367,9 @@ export default class llamacpp_extension extends AIEngine {
|
||||
const api_key = await this.generateApiKey(modelId, String(port))
|
||||
envs['LLAMA_API_KEY'] = api_key
|
||||
|
||||
// set user envs
|
||||
this.parseEnvFromString(envs, this.llamacpp_env)
|
||||
|
||||
// model option is required
|
||||
// NOTE: model_path and mmproj_path can be either relative to Jan's data folder or absolute path
|
||||
const modelPath = await joinPath([
|
||||
@ -1716,6 +1745,9 @@ export default class llamacpp_extension extends AIEngine {
|
||||
`Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
|
||||
)
|
||||
}
|
||||
// set envs
|
||||
const envs: Record<string, string> = {}
|
||||
this.parseEnvFromString(envs, this.llamacpp_env)
|
||||
|
||||
// Ensure backend is downloaded and ready before proceeding
|
||||
await this.ensureBackendReady(backend, version)
|
||||
@ -1726,6 +1758,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
const dList = await invoke<DeviceList[]>('plugin:llamacpp|get_devices', {
|
||||
backendPath,
|
||||
libraryPath,
|
||||
envs,
|
||||
})
|
||||
return dList
|
||||
} catch (error) {
|
||||
|
||||
@ -265,8 +265,9 @@ pub async fn unload_llama_model<R: Runtime>(
|
||||
pub async fn get_devices(
|
||||
backend_path: &str,
|
||||
library_path: Option<&str>,
|
||||
envs: HashMap<String, String>
|
||||
) -> ServerResult<Vec<DeviceInfo>> {
|
||||
get_devices_from_backend(backend_path, library_path).await
|
||||
get_devices_from_backend(backend_path, library_path, envs).await
|
||||
}
|
||||
|
||||
/// Generate API key using HMAC-SHA256
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::process::Stdio;
|
||||
use std::time::Duration;
|
||||
use tokio::process::Command;
|
||||
@ -19,6 +20,7 @@ pub struct DeviceInfo {
|
||||
pub async fn get_devices_from_backend(
|
||||
backend_path: &str,
|
||||
library_path: Option<&str>,
|
||||
envs: HashMap<String, String>,
|
||||
) -> ServerResult<Vec<DeviceInfo>> {
|
||||
log::info!("Getting devices from server at path: {:?}", backend_path);
|
||||
|
||||
@ -27,6 +29,7 @@ pub async fn get_devices_from_backend(
|
||||
// Configure the command to run the server with --list-devices
|
||||
let mut command = Command::new(backend_path);
|
||||
command.arg("--list-devices");
|
||||
command.envs(envs);
|
||||
|
||||
// Set up library path
|
||||
setup_library_path(library_path, &mut command);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user