Compare commits
5 Commits
dev
...
refactor/b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ea231676bf | ||
|
|
1f4977c1d1 | ||
|
|
7b6e4cd172 | ||
|
|
8b15fe4ef2 | ||
|
|
0c5fbc102c |
@ -149,9 +149,14 @@
|
|||||||
"key": "flash_attn",
|
"key": "flash_attn",
|
||||||
"title": "Flash Attention",
|
"title": "Flash Attention",
|
||||||
"description": "Enable Flash Attention for optimized performance.",
|
"description": "Enable Flash Attention for optimized performance.",
|
||||||
"controllerType": "checkbox",
|
"controllerType": "dropdown",
|
||||||
"controllerProps": {
|
"controllerProps": {
|
||||||
"value": false
|
"value": "auto",
|
||||||
|
"options": [
|
||||||
|
{ "value": "auto", "name": "Auto" },
|
||||||
|
{ "value": "on", "name": "ON" },
|
||||||
|
{ "value": "off", "name": "OFF" }
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -102,50 +102,27 @@ export async function listSupportedBackends(): Promise<
|
|||||||
// TODO: fetch versions from the server?
|
// TODO: fetch versions from the server?
|
||||||
// TODO: select CUDA version based on driver version
|
// TODO: select CUDA version based on driver version
|
||||||
if (sysType == 'windows-x86_64') {
|
if (sysType == 'windows-x86_64') {
|
||||||
// NOTE: if a machine supports AVX2, should we include noavx and avx?
|
supportedBackends.push('win-common_cpus-x64')
|
||||||
supportedBackends.push('win-noavx-x64')
|
|
||||||
if (features.avx) supportedBackends.push('win-avx-x64')
|
|
||||||
if (features.avx2) supportedBackends.push('win-avx2-x64')
|
|
||||||
if (features.avx512) supportedBackends.push('win-avx512-x64')
|
|
||||||
if (features.cuda11) {
|
if (features.cuda11) {
|
||||||
if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64')
|
supportedBackends.push('win-cuda-11-common_cpus-x64')
|
||||||
else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
|
|
||||||
else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64')
|
|
||||||
else supportedBackends.push('win-noavx-cuda-cu11.7-x64')
|
|
||||||
}
|
}
|
||||||
if (features.cuda12) {
|
if (features.cuda12) {
|
||||||
if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64')
|
supportedBackends.push('win-cuda-12-common_cpus-x64')
|
||||||
else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
|
|
||||||
else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64')
|
|
||||||
else supportedBackends.push('win-noavx-cuda-cu12.0-x64')
|
|
||||||
}
|
}
|
||||||
if (features.vulkan) supportedBackends.push('win-vulkan-x64')
|
if (features.vulkan) supportedBackends.push('win-vulkan-common_cpus-x64')
|
||||||
}
|
}
|
||||||
// not available yet, placeholder for future
|
// not available yet, placeholder for future
|
||||||
else if (sysType === 'windows-aarch64' || sysType === 'windows-arm64') {
|
else if (sysType === 'windows-aarch64' || sysType === 'windows-arm64') {
|
||||||
supportedBackends.push('win-arm64')
|
supportedBackends.push('win-arm64')
|
||||||
} else if (sysType === 'linux-x86_64' || sysType === 'linux-x86') {
|
} else if (sysType === 'linux-x86_64' || sysType === 'linux-x86') {
|
||||||
supportedBackends.push('linux-noavx-x64')
|
supportedBackends.push('linux-common_cpus-x64')
|
||||||
if (features.avx) supportedBackends.push('linux-avx-x64')
|
|
||||||
if (features.avx2) supportedBackends.push('linux-avx2-x64')
|
|
||||||
if (features.avx512) supportedBackends.push('linux-avx512-x64')
|
|
||||||
if (features.cuda11) {
|
if (features.cuda11) {
|
||||||
if (features.avx512)
|
supportedBackends.push('linux-cuda-11-common_cpus-x64')
|
||||||
supportedBackends.push('linux-avx512-cuda-cu11.7-x64')
|
|
||||||
else if (features.avx2)
|
|
||||||
supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
|
|
||||||
else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64')
|
|
||||||
else supportedBackends.push('linux-noavx-cuda-cu11.7-x64')
|
|
||||||
}
|
}
|
||||||
if (features.cuda12) {
|
if (features.cuda12) {
|
||||||
if (features.avx512)
|
supportedBackends.push('linux-cuda-12-common_cpus-x64')
|
||||||
supportedBackends.push('linux-avx512-cuda-cu12.0-x64')
|
|
||||||
else if (features.avx2)
|
|
||||||
supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
|
|
||||||
else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64')
|
|
||||||
else supportedBackends.push('linux-noavx-cuda-cu12.0-x64')
|
|
||||||
}
|
}
|
||||||
if (features.vulkan) supportedBackends.push('linux-vulkan-x64')
|
if (features.vulkan) supportedBackends.push('linux-vulkan-common_cpus-x64')
|
||||||
}
|
}
|
||||||
// not available yet, placeholder for future
|
// not available yet, placeholder for future
|
||||||
else if (sysType === 'linux-aarch64' || sysType === 'linux-arm64') {
|
else if (sysType === 'linux-aarch64' || sysType === 'linux-arm64') {
|
||||||
@ -230,10 +207,7 @@ export async function downloadBackend(
|
|||||||
version: string,
|
version: string,
|
||||||
source: 'github' | 'cdn' = 'github'
|
source: 'github' | 'cdn' = 'github'
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const janDataFolderPath = await getJanDataFolderPath()
|
|
||||||
const llamacppPath = await joinPath([janDataFolderPath, 'llamacpp'])
|
|
||||||
const backendDir = await getBackendDir(backend, version)
|
const backendDir = await getBackendDir(backend, version)
|
||||||
const libDir = await joinPath([llamacppPath, 'lib'])
|
|
||||||
|
|
||||||
const downloadManager = window.core.extensionManager.getByName(
|
const downloadManager = window.core.extensionManager.getByName(
|
||||||
'@janhq/download-extension'
|
'@janhq/download-extension'
|
||||||
@ -265,7 +239,7 @@ export async function downloadBackend(
|
|||||||
source === 'github'
|
source === 'github'
|
||||||
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`
|
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`
|
||||||
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`,
|
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`,
|
||||||
save_path: await joinPath([libDir, 'cuda11.tar.gz']),
|
save_path: await joinPath([backendDir, 'build', 'bin', 'cuda11.tar.gz']),
|
||||||
proxy: proxyConfig,
|
proxy: proxyConfig,
|
||||||
})
|
})
|
||||||
} else if (backend.includes('cu12.0') && !(await _isCudaInstalled('12.0'))) {
|
} else if (backend.includes('cu12.0') && !(await _isCudaInstalled('12.0'))) {
|
||||||
@ -274,7 +248,7 @@ export async function downloadBackend(
|
|||||||
source === 'github'
|
source === 'github'
|
||||||
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`
|
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`
|
||||||
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`,
|
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`,
|
||||||
save_path: await joinPath([libDir, 'cuda12.tar.gz']),
|
save_path: await joinPath([backendDir, 'build', 'bin', 'cuda12.tar.gz']),
|
||||||
proxy: proxyConfig,
|
proxy: proxyConfig,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -344,8 +318,8 @@ async function _getSupportedFeatures() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// https://docs.nvidia.com/deploy/cuda-compatibility/#cuda-11-and-later-defaults-to-minor-version-compatibility
|
// https://docs.nvidia.com/deploy/cuda-compatibility/#cuda-11-and-later-defaults-to-minor-version-compatibility
|
||||||
let minCuda11DriverVersion
|
let minCuda11DriverVersion: string
|
||||||
let minCuda12DriverVersion
|
let minCuda12DriverVersion: string
|
||||||
if (sysInfo.os_type === 'linux') {
|
if (sysInfo.os_type === 'linux') {
|
||||||
minCuda11DriverVersion = '450.80.02'
|
minCuda11DriverVersion = '450.80.02'
|
||||||
minCuda12DriverVersion = '525.60.13'
|
minCuda12DriverVersion = '525.60.13'
|
||||||
|
|||||||
@ -38,10 +38,12 @@ import { invoke } from '@tauri-apps/api/core'
|
|||||||
import { getProxyConfig } from './util'
|
import { getProxyConfig } from './util'
|
||||||
import { basename } from '@tauri-apps/api/path'
|
import { basename } from '@tauri-apps/api/path'
|
||||||
import {
|
import {
|
||||||
|
loadLlamaModel,
|
||||||
readGgufMetadata,
|
readGgufMetadata,
|
||||||
getModelSize,
|
getModelSize,
|
||||||
isModelSupported,
|
isModelSupported,
|
||||||
planModelLoadInternal,
|
planModelLoadInternal,
|
||||||
|
unloadLlamaModel,
|
||||||
} from '@janhq/tauri-plugin-llamacpp-api'
|
} from '@janhq/tauri-plugin-llamacpp-api'
|
||||||
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
|
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
|
||||||
|
|
||||||
@ -69,7 +71,7 @@ type LlamacppConfig = {
|
|||||||
device: string
|
device: string
|
||||||
split_mode: string
|
split_mode: string
|
||||||
main_gpu: number
|
main_gpu: number
|
||||||
flash_attn: boolean
|
flash_attn: string
|
||||||
cont_batching: boolean
|
cont_batching: boolean
|
||||||
no_mmap: boolean
|
no_mmap: boolean
|
||||||
mlock: boolean
|
mlock: boolean
|
||||||
@ -549,9 +551,9 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
// Helper to map backend string to a priority category
|
// Helper to map backend string to a priority category
|
||||||
const getBackendCategory = (backendString: string): string | undefined => {
|
const getBackendCategory = (backendString: string): string | undefined => {
|
||||||
if (backendString.includes('cu12.0')) return 'cuda-cu12.0'
|
if (backendString.includes('cuda-12-common_cpus')) return 'cuda-cu12.0'
|
||||||
if (backendString.includes('cu11.7')) return 'cuda-cu11.7'
|
if (backendString.includes('cuda-11-common_cpus')) return 'cuda-cu11.7'
|
||||||
if (backendString.includes('vulkan')) return 'vulkan'
|
if (backendString.includes('vulkan-common_cpus')) return 'vulkan'
|
||||||
if (backendString.includes('avx512')) return 'avx512'
|
if (backendString.includes('avx512')) return 'avx512'
|
||||||
if (backendString.includes('avx2')) return 'avx2'
|
if (backendString.includes('avx2')) return 'avx2'
|
||||||
if (
|
if (
|
||||||
@ -1644,18 +1646,20 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
if (cfg.device.length > 0) args.push('--device', cfg.device)
|
if (cfg.device.length > 0) args.push('--device', cfg.device)
|
||||||
if (cfg.split_mode.length > 0 && cfg.split_mode != 'layer')
|
if (cfg.split_mode.length > 0 && cfg.split_mode != 'layer')
|
||||||
args.push('--split-mode', cfg.split_mode)
|
args.push('--split-mode', cfg.split_mode)
|
||||||
if (cfg.main_gpu !== undefined && cfg.main_gpu != 0)
|
if (cfg.main_gpu !== undefined && cfg.main_gpu !== 0)
|
||||||
args.push('--main-gpu', String(cfg.main_gpu))
|
args.push('--main-gpu', String(cfg.main_gpu))
|
||||||
|
// Note: Older llama.cpp versions are no longer supported
|
||||||
|
if (
|
||||||
|
cfg.flash_attn !== undefined ||
|
||||||
|
!cfg.flash_attn ||
|
||||||
|
cfg.flash_attn !== ''
|
||||||
|
)
|
||||||
|
args.push('--flash-attn', String(cfg.flash_attn)) //default: auto = ON when supported
|
||||||
|
|
||||||
// Boolean flags
|
// Boolean flags
|
||||||
if (cfg.ctx_shift) args.push('--context-shift')
|
if (cfg.ctx_shift) args.push('--context-shift')
|
||||||
if (Number(version.replace(/^b/, '')) >= 6325) {
|
|
||||||
if (!cfg.flash_attn) args.push('--flash-attn', 'off') //default: auto = ON when supported
|
|
||||||
} else {
|
|
||||||
if (cfg.flash_attn) args.push('--flash-attn')
|
|
||||||
}
|
|
||||||
if (cfg.cont_batching) args.push('--cont-batching')
|
if (cfg.cont_batching) args.push('--cont-batching')
|
||||||
args.push('--no-mmap')
|
if (cfg.no_mmap) args.push('--no-mmap')
|
||||||
if (cfg.mlock) args.push('--mlock')
|
if (cfg.mlock) args.push('--mlock')
|
||||||
if (cfg.no_kv_offload) args.push('--no-kv-offload')
|
if (cfg.no_kv_offload) args.push('--no-kv-offload')
|
||||||
if (isEmbedding) {
|
if (isEmbedding) {
|
||||||
@ -1667,7 +1671,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
if (cfg.cache_type_k && cfg.cache_type_k != 'f16')
|
if (cfg.cache_type_k && cfg.cache_type_k != 'f16')
|
||||||
args.push('--cache-type-k', cfg.cache_type_k)
|
args.push('--cache-type-k', cfg.cache_type_k)
|
||||||
if (
|
if (
|
||||||
cfg.flash_attn &&
|
cfg.flash_attn !== 'on' &&
|
||||||
cfg.cache_type_v != 'f16' &&
|
cfg.cache_type_v != 'f16' &&
|
||||||
cfg.cache_type_v != 'f32'
|
cfg.cache_type_v != 'f32'
|
||||||
) {
|
) {
|
||||||
@ -1688,20 +1692,9 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
logger.info('Calling Tauri command llama_load with args:', args)
|
logger.info('Calling Tauri command llama_load with args:', args)
|
||||||
const backendPath = await getBackendExePath(backend, version)
|
const backendPath = await getBackendExePath(backend, version)
|
||||||
const libraryPath = await joinPath([await this.getProviderPath(), 'lib'])
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// TODO: add LIBRARY_PATH
|
const sInfo = await loadLlamaModel(backendPath, args, envs, isEmbedding)
|
||||||
const sInfo = await invoke<SessionInfo>(
|
|
||||||
'plugin:llamacpp|load_llama_model',
|
|
||||||
{
|
|
||||||
backendPath,
|
|
||||||
libraryPath,
|
|
||||||
args,
|
|
||||||
envs,
|
|
||||||
isEmbedding,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return sInfo
|
return sInfo
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Error in load command:\n', error)
|
logger.error('Error in load command:\n', error)
|
||||||
@ -1717,12 +1710,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const pid = sInfo.pid
|
const pid = sInfo.pid
|
||||||
try {
|
try {
|
||||||
// Pass the PID as the session_id
|
// Pass the PID as the session_id
|
||||||
const result = await invoke<UnloadResult>(
|
const result = await unloadLlamaModel(pid)
|
||||||
'plugin:llamacpp|unload_llama_model',
|
|
||||||
{
|
|
||||||
pid: pid,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
// If successful, remove from active sessions
|
// If successful, remove from active sessions
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
@ -2042,7 +2030,10 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
if (sysInfo?.os_type === 'linux' && Array.isArray(sysInfo.gpus)) {
|
if (sysInfo?.os_type === 'linux' && Array.isArray(sysInfo.gpus)) {
|
||||||
const usage = await getSystemUsage()
|
const usage = await getSystemUsage()
|
||||||
if (usage && Array.isArray(usage.gpus)) {
|
if (usage && Array.isArray(usage.gpus)) {
|
||||||
const uuidToUsage: Record<string, { total_memory: number; used_memory: number }> = {}
|
const uuidToUsage: Record<
|
||||||
|
string,
|
||||||
|
{ total_memory: number; used_memory: number }
|
||||||
|
> = {}
|
||||||
for (const u of usage.gpus as any[]) {
|
for (const u of usage.gpus as any[]) {
|
||||||
if (u && typeof u.uuid === 'string') {
|
if (u && typeof u.uuid === 'string') {
|
||||||
uuidToUsage[u.uuid] = u
|
uuidToUsage[u.uuid] = u
|
||||||
@ -2082,7 +2073,10 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
typeof u.used_memory === 'number'
|
typeof u.used_memory === 'number'
|
||||||
) {
|
) {
|
||||||
const total = Math.max(0, Math.floor(u.total_memory))
|
const total = Math.max(0, Math.floor(u.total_memory))
|
||||||
const free = Math.max(0, Math.floor(u.total_memory - u.used_memory))
|
const free = Math.max(
|
||||||
|
0,
|
||||||
|
Math.floor(u.total_memory - u.used_memory)
|
||||||
|
)
|
||||||
return { ...dev, mem: total, free }
|
return { ...dev, mem: total, free }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,11 +2,18 @@ import { invoke } from '@tauri-apps/api/core'
|
|||||||
|
|
||||||
// Types
|
// Types
|
||||||
export interface SessionInfo {
|
export interface SessionInfo {
|
||||||
pid: number
|
pid: number;
|
||||||
port: number
|
port: number;
|
||||||
model_id: string
|
model_id: string;
|
||||||
model_path: string
|
model_path: string;
|
||||||
api_key: string
|
is_embedding: boolean
|
||||||
|
api_key: string;
|
||||||
|
mmproj_path?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface UnloadResult {
|
||||||
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DeviceInfo {
|
export interface DeviceInfo {
|
||||||
@ -29,19 +36,19 @@ export async function cleanupLlamaProcesses(): Promise<void> {
|
|||||||
// LlamaCpp server commands
|
// LlamaCpp server commands
|
||||||
export async function loadLlamaModel(
|
export async function loadLlamaModel(
|
||||||
backendPath: string,
|
backendPath: string,
|
||||||
libraryPath?: string,
|
args: string[],
|
||||||
args: string[] = [],
|
envs: Record<string, string>,
|
||||||
isEmbedding: boolean = false
|
isEmbedding: boolean
|
||||||
): Promise<SessionInfo> {
|
): Promise<SessionInfo> {
|
||||||
return await invoke('plugin:llamacpp|load_llama_model', {
|
return await invoke('plugin:llamacpp|load_llama_model', {
|
||||||
backendPath,
|
backendPath,
|
||||||
libraryPath,
|
|
||||||
args,
|
args,
|
||||||
isEmbedding,
|
envs,
|
||||||
|
isEmbedding
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function unloadLlamaModel(pid: number): Promise<void> {
|
export async function unloadLlamaModel(pid: number): Promise<UnloadResult> {
|
||||||
return await invoke('plugin:llamacpp|unload_llama_model', { pid })
|
return await invoke('plugin:llamacpp|unload_llama_model', { pid })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -41,7 +41,6 @@ pub struct UnloadResult {
|
|||||||
pub async fn load_llama_model<R: Runtime>(
|
pub async fn load_llama_model<R: Runtime>(
|
||||||
app_handle: tauri::AppHandle<R>,
|
app_handle: tauri::AppHandle<R>,
|
||||||
backend_path: &str,
|
backend_path: &str,
|
||||||
library_path: Option<&str>,
|
|
||||||
mut args: Vec<String>,
|
mut args: Vec<String>,
|
||||||
envs: HashMap<String, String>,
|
envs: HashMap<String, String>,
|
||||||
is_embedding: bool,
|
is_embedding: bool,
|
||||||
@ -52,7 +51,7 @@ pub async fn load_llama_model<R: Runtime>(
|
|||||||
log::info!("Attempting to launch server at path: {:?}", backend_path);
|
log::info!("Attempting to launch server at path: {:?}", backend_path);
|
||||||
log::info!("Using arguments: {:?}", args);
|
log::info!("Using arguments: {:?}", args);
|
||||||
|
|
||||||
validate_binary_path(backend_path)?;
|
let bin_path = validate_binary_path(backend_path)?;
|
||||||
|
|
||||||
let port = parse_port_from_args(&args);
|
let port = parse_port_from_args(&args);
|
||||||
let model_path_pb = validate_model_path(&mut args)?;
|
let model_path_pb = validate_model_path(&mut args)?;
|
||||||
@ -83,11 +82,11 @@ pub async fn load_llama_model<R: Runtime>(
|
|||||||
let model_id = extract_arg_value(&args, "-a");
|
let model_id = extract_arg_value(&args, "-a");
|
||||||
|
|
||||||
// Configure the command to run the server
|
// Configure the command to run the server
|
||||||
let mut command = Command::new(backend_path);
|
let mut command = Command::new(&bin_path);
|
||||||
command.args(args);
|
command.args(args);
|
||||||
command.envs(envs);
|
command.envs(envs);
|
||||||
|
|
||||||
setup_library_path(library_path, &mut command);
|
setup_library_path(bin_path.parent().and_then(|p| p.to_str()), &mut command);
|
||||||
command.stdout(Stdio::piped());
|
command.stdout(Stdio::piped());
|
||||||
command.stderr(Stdio::piped());
|
command.stderr(Stdio::piped());
|
||||||
setup_windows_process_flags(&mut command);
|
setup_windows_process_flags(&mut command);
|
||||||
@ -280,10 +279,9 @@ pub async fn unload_llama_model<R: Runtime>(
|
|||||||
#[tauri::command]
|
#[tauri::command]
|
||||||
pub async fn get_devices(
|
pub async fn get_devices(
|
||||||
backend_path: &str,
|
backend_path: &str,
|
||||||
library_path: Option<&str>,
|
|
||||||
envs: HashMap<String, String>,
|
envs: HashMap<String, String>,
|
||||||
) -> ServerResult<Vec<DeviceInfo>> {
|
) -> ServerResult<Vec<DeviceInfo>> {
|
||||||
get_devices_from_backend(backend_path, library_path, envs).await
|
get_devices_from_backend(backend_path, envs).await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate API key using HMAC-SHA256
|
/// Generate API key using HMAC-SHA256
|
||||||
|
|||||||
@ -19,20 +19,19 @@ pub struct DeviceInfo {
|
|||||||
|
|
||||||
pub async fn get_devices_from_backend(
|
pub async fn get_devices_from_backend(
|
||||||
backend_path: &str,
|
backend_path: &str,
|
||||||
library_path: Option<&str>,
|
|
||||||
envs: HashMap<String, String>,
|
envs: HashMap<String, String>,
|
||||||
) -> ServerResult<Vec<DeviceInfo>> {
|
) -> ServerResult<Vec<DeviceInfo>> {
|
||||||
log::info!("Getting devices from server at path: {:?}", backend_path);
|
log::info!("Getting devices from server at path: {:?}", backend_path);
|
||||||
|
|
||||||
validate_binary_path(backend_path)?;
|
let bin_path = validate_binary_path(backend_path)?;
|
||||||
|
|
||||||
// Configure the command to run the server with --list-devices
|
// Configure the command to run the server with --list-devices
|
||||||
let mut command = Command::new(backend_path);
|
let mut command = Command::new(&bin_path);
|
||||||
command.arg("--list-devices");
|
command.arg("--list-devices");
|
||||||
command.envs(envs);
|
command.envs(envs);
|
||||||
|
|
||||||
// Set up library path
|
// Set up library path
|
||||||
setup_library_path(library_path, &mut command);
|
setup_library_path(bin_path.parent().and_then(|p| p.to_str()), &mut command);
|
||||||
|
|
||||||
command.stdout(Stdio::piped());
|
command.stdout(Stdio::piped());
|
||||||
command.stderr(Stdio::piped());
|
command.stderr(Stdio::piped());
|
||||||
@ -410,4 +409,4 @@ AnotherInvalid
|
|||||||
assert_eq!(result[0].id, "Vulkan0");
|
assert_eq!(result[0].id, "Vulkan0");
|
||||||
assert_eq!(result[1].id, "CUDA0");
|
assert_eq!(result[1].id, "CUDA0");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -62,7 +62,6 @@ pub async fn estimate_kv_cache_internal(
|
|||||||
ctx_size: Option<u64>,
|
ctx_size: Option<u64>,
|
||||||
) -> Result<KVCacheEstimate, KVCacheError> {
|
) -> Result<KVCacheEstimate, KVCacheError> {
|
||||||
log::info!("Received ctx_size parameter: {:?}", ctx_size);
|
log::info!("Received ctx_size parameter: {:?}", ctx_size);
|
||||||
log::info!("Received model metadata:\n{:?}", &meta);
|
|
||||||
let arch = meta
|
let arch = meta
|
||||||
.get("general.architecture")
|
.get("general.architecture")
|
||||||
.ok_or(KVCacheError::ArchitectureNotFound)?;
|
.ok_or(KVCacheError::ArchitectureNotFound)?;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user