From 8b15fe4ef25a81cc17e713a55081ffd0d23a1f56 Mon Sep 17 00:00:00 2001 From: Akarshan Date: Wed, 15 Oct 2025 10:57:45 +0530 Subject: [PATCH] feat: Simplify backend architecture This commit introduces a functional flag for embedding models and refactors the backend detection logic for cleaner implementation. Key changes: - Embedding Support: The loadLlamaModel API and SessionInfo now include an isEmbedding: boolean flag. This allows the core process to differentiate and correctly initialize models intended for embedding tasks. - Backend Naming Simplification (Refactor): Consolidated the CPU-specific backend tags (e.g., win-noavx-x64, win-avx2-x64) into generic *-common_cpus-x64 variants (e.g., win-common_cpus-x64). This streamlines supported backend detection. - File Structure Update: Changed the download path for CUDA runtime libraries (cudart) to place them inside the specific backend's directory (/build/bin/) rather than a shared lib folder, improving asset isolation. --- extensions/llamacpp-extension/src/backend.ts | 50 +++++-------------- extensions/llamacpp-extension/src/index.ts | 8 +-- .../tauri-plugin-llamacpp/guest-js/index.ts | 7 ++- 3 files changed, 21 insertions(+), 44 deletions(-) diff --git a/extensions/llamacpp-extension/src/backend.ts b/extensions/llamacpp-extension/src/backend.ts index 5a76a74f0..3e69c9f76 100644 --- a/extensions/llamacpp-extension/src/backend.ts +++ b/extensions/llamacpp-extension/src/backend.ts @@ -102,50 +102,27 @@ export async function listSupportedBackends(): Promise< // TODO: fetch versions from the server? // TODO: select CUDA version based on driver version if (sysType == 'windows-x86_64') { - // NOTE: if a machine supports AVX2, should we include noavx and avx? - supportedBackends.push('win-noavx-x64') - if (features.avx) supportedBackends.push('win-avx-x64') - if (features.avx2) supportedBackends.push('win-avx2-x64') - if (features.avx512) supportedBackends.push('win-avx512-x64') + supportedBackends.push('win-common_cpus-x64') if (features.cuda11) { - if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64') - else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64') - else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64') - else supportedBackends.push('win-noavx-cuda-cu11.7-x64') + supportedBackends.push('win-cuda-11-common_cpus-x64') } if (features.cuda12) { - if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64') - else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64') - else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64') - else supportedBackends.push('win-noavx-cuda-cu12.0-x64') + supportedBackends.push('win-cuda-12-common_cpus-x64') } - if (features.vulkan) supportedBackends.push('win-vulkan-x64') + if (features.vulkan) supportedBackends.push('win-vulkan-common_cpus-x64') } // not available yet, placeholder for future else if (sysType === 'windows-aarch64' || sysType === 'windows-arm64') { supportedBackends.push('win-arm64') } else if (sysType === 'linux-x86_64' || sysType === 'linux-x86') { - supportedBackends.push('linux-noavx-x64') - if (features.avx) supportedBackends.push('linux-avx-x64') - if (features.avx2) supportedBackends.push('linux-avx2-x64') - if (features.avx512) supportedBackends.push('linux-avx512-x64') + supportedBackends.push('linux-common_cpus-x64') if (features.cuda11) { - if (features.avx512) - supportedBackends.push('linux-avx512-cuda-cu11.7-x64') - else if (features.avx2) - supportedBackends.push('linux-avx2-cuda-cu11.7-x64') - else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64') - else supportedBackends.push('linux-noavx-cuda-cu11.7-x64') + supportedBackends.push('linux-cuda-11-common_cpus-x64') } if (features.cuda12) { - if (features.avx512) - supportedBackends.push('linux-avx512-cuda-cu12.0-x64') - else if (features.avx2) - supportedBackends.push('linux-avx2-cuda-cu12.0-x64') - else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64') - else supportedBackends.push('linux-noavx-cuda-cu12.0-x64') + supportedBackends.push('linux-cuda-12-common_cpus-x64') } - if (features.vulkan) supportedBackends.push('linux-vulkan-x64') + if (features.vulkan) supportedBackends.push('linux-vulkan-common_cpus-x64') } // not available yet, placeholder for future else if (sysType === 'linux-aarch64' || sysType === 'linux-arm64') { @@ -230,10 +207,7 @@ export async function downloadBackend( version: string, source: 'github' | 'cdn' = 'github' ): Promise { - const janDataFolderPath = await getJanDataFolderPath() - const llamacppPath = await joinPath([janDataFolderPath, 'llamacpp']) const backendDir = await getBackendDir(backend, version) - const libDir = await joinPath([llamacppPath, 'lib']) const downloadManager = window.core.extensionManager.getByName( '@janhq/download-extension' @@ -265,7 +239,7 @@ export async function downloadBackend( source === 'github' ? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz` : `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`, - save_path: await joinPath([libDir, 'cuda11.tar.gz']), + save_path: await joinPath([backendDir, 'build', 'bin', 'cuda11.tar.gz']), proxy: proxyConfig, }) } else if (backend.includes('cu12.0') && !(await _isCudaInstalled('12.0'))) { @@ -274,7 +248,7 @@ export async function downloadBackend( source === 'github' ? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz` : `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`, - save_path: await joinPath([libDir, 'cuda12.tar.gz']), + save_path: await joinPath([backendDir, 'build', 'bin', 'cuda12.tar.gz']), proxy: proxyConfig, }) } @@ -344,8 +318,8 @@ async function _getSupportedFeatures() { } // https://docs.nvidia.com/deploy/cuda-compatibility/#cuda-11-and-later-defaults-to-minor-version-compatibility - let minCuda11DriverVersion - let minCuda12DriverVersion + let minCuda11DriverVersion: string + let minCuda12DriverVersion: string if (sysInfo.os_type === 'linux') { minCuda11DriverVersion = '450.80.02' minCuda12DriverVersion = '525.60.13' diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 631220a92..cfda2a403 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -551,9 +551,9 @@ export default class llamacpp_extension extends AIEngine { // Helper to map backend string to a priority category const getBackendCategory = (backendString: string): string | undefined => { - if (backendString.includes('cu12.0')) return 'cuda-cu12.0' - if (backendString.includes('cu11.7')) return 'cuda-cu11.7' - if (backendString.includes('vulkan')) return 'vulkan' + if (backendString.includes('cuda-12-common_cpus')) return 'cuda-cu12.0' + if (backendString.includes('cuda-11-common_cpus')) return 'cuda-cu11.7' + if (backendString.includes('vulkan-common_cpus')) return 'vulkan' if (backendString.includes('avx512')) return 'avx512' if (backendString.includes('avx2')) return 'avx2' if ( @@ -1689,7 +1689,7 @@ export default class llamacpp_extension extends AIEngine { const backendPath = await getBackendExePath(backend, version) try { - const sInfo = await loadLlamaModel(backendPath, args, envs) + const sInfo = await loadLlamaModel(backendPath, args, envs, isEmbedding) return sInfo } catch (error) { logger.error('Error in load command:\n', error) diff --git a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts index b31133da5..c57680308 100644 --- a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts +++ b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts @@ -6,6 +6,7 @@ export interface SessionInfo { port: number; model_id: string; model_path: string; + is_embedding: boolean api_key: string; mmproj_path?: string; } @@ -36,12 +37,14 @@ export async function cleanupLlamaProcesses(): Promise { export async function loadLlamaModel( backendPath: string, args: string[], - envs: Record + envs: Record, + isEmbedding: boolean ): Promise { return await invoke('plugin:llamacpp|load_llama_model', { backendPath, args, - envs + envs, + isEmbedding }) }