diff --git a/extensions/llamacpp-extension/src/backend.ts b/extensions/llamacpp-extension/src/backend.ts index 5a76a74f0..3e69c9f76 100644 --- a/extensions/llamacpp-extension/src/backend.ts +++ b/extensions/llamacpp-extension/src/backend.ts @@ -102,50 +102,27 @@ export async function listSupportedBackends(): Promise< // TODO: fetch versions from the server? // TODO: select CUDA version based on driver version if (sysType == 'windows-x86_64') { - // NOTE: if a machine supports AVX2, should we include noavx and avx? - supportedBackends.push('win-noavx-x64') - if (features.avx) supportedBackends.push('win-avx-x64') - if (features.avx2) supportedBackends.push('win-avx2-x64') - if (features.avx512) supportedBackends.push('win-avx512-x64') + supportedBackends.push('win-common_cpus-x64') if (features.cuda11) { - if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64') - else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64') - else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64') - else supportedBackends.push('win-noavx-cuda-cu11.7-x64') + supportedBackends.push('win-cuda-11-common_cpus-x64') } if (features.cuda12) { - if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64') - else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64') - else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64') - else supportedBackends.push('win-noavx-cuda-cu12.0-x64') + supportedBackends.push('win-cuda-12-common_cpus-x64') } - if (features.vulkan) supportedBackends.push('win-vulkan-x64') + if (features.vulkan) supportedBackends.push('win-vulkan-common_cpus-x64') } // not available yet, placeholder for future else if (sysType === 'windows-aarch64' || sysType === 'windows-arm64') { supportedBackends.push('win-arm64') } else if (sysType === 'linux-x86_64' || sysType === 'linux-x86') { - supportedBackends.push('linux-noavx-x64') - if (features.avx) supportedBackends.push('linux-avx-x64') - if (features.avx2) supportedBackends.push('linux-avx2-x64') - if (features.avx512) supportedBackends.push('linux-avx512-x64') + supportedBackends.push('linux-common_cpus-x64') if (features.cuda11) { - if (features.avx512) - supportedBackends.push('linux-avx512-cuda-cu11.7-x64') - else if (features.avx2) - supportedBackends.push('linux-avx2-cuda-cu11.7-x64') - else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64') - else supportedBackends.push('linux-noavx-cuda-cu11.7-x64') + supportedBackends.push('linux-cuda-11-common_cpus-x64') } if (features.cuda12) { - if (features.avx512) - supportedBackends.push('linux-avx512-cuda-cu12.0-x64') - else if (features.avx2) - supportedBackends.push('linux-avx2-cuda-cu12.0-x64') - else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64') - else supportedBackends.push('linux-noavx-cuda-cu12.0-x64') + supportedBackends.push('linux-cuda-12-common_cpus-x64') } - if (features.vulkan) supportedBackends.push('linux-vulkan-x64') + if (features.vulkan) supportedBackends.push('linux-vulkan-common_cpus-x64') } // not available yet, placeholder for future else if (sysType === 'linux-aarch64' || sysType === 'linux-arm64') { @@ -230,10 +207,7 @@ export async function downloadBackend( version: string, source: 'github' | 'cdn' = 'github' ): Promise { - const janDataFolderPath = await getJanDataFolderPath() - const llamacppPath = await joinPath([janDataFolderPath, 'llamacpp']) const backendDir = await getBackendDir(backend, version) - const libDir = await joinPath([llamacppPath, 'lib']) const downloadManager = window.core.extensionManager.getByName( '@janhq/download-extension' @@ -265,7 +239,7 @@ export async function downloadBackend( source === 'github' ? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz` : `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`, - save_path: await joinPath([libDir, 'cuda11.tar.gz']), + save_path: await joinPath([backendDir, 'build', 'bin', 'cuda11.tar.gz']), proxy: proxyConfig, }) } else if (backend.includes('cu12.0') && !(await _isCudaInstalled('12.0'))) { @@ -274,7 +248,7 @@ export async function downloadBackend( source === 'github' ? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz` : `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`, - save_path: await joinPath([libDir, 'cuda12.tar.gz']), + save_path: await joinPath([backendDir, 'build', 'bin', 'cuda12.tar.gz']), proxy: proxyConfig, }) } @@ -344,8 +318,8 @@ async function _getSupportedFeatures() { } // https://docs.nvidia.com/deploy/cuda-compatibility/#cuda-11-and-later-defaults-to-minor-version-compatibility - let minCuda11DriverVersion - let minCuda12DriverVersion + let minCuda11DriverVersion: string + let minCuda12DriverVersion: string if (sysInfo.os_type === 'linux') { minCuda11DriverVersion = '450.80.02' minCuda12DriverVersion = '525.60.13' diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 631220a92..cfda2a403 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -551,9 +551,9 @@ export default class llamacpp_extension extends AIEngine { // Helper to map backend string to a priority category const getBackendCategory = (backendString: string): string | undefined => { - if (backendString.includes('cu12.0')) return 'cuda-cu12.0' - if (backendString.includes('cu11.7')) return 'cuda-cu11.7' - if (backendString.includes('vulkan')) return 'vulkan' + if (backendString.includes('cuda-12-common_cpus')) return 'cuda-cu12.0' + if (backendString.includes('cuda-11-common_cpus')) return 'cuda-cu11.7' + if (backendString.includes('vulkan-common_cpus')) return 'vulkan' if (backendString.includes('avx512')) return 'avx512' if (backendString.includes('avx2')) return 'avx2' if ( @@ -1689,7 +1689,7 @@ export default class llamacpp_extension extends AIEngine { const backendPath = await getBackendExePath(backend, version) try { - const sInfo = await loadLlamaModel(backendPath, args, envs) + const sInfo = await loadLlamaModel(backendPath, args, envs, isEmbedding) return sInfo } catch (error) { logger.error('Error in load command:\n', error) diff --git a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts index b31133da5..c57680308 100644 --- a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts +++ b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts @@ -6,6 +6,7 @@ export interface SessionInfo { port: number; model_id: string; model_path: string; + is_embedding: boolean api_key: string; mmproj_path?: string; } @@ -36,12 +37,14 @@ export async function cleanupLlamaProcesses(): Promise { export async function loadLlamaModel( backendPath: string, args: string[], - envs: Record + envs: Record, + isEmbedding: boolean ): Promise { return await invoke('plugin:llamacpp|load_llama_model', { backendPath, args, - envs + envs, + isEmbedding }) }