feat: Simplify backend architecture

This commit introduces a functional flag for embedding models and refactors the backend detection logic for cleaner implementation.

Key changes:

 - Embedding Support: The loadLlamaModel API and SessionInfo now include an isEmbedding: boolean flag. This allows the core process to differentiate and correctly initialize models intended for embedding tasks.

 - Backend Naming Simplification (Refactor): Consolidated the CPU-specific backend tags (e.g., win-noavx-x64, win-avx2-x64) into generic *-common_cpus-x64 variants (e.g., win-common_cpus-x64). This streamlines supported backend detection.

 - File Structure Update: Changed the download path for CUDA runtime libraries (cudart) to place them inside the specific backend's directory (/build/bin/) rather than a shared lib folder, improving asset isolation.
This commit is contained in:
Akarshan 2025-10-15 10:57:45 +05:30
parent 0c5fbc102c
commit 8b15fe4ef2
No known key found for this signature in database
GPG Key ID: D75C9634A870665F
3 changed files with 21 additions and 44 deletions

View File

@ -102,50 +102,27 @@ export async function listSupportedBackends(): Promise<
// TODO: fetch versions from the server? // TODO: fetch versions from the server?
// TODO: select CUDA version based on driver version // TODO: select CUDA version based on driver version
if (sysType == 'windows-x86_64') { if (sysType == 'windows-x86_64') {
// NOTE: if a machine supports AVX2, should we include noavx and avx? supportedBackends.push('win-common_cpus-x64')
supportedBackends.push('win-noavx-x64')
if (features.avx) supportedBackends.push('win-avx-x64')
if (features.avx2) supportedBackends.push('win-avx2-x64')
if (features.avx512) supportedBackends.push('win-avx512-x64')
if (features.cuda11) { if (features.cuda11) {
if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64') supportedBackends.push('win-cuda-11-common_cpus-x64')
else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64')
else supportedBackends.push('win-noavx-cuda-cu11.7-x64')
} }
if (features.cuda12) { if (features.cuda12) {
if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64') supportedBackends.push('win-cuda-12-common_cpus-x64')
else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64')
else supportedBackends.push('win-noavx-cuda-cu12.0-x64')
} }
if (features.vulkan) supportedBackends.push('win-vulkan-x64') if (features.vulkan) supportedBackends.push('win-vulkan-common_cpus-x64')
} }
// not available yet, placeholder for future // not available yet, placeholder for future
else if (sysType === 'windows-aarch64' || sysType === 'windows-arm64') { else if (sysType === 'windows-aarch64' || sysType === 'windows-arm64') {
supportedBackends.push('win-arm64') supportedBackends.push('win-arm64')
} else if (sysType === 'linux-x86_64' || sysType === 'linux-x86') { } else if (sysType === 'linux-x86_64' || sysType === 'linux-x86') {
supportedBackends.push('linux-noavx-x64') supportedBackends.push('linux-common_cpus-x64')
if (features.avx) supportedBackends.push('linux-avx-x64')
if (features.avx2) supportedBackends.push('linux-avx2-x64')
if (features.avx512) supportedBackends.push('linux-avx512-x64')
if (features.cuda11) { if (features.cuda11) {
if (features.avx512) supportedBackends.push('linux-cuda-11-common_cpus-x64')
supportedBackends.push('linux-avx512-cuda-cu11.7-x64')
else if (features.avx2)
supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64')
else supportedBackends.push('linux-noavx-cuda-cu11.7-x64')
} }
if (features.cuda12) { if (features.cuda12) {
if (features.avx512) supportedBackends.push('linux-cuda-12-common_cpus-x64')
supportedBackends.push('linux-avx512-cuda-cu12.0-x64')
else if (features.avx2)
supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64')
else supportedBackends.push('linux-noavx-cuda-cu12.0-x64')
} }
if (features.vulkan) supportedBackends.push('linux-vulkan-x64') if (features.vulkan) supportedBackends.push('linux-vulkan-common_cpus-x64')
} }
// not available yet, placeholder for future // not available yet, placeholder for future
else if (sysType === 'linux-aarch64' || sysType === 'linux-arm64') { else if (sysType === 'linux-aarch64' || sysType === 'linux-arm64') {
@ -230,10 +207,7 @@ export async function downloadBackend(
version: string, version: string,
source: 'github' | 'cdn' = 'github' source: 'github' | 'cdn' = 'github'
): Promise<void> { ): Promise<void> {
const janDataFolderPath = await getJanDataFolderPath()
const llamacppPath = await joinPath([janDataFolderPath, 'llamacpp'])
const backendDir = await getBackendDir(backend, version) const backendDir = await getBackendDir(backend, version)
const libDir = await joinPath([llamacppPath, 'lib'])
const downloadManager = window.core.extensionManager.getByName( const downloadManager = window.core.extensionManager.getByName(
'@janhq/download-extension' '@janhq/download-extension'
@ -265,7 +239,7 @@ export async function downloadBackend(
source === 'github' source === 'github'
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz` ? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`, : `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`,
save_path: await joinPath([libDir, 'cuda11.tar.gz']), save_path: await joinPath([backendDir, 'build', 'bin', 'cuda11.tar.gz']),
proxy: proxyConfig, proxy: proxyConfig,
}) })
} else if (backend.includes('cu12.0') && !(await _isCudaInstalled('12.0'))) { } else if (backend.includes('cu12.0') && !(await _isCudaInstalled('12.0'))) {
@ -274,7 +248,7 @@ export async function downloadBackend(
source === 'github' source === 'github'
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz` ? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`, : `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`,
save_path: await joinPath([libDir, 'cuda12.tar.gz']), save_path: await joinPath([backendDir, 'build', 'bin', 'cuda12.tar.gz']),
proxy: proxyConfig, proxy: proxyConfig,
}) })
} }
@ -344,8 +318,8 @@ async function _getSupportedFeatures() {
} }
// https://docs.nvidia.com/deploy/cuda-compatibility/#cuda-11-and-later-defaults-to-minor-version-compatibility // https://docs.nvidia.com/deploy/cuda-compatibility/#cuda-11-and-later-defaults-to-minor-version-compatibility
let minCuda11DriverVersion let minCuda11DriverVersion: string
let minCuda12DriverVersion let minCuda12DriverVersion: string
if (sysInfo.os_type === 'linux') { if (sysInfo.os_type === 'linux') {
minCuda11DriverVersion = '450.80.02' minCuda11DriverVersion = '450.80.02'
minCuda12DriverVersion = '525.60.13' minCuda12DriverVersion = '525.60.13'

View File

@ -551,9 +551,9 @@ export default class llamacpp_extension extends AIEngine {
// Helper to map backend string to a priority category // Helper to map backend string to a priority category
const getBackendCategory = (backendString: string): string | undefined => { const getBackendCategory = (backendString: string): string | undefined => {
if (backendString.includes('cu12.0')) return 'cuda-cu12.0' if (backendString.includes('cuda-12-common_cpus')) return 'cuda-cu12.0'
if (backendString.includes('cu11.7')) return 'cuda-cu11.7' if (backendString.includes('cuda-11-common_cpus')) return 'cuda-cu11.7'
if (backendString.includes('vulkan')) return 'vulkan' if (backendString.includes('vulkan-common_cpus')) return 'vulkan'
if (backendString.includes('avx512')) return 'avx512' if (backendString.includes('avx512')) return 'avx512'
if (backendString.includes('avx2')) return 'avx2' if (backendString.includes('avx2')) return 'avx2'
if ( if (
@ -1689,7 +1689,7 @@ export default class llamacpp_extension extends AIEngine {
const backendPath = await getBackendExePath(backend, version) const backendPath = await getBackendExePath(backend, version)
try { try {
const sInfo = await loadLlamaModel(backendPath, args, envs) const sInfo = await loadLlamaModel(backendPath, args, envs, isEmbedding)
return sInfo return sInfo
} catch (error) { } catch (error) {
logger.error('Error in load command:\n', error) logger.error('Error in load command:\n', error)

View File

@ -6,6 +6,7 @@ export interface SessionInfo {
port: number; port: number;
model_id: string; model_id: string;
model_path: string; model_path: string;
is_embedding: boolean
api_key: string; api_key: string;
mmproj_path?: string; mmproj_path?: string;
} }
@ -36,12 +37,14 @@ export async function cleanupLlamaProcesses(): Promise<void> {
export async function loadLlamaModel( export async function loadLlamaModel(
backendPath: string, backendPath: string,
args: string[], args: string[],
envs: Record<string, string> envs: Record<string, string>,
isEmbedding: boolean
): Promise<SessionInfo> { ): Promise<SessionInfo> {
return await invoke('plugin:llamacpp|load_llama_model', { return await invoke('plugin:llamacpp|load_llama_model', {
backendPath, backendPath,
args, args,
envs envs,
isEmbedding
}) })
} }