feat: Simplify backend architecture
This commit introduces a functional flag for embedding models and refactors the backend detection logic for cleaner implementation. Key changes: - Embedding Support: The loadLlamaModel API and SessionInfo now include an isEmbedding: boolean flag. This allows the core process to differentiate and correctly initialize models intended for embedding tasks. - Backend Naming Simplification (Refactor): Consolidated the CPU-specific backend tags (e.g., win-noavx-x64, win-avx2-x64) into generic *-common_cpus-x64 variants (e.g., win-common_cpus-x64). This streamlines supported backend detection. - File Structure Update: Changed the download path for CUDA runtime libraries (cudart) to place them inside the specific backend's directory (/build/bin/) rather than a shared lib folder, improving asset isolation.
This commit is contained in:
parent
0c5fbc102c
commit
8b15fe4ef2
@ -102,50 +102,27 @@ export async function listSupportedBackends(): Promise<
|
|||||||
// TODO: fetch versions from the server?
|
// TODO: fetch versions from the server?
|
||||||
// TODO: select CUDA version based on driver version
|
// TODO: select CUDA version based on driver version
|
||||||
if (sysType == 'windows-x86_64') {
|
if (sysType == 'windows-x86_64') {
|
||||||
// NOTE: if a machine supports AVX2, should we include noavx and avx?
|
supportedBackends.push('win-common_cpus-x64')
|
||||||
supportedBackends.push('win-noavx-x64')
|
|
||||||
if (features.avx) supportedBackends.push('win-avx-x64')
|
|
||||||
if (features.avx2) supportedBackends.push('win-avx2-x64')
|
|
||||||
if (features.avx512) supportedBackends.push('win-avx512-x64')
|
|
||||||
if (features.cuda11) {
|
if (features.cuda11) {
|
||||||
if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64')
|
supportedBackends.push('win-cuda-11-common_cpus-x64')
|
||||||
else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
|
|
||||||
else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64')
|
|
||||||
else supportedBackends.push('win-noavx-cuda-cu11.7-x64')
|
|
||||||
}
|
}
|
||||||
if (features.cuda12) {
|
if (features.cuda12) {
|
||||||
if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64')
|
supportedBackends.push('win-cuda-12-common_cpus-x64')
|
||||||
else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
|
|
||||||
else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64')
|
|
||||||
else supportedBackends.push('win-noavx-cuda-cu12.0-x64')
|
|
||||||
}
|
}
|
||||||
if (features.vulkan) supportedBackends.push('win-vulkan-x64')
|
if (features.vulkan) supportedBackends.push('win-vulkan-common_cpus-x64')
|
||||||
}
|
}
|
||||||
// not available yet, placeholder for future
|
// not available yet, placeholder for future
|
||||||
else if (sysType === 'windows-aarch64' || sysType === 'windows-arm64') {
|
else if (sysType === 'windows-aarch64' || sysType === 'windows-arm64') {
|
||||||
supportedBackends.push('win-arm64')
|
supportedBackends.push('win-arm64')
|
||||||
} else if (sysType === 'linux-x86_64' || sysType === 'linux-x86') {
|
} else if (sysType === 'linux-x86_64' || sysType === 'linux-x86') {
|
||||||
supportedBackends.push('linux-noavx-x64')
|
supportedBackends.push('linux-common_cpus-x64')
|
||||||
if (features.avx) supportedBackends.push('linux-avx-x64')
|
|
||||||
if (features.avx2) supportedBackends.push('linux-avx2-x64')
|
|
||||||
if (features.avx512) supportedBackends.push('linux-avx512-x64')
|
|
||||||
if (features.cuda11) {
|
if (features.cuda11) {
|
||||||
if (features.avx512)
|
supportedBackends.push('linux-cuda-11-common_cpus-x64')
|
||||||
supportedBackends.push('linux-avx512-cuda-cu11.7-x64')
|
|
||||||
else if (features.avx2)
|
|
||||||
supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
|
|
||||||
else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64')
|
|
||||||
else supportedBackends.push('linux-noavx-cuda-cu11.7-x64')
|
|
||||||
}
|
}
|
||||||
if (features.cuda12) {
|
if (features.cuda12) {
|
||||||
if (features.avx512)
|
supportedBackends.push('linux-cuda-12-common_cpus-x64')
|
||||||
supportedBackends.push('linux-avx512-cuda-cu12.0-x64')
|
|
||||||
else if (features.avx2)
|
|
||||||
supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
|
|
||||||
else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64')
|
|
||||||
else supportedBackends.push('linux-noavx-cuda-cu12.0-x64')
|
|
||||||
}
|
}
|
||||||
if (features.vulkan) supportedBackends.push('linux-vulkan-x64')
|
if (features.vulkan) supportedBackends.push('linux-vulkan-common_cpus-x64')
|
||||||
}
|
}
|
||||||
// not available yet, placeholder for future
|
// not available yet, placeholder for future
|
||||||
else if (sysType === 'linux-aarch64' || sysType === 'linux-arm64') {
|
else if (sysType === 'linux-aarch64' || sysType === 'linux-arm64') {
|
||||||
@ -230,10 +207,7 @@ export async function downloadBackend(
|
|||||||
version: string,
|
version: string,
|
||||||
source: 'github' | 'cdn' = 'github'
|
source: 'github' | 'cdn' = 'github'
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const janDataFolderPath = await getJanDataFolderPath()
|
|
||||||
const llamacppPath = await joinPath([janDataFolderPath, 'llamacpp'])
|
|
||||||
const backendDir = await getBackendDir(backend, version)
|
const backendDir = await getBackendDir(backend, version)
|
||||||
const libDir = await joinPath([llamacppPath, 'lib'])
|
|
||||||
|
|
||||||
const downloadManager = window.core.extensionManager.getByName(
|
const downloadManager = window.core.extensionManager.getByName(
|
||||||
'@janhq/download-extension'
|
'@janhq/download-extension'
|
||||||
@ -265,7 +239,7 @@ export async function downloadBackend(
|
|||||||
source === 'github'
|
source === 'github'
|
||||||
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`
|
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`
|
||||||
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`,
|
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`,
|
||||||
save_path: await joinPath([libDir, 'cuda11.tar.gz']),
|
save_path: await joinPath([backendDir, 'build', 'bin', 'cuda11.tar.gz']),
|
||||||
proxy: proxyConfig,
|
proxy: proxyConfig,
|
||||||
})
|
})
|
||||||
} else if (backend.includes('cu12.0') && !(await _isCudaInstalled('12.0'))) {
|
} else if (backend.includes('cu12.0') && !(await _isCudaInstalled('12.0'))) {
|
||||||
@ -274,7 +248,7 @@ export async function downloadBackend(
|
|||||||
source === 'github'
|
source === 'github'
|
||||||
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`
|
? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`
|
||||||
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`,
|
: `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`,
|
||||||
save_path: await joinPath([libDir, 'cuda12.tar.gz']),
|
save_path: await joinPath([backendDir, 'build', 'bin', 'cuda12.tar.gz']),
|
||||||
proxy: proxyConfig,
|
proxy: proxyConfig,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -344,8 +318,8 @@ async function _getSupportedFeatures() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// https://docs.nvidia.com/deploy/cuda-compatibility/#cuda-11-and-later-defaults-to-minor-version-compatibility
|
// https://docs.nvidia.com/deploy/cuda-compatibility/#cuda-11-and-later-defaults-to-minor-version-compatibility
|
||||||
let minCuda11DriverVersion
|
let minCuda11DriverVersion: string
|
||||||
let minCuda12DriverVersion
|
let minCuda12DriverVersion: string
|
||||||
if (sysInfo.os_type === 'linux') {
|
if (sysInfo.os_type === 'linux') {
|
||||||
minCuda11DriverVersion = '450.80.02'
|
minCuda11DriverVersion = '450.80.02'
|
||||||
minCuda12DriverVersion = '525.60.13'
|
minCuda12DriverVersion = '525.60.13'
|
||||||
|
|||||||
@ -551,9 +551,9 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
|
|
||||||
// Helper to map backend string to a priority category
|
// Helper to map backend string to a priority category
|
||||||
const getBackendCategory = (backendString: string): string | undefined => {
|
const getBackendCategory = (backendString: string): string | undefined => {
|
||||||
if (backendString.includes('cu12.0')) return 'cuda-cu12.0'
|
if (backendString.includes('cuda-12-common_cpus')) return 'cuda-cu12.0'
|
||||||
if (backendString.includes('cu11.7')) return 'cuda-cu11.7'
|
if (backendString.includes('cuda-11-common_cpus')) return 'cuda-cu11.7'
|
||||||
if (backendString.includes('vulkan')) return 'vulkan'
|
if (backendString.includes('vulkan-common_cpus')) return 'vulkan'
|
||||||
if (backendString.includes('avx512')) return 'avx512'
|
if (backendString.includes('avx512')) return 'avx512'
|
||||||
if (backendString.includes('avx2')) return 'avx2'
|
if (backendString.includes('avx2')) return 'avx2'
|
||||||
if (
|
if (
|
||||||
@ -1689,7 +1689,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const backendPath = await getBackendExePath(backend, version)
|
const backendPath = await getBackendExePath(backend, version)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const sInfo = await loadLlamaModel(backendPath, args, envs)
|
const sInfo = await loadLlamaModel(backendPath, args, envs, isEmbedding)
|
||||||
return sInfo
|
return sInfo
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Error in load command:\n', error)
|
logger.error('Error in load command:\n', error)
|
||||||
|
|||||||
@ -6,6 +6,7 @@ export interface SessionInfo {
|
|||||||
port: number;
|
port: number;
|
||||||
model_id: string;
|
model_id: string;
|
||||||
model_path: string;
|
model_path: string;
|
||||||
|
is_embedding: boolean
|
||||||
api_key: string;
|
api_key: string;
|
||||||
mmproj_path?: string;
|
mmproj_path?: string;
|
||||||
}
|
}
|
||||||
@ -36,12 +37,14 @@ export async function cleanupLlamaProcesses(): Promise<void> {
|
|||||||
export async function loadLlamaModel(
|
export async function loadLlamaModel(
|
||||||
backendPath: string,
|
backendPath: string,
|
||||||
args: string[],
|
args: string[],
|
||||||
envs: Record<string, string>
|
envs: Record<string, string>,
|
||||||
|
isEmbedding: boolean
|
||||||
): Promise<SessionInfo> {
|
): Promise<SessionInfo> {
|
||||||
return await invoke('plugin:llamacpp|load_llama_model', {
|
return await invoke('plugin:llamacpp|load_llama_model', {
|
||||||
backendPath,
|
backendPath,
|
||||||
args,
|
args,
|
||||||
envs
|
envs,
|
||||||
|
isEmbedding
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user