feat: prioritize Vulkan backend only when GPU has ≥6 GB VRAM

Added a GPU memory check using `getSystemInfo` to ensure Vulkan is selected only on systems with at least 6 GB of VRAM.
* Made `determineBestBackend` asynchronous and updated all callers to `await` it.
* Adjusted backend priority list to include or demote Vulkan based on the memory check.
* Updated Vulkan support detection in `backend.ts` to rely solely on API version (memory check moved to selection logic).
* Imported `getSystemInfo` and refined file‑existence validation.

These changes prevent sub‑optimal Vulkan usage on low‑memory GPUs and improve backend selection reliability.
This commit is contained in:
Akarshan 2025-09-10 16:44:31 +05:30
parent 84874c6039
commit 42411b5f33
No known key found for this signature in database
GPG Key ID: D75C9634A870665F
2 changed files with 59 additions and 22 deletions

View File

@ -358,9 +358,8 @@ async function _getSupportedFeatures() {
if (compareVersions(driverVersion, minCuda12DriverVersion) >= 0) if (compareVersions(driverVersion, minCuda12DriverVersion) >= 0)
features.cuda12 = true features.cuda12 = true
} }
// Vulkan support check - only discrete GPUs with 6GB+ VRAM // Vulkan support check
if (gpuInfo.vulkan_info?.api_version && gpuInfo.total_memory >= 6 * 1024) { if (gpuInfo.vulkan_info?.api_version) {
// 6GB (total_memory is in MB)
features.vulkan = true features.vulkan = true
} }
} }

View File

@ -36,8 +36,11 @@ import {
import { invoke } from '@tauri-apps/api/core' import { invoke } from '@tauri-apps/api/core'
import { getProxyConfig } from './util' import { getProxyConfig } from './util'
import { basename } from '@tauri-apps/api/path' import { basename } from '@tauri-apps/api/path'
import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api' import {
import { getSystemUsage } from '@janhq/tauri-plugin-hardware-api' GgufMetadata,
readGgufMetadata,
} from '@janhq/tauri-plugin-llamacpp-api'
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
type LlamacppConfig = { type LlamacppConfig = {
version_backend: string version_backend: string
@ -322,10 +325,10 @@ export default class llamacpp_extension extends AIEngine {
// Clear the invalid stored preference // Clear the invalid stored preference
this.clearStoredBackendType() this.clearStoredBackendType()
bestAvailableBackendString = bestAvailableBackendString =
this.determineBestBackend(version_backends) await this.determineBestBackend(version_backends)
} }
} else { } else {
bestAvailableBackendString = this.determineBestBackend(version_backends) bestAvailableBackendString = await this.determineBestBackend(version_backends)
} }
let settings = structuredClone(SETTINGS) let settings = structuredClone(SETTINGS)
@ -487,29 +490,61 @@ export default class llamacpp_extension extends AIEngine {
} }
} }
private determineBestBackend( private async determineBestBackend(
version_backends: { version: string; backend: string }[] version_backends: { version: string; backend: string }[]
): string { ): Promise<string> {
if (version_backends.length === 0) return '' if (version_backends.length === 0) return ''
// Check GPU memory availability
let hasEnoughGpuMemory = false
try {
const sysInfo = await getSystemInfo()
for (const gpuInfo of sysInfo.gpus) {
if (gpuInfo.total_memory >= 6 * 1024) {
hasEnoughGpuMemory = true
break
}
}
} catch (error) {
logger.warn('Failed to get system info for GPU memory check:', error)
// Default to false if we can't determine GPU memory
hasEnoughGpuMemory = false
}
// Priority list for backend types (more specific/performant ones first) // Priority list for backend types (more specific/performant ones first)
const backendPriorities: string[] = [ // Vulkan will be conditionally prioritized based on GPU memory
'cuda-cu12.0', const backendPriorities: string[] = hasEnoughGpuMemory
'cuda-cu11.7', ? [
'vulkan', 'cuda-cu12.0',
'avx512', 'cuda-cu11.7',
'avx2', 'vulkan', // Include vulkan if we have enough GPU memory
'avx', 'avx512',
'noavx', 'avx2',
'arm64', 'avx',
'x64', 'noavx',
] 'arm64',
'x64',
]
: [
'cuda-cu12.0',
'cuda-cu11.7',
'avx512',
'avx2',
'avx',
'noavx',
'arm64',
'x64',
'vulkan', // demote to last if we don't have enough memory
]
// Helper to map backend string to a priority category // Helper to map backend string to a priority category
const getBackendCategory = (backendString: string): string | undefined => { const getBackendCategory = (backendString: string): string | undefined => {
if (backendString.includes('cu12.0')) return 'cuda-cu12.0' if (backendString.includes('cu12.0')) return 'cuda-cu12.0'
if (backendString.includes('cu11.7')) return 'cuda-cu11.7' if (backendString.includes('cu11.7')) return 'cuda-cu11.7'
if (backendString.includes('vulkan')) return 'vulkan' if (backendString.includes('vulkan')) {
// Only return vulkan category if we have enough GPU memory
return hasEnoughGpuMemory ? 'vulkan' : undefined
}
if (backendString.includes('avx512')) return 'avx512' if (backendString.includes('avx512')) return 'avx512'
if (backendString.includes('avx2')) return 'avx2' if (backendString.includes('avx2')) return 'avx2'
if ( if (
@ -544,6 +579,9 @@ export default class llamacpp_extension extends AIEngine {
return `${foundBestBackend.version}/${foundBestBackend.backend}` return `${foundBestBackend.version}/${foundBestBackend.backend}`
} else { } else {
// Fallback to newest version // Fallback to newest version
logger.info(
`Fallback to: ${version_backends[0].version}/${version_backends[0].backend}`
)
return `${version_backends[0].version}/${version_backends[0].backend}` return `${version_backends[0].version}/${version_backends[0].backend}`
} }
} }
@ -1048,7 +1086,7 @@ export default class llamacpp_extension extends AIEngine {
const archiveName = await basename(path) const archiveName = await basename(path)
logger.info(`Installing backend from path: ${path}`) logger.info(`Installing backend from path: ${path}`)
if (!(await fs.existsSync(path)) && !(path.endsWith('tar.gz'))) { if (!(await fs.existsSync(path)) && !path.endsWith('tar.gz')) {
logger.error(`Invalid path or file ${path}`) logger.error(`Invalid path or file ${path}`)
throw new Error(`Invalid path or file ${path}`) throw new Error(`Invalid path or file ${path}`)
} }