feat: prioritize Vulkan backend only when GPU has ≥6 GB VRAM
Added a GPU memory check using `getSystemInfo` to ensure Vulkan is selected only on systems with at least 6 GB of VRAM. * Made `determineBestBackend` asynchronous and updated all callers to `await` it. * Adjusted backend priority list to include or demote Vulkan based on the memory check. * Updated Vulkan support detection in `backend.ts` to rely solely on API version (memory check moved to selection logic). * Imported `getSystemInfo` and refined file‑existence validation. These changes prevent sub‑optimal Vulkan usage on low‑memory GPUs and improve backend selection reliability.
This commit is contained in:
parent
84874c6039
commit
42411b5f33
@ -358,9 +358,8 @@ async function _getSupportedFeatures() {
|
|||||||
if (compareVersions(driverVersion, minCuda12DriverVersion) >= 0)
|
if (compareVersions(driverVersion, minCuda12DriverVersion) >= 0)
|
||||||
features.cuda12 = true
|
features.cuda12 = true
|
||||||
}
|
}
|
||||||
// Vulkan support check - only discrete GPUs with 6GB+ VRAM
|
// Vulkan support check
|
||||||
if (gpuInfo.vulkan_info?.api_version && gpuInfo.total_memory >= 6 * 1024) {
|
if (gpuInfo.vulkan_info?.api_version) {
|
||||||
// 6GB (total_memory is in MB)
|
|
||||||
features.vulkan = true
|
features.vulkan = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -36,8 +36,11 @@ import {
|
|||||||
import { invoke } from '@tauri-apps/api/core'
|
import { invoke } from '@tauri-apps/api/core'
|
||||||
import { getProxyConfig } from './util'
|
import { getProxyConfig } from './util'
|
||||||
import { basename } from '@tauri-apps/api/path'
|
import { basename } from '@tauri-apps/api/path'
|
||||||
import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
|
import {
|
||||||
import { getSystemUsage } from '@janhq/tauri-plugin-hardware-api'
|
GgufMetadata,
|
||||||
|
readGgufMetadata,
|
||||||
|
} from '@janhq/tauri-plugin-llamacpp-api'
|
||||||
|
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
|
||||||
|
|
||||||
type LlamacppConfig = {
|
type LlamacppConfig = {
|
||||||
version_backend: string
|
version_backend: string
|
||||||
@ -322,10 +325,10 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
// Clear the invalid stored preference
|
// Clear the invalid stored preference
|
||||||
this.clearStoredBackendType()
|
this.clearStoredBackendType()
|
||||||
bestAvailableBackendString =
|
bestAvailableBackendString =
|
||||||
this.determineBestBackend(version_backends)
|
await this.determineBestBackend(version_backends)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
bestAvailableBackendString = this.determineBestBackend(version_backends)
|
bestAvailableBackendString = await this.determineBestBackend(version_backends)
|
||||||
}
|
}
|
||||||
|
|
||||||
let settings = structuredClone(SETTINGS)
|
let settings = structuredClone(SETTINGS)
|
||||||
@ -487,16 +490,34 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private determineBestBackend(
|
private async determineBestBackend(
|
||||||
version_backends: { version: string; backend: string }[]
|
version_backends: { version: string; backend: string }[]
|
||||||
): string {
|
): Promise<string> {
|
||||||
if (version_backends.length === 0) return ''
|
if (version_backends.length === 0) return ''
|
||||||
|
|
||||||
|
// Check GPU memory availability
|
||||||
|
let hasEnoughGpuMemory = false
|
||||||
|
try {
|
||||||
|
const sysInfo = await getSystemInfo()
|
||||||
|
for (const gpuInfo of sysInfo.gpus) {
|
||||||
|
if (gpuInfo.total_memory >= 6 * 1024) {
|
||||||
|
hasEnoughGpuMemory = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn('Failed to get system info for GPU memory check:', error)
|
||||||
|
// Default to false if we can't determine GPU memory
|
||||||
|
hasEnoughGpuMemory = false
|
||||||
|
}
|
||||||
|
|
||||||
// Priority list for backend types (more specific/performant ones first)
|
// Priority list for backend types (more specific/performant ones first)
|
||||||
const backendPriorities: string[] = [
|
// Vulkan will be conditionally prioritized based on GPU memory
|
||||||
|
const backendPriorities: string[] = hasEnoughGpuMemory
|
||||||
|
? [
|
||||||
'cuda-cu12.0',
|
'cuda-cu12.0',
|
||||||
'cuda-cu11.7',
|
'cuda-cu11.7',
|
||||||
'vulkan',
|
'vulkan', // Include vulkan if we have enough GPU memory
|
||||||
'avx512',
|
'avx512',
|
||||||
'avx2',
|
'avx2',
|
||||||
'avx',
|
'avx',
|
||||||
@ -504,12 +525,26 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
'arm64',
|
'arm64',
|
||||||
'x64',
|
'x64',
|
||||||
]
|
]
|
||||||
|
: [
|
||||||
|
'cuda-cu12.0',
|
||||||
|
'cuda-cu11.7',
|
||||||
|
'avx512',
|
||||||
|
'avx2',
|
||||||
|
'avx',
|
||||||
|
'noavx',
|
||||||
|
'arm64',
|
||||||
|
'x64',
|
||||||
|
'vulkan', // demote to last if we don't have enough memory
|
||||||
|
]
|
||||||
|
|
||||||
// Helper to map backend string to a priority category
|
// Helper to map backend string to a priority category
|
||||||
const getBackendCategory = (backendString: string): string | undefined => {
|
const getBackendCategory = (backendString: string): string | undefined => {
|
||||||
if (backendString.includes('cu12.0')) return 'cuda-cu12.0'
|
if (backendString.includes('cu12.0')) return 'cuda-cu12.0'
|
||||||
if (backendString.includes('cu11.7')) return 'cuda-cu11.7'
|
if (backendString.includes('cu11.7')) return 'cuda-cu11.7'
|
||||||
if (backendString.includes('vulkan')) return 'vulkan'
|
if (backendString.includes('vulkan')) {
|
||||||
|
// Only return vulkan category if we have enough GPU memory
|
||||||
|
return hasEnoughGpuMemory ? 'vulkan' : undefined
|
||||||
|
}
|
||||||
if (backendString.includes('avx512')) return 'avx512'
|
if (backendString.includes('avx512')) return 'avx512'
|
||||||
if (backendString.includes('avx2')) return 'avx2'
|
if (backendString.includes('avx2')) return 'avx2'
|
||||||
if (
|
if (
|
||||||
@ -544,6 +579,9 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
return `${foundBestBackend.version}/${foundBestBackend.backend}`
|
return `${foundBestBackend.version}/${foundBestBackend.backend}`
|
||||||
} else {
|
} else {
|
||||||
// Fallback to newest version
|
// Fallback to newest version
|
||||||
|
logger.info(
|
||||||
|
`Fallback to: ${version_backends[0].version}/${version_backends[0].backend}`
|
||||||
|
)
|
||||||
return `${version_backends[0].version}/${version_backends[0].backend}`
|
return `${version_backends[0].version}/${version_backends[0].backend}`
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1048,7 +1086,7 @@ export default class llamacpp_extension extends AIEngine {
|
|||||||
const archiveName = await basename(path)
|
const archiveName = await basename(path)
|
||||||
logger.info(`Installing backend from path: ${path}`)
|
logger.info(`Installing backend from path: ${path}`)
|
||||||
|
|
||||||
if (!(await fs.existsSync(path)) && !(path.endsWith('tar.gz'))) {
|
if (!(await fs.existsSync(path)) && !path.endsWith('tar.gz')) {
|
||||||
logger.error(`Invalid path or file ${path}`)
|
logger.error(`Invalid path or file ${path}`)
|
||||||
throw new Error(`Invalid path or file ${path}`)
|
throw new Error(`Invalid path or file ${path}`)
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user