diff --git a/extensions/llamacpp-extension/src/backend.ts b/extensions/llamacpp-extension/src/backend.ts
index 6e009e050..9340aa4c0 100644
--- a/extensions/llamacpp-extension/src/backend.ts
+++ b/extensions/llamacpp-extension/src/backend.ts
@@ -358,9 +358,8 @@ async function _getSupportedFeatures() {
       if (compareVersions(driverVersion, minCuda12DriverVersion) >= 0)
         features.cuda12 = true
     }
-    // Vulkan support check - only discrete GPUs with 6GB+ VRAM
-    if (gpuInfo.vulkan_info?.api_version && gpuInfo.total_memory >= 6 * 1024) {
-      // 6GB (total_memory is in MB)
+    // Vulkan support check
+    if (gpuInfo.vulkan_info?.api_version) {
       features.vulkan = true
     }
   }
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 56fa4a644..88a736588 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -36,8 +36,11 @@ import {
 import { invoke } from '@tauri-apps/api/core'
 import { getProxyConfig } from './util'
 import { basename } from '@tauri-apps/api/path'
-import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
-import { getSystemUsage } from '@janhq/tauri-plugin-hardware-api'
+import {
+  GgufMetadata,
+  readGgufMetadata,
+} from '@janhq/tauri-plugin-llamacpp-api'
+import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
 
 type LlamacppConfig = {
   version_backend: string
@@ -322,10 +325,10 @@ export default class llamacpp_extension extends AIEngine {
           // Clear the invalid stored preference
           this.clearStoredBackendType()
           bestAvailableBackendString =
-            this.determineBestBackend(version_backends)
+            await this.determineBestBackend(version_backends)
         }
       } else {
-        bestAvailableBackendString = this.determineBestBackend(version_backends)
+        bestAvailableBackendString = await this.determineBestBackend(version_backends)
       }
 
       let settings = structuredClone(SETTINGS)
@@ -487,29 +490,61 @@ export default class llamacpp_extension extends AIEngine {
     }
   }
 
-  private determineBestBackend(
+  private async determineBestBackend(
     version_backends: { version: string; backend: string }[]
-  ): string {
+  ): Promise<string> {
     if (version_backends.length === 0) return ''
 
+    // Check GPU memory availability
+    let hasEnoughGpuMemory = false
+    try {
+      const sysInfo = await getSystemInfo()
+      for (const gpuInfo of sysInfo.gpus) {
+        if (gpuInfo.total_memory >= 6 * 1024) {
+          hasEnoughGpuMemory = true
+          break
+        }
+      }
+    } catch (error) {
+      logger.warn('Failed to get system info for GPU memory check:', error)
+      // Default to false if we can't determine GPU memory
+      hasEnoughGpuMemory = false
+    }
+
     // Priority list for backend types (more specific/performant ones first)
-    const backendPriorities: string[] = [
-      'cuda-cu12.0',
-      'cuda-cu11.7',
-      'vulkan',
-      'avx512',
-      'avx2',
-      'avx',
-      'noavx',
-      'arm64',
-      'x64',
-    ]
+    // Vulkan will be conditionally prioritized based on GPU memory
+    const backendPriorities: string[] = hasEnoughGpuMemory
+      ? [
+          'cuda-cu12.0',
+          'cuda-cu11.7',
+          'vulkan', // Include vulkan if we have enough GPU memory
+          'avx512',
+          'avx2',
+          'avx',
+          'noavx',
+          'arm64',
+          'x64',
+        ]
+      : [
+          'cuda-cu12.0',
+          'cuda-cu11.7',
+          'avx512',
+          'avx2',
+          'avx',
+          'noavx',
+          'arm64',
+          'x64',
+          'vulkan', // demote to last if we don't have enough memory
+        ]
 
     // Helper to map backend string to a priority category
     const getBackendCategory = (backendString: string): string | undefined => {
       if (backendString.includes('cu12.0')) return 'cuda-cu12.0'
       if (backendString.includes('cu11.7')) return 'cuda-cu11.7'
-      if (backendString.includes('vulkan')) return 'vulkan'
+      if (backendString.includes('vulkan')) {
+        // Only return vulkan category if we have enough GPU memory
+        return hasEnoughGpuMemory ? 'vulkan' : undefined
+      }
       if (backendString.includes('avx512')) return 'avx512'
       if (backendString.includes('avx2')) return 'avx2'
       if (
@@ -544,6 +579,9 @@ export default class llamacpp_extension extends AIEngine {
       return `${foundBestBackend.version}/${foundBestBackend.backend}`
     } else {
       // Fallback to newest version
+      logger.info(
+        `Fallback to: ${version_backends[0].version}/${version_backends[0].backend}`
+      )
       return `${version_backends[0].version}/${version_backends[0].backend}`
     }
   }
@@ -1048,7 +1086,7 @@ export default class llamacpp_extension extends AIEngine {
     const archiveName = await basename(path)
     logger.info(`Installing backend from path: ${path}`)
 
-    if (!(await fs.existsSync(path)) && !(path.endsWith('tar.gz'))) {
+    if (!(await fs.existsSync(path)) && !path.endsWith('tar.gz')) {
       logger.error(`Invalid path or file ${path}`)
       throw new Error(`Invalid path or file ${path}`)
     }