Merge pull request #6046 from menloresearch/fix/support-missing-llamacpp-cuda-backends

fix: support missing llamacpp cuda backends
2025-08-05 12:37:31 +07:00 · 2025-08-05 12:37:31 +07:00 · 813c911487
commit 813c911487
parent ac66b49eca 4a4bc35cce
6 changed files with 207 additions and 11 deletions
--- a/extensions/llamacpp-extension/src/backend.ts
+++ b/extensions/llamacpp-extension/src/backend.ts
@ -27,8 +27,18 @@ export async function listSupportedBackends(): Promise<
    if (features.avx) supportedBackends.push('win-avx-x64')
    if (features.avx2) supportedBackends.push('win-avx2-x64')
    if (features.avx512) supportedBackends.push('win-avx512-x64')
-    if (features.cuda11) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
-    if (features.cuda12) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
+    if (features.cuda11) {
+      if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64')
+      else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
+      else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64')
+      else supportedBackends.push('win-noavx-cuda-cu11.7-x64')
+    }
+    if (features.cuda12) {
+      if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64')
+      else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
+      else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64')
+      else supportedBackends.push('win-noavx-cuda-cu12.0-x64')
+    }
    if (features.vulkan) supportedBackends.push('win-vulkan-x64')
  }
  // not available yet, placeholder for future
@ -39,8 +49,18 @@ export async function listSupportedBackends(): Promise<
    if (features.avx) supportedBackends.push('linux-avx-x64')
    if (features.avx2) supportedBackends.push('linux-avx2-x64')
    if (features.avx512) supportedBackends.push('linux-avx512-x64')
-    if (features.cuda11) supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
-    if (features.cuda12) supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
+    if (features.cuda11) {
+      if (features.avx512) supportedBackends.push('linux-avx512-cuda-cu11.7-x64')
+      else if (features.avx2) supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
+      else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64')
+      else supportedBackends.push('linux-noavx-cuda-cu11.7-x64')
+    }
+    if (features.cuda12) {
+      if (features.avx512) supportedBackends.push('linux-avx512-cuda-cu12.0-x64')
+      else if (features.avx2) supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
+      else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64')
+      else supportedBackends.push('linux-noavx-cuda-cu12.0-x64')
+    }
    if (features.vulkan) supportedBackends.push('linux-vulkan-x64')
  }
  // not available yet, placeholder for future
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -357,9 +357,16 @@ export default class llamacpp_extension extends AIEngine {

      // Handle fresh installation case where version_backend might be 'none' or invalid
      if (
-        !effectiveBackendString ||
+        (!effectiveBackendString ||
          effectiveBackendString === 'none' ||
-        !effectiveBackendString.includes('/')
+          !effectiveBackendString.includes('/') ||
+          // If the selected backend is not in the list of supported backends
+          // Need to reset too
+          !version_backends.some(
+            (e) => `${e.version}/${e.backend}` === effectiveBackendString
+          )) &&
+        // Ensure we have a valid best available backend
+        bestAvailableBackendString
      ) {
        effectiveBackendString = bestAvailableBackendString
        logger.info(
@ -380,6 +387,17 @@ export default class llamacpp_extension extends AIEngine {
          })
        )
        logger.info(`Updated UI settings to show: ${effectiveBackendString}`)
+
+        // Emit for updating fe
+        if (events && typeof events.emit === 'function') {
+          logger.info(
+            `Emitting settingsChanged event for version_backend with value: ${effectiveBackendString}`
+          )
+          events.emit('settingsChanged', {
+            key: 'version_backend',
+            value: effectiveBackendString,
+          })
+        }
      }

      // Download and install the backend if not already present
--- a/extensions/llamacpp-extension/src/test/backend.test.ts
+++ b/extensions/llamacpp-extension/src/test/backend.test.ts
@ -51,6 +51,164 @@ describe('Backend functions', () => {
      ])
    })

+    it('should return CUDA backends with proper CPU instruction detection for Windows', async () => {
+      // Mock system info with CUDA support and AVX512
+      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
+        os_type: 'windows',
+        cpu: {
+          arch: 'x86_64',
+          extensions: ['avx', 'avx2', 'avx512'],
+        },
+        gpus: [
+          {
+            driver_version: '530.41',
+            nvidia_info: { compute_capability: '8.6' },
+          },
+        ],
+      })
+
+      // Mock GitHub releases with CUDA backends
+      const mockReleases = [
+        {
+          tag_name: 'v1.0.0',
+          assets: [
+            { name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
+          ],
+        },
+      ]
+
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () => Promise.resolve(mockReleases),
+      })
+
+      const result = await listSupportedBackends()
+
+      expect(result).toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
+    })
+
+    it('should select appropriate CUDA backend based on CPU features - AVX2 only', async () => {
+      // Mock system info with CUDA support but only AVX2
+      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
+        os_type: 'windows',
+        cpu: {
+          arch: 'x86_64',
+          extensions: ['avx', 'avx2'], // No AVX512
+        },
+        gpus: [
+          {
+            driver_version: '530.41',
+            nvidia_info: { compute_capability: '8.6' },
+          },
+        ],
+      })
+
+      const mockReleases = [
+        {
+          tag_name: 'v1.0.0',
+          assets: [
+            { name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
+          ],
+        },
+      ]
+
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () => Promise.resolve(mockReleases),
+      })
+
+      const result = await listSupportedBackends()
+
+      expect(result).toContain({ version: 'v1.0.0', backend: 'win-avx2-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
+    })
+
+    it('should select appropriate CUDA backend based on CPU features - no AVX', async () => {
+      // Mock system info with CUDA support but no AVX
+      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
+        os_type: 'windows',
+        cpu: {
+          arch: 'x86_64',
+          extensions: [], // No AVX extensions
+        },
+        gpus: [
+          {
+            driver_version: '530.41',
+            nvidia_info: { compute_capability: '8.6' },
+          },
+        ],
+      })
+
+      const mockReleases = [
+        {
+          tag_name: 'v1.0.0',
+          assets: [
+            { name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
+          ],
+        },
+      ]
+
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () => Promise.resolve(mockReleases),
+      })
+
+      const result = await listSupportedBackends()
+
+      expect(result).toContain({ version: 'v1.0.0', backend: 'win-noavx-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx2-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
+    })
+
+    it('should return CUDA backends with proper CPU instruction detection for Linux', async () => {
+      // Mock system info with CUDA support and AVX support
+      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
+        os_type: 'linux',
+        cpu: {
+          arch: 'x86_64',
+          extensions: ['avx'], // Only AVX, no AVX2
+        },
+        gpus: [
+          {
+            driver_version: '530.60.13',
+            nvidia_info: { compute_capability: '8.6' },
+          },
+        ],
+      })
+
+      const mockReleases = [
+        {
+          tag_name: 'v1.0.0',
+          assets: [
+            { name: 'llama-v1.0.0-bin-linux-avx512-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-linux-avx2-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-linux-avx-cuda-cu12.0-x64.tar.gz' },
+            { name: 'llama-v1.0.0-bin-linux-noavx-cuda-cu12.0-x64.tar.gz' },
+          ],
+        },
+      ]
+
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () => Promise.resolve(mockReleases),
+      })
+
+      const result = await listSupportedBackends()
+
+      expect(result).toContain({ version: 'v1.0.0', backend: 'linux-avx-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'linux-avx2-cuda-cu12.0-x64' })
+      expect(result).not.toContain({ version: 'v1.0.0', backend: 'linux-avx512-cuda-cu12.0-x64' })
+    })
+
    it('should return supported backends for macOS arm64', async () => {
      window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
        os_type: 'macos',
--- a/package.json
+++ b/package.json
@ -17,7 +17,7 @@
    "test:coverage": "vitest run --coverage",
    "test:prepare": "yarn build:icon && yarn copy:assets:tauri && yarn build --no-bundle ",
    "dev:web": "yarn workspace @janhq/web-app dev",
-    "dev:tauri": "yarn build:icon && yarn copy:assets:tauri && tauri dev",
+    "dev:tauri": "yarn build:icon && yarn copy:assets:tauri && cross-env IS_CLEAN=true tauri dev",
    "copy:assets:tauri": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\"",
    "download:lib": "node ./scripts/download-lib.mjs",
    "download:bin": "node ./scripts/download-bin.mjs",
--- a/src-tauri/src/core/setup.rs
+++ b/src-tauri/src/core/setup.rs
@ -43,8 +43,8 @@ pub fn install_extensions(app: tauri::AppHandle, force: bool) -> Result<(), Stri

    let mut clean_up = force;

-    // Check CLEAN environment variable to optionally skip extension install
-    if std::env::var("CLEAN").is_ok() {
+    // Check IS_CLEAN environment variable to optionally skip extension install
+    if std::env::var("IS_CLEAN").is_ok() {
        clean_up = true;
    }
    log::info!(
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@ -6,7 +6,7 @@
  "build": {
    "frontendDist": "../web-app/dist",
    "devUrl": "http://localhost:1420",
-    "beforeDevCommand": "cross-env IS_TAURI=true CLEAN=true yarn dev:web",
+    "beforeDevCommand": "cross-env IS_TAURI=true yarn dev:web",
    "beforeBuildCommand": "cross-env IS_TAURI=true yarn build:web"
  },
  "app": {