From 8b15fe4ef25a81cc17e713a55081ffd0d23a1f56 Mon Sep 17 00:00:00 2001
From: Akarshan <akarshan@menlo.ai>
Date: Wed, 15 Oct 2025 10:57:45 +0530
Subject: [PATCH] feat: Simplify backend architecture

This commit introduces a functional flag for embedding models and refactors the backend detection logic for cleaner implementation.

Key changes:

 - Embedding Support: The loadLlamaModel API and SessionInfo now include an isEmbedding: boolean flag. This allows the core process to differentiate and correctly initialize models intended for embedding tasks.

 - Backend Naming Simplification (Refactor): Consolidated the CPU-specific backend tags (e.g., win-noavx-x64, win-avx2-x64) into generic *-common_cpus-x64 variants (e.g., win-common_cpus-x64). This streamlines supported backend detection.

 - File Structure Update: Changed the download path for CUDA runtime libraries (cudart) to place them inside the specific backend's directory (/build/bin/) rather than a shared lib folder, improving asset isolation.
---
 extensions/llamacpp-extension/src/backend.ts  | 50 +++++--------------
 extensions/llamacpp-extension/src/index.ts    |  8 +--
 .../tauri-plugin-llamacpp/guest-js/index.ts   |  7 ++-
 3 files changed, 21 insertions(+), 44 deletions(-)

diff --git a/extensions/llamacpp-extension/src/backend.ts b/extensions/llamacpp-extension/src/backend.ts
index 5a76a74f0..3e69c9f76 100644
--- a/extensions/llamacpp-extension/src/backend.ts
+++ b/extensions/llamacpp-extension/src/backend.ts
@@ -102,50 +102,27 @@ export async function listSupportedBackends(): Promise<
   // TODO: fetch versions from the server?
   // TODO: select CUDA version based on driver version
   if (sysType == 'windows-x86_64') {
-    // NOTE: if a machine supports AVX2, should we include noavx and avx?
-    supportedBackends.push('win-noavx-x64')
-    if (features.avx) supportedBackends.push('win-avx-x64')
-    if (features.avx2) supportedBackends.push('win-avx2-x64')
-    if (features.avx512) supportedBackends.push('win-avx512-x64')
+    supportedBackends.push('win-common_cpus-x64')
     if (features.cuda11) {
-      if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64')
-      else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
-      else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64')
-      else supportedBackends.push('win-noavx-cuda-cu11.7-x64')
+      supportedBackends.push('win-cuda-11-common_cpus-x64')
     }
     if (features.cuda12) {
-      if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64')
-      else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
-      else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64')
-      else supportedBackends.push('win-noavx-cuda-cu12.0-x64')
+      supportedBackends.push('win-cuda-12-common_cpus-x64')
     }
-    if (features.vulkan) supportedBackends.push('win-vulkan-x64')
+    if (features.vulkan) supportedBackends.push('win-vulkan-common_cpus-x64')
   }
   // not available yet, placeholder for future
   else if (sysType === 'windows-aarch64' || sysType === 'windows-arm64') {
     supportedBackends.push('win-arm64')
   } else if (sysType === 'linux-x86_64' || sysType === 'linux-x86') {
-    supportedBackends.push('linux-noavx-x64')
-    if (features.avx) supportedBackends.push('linux-avx-x64')
-    if (features.avx2) supportedBackends.push('linux-avx2-x64')
-    if (features.avx512) supportedBackends.push('linux-avx512-x64')
+    supportedBackends.push('linux-common_cpus-x64')
     if (features.cuda11) {
-      if (features.avx512)
-        supportedBackends.push('linux-avx512-cuda-cu11.7-x64')
-      else if (features.avx2)
-        supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
-      else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64')
-      else supportedBackends.push('linux-noavx-cuda-cu11.7-x64')
+       supportedBackends.push('linux-cuda-11-common_cpus-x64')
     }
     if (features.cuda12) {
-      if (features.avx512)
-        supportedBackends.push('linux-avx512-cuda-cu12.0-x64')
-      else if (features.avx2)
-        supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
-      else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64')
-      else supportedBackends.push('linux-noavx-cuda-cu12.0-x64')
+       supportedBackends.push('linux-cuda-12-common_cpus-x64')
     }
-    if (features.vulkan) supportedBackends.push('linux-vulkan-x64')
+    if (features.vulkan) supportedBackends.push('linux-vulkan-common_cpus-x64')
   }
   // not available yet, placeholder for future
   else if (sysType === 'linux-aarch64' || sysType === 'linux-arm64') {
@@ -230,10 +207,7 @@ export async function downloadBackend(
   version: string,
   source: 'github' | 'cdn' = 'github'
 ): Promise<void> {
-  const janDataFolderPath = await getJanDataFolderPath()
-  const llamacppPath = await joinPath([janDataFolderPath, 'llamacpp'])
   const backendDir = await getBackendDir(backend, version)
-  const libDir = await joinPath([llamacppPath, 'lib'])
 
   const downloadManager = window.core.extensionManager.getByName(
     '@janhq/download-extension'
@@ -265,7 +239,7 @@ export async function downloadBackend(
         source === 'github'
           ? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`
           : `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu11.7-x64.tar.gz`,
-      save_path: await joinPath([libDir, 'cuda11.tar.gz']),
+      save_path: await joinPath([backendDir, 'build', 'bin', 'cuda11.tar.gz']),
       proxy: proxyConfig,
     })
   } else if (backend.includes('cu12.0') && !(await _isCudaInstalled('12.0'))) {
@@ -274,7 +248,7 @@ export async function downloadBackend(
         source === 'github'
           ? `https://github.com/janhq/llama.cpp/releases/download/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`
           : `https://catalog.jan.ai/llama.cpp/releases/${version}/cudart-llama-bin-${platformName}-cu12.0-x64.tar.gz`,
-      save_path: await joinPath([libDir, 'cuda12.tar.gz']),
+      save_path: await joinPath([backendDir, 'build', 'bin', 'cuda12.tar.gz']),
       proxy: proxyConfig,
     })
   }
@@ -344,8 +318,8 @@ async function _getSupportedFeatures() {
   }
 
   // https://docs.nvidia.com/deploy/cuda-compatibility/#cuda-11-and-later-defaults-to-minor-version-compatibility
-  let minCuda11DriverVersion
-  let minCuda12DriverVersion
+  let minCuda11DriverVersion: string
+  let minCuda12DriverVersion: string
   if (sysInfo.os_type === 'linux') {
     minCuda11DriverVersion = '450.80.02'
     minCuda12DriverVersion = '525.60.13'
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 631220a92..cfda2a403 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -551,9 +551,9 @@ export default class llamacpp_extension extends AIEngine {
 
     // Helper to map backend string to a priority category
     const getBackendCategory = (backendString: string): string | undefined => {
-      if (backendString.includes('cu12.0')) return 'cuda-cu12.0'
-      if (backendString.includes('cu11.7')) return 'cuda-cu11.7'
-      if (backendString.includes('vulkan')) return 'vulkan'
+      if (backendString.includes('cuda-12-common_cpus')) return 'cuda-cu12.0'
+      if (backendString.includes('cuda-11-common_cpus')) return 'cuda-cu11.7'
+      if (backendString.includes('vulkan-common_cpus')) return 'vulkan'
       if (backendString.includes('avx512')) return 'avx512'
       if (backendString.includes('avx2')) return 'avx2'
       if (
@@ -1689,7 +1689,7 @@ export default class llamacpp_extension extends AIEngine {
     const backendPath = await getBackendExePath(backend, version)
 
     try {
-      const sInfo = await loadLlamaModel(backendPath, args, envs)
+      const sInfo = await loadLlamaModel(backendPath, args, envs, isEmbedding)
       return sInfo
     } catch (error) {
       logger.error('Error in load command:\n', error)
diff --git a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts
index b31133da5..c57680308 100644
--- a/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/guest-js/index.ts
@@ -6,6 +6,7 @@ export interface SessionInfo {
     port: number;
     model_id: string;
     model_path: string;
+    is_embedding: boolean
     api_key: string;
     mmproj_path?: string;
 }
@@ -36,12 +37,14 @@ export async function cleanupLlamaProcesses(): Promise<void> {
 export async function loadLlamaModel(
   backendPath: string,
   args: string[],
-  envs: Record<string, string>
+  envs: Record<string, string>,
+  isEmbedding: boolean
 ): Promise<SessionInfo> {
   return await invoke('plugin:llamacpp|load_llama_model', {
     backendPath,
     args,
-    envs
+    envs,
+    isEmbedding
   })
 }