feat: Enhance Llama.cpp backend management with persistence (#5886)

* feat: Enhance Llama.cpp backend management with persistence This commit introduces significant improvements to how the Llama.cpp extension manages and updates its backend installations, focusing on user preference persistence and smarter auto-updates. Key changes include: * **Persistent Backend Type Preference:** The extension now stores the user's preferred backend type (e.g., `cuda`, `cpu`, `metal`) in `localStorage`. This ensures that even after updates or restarts, the system attempts to use the user's previously selected backend type, if available. * **Intelligent Auto-Update:** The auto-update mechanism has been refined to prioritize updating to the **latest version of the *currently selected backend type*** rather than always defaulting to the "best available" backend (which might change). This respects user choice while keeping the chosen backend type up-to-date. * **Improved Initial Installation/Configuration:** For fresh installations or cases where the `version_backend` setting is invalid, the system now intelligently determines and installs the best available backend, then persists its type. * **Refined Old Backend Cleanup:** The `removeOldBackends` function has been renamed to `removeOldBackend` and modified to specifically clean up *older versions of the currently selected backend type*, preventing the accumulation of unnecessary files while preserving other backend types the user might switch to. * **Robust Local Storage Handling:** New private methods (`getStoredBackendType`, `setStoredBackendType`, `clearStoredBackendType`) are introduced to safely interact with `localStorage`, including error handling for potential `localStorage` access issues. * **Version Filtering Utility:** A new utility `findLatestVersionForBackend` helps in identifying the latest available version for a specific backend type from a list of supported backends. These changes provide a more stable, user-friendly, and maintainable backend management experience for the Llama.cpp extension. Fixes: #5883 * fix: cortex models migration should be done once * feat: Optimize Llama.cpp backend preference storage and UI updates This commit refines the Llama.cpp extension's backend management by: * **Optimizing `localStorage` Writes:** The system now only writes the backend type preference to `localStorage` if the new value is different from the currently stored one. This reduces unnecessary `localStorage` operations. * **Ensuring UI Consistency on Initial Setup:** When a fresh installation or an invalid backend configuration is detected, the UI settings are now explicitly updated to reflect the newly determined `effectiveBackendString`, ensuring the displayed setting matches the active configuration. These changes improve performance by reducing redundant storage operations and enhance user experience by maintaining UI synchronization with the backend state. * Revert "fix: provider settings should be refreshed on page load (#5887)" This reverts commit ce6af62c7df4a7e7ea8c0896f307309d6bf38771. * fix: add loader version backend llamacpp * fix: wrong key name * fix: model setting issues * fix: virtual dom hub * chore: cleanup * chore: hide device ofload setting --------- Co-authored-by: Louis <louis@jan.ai> Co-authored-by: Faisal Amir <urmauur@gmail.com>
2025-07-24 17:03:35 +05:30 · 2025-07-24 17:03:35 +05:30 · a1af70f7a9
commit a1af70f7a9
parent d51f904826
10 changed files with 677 additions and 386 deletions
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@ -60,19 +60,7 @@
    }
  },
  {
-    "key": "ctx_size",
-    "title": "Context Size",
-    "description": "Size of the prompt context (0 = loaded from model).",
-    "controllerType": "input",
-    "controllerProps": {
-      "value": 8192,
-      "placeholder": "8192",
-      "type": "number",
-      "textAlign": "right"
-    }
-  },
-  {
-    "key": "context_shift",
+    "key": "ctx_shift",
    "title": "Context Shift",
    "description": "Allow model to cut text in the beginning to accommodate new text in its memory",
    "controllerType": "checkbox",
@ -116,18 +104,6 @@
      "textAlign": "right"
    }
  },
-  {
-    "key": "n_gpu_layers",
-    "title": "GPU Layers",
-    "description": "Number of model layers to offload to the GPU (-1 for all layers, 0 for CPU only).",
-    "controllerType": "input",
-    "controllerProps": {
-      "value": -1,
-      "placeholder": "-1",
-      "type": "number",
-      "textAlign": "right"
-    }
-  },
  {
    "key": "device",
    "title": "Devices for Offload",
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -177,6 +177,49 @@ export default class llamacpp_extension extends AIEngine {
    this.configureBackends()
  }

+  private getStoredBackendType(): string | null {
+    try {
+      return localStorage.getItem('llama_cpp_backend_type')
+    } catch (error) {
+      logger.warn('Failed to read backend type from localStorage:', error)
+      return null
+    }
+  }
+
+  private setStoredBackendType(backendType: string): void {
+    try {
+      localStorage.setItem('llama_cpp_backend_type', backendType)
+      logger.info(`Stored backend type preference: ${backendType}`)
+    } catch (error) {
+      logger.warn('Failed to store backend type in localStorage:', error)
+    }
+  }
+
+  private clearStoredBackendType(): void {
+    try {
+      localStorage.removeItem('llama_cpp_backend_type')
+      logger.info('Cleared stored backend type preference')
+    } catch (error) {
+      logger.warn('Failed to clear backend type from localStorage:', error)
+    }
+  }
+
+  private findLatestVersionForBackend(
+    version_backends: { version: string; backend: string }[],
+    backendType: string
+  ): string | null {
+    const matchingBackends = version_backends.filter(
+      (vb) => vb.backend === backendType
+    )
+    if (matchingBackends.length === 0) {
+      return null
+    }
+
+    // Sort by version (newest first) and get the latest
+    matchingBackends.sort((a, b) => b.version.localeCompare(a.version))
+    return `${matchingBackends[0].version}/${matchingBackends[0].backend}`
+  }
+
  async configureBackends(): Promise<void> {
    if (this.isConfiguringBackends) {
      logger.info(
@ -207,8 +250,33 @@ export default class llamacpp_extension extends AIEngine {
        )
      }

-      let bestAvailableBackendString =
-        this.determineBestBackend(version_backends)
+      // Get stored backend preference
+      const storedBackendType = this.getStoredBackendType()
+      let bestAvailableBackendString = ''
+
+      if (storedBackendType) {
+        // Find the latest version of the stored backend type
+        const preferredBackendString = this.findLatestVersionForBackend(
+          version_backends,
+          storedBackendType
+        )
+        if (preferredBackendString) {
+          bestAvailableBackendString = preferredBackendString
+          logger.info(
+            `Using stored backend preference: ${bestAvailableBackendString}`
+          )
+        } else {
+          logger.warn(
+            `Stored backend type '${storedBackendType}' not available, falling back to best backend`
+          )
+          // Clear the invalid stored preference
+          this.clearStoredBackendType()
+          bestAvailableBackendString =
+            this.determineBestBackend(version_backends)
+        }
+      } else {
+        bestAvailableBackendString = this.determineBestBackend(version_backends)
+      }

      let settings = structuredClone(SETTINGS)
      const backendSettingIndex = settings.findIndex(
@ -231,11 +299,42 @@ export default class llamacpp_extension extends AIEngine {
          originalDefaultBackendValue
        )

-        const initialUiDefault =
+        // Determine initial UI default based on priority:
+        // 1. Saved setting (if valid and not original default)
+        // 2. Best available for stored backend type
+        // 3. Original default
+        let initialUiDefault = originalDefaultBackendValue
+
+        if (
          savedBackendSetting &&
          savedBackendSetting !== originalDefaultBackendValue
-            ? savedBackendSetting
-            : bestAvailableBackendString || originalDefaultBackendValue
+        ) {
+          initialUiDefault = savedBackendSetting
+          // Store the backend type from the saved setting only if different
+          const [, backendType] = savedBackendSetting.split('/')
+          if (backendType) {
+            const currentStoredBackend = this.getStoredBackendType()
+            if (currentStoredBackend !== backendType) {
+              this.setStoredBackendType(backendType)
+              logger.info(
+                `Stored backend type preference from saved setting: ${backendType}`
+              )
+            }
+          }
+        } else if (bestAvailableBackendString) {
+          initialUiDefault = bestAvailableBackendString
+          // Store the backend type from the best available only if different
+          const [, backendType] = bestAvailableBackendString.split('/')
+          if (backendType) {
+            const currentStoredBackend = this.getStoredBackendType()
+            if (currentStoredBackend !== backendType) {
+              this.setStoredBackendType(backendType)
+              logger.info(
+                `Stored backend type preference from best available: ${backendType}`
+              )
+            }
+          }
+        }

        backendSetting.controllerProps.value = initialUiDefault
        logger.info(
@ -253,6 +352,49 @@ export default class llamacpp_extension extends AIEngine {
      let effectiveBackendString = this.config.version_backend
      let backendWasDownloaded = false

+      // Handle fresh installation case where version_backend might be 'none' or invalid
+      if (
+        !effectiveBackendString ||
+        effectiveBackendString === 'none' ||
+        !effectiveBackendString.includes('/')
+      ) {
+        effectiveBackendString = bestAvailableBackendString
+        logger.info(
+          `Fresh installation or invalid backend detected, using: ${effectiveBackendString}`
+        )
+
+        // Update the config immediately
+        this.config.version_backend = effectiveBackendString
+
+        // Update the settings to reflect the change in UI
+        const updatedSettings = await this.getSettings()
+        await this.updateSettings(
+          updatedSettings.map((item) => {
+            if (item.key === 'version_backend') {
+              item.controllerProps.value = effectiveBackendString
+            }
+            return item
+          })
+        )
+        logger.info(`Updated UI settings to show: ${effectiveBackendString}`)
+      }
+
+      // Download and install the backend if not already present
+      if (effectiveBackendString) {
+        const [version, backend] = effectiveBackendString.split('/')
+        if (version && backend) {
+          const isInstalled = await isBackendInstalled(backend, version)
+          if (!isInstalled) {
+            logger.info(`Installing initial backend: ${effectiveBackendString}`)
+            await this.ensureBackendReady(backend, version)
+            backendWasDownloaded = true
+            logger.info(
+              `Successfully installed initial backend: ${effectiveBackendString}`
+            )
+          }
+        }
+      }
+
      if (this.config.auto_update_engine) {
        const updateResult = await this.handleAutoUpdate(
          bestAvailableBackendString
@ -263,12 +405,8 @@ export default class llamacpp_extension extends AIEngine {
        }
      }

-      if (!backendWasDownloaded) {
+      if (!backendWasDownloaded && effectiveBackendString) {
        await this.ensureFinalBackendInstallation(effectiveBackendString)
-      } else {
-        logger.info(
-          'Skipping final installation check - backend was just downloaded during auto-update'
-        )
      }
    } finally {
      this.isConfiguringBackends = false
@ -350,65 +488,149 @@ export default class llamacpp_extension extends AIEngine {
      return { wasUpdated: false, newBackend: this.config.version_backend }
    }

+    // If version_backend is empty, invalid, or 'none', use the best available backend
+    if (
+      !this.config.version_backend ||
+      this.config.version_backend === '' ||
+      this.config.version_backend === 'none' ||
+      !this.config.version_backend.includes('/')
+    ) {
+      logger.info(
+        'No valid backend currently selected, using best available backend'
+      )
+      try {
+        const [bestVersion, bestBackend] = bestAvailableBackendString.split('/')
+
+        // Download new backend
+        await this.ensureBackendReady(bestBackend, bestVersion)
+
+        // Add delay on Windows
+        if (IS_WINDOWS) {
+          await new Promise((resolve) => setTimeout(resolve, 1000))
+        }
+
+        // Update configuration
+        this.config.version_backend = bestAvailableBackendString
+
+        // Store the backend type preference only if it changed
+        const currentStoredBackend = this.getStoredBackendType()
+        if (currentStoredBackend !== bestBackend) {
+          this.setStoredBackendType(bestBackend)
+          logger.info(`Stored new backend type preference: ${bestBackend}`)
+        }
+
+        // Update settings
+        const settings = await this.getSettings()
+        await this.updateSettings(
+          settings.map((item) => {
+            if (item.key === 'version_backend') {
+              item.controllerProps.value = bestAvailableBackendString
+            }
+            return item
+          })
+        )
+
+        logger.info(
+          `Successfully set initial backend: ${bestAvailableBackendString}`
+        )
+        return { wasUpdated: true, newBackend: bestAvailableBackendString }
+      } catch (error) {
+        logger.error('Failed to set initial backend:', error)
+        return { wasUpdated: false, newBackend: this.config.version_backend }
+      }
+    }
+
+    // Parse current backend configuration
    const [currentVersion, currentBackend] = (
      this.config.version_backend || ''
    ).split('/')
-    const [bestVersion, bestBackend] = bestAvailableBackendString.split('/')

-    // Check if update is needed
-    if (currentBackend === bestBackend && currentVersion === bestVersion) {
-      logger.info('Auto-update: Already using the best available backend')
+    if (!currentVersion || !currentBackend) {
+      logger.warn(
+        `Invalid current backend format: ${this.config.version_backend}`
+      )
      return { wasUpdated: false, newBackend: this.config.version_backend }
    }

-    // Perform update
+    // Find the latest version for the currently selected backend type
+    const version_backends = await listSupportedBackends()
+    const targetBackendString = this.findLatestVersionForBackend(
+      version_backends,
+      currentBackend
+    )
+
+    if (!targetBackendString) {
+      logger.warn(
+        `No available versions found for current backend type: ${currentBackend}`
+      )
+      return { wasUpdated: false, newBackend: this.config.version_backend }
+    }
+
+    const [latestVersion] = targetBackendString.split('/')
+
+    // Check if update is needed (only version comparison for same backend type)
+    if (currentVersion === latestVersion) {
+      logger.info(
+        'Auto-update: Already using the latest version of the selected backend'
+      )
+      return { wasUpdated: false, newBackend: this.config.version_backend }
+    }
+
+    // Perform version update for the same backend type
    try {
      logger.info(
-        `Auto-updating from ${this.config.version_backend} to ${bestAvailableBackendString}`
+        `Auto-updating from ${this.config.version_backend} to ${targetBackendString} (preserving backend type)`
      )

-      // Download new backend first
-      await this.ensureBackendReady(bestBackend, bestVersion)
+      // Download new version of the same backend type
+      await this.ensureBackendReady(currentBackend, latestVersion)

-      // Add a small delay on Windows to ensure file operations complete
+      // Add delay on Windows
      if (IS_WINDOWS) {
        await new Promise((resolve) => setTimeout(resolve, 1000))
      }

      // Update configuration
-      this.config.version_backend = bestAvailableBackendString
+      this.config.version_backend = targetBackendString
+
+      // Update stored backend type preference only if it changed
+      const currentStoredBackend = this.getStoredBackendType()
+      if (currentStoredBackend !== currentBackend) {
+        this.setStoredBackendType(currentBackend)
+        logger.info(`Updated stored backend type preference: ${currentBackend}`)
+      }

      // Update settings
      const settings = await this.getSettings()
      await this.updateSettings(
        settings.map((item) => {
          if (item.key === 'version_backend') {
-            item.controllerProps.value = bestAvailableBackendString
+            item.controllerProps.value = targetBackendString
          }
          return item
        })
      )

      logger.info(
-        `Successfully updated to backend: ${bestAvailableBackendString}`
+        `Successfully updated to backend: ${targetBackendString} (preserved backend type: ${currentBackend})`
      )

-      // Clean up old backends (with additional delay on Windows)
+      // Clean up old versions of the same backend type
      if (IS_WINDOWS) {
        await new Promise((resolve) => setTimeout(resolve, 500))
      }
-      await this.removeOldBackends(bestVersion, bestBackend)
+      await this.removeOldBackend(latestVersion, currentBackend)

-      return { wasUpdated: true, newBackend: bestAvailableBackendString }
+      return { wasUpdated: true, newBackend: targetBackendString }
    } catch (error) {
      logger.error('Auto-update failed:', error)
      return { wasUpdated: false, newBackend: this.config.version_backend }
    }
  }

-  private async removeOldBackends(
-    bestVersion: string,
-    bestBackend: string
+  private async removeOldBackend(
+    latestVersion: string,
+    backendType: string
  ): Promise<void> {
    try {
      const janDataFolderPath = await getJanDataFolderPath()
@ -426,32 +648,35 @@ export default class llamacpp_extension extends AIEngine {

      for (const versionDir of versionDirs) {
        const versionPath = await joinPath([backendsDir, versionDir])
-        const backendTypeDirs = await fs.readdirSync(versionPath)
+        const versionName = await basename(versionDir)

-        for (const backendTypeDir of backendTypeDirs) {
-          const versionName = await basename(versionDir)
-          const backendName = await basename(backendTypeDir)
+        // Skip the latest version
+        if (versionName === latestVersion) {
+          continue
+        }

-          // Skip if it's the best version/backend
-          if (versionName === bestVersion && backendName === bestBackend) {
-            continue
-          }
+        // Check if this version has the specific backend type we're interested in
+        const backendTypePath = await joinPath([versionPath, backendType])

-          // If this other backend is installed, remove it
-          const isInstalled = await isBackendInstalled(backendName, versionName)
+        if (await fs.existsSync(backendTypePath)) {
+          const isInstalled = await isBackendInstalled(backendType, versionName)
          if (isInstalled) {
-            const toRemove = await joinPath([versionPath, backendTypeDir])
            try {
-              await fs.rm(toRemove)
-              logger.info(`Removed old backend: ${toRemove}`)
+              await fs.rm(backendTypePath)
+              logger.info(
+                `Removed old version of ${backendType}: ${backendTypePath}`
+              )
            } catch (e) {
-              logger.warn(`Failed to remove old backend: ${toRemove}`, e)
+              logger.warn(
+                `Failed to remove old backend version: ${backendTypePath}`,
+                e
+              )
            }
          }
        }
      }
    } catch (error) {
-      logger.error('Error during old backend cleanup:', error)
+      logger.error('Error during old backend version cleanup:', error)
    }
  }

@ -526,6 +751,15 @@ export default class llamacpp_extension extends AIEngine {
      const valueStr = value as string
      const [version, backend] = valueStr.split('/')

+      // Store the backend type preference in localStorage only if it changed
+      if (backend) {
+        const currentStoredBackend = this.getStoredBackendType()
+        if (currentStoredBackend !== backend) {
+          this.setStoredBackendType(backend)
+          logger.info(`Updated backend type preference to: ${backend}`)
+        }
+      }
+
      // Reset device setting when backend changes
      this.config.device = ''

@ -602,6 +836,9 @@ export default class llamacpp_extension extends AIEngine {
  }

  private async migrateLegacyModels() {
+    // Attempt to migrate only once
+    if (localStorage.getItem('cortex_models_migrated') === 'true') return
+
    const janDataFolderPath = await getJanDataFolderPath()
    const modelsDir = await joinPath([janDataFolderPath, 'models'])
    if (!(await fs.existsSync(modelsDir))) return
@ -687,6 +924,7 @@ export default class llamacpp_extension extends AIEngine {
        stack.push(child)
      }
    }
+    localStorage.setItem('cortex_models_migrated', 'true')
  }

  override async import(modelId: string, opts: ImportOptions): Promise<void> {
--- a/package.json
+++ b/package.json
@ -57,6 +57,7 @@
  },
  "packageManager": "yarn@4.5.3",
  "dependencies": {
+    "@tanstack/react-virtual": "^3.13.12",
    "download-cli": "^1.1.1"
  }
 }
--- a/web-app/src/containers/ModelSetting.tsx
+++ b/web-app/src/containers/ModelSetting.tsx
@ -11,8 +11,7 @@ import {
 } from '@/components/ui/sheet'
 import { DynamicControllerSetting } from '@/containers/dynamicControllerSetting'
 import { useModelProvider } from '@/hooks/useModelProvider'
-import { updateModel, stopModel } from '@/services/models'
-import { ModelSettingParams } from '@janhq/core'
+import { stopModel } from '@/services/models'
 import { cn } from '@/lib/utils'
 import { useTranslation } from '@/i18n/react-i18next-compat'

@ -71,22 +70,6 @@ export function ModelSetting({
        models: updatedModels,
      })

-      const params = Object.entries(updatedModel.settings).reduce(
-        (acc, [key, value]) => {
-          const rawVal = value.controller_props?.value
-          const num = parseFloat(rawVal as string)
-          acc[key] = !isNaN(num) ? num : rawVal
-          return acc
-        },
-        {} as Record<string, unknown>
-      ) as ModelSettingParams
-
-      updateModel({
-        id: model.id,
-        settings: params,
-        ...(params as unknown as object),
-      })
-
      // Call debounced stopModel only when updating ctx_len or ngl
      if (key === 'ctx_len' || key === 'ngl') {
        debouncedStopModel(model.id)
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -183,7 +183,7 @@ export const useChat = () => {
    async (modelId: string, provider: ProviderObject) => {
      const providerName = provider.provider
      const newSettings = [...provider.settings]
-      const settingKey = 'context_shift'
+      const settingKey = 'ctx_shift'
      // Handle different value types by forcing the type
      // Use type assertion to bypass type checking
      const settingIndex = provider.settings.findIndex(
--- a/web-app/src/hooks/useModelProvider.ts
+++ b/web-app/src/hooks/useModelProvider.ts
@ -74,9 +74,17 @@ export const useModelProvider = create<ModelProviderState>()(
              ),
              ...models,
            ]
+            const updatedModels = provider.models?.map((model) => {
+              return {
+                ...model,
+                settings:
+                  models.find((m) => m.id === model.id)?.settings ||
+                  model.settings,
+              }
+            })
            return {
              ...provider,
-              models: provider.persist ? provider?.models : mergedModels,
+              models: provider.persist ? updatedModels : mergedModels,
              settings: provider.settings.map((setting) => {
                const existingSetting = provider.persist
                  ? undefined
--- a/web-app/src/locales/en/common.json
+++ b/web-app/src/locales/en/common.json
@ -119,6 +119,7 @@
  "createAssistant": "Create Assistant",
  "enterApiKey": "Enter API Key",
  "scrollToBottom": "Scroll to bottom",
+  "generateAiResponse": "Generate AI Response",
  "addModel": {
    "title": "Add Model",
    "modelId": "Model ID",
--- a/web-app/src/routes/hub/index.tsx
+++ b/web-app/src/routes/hub/index.tsx
@ -1,4 +1,5 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
+import { useVirtualizer } from '@tanstack/react-virtual'
 import { createFileRoute, useNavigate, useSearch } from '@tanstack/react-router'
 import { route } from '@/constants/routes'
 import { useModelSources } from '@/hooks/useModelSources'
@ -54,6 +55,8 @@ export const Route = createFileRoute(route.hub.index as any)({
 })

 function Hub() {
+  const parentRef = useRef(null)
+
  const { t } = useTranslation()
  const sortOptions = [
    { value: 'newest', name: t('hub:sortNewest') },
@ -173,7 +176,6 @@ function Hub() {
  // Filtered models
  const filteredModels = useMemo(() => {
    let filtered = sortedModels
-
    // Apply search filter
    if (searchValue.length) {
      filtered = filtered?.filter(
@ -190,7 +192,6 @@ function Hub() {
          )
      )
    }
-
    // Apply downloaded filter
    if (showOnlyDownloaded) {
      filtered = filtered?.filter((model) =>
@ -201,12 +202,10 @@ function Hub() {
        )
      )
    }
-
    // Add HuggingFace repo at the beginning if available
    if (huggingFaceRepo) {
      filtered = [huggingFaceRepo, ...filtered]
    }
-
    return filtered
  }, [
    searchValue,
@ -216,6 +215,13 @@ function Hub() {
    huggingFaceRepo,
  ])

+  // The virtualizer
+  const rowVirtualizer = useVirtualizer({
+    count: filteredModels.length,
+    getScrollElement: () => parentRef.current,
+    estimateSize: () => 35,
+  })
+
  useEffect(() => {
    fetchSources()
  }, [fetchSources])
@ -566,213 +572,259 @@ function Hub() {
                  </div>
                </div>
              ) : (
-                <div className="flex flex-col pb-2 mb-2 gap-2 ">
+                <div className="flex flex-col pb-2 mb-2 gap-2" ref={parentRef}>
                  <div className="flex items-center gap-2 justify-end sm:hidden">
                    {renderFilter()}
                  </div>
-                  {filteredModels.map((model, i) => (
-                    <div key={`${model.model_name}-${i}`}>
-                      <Card
-                        header={
-                          <div className="flex items-center justify-between gap-x-2">
-                            <div
-                              className="cursor-pointer"
-                              onClick={() => {
-                                console.log(model.model_name)
-                                navigate({
-                                  to: route.hub.model,
-                                  params: {
-                                    modelId: model.model_name,
-                                  },
-                                })
-                              }}
-                            >
-                              <h1
-                                className={cn(
-                                  'text-main-view-fg font-medium text-base capitalize  sm:max-w-none',
-                                  isRecommendedModel(model.model_name)
-                                    ? 'hub-model-card-step'
-                                    : ''
-                                )}
-                                title={extractModelName(model.model_name) || ''}
+                  <div
+                    style={{
+                      height: `${rowVirtualizer.getTotalSize()}px`,
+                      width: '100%',
+                      position: 'relative',
+                    }}
+                  >
+                    {rowVirtualizer.getVirtualItems().map((virtualItem) => (
+                      <div key={virtualItem.key} className="mb-2">
+                        <Card
+                          header={
+                            <div className="flex items-center justify-between gap-x-2">
+                              <div
+                                className="cursor-pointer"
+                                onClick={() => {
+                                  navigate({
+                                    to: route.hub.model,
+                                    params: {
+                                      modelId:
+                                        filteredModels[virtualItem.index]
+                                          .model_name,
+                                    },
+                                  })
+                                }}
                              >
-                                {extractModelName(model.model_name) || ''}
-                              </h1>
-                            </div>
-                            <div className="shrink-0 space-x-3 flex items-center">
-                              <span className="text-main-view-fg/70 font-medium text-xs">
-                                {
-                                  (
-                                    model.quants.find((m) =>
-                                      defaultModelQuantizations.some((e) =>
-                                        m.model_id.toLowerCase().includes(e)
-                                      )
-                                    ) ?? model.quants?.[0]
-                                  )?.file_size
-                                }
-                              </span>
-                              <DownloadButtonPlaceholder model={model} />
-                            </div>
-                          </div>
-                        }
-                      >
-                        <div className="line-clamp-2 mt-3 text-main-view-fg/60">
-                          <RenderMarkdown
-                            enableRawHtml={true}
-                            className="select-none reset-heading"
-                            components={{
-                              a: ({ ...props }) => (
-                                <a
-                                  {...props}
-                                  target="_blank"
-                                  rel="noopener noreferrer"
-                                />
-                              ),
-                            }}
-                            content={
-                              extractDescription(model?.description) || ''
-                            }
-                          />
-                        </div>
-                        <div className="flex items-center gap-2 mt-2">
-                          <span className="capitalize text-main-view-fg/80">
-                            {t('hub:by')} {model?.developer}
-                          </span>
-                          <div className="flex items-center gap-4 ml-2">
-                            <div className="flex items-center gap-1">
-                              <IconDownload
-                                size={18}
-                                className="text-main-view-fg/50"
-                                title={t('hub:downloads')}
-                              />
-                              <span className="text-main-view-fg/80">
-                                {model.downloads || 0}
-                              </span>
-                            </div>
-                            <div className="flex items-center gap-1">
-                              <IconFileCode
-                                size={20}
-                                className="text-main-view-fg/50"
-                                title={t('hub:variants')}
-                              />
-                              <span className="text-main-view-fg/80">
-                                {model.quants?.length || 0}
-                              </span>
-                            </div>
-                            {model.quants.length > 1 && (
-                              <div className="flex items-center gap-2 hub-show-variants-step">
-                                <Switch
-                                  checked={!!expandedModels[model.model_name]}
-                                  onCheckedChange={() =>
-                                    toggleModelExpansion(model.model_name)
+                                <h1
+                                  className={cn(
+                                    'text-main-view-fg font-medium text-base capitalize  sm:max-w-none',
+                                    isRecommendedModel(
+                                      filteredModels[virtualItem.index]
+                                        .model_name
+                                    )
+                                      ? 'hub-model-card-step'
+                                      : ''
+                                  )}
+                                  title={
+                                    extractModelName(
+                                      filteredModels[virtualItem.index]
+                                        .model_name
+                                    ) || ''
                                  }
-                                />
-                                <p className="text-main-view-fg/70">
-                                  {t('hub:showVariants')}
-                                </p>
+                                >
+                                  {extractModelName(
+                                    filteredModels[virtualItem.index].model_name
+                                  ) || ''}
+                                </h1>
                              </div>
-                            )}
+                              <div className="shrink-0 space-x-3 flex items-center">
+                                <span className="text-main-view-fg/70 font-medium text-xs">
+                                  {
+                                    (
+                                      filteredModels[
+                                        virtualItem.index
+                                      ].quants.find((m) =>
+                                        defaultModelQuantizations.some((e) =>
+                                          m.model_id.toLowerCase().includes(e)
+                                        )
+                                      ) ??
+                                      filteredModels[virtualItem.index]
+                                        .quants?.[0]
+                                    )?.file_size
+                                  }
+                                </span>
+                                <DownloadButtonPlaceholder
+                                  model={filteredModels[virtualItem.index]}
+                                />
+                              </div>
+                            </div>
+                          }
+                        >
+                          <div className="line-clamp-2 mt-3 text-main-view-fg/60">
+                            <RenderMarkdown
+                              enableRawHtml={true}
+                              className="select-none reset-heading"
+                              components={{
+                                a: ({ ...props }) => (
+                                  <a
+                                    {...props}
+                                    target="_blank"
+                                    rel="noopener noreferrer"
+                                  />
+                                ),
+                              }}
+                              content={
+                                extractDescription(
+                                  filteredModels[virtualItem.index]?.description
+                                ) || ''
+                              }
+                            />
                          </div>
-                        </div>
-                        {expandedModels[model.model_name] &&
-                          model.quants.length > 0 && (
-                            <div className="mt-5">
-                              {model.quants.map((variant) => (
-                                <CardItem
-                                  key={variant.model_id}
-                                  title={variant.model_id}
-                                  actions={
-                                    <div className="flex items-center gap-2">
-                                      <p className="text-main-view-fg/70 font-medium text-xs">
-                                        {variant.file_size}
-                                      </p>
-                                      {(() => {
-                                        const isDownloading =
-                                          localDownloadingModels.has(
-                                            variant.model_id
-                                          ) ||
-                                          downloadProcesses.some(
-                                            (e) => e.id === variant.model_id
-                                          )
-                                        const downloadProgress =
-                                          downloadProcesses.find(
-                                            (e) => e.id === variant.model_id
-                                          )?.progress || 0
-                                        const isDownloaded =
-                                          llamaProvider?.models.some(
-                                            (m: { id: string }) =>
-                                              m.id === variant.model_id
-                                          )
+                          <div className="flex items-center gap-2 mt-2">
+                            <span className="capitalize text-main-view-fg/80">
+                              {t('hub:by')}{' '}
+                              {filteredModels[virtualItem.index]?.developer}
+                            </span>
+                            <div className="flex items-center gap-4 ml-2">
+                              <div className="flex items-center gap-1">
+                                <IconDownload
+                                  size={18}
+                                  className="text-main-view-fg/50"
+                                  title={t('hub:downloads')}
+                                />
+                                <span className="text-main-view-fg/80">
+                                  {filteredModels[virtualItem.index]
+                                    .downloads || 0}
+                                </span>
+                              </div>
+                              <div className="flex items-center gap-1">
+                                <IconFileCode
+                                  size={20}
+                                  className="text-main-view-fg/50"
+                                  title={t('hub:variants')}
+                                />
+                                <span className="text-main-view-fg/80">
+                                  {filteredModels[virtualItem.index].quants
+                                    ?.length || 0}
+                                </span>
+                              </div>
+                              {filteredModels[virtualItem.index].quants.length >
+                                1 && (
+                                <div className="flex items-center gap-2 hub-show-variants-step">
+                                  <Switch
+                                    checked={
+                                      !!expandedModels[
+                                        filteredModels[virtualItem.index]
+                                          .model_name
+                                      ]
+                                    }
+                                    onCheckedChange={() =>
+                                      toggleModelExpansion(
+                                        filteredModels[virtualItem.index]
+                                          .model_name
+                                      )
+                                    }
+                                  />
+                                  <p className="text-main-view-fg/70">
+                                    {t('hub:showVariants')}
+                                  </p>
+                                </div>
+                              )}
+                            </div>
+                          </div>
+                          {expandedModels[
+                            filteredModels[virtualItem.index].model_name
+                          ] &&
+                            filteredModels[virtualItem.index].quants.length >
+                              0 && (
+                              <div className="mt-5">
+                                {filteredModels[virtualItem.index].quants.map(
+                                  (variant) => (
+                                    <CardItem
+                                      key={variant.model_id}
+                                      title={variant.model_id}
+                                      actions={
+                                        <div className="flex items-center gap-2">
+                                          <p className="text-main-view-fg/70 font-medium text-xs">
+                                            {variant.file_size}
+                                          </p>
+                                          {(() => {
+                                            const isDownloading =
+                                              localDownloadingModels.has(
+                                                variant.model_id
+                                              ) ||
+                                              downloadProcesses.some(
+                                                (e) => e.id === variant.model_id
+                                              )
+                                            const downloadProgress =
+                                              downloadProcesses.find(
+                                                (e) => e.id === variant.model_id
+                                              )?.progress || 0
+                                            const isDownloaded =
+                                              llamaProvider?.models.some(
+                                                (m: { id: string }) =>
+                                                  m.id === variant.model_id
+                                              )

-                                        if (isDownloading) {
-                                          return (
-                                            <>
-                                              <div className="flex items-center gap-2 w-20">
-                                                <Progress
-                                                  value={downloadProgress * 100}
-                                                />
-                                                <span className="text-xs text-center text-main-view-fg/70">
-                                                  {Math.round(
-                                                    downloadProgress * 100
-                                                  )}
-                                                  %
-                                                </span>
-                                              </div>
-                                            </>
-                                          )
-                                        }
+                                            if (isDownloading) {
+                                              return (
+                                                <>
+                                                  <div className="flex items-center gap-2 w-20">
+                                                    <Progress
+                                                      value={
+                                                        downloadProgress * 100
+                                                      }
+                                                    />
+                                                    <span className="text-xs text-center text-main-view-fg/70">
+                                                      {Math.round(
+                                                        downloadProgress * 100
+                                                      )}
+                                                      %
+                                                    </span>
+                                                  </div>
+                                                </>
+                                              )
+                                            }

-                                        if (isDownloaded) {
-                                          return (
-                                            <div
-                                              className="flex items-center justify-center rounded bg-main-view-fg/10"
-                                              title={t('hub:useModel')}
-                                            >
-                                              <Button
-                                                variant="link"
-                                                size="sm"
-                                                onClick={() =>
-                                                  handleUseModel(
+                                            if (isDownloaded) {
+                                              return (
+                                                <div
+                                                  className="flex items-center justify-center rounded bg-main-view-fg/10"
+                                                  title={t('hub:useModel')}
+                                                >
+                                                  <Button
+                                                    variant="link"
+                                                    size="sm"
+                                                    onClick={() =>
+                                                      handleUseModel(
+                                                        variant.model_id
+                                                      )
+                                                    }
+                                                  >
+                                                    {t('hub:use')}
+                                                  </Button>
+                                                </div>
+                                              )
+                                            }
+
+                                            return (
+                                              <div
+                                                className="size-6 cursor-pointer flex items-center justify-center rounded hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
+                                                title={t('hub:downloadModel')}
+                                                onClick={() => {
+                                                  addLocalDownloadingModel(
                                                    variant.model_id
                                                  )
-                                                }
+                                                  pullModel(
+                                                    variant.model_id,
+                                                    variant.path
+                                                  )
+                                                }}
                                              >
-                                                {t('hub:use')}
-                                              </Button>
-                                            </div>
-                                          )
-                                        }
-
-                                        return (
-                                          <div
-                                            className="size-6 cursor-pointer flex items-center justify-center rounded hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
-                                            title={t('hub:downloadModel')}
-                                            onClick={() => {
-                                              addLocalDownloadingModel(
-                                                variant.model_id
-                                              )
-                                              pullModel(
-                                                variant.model_id,
-                                                variant.path
-                                              )
-                                            }}
-                                          >
-                                            <IconDownload
-                                              size={16}
-                                              className="text-main-view-fg/80"
-                                            />
-                                          </div>
-                                        )
-                                      })()}
-                                    </div>
-                                  }
-                                />
-                              ))}
-                            </div>
-                          )}
-                      </Card>
-                    </div>
-                  ))}
+                                                <IconDownload
+                                                  size={16}
+                                                  className="text-main-view-fg/80"
+                                                />
+                                              </div>
+                                            )
+                                          })()}
+                                        </div>
+                                      }
+                                    />
+                                  )
+                                )}
+                              </div>
+                            )}
+                        </Card>
+                      </div>
+                    ))}
+                  </div>
                </div>
              )}
            </div>
--- a/web-app/src/routes/settings/providers/$providerName.tsx
+++ b/web-app/src/routes/settings/providers/$providerName.tsx
@ -1,3 +1,4 @@
+/* eslint-disable react-hooks/exhaustive-deps */
 import { Card, CardItem } from '@/containers/Card'
 import HeaderPage from '@/containers/HeaderPage'
 import SettingsMenu from '@/containers/SettingsMenu'
@ -39,7 +40,6 @@ import { useEffect, useState } from 'react'
 import { predefinedProviders } from '@/consts/providers'
 import { useModelLoad } from '@/hooks/useModelLoad'
 import { useLlamacppDevices } from '@/hooks/useLlamacppDevices'
-import { EngineManager } from '@janhq/core'

 // as route.threadsDetail
 export const Route = createFileRoute('/settings/providers/$providerName')({
@ -82,10 +82,20 @@ function ProviderDetail() {
  const { providerName } = useParams({ from: Route.id })
  const { getProviderByName, setProviders, updateProvider } = useModelProvider()
  const provider = getProviderByName(providerName)
-  const [settings, setSettings] = useState<ProviderSetting[]>([])
  const isSetup = step === 'setup_remote_provider'
  const navigate = useNavigate()

+  // Check if llamacpp provider needs backend configuration
+  const needsBackendConfig =
+    provider?.provider === 'llamacpp' &&
+    provider.settings?.some(
+      (setting) =>
+        setting.key === 'version_backend' &&
+        (setting.controller_props.value === 'none' ||
+          setting.controller_props.value === '' ||
+          !setting.controller_props.value)
+    )
+
  useEffect(() => {
    // Initial data fetch
    getActiveModels().then((models) => setActiveModels(models || []))
@ -98,6 +108,44 @@ function ProviderDetail() {
    return () => clearInterval(intervalId)
  }, [setActiveModels])

+  // Auto-refresh provider settings to get updated backend configuration
+  const refreshSettings = async () => {
+    if (!provider) return
+
+    try {
+      // Refresh providers to get updated settings from the extension
+      const updatedProviders = await getProviders()
+      setProviders(updatedProviders)
+    } catch (error) {
+      console.error('Failed to refresh settings:', error)
+    }
+  }
+
+  // Auto-refresh settings when provider changes or when llamacpp needs backend config
+  useEffect(() => {
+    if (provider && needsBackendConfig) {
+      // Auto-refresh every 3 seconds when backend is being configured
+      const intervalId = setInterval(refreshSettings, 3000)
+      return () => clearInterval(intervalId)
+    }
+  }, [provider, needsBackendConfig])
+
+  // Auto-refresh models for non-predefined providers
+  useEffect(() => {
+    if (
+      provider &&
+      provider.provider !== 'llamacpp' &&
+      !predefinedProviders.some((p) => p.provider === provider.provider) &&
+      provider.base_url
+    ) {
+      // Auto-refresh models every 10 seconds for remote providers
+      const intervalId = setInterval(() => {
+        handleRefreshModels()
+      }, 10000)
+      return () => clearInterval(intervalId)
+    }
+  }, [provider])
+
  const handleJoyrideCallback = (data: CallBackProps) => {
    const { status } = data

@ -108,32 +156,6 @@ function ProviderDetail() {
    }
  }

-  useEffect(() => {
-    async function getSettings() {
-      // TODO: Replace this hardcoded check with engine check later (and the rest below)
-      if (provider?.provider === 'llamacpp') {
-        setSettings(
-          ((
-            await EngineManager.instance()
-              .get(provider?.provider)
-              ?.getSettings()
-          )?.map((setting) => {
-            return {
-              key: setting.key,
-              title: setting.title,
-              description: setting.description,
-              controller_type: setting.controllerType as unknown,
-              controller_props: setting.controllerProps as unknown,
-            }
-          }) as ProviderSetting[]) ?? []
-        )
-      } else {
-        setSettings(provider?.settings ?? [])
-      }
-    }
-    getSettings()
-  }, [provider])
-
  const handleRefreshModels = async () => {
    if (!provider || !provider.base_url) {
      toast.error(t('providers:models'), {
@ -274,85 +296,94 @@ function ProviderDetail() {
              >
                {/* Settings */}
                <Card>
-                  {settings.map((setting, settingIndex) => {
+                  {provider?.settings.map((setting, settingIndex) => {
                    // Use the DynamicController component
                    const actionComponent = (
                      <div className="mt-2">
-                        <DynamicControllerSetting
-                          controllerType={setting.controller_type}
-                          controllerProps={setting.controller_props}
-                          className={cn(
-                            setting.key === 'api-key' &&
-                              'third-step-setup-remote-provider',
-                            setting.key === 'device' && 'hidden'
-                          )}
-                          onChange={(newValue) => {
-                            if (provider) {
-                              const newSettings = [...provider.settings]
-                              // Handle different value types by forcing the type
-                              // Use type assertion to bypass type checking
+                        {needsBackendConfig &&
+                        setting.key === 'version_backend' ? (
+                          <div className="flex items-center gap-1 text-sm text-main-view-fg/70">
+                            <IconLoader size={16} className="animate-spin" />
+                            <span>loading</span>
+                          </div>
+                        ) : (
+                          <DynamicControllerSetting
+                            controllerType={setting.controller_type}
+                            controllerProps={setting.controller_props}
+                            className={cn(
+                              setting.key === 'api-key' &&
+                                'third-step-setup-remote-provider',
+                              setting.key === 'device' && 'hidden'
+                            )}
+                            onChange={(newValue) => {
+                              if (provider) {
+                                const newSettings = [...provider.settings]
+                                // Handle different value types by forcing the type
+                                // Use type assertion to bypass type checking

-                              ;(
-                                newSettings[settingIndex].controller_props as {
-                                  value: string | boolean | number
+                                ;(
+                                  newSettings[settingIndex]
+                                    .controller_props as {
+                                    value: string | boolean | number
+                                  }
+                                ).value = newValue
+
+                                // Create update object with updated settings
+                                const updateObj: Partial<ModelProvider> = {
+                                  settings: newSettings,
                                }
-                              ).value = newValue
-
-                              // Create update object with updated settings
-                              const updateObj: Partial<ModelProvider> = {
-                                settings: newSettings,
-                              }
-                              // Check if this is an API key or base URL setting and update the corresponding top-level field
-                              const settingKey = setting.key
-                              if (
-                                settingKey === 'api-key' &&
-                                typeof newValue === 'string'
-                              ) {
-                                updateObj.api_key = newValue
-                              } else if (
-                                settingKey === 'base-url' &&
-                                typeof newValue === 'string'
-                              ) {
-                                updateObj.base_url = newValue
-                              }
-
-                              // Reset device setting to empty when backend version changes
-                              if (settingKey === 'version_backend') {
-                                const deviceSettingIndex =
-                                  newSettings.findIndex(
-                                    (s) => s.key === 'device'
-                                  )
-
-                                if (deviceSettingIndex !== -1) {
-                                  ;(
-                                    newSettings[deviceSettingIndex]
-                                      .controller_props as {
-                                      value: string
-                                    }
-                                  ).value = ''
+                                // Check if this is an API key or base URL setting and update the corresponding top-level field
+                                const settingKey = setting.key
+                                if (
+                                  settingKey === 'api-key' &&
+                                  typeof newValue === 'string'
+                                ) {
+                                  updateObj.api_key = newValue
+                                } else if (
+                                  settingKey === 'base-url' &&
+                                  typeof newValue === 'string'
+                                ) {
+                                  updateObj.base_url = newValue
                                }

-                                // Reset llamacpp device activations when backend version changes
-                                if (providerName === 'llamacpp') {
-                                  const { setActivatedDevices } =
-                                    useLlamacppDevices.getState()
-                                  setActivatedDevices([])
+                                // Reset device setting to empty when backend version changes
+                                if (settingKey === 'version_backend') {
+                                  const deviceSettingIndex =
+                                    newSettings.findIndex(
+                                      (s) => s.key === 'device'
+                                    )
+
+                                  if (deviceSettingIndex !== -1) {
+                                    ;(
+                                      newSettings[deviceSettingIndex]
+                                        .controller_props as {
+                                        value: string
+                                      }
+                                    ).value = ''
+                                  }
+
+                                  // Reset llamacpp device activations when backend version changes
+                                  if (providerName === 'llamacpp') {
+                                    const { setActivatedDevices } =
+                                      useLlamacppDevices.getState()
+                                    setActivatedDevices([])
+                                  }
                                }
+
+                                updateSettings(
+                                  providerName,
+                                  updateObj.settings ?? []
+                                )
+                                updateProvider(providerName, {
+                                  ...provider,
+                                  ...updateObj,
+                                })
+
+                                stopAllModels()
                              }
-
-                              updateSettings(
-                                providerName,
-                                updateObj.settings ?? []
-                              )
-                              updateProvider(providerName, {
-                                ...provider,
-                                ...updateObj,
-                              })
-
-                              stopAllModels()
-                            }
-                          }}
-                        />
+                            }}
+                          />
+                        )}
                      </div>
                    )

@ -360,6 +391,7 @@ function ProviderDetail() {
                      <CardItem
                        key={settingIndex}
                        title={setting.title}
+                        className={cn(setting.key === 'device' && 'hidden')}
                        column={
                          setting.controller_type === 'input' &&
                          setting.controller_props.type !== 'number'
--- a/web-app/src/routes/threads/$threadId.tsx
+++ b/web-app/src/routes/threads/$threadId.tsx
@ -325,7 +325,7 @@ function ThreadDetail() {
                className="bg-main-view-fg/10 px-4 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
                onClick={generateAIResponse}
              >
-                <p className="text-xs">{t('Generate AI Response')}</p>
+                <p className="text-xs">{t('common:generateAiResponse')}</p>
                <Play size={12} />
              </div>
            )}