feat: Add model compatibility check and memory estimation (#6243)

* feat: Add model compatibility check and memory estimation This commit introduces a new feature to check if a given model is supported based on available device memory. The change includes: - A new `estimateKVCache` method that calculates the required memory for the model's KV cache. It uses GGUF metadata such as `block_count`, `head_count`, `key_length`, and `value_length` to perform the calculation. - An `isModelSupported` method that combines the model file size and the estimated KV cache size to determine the total memory required. It then checks if any available device has sufficient free memory to load the model. - An updated error message for the `version_backend` check to be more user-friendly, suggesting a stable internet connection as a potential solution for backend setup failures. This functionality helps prevent the application from attempting to load models that would exceed the device's memory capacity, leading to more stable and predictable behavior. fixes: #5505 * Update extensions/llamacpp-extension/src/index.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * Update extensions/llamacpp-extension/src/index.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * Extend this to available system RAM if GGML device is not available * fix: Improve model metadata and memory checks This commit refactors the logic for checking if a model is supported by a system's available memory. **Key changes:** - **Remote model support**: The `read_gguf_metadata` function can now fetch metadata from a remote URL by reading the file in chunks. - **Improved KV cache size calculation**: The KV cache size is now estimated more accurately by using `attention.key_length` and `attention.value_length` from the GGUF metadata, with a fallback to `embedding_length`. - **Granular memory check statuses**: The `isModelSupported` function now returns a more specific status (`'RED'`, `'YELLOW'`, `'GREEN'`) to indicate whether the model weights or the KV cache are too large for the available memory. - **Consolidated logic**: The logic for checking local and remote models has been consolidated into a single `isModelSupported` function, improving code clarity and maintainability. These changes provide more robust and informative model compatibility checks, especially for models hosted on remote servers. * Update extensions/llamacpp-extension/src/index.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * Make ctx_size optional and use sum free memory across ggml devices * feat: hub and dropdown model selection handle model compatibility * feat: update bage model info color * chore: enable detail page to get compatibility model * chore: update copy * chore: update shrink indicator UI --------- Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> Co-authored-by: Faisal Amir <urmauur@gmail.com>
2025-08-21 16:13:50 +05:30 · 2025-08-21 16:13:50 +05:30 · 510c70bdf7
commit 510c70bdf7
parent 9c25480c7b
13 changed files with 978 additions and 76 deletions
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -35,7 +35,11 @@ import {
 import { invoke } from '@tauri-apps/api/core'
 import { getProxyConfig } from './util'
 import { basename } from '@tauri-apps/api/path'
-import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
+import {
+  GgufMetadata,
+  readGgufMetadata,
+} from '@janhq/tauri-plugin-llamacpp-api'
+import { getSystemUsage } from '@janhq/tauri-plugin-hardware-api'

 type LlamacppConfig = {
  version_backend: string
@ -1742,7 +1746,7 @@ export default class llamacpp_extension extends AIEngine {
    const [version, backend] = cfg.version_backend.split('/')
    if (!version || !backend) {
      throw new Error(
-        `Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
+        'Backend setup was not successful. Please restart the app in a stable internet connection.'
      )
    }
    // set envs
@ -1843,4 +1847,134 @@ export default class llamacpp_extension extends AIEngine {
      'tokenizer.chat_template'
    ]?.includes('tools')
  }
+
+  /**
+   *  estimate KVCache size of from a given metadata
+   *
+   */
+  private async estimateKVCache(
+    meta: Record<string, string>,
+    ctx_size?: number
+  ): Promise<number> {
+    const arch = meta['general.architecture']
+    if (!arch) throw new Error('Invalid metadata: architecture not found')
+
+    const nLayer = Number(meta[`${arch}.block_count`])
+    if (!nLayer) throw new Error('Invalid metadata: block_count not found')
+
+    const nHead = Number(meta[`${arch}.attention.head_count`])
+    if (!nHead) throw new Error('Invalid metadata: head_count not found')
+
+    // Try to get key/value lengths first (more accurate)
+    const keyLen = Number(meta[`${arch}.attention.key_length`])
+    const valLen = Number(meta[`${arch}.attention.value_length`])
+
+    let headDim: number
+
+    if (keyLen && valLen) {
+      // Use explicit key/value lengths if available
+      logger.info(
+        `Using explicit key_length: ${keyLen}, value_length: ${valLen}`
+      )
+      headDim = (keyLen + valLen)
+    } else {
+      // Fall back to embedding_length estimation
+      const embeddingLen = Number(meta[`${arch}.embedding_length`])
+      if (!embeddingLen)
+        throw new Error('Invalid metadata: embedding_length not found')
+
+      // Standard transformer: head_dim = embedding_dim / num_heads
+      // For KV cache: we need both K and V, so 2 * head_dim per head
+      headDim = (embeddingLen / nHead) * 2
+      logger.info(
+        `Using embedding_length estimation: ${embeddingLen}, calculated head_dim: ${headDim}`
+      )
+    }
+    let ctxLen: number
+    if (!ctx_size) {
+      ctxLen = Number(meta[`${arch}.context_length`])
+    } else {
+      ctxLen = ctx_size
+    }
+
+    logger.info(`ctxLen: ${ctxLen}`)
+    logger.info(`nLayer: ${nLayer}`)
+    logger.info(`nHead: ${nHead}`)
+    logger.info(`headDim: ${headDim}`)
+
+    // Consider f16 by default
+    // Can be extended by checking cache-type-v and cache-type-k
+    // but we are checking overall compatibility with the default settings
+    // fp16 = 8 bits * 2 = 16
+    const bytesPerElement = 2
+
+    // Total KV cache size per token = nHead * headDim * bytesPerElement
+    const kvPerToken = nHead * headDim * bytesPerElement
+
+    return ctxLen * nLayer * kvPerToken
+  }
+
+  private async getModelSize(path: string): Promise<number> {
+    if (path.startsWith('https://')) {
+      const res = await fetch(path, { method: 'HEAD' })
+      const len = res.headers.get('content-length')
+      return len ? parseInt(len, 10) : 0
+    } else {
+      return (await fs.fileStat(path)).size
+    }
+  }
+
+  /*
+   * check the support status of a model by its path (local/remote)
+   *
+   * * Returns:
+   * - "RED"    → weights don't fit
+   * - "YELLOW" → weights fit, KV cache doesn't
+   * - "GREEN"  → both weights + KV cache fit
+   */
+  async isModelSupported(
+    path: string,
+    ctx_size?: number
+  ): Promise<'RED' | 'YELLOW' | 'GREEN'> {
+    try {
+      const modelSize = await this.getModelSize(path)
+      logger.info(`modelSize: ${modelSize}`)
+      let gguf: GgufMetadata
+      gguf = await readGgufMetadata(path)
+      let kvCacheSize: number
+      if (ctx_size) {
+        kvCacheSize = await this.estimateKVCache(gguf.metadata, ctx_size)
+      } else {
+        kvCacheSize = await this.estimateKVCache(gguf.metadata)
+      }
+      // total memory consumption = model weights + kvcache + a small buffer for outputs
+      // output buffer is small so not considering here
+      const totalRequired = modelSize + kvCacheSize
+      logger.info(
+        `isModelSupported: Total memory requirement: ${totalRequired} for ${path}`
+      )
+      let availableMemBytes: number
+      const devices = await this.getDevices()
+      if (devices.length > 0) {
+        // Sum free memory across all GPUs
+        availableMemBytes = devices
+          .map((d) => d.free * 1024 * 1024)
+          .reduce((a, b) => a + b, 0)
+      } else {
+        // CPU fallback
+        const sys = await getSystemUsage()
+        availableMemBytes = (sys.total_memory - sys.used_memory) * 1024 * 1024
+      }
+      // check model size wrt system memory
+      if (modelSize > availableMemBytes) {
+        return 'RED'
+      } else if (modelSize + kvCacheSize > availableMemBytes) {
+        return 'YELLOW'
+      } else {
+        return 'GREEN'
+      }
+    } catch (e) {
+      throw new Error(String(e))
+    }
+  }
 }
--- a/src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
@ -23,12 +23,13 @@ sysinfo = "0.34.2"
 tauri = { version = "2.5.0", default-features = false, features = [] }
 thiserror = "2.0.12"
 tokio = { version = "1", features = ["full"] }
+reqwest = { version = "0.11", features = ["json", "blocking", "stream"] }

 # Windows-specific dependencies
 [target.'cfg(windows)'.dependencies]
 windows-sys = { version = "0.60.2", features = ["Win32_Storage_FileSystem"] }

-# Unix-specific dependencies  
+# Unix-specific dependencies
 [target.'cfg(unix)'.dependencies]
 nix = { version = "=0.30.1", features = ["signal", "process"] }

--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/commands.rs
@ -1,8 +1,58 @@
 use super::helpers;
 use super::types::GgufMetadata;
+use reqwest;
+use std::fs::File;
+use std::io::BufReader;

 /// Read GGUF metadata from a model file
 #[tauri::command]
 pub async fn read_gguf_metadata(path: String) -> Result<GgufMetadata, String> {
-    helpers::read_gguf_metadata(&path).map_err(|e| format!("Failed to read GGUF metadata: {}", e))
+    if path.starts_with("http://") || path.starts_with("https://") {
+        // Remote: read in 2MB chunks until successful
+        let client = reqwest::Client::new();
+        let chunk_size = 2 * 1024 * 1024; // Fixed 2MB chunks
+        let max_total_size = 120 * 1024 * 1024; // Don't exceed 120MB total
+        let mut total_downloaded = 0;
+        let mut accumulated_data = Vec::new();
+
+        while total_downloaded < max_total_size {
+            let start = total_downloaded;
+            let end = std::cmp::min(start + chunk_size - 1, max_total_size - 1);
+
+            let resp = client
+                .get(&path)
+                .header("Range", format!("bytes={}-{}", start, end))
+                .send()
+                .await
+                .map_err(|e| format!("Failed to fetch chunk {}-{}: {}", start, end, e))?;
+
+            let chunk_data = resp
+                .bytes()
+                .await
+                .map_err(|e| format!("Failed to read chunk response: {}", e))?;
+
+            accumulated_data.extend_from_slice(&chunk_data);
+            total_downloaded += chunk_data.len();
+
+            // Try parsing after each chunk
+            let cursor = std::io::Cursor::new(&accumulated_data);
+            if let Ok(metadata) = helpers::read_gguf_metadata(cursor) {
+                return Ok(metadata);
+            }
+
+            // If we got less data than expected, we've reached EOF
+            if chunk_data.len() < chunk_size {
+                break;
+            }
+        }
+        Err("Could not parse GGUF metadata from downloaded data".to_string())
+    } else {
+        // Local: use streaming file reader
+        let file =
+            File::open(&path).map_err(|e| format!("Failed to open local file {}: {}", path, e))?;
+        let reader = BufReader::new(file);
+
+        helpers::read_gguf_metadata(reader)
+            .map_err(|e| format!("Failed to parse GGUF metadata: {}", e))
+    }
 }
--- a/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/helpers.rs
+++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/helpers.rs
@ -1,13 +1,11 @@
 use byteorder::{LittleEndian, ReadBytesExt};
 use std::convert::TryFrom;
-use std::fs::File;
 use std::io::{self, BufReader, Read, Seek};
-use std::path::Path;

 use super::types::{GgufMetadata, GgufValueType};

-pub fn read_gguf_metadata<P: AsRef<Path>>(path: P) -> io::Result<GgufMetadata> {
-    let mut file = BufReader::new(File::open(path)?);
+pub fn read_gguf_metadata<R: Read + Seek>(reader: R) -> io::Result<GgufMetadata> {
+    let mut file = BufReader::new(reader);

    let mut magic = [0u8; 4];
    file.read_exact(&mut magic)?;
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@ -17,7 +17,6 @@ import {
  IconPhoto,
  IconWorld,
  IconAtom,
-  IconEye,
  IconTool,
  IconCodeCircle2,
  IconPlayerStopFilled,
@ -537,7 +536,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
                {/* File attachment - show only for models with mmproj */}
                {hasMmproj && (
                  <div
-                    className="h-6 p-1 ml-1 flex items-center justify-center rounded-sm hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out gap-1"
+                    className="h-6 p-1 flex items-center justify-center rounded-sm hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out gap-1"
                    onClick={handleAttachmentClick}
                  >
                    <IconPhoto size={18} className="text-main-view-fg/50" />
@ -554,20 +553,6 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
                {/* <div className="h-6 p-1 flex items-center justify-center rounded-sm hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out gap-1">
                <IconMicrophone size={18} className="text-main-view-fg/50" />
              </div> */}
-                {selectedModel?.capabilities?.includes('vision') && (
-                  <TooltipProvider>
-                    <Tooltip>
-                      <TooltipTrigger disabled={dropdownToolsAvailable}>
-                        <div className="h-6 p-1 flex items-center justify-center rounded-sm hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out gap-1">
-                          <IconEye size={18} className="text-main-view-fg/50" />
-                        </div>
-                      </TooltipTrigger>
-                      <TooltipContent>
-                        <p>{t('vision')}</p>
-                      </TooltipContent>
-                    </Tooltip>
-                  </TooltipProvider>
-                )}
                {selectedModel?.capabilities?.includes('embeddings') && (
                  <TooltipProvider>
                    <Tooltip>
--- a/web-app/src/containers/DropdownModelProvider.tsx
+++ b/web-app/src/containers/DropdownModelProvider.tsx
@ -14,12 +14,16 @@ import { route } from '@/constants/routes'
 import { useThreads } from '@/hooks/useThreads'
 import { ModelSetting } from '@/containers/ModelSetting'
 import ProvidersAvatar from '@/containers/ProvidersAvatar'
+import { ModelSupportStatus } from '@/containers/ModelSupportStatus'
 import { Fzf } from 'fzf'
 import { localStorageKey } from '@/constants/localStorage'
 import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useFavoriteModel } from '@/hooks/useFavoriteModel'
 import { predefinedProviders } from '@/consts/providers'
-import { checkMmprojExistsAndUpdateOffloadMMprojSetting } from '@/services/models'
+import {
+  checkMmprojExistsAndUpdateOffloadMMprojSetting,
+  checkMmprojExists,
+} from '@/services/models'

 type DropdownModelProviderProps = {
  model?: ThreadModel
@ -91,6 +95,50 @@ const DropdownModelProvider = ({
    [providers]
  )

+  // Helper function to get context size from model settings
+  const getContextSize = useCallback((): number => {
+    if (!selectedModel?.settings?.ctx_len?.controller_props?.value) {
+      return 8192 // Default context size
+    }
+    return selectedModel.settings.ctx_len.controller_props.value as number
+  }, [selectedModel?.settings?.ctx_len?.controller_props?.value])
+
+  // Function to check if a llamacpp model has vision capabilities and update model capabilities
+  const checkAndUpdateModelVisionCapability = useCallback(
+    async (modelId: string) => {
+      try {
+        const hasVision = await checkMmprojExists(modelId)
+        if (hasVision) {
+          // Update the model capabilities to include 'vision'
+          const provider = getProviderByName('llamacpp')
+          if (provider) {
+            const modelIndex = provider.models.findIndex(
+              (m) => m.id === modelId
+            )
+            if (modelIndex !== -1) {
+              const model = provider.models[modelIndex]
+              const capabilities = model.capabilities || []
+
+              // Add 'vision' capability if not already present
+              if (!capabilities.includes('vision')) {
+                const updatedModels = [...provider.models]
+                updatedModels[modelIndex] = {
+                  ...model,
+                  capabilities: [...capabilities, 'vision'],
+                }
+
+                updateProvider('llamacpp', { models: updatedModels })
+              }
+            }
+          }
+        }
+      } catch (error) {
+        console.debug('Error checking mmproj for model:', modelId, error)
+      }
+    },
+    [getProviderByName, updateProvider]
+  )
+
  // Initialize model provider only once
  useEffect(() => {
    const initializeModel = async () => {
@ -107,6 +155,8 @@ const DropdownModelProvider = ({
            updateProvider,
            getProviderByName
          )
+          // Also check vision capability
+          await checkAndUpdateModelVisionCapability(model.id as string)
        }
      } else if (useLastUsedModel) {
        // Try to use last used model only when explicitly requested (for new chat)
@ -119,6 +169,8 @@ const DropdownModelProvider = ({
              updateProvider,
              getProviderByName
            )
+            // Also check vision capability
+            await checkAndUpdateModelVisionCapability(lastUsed.model)
          }
        } else {
          selectModelProvider('', '')
@ -136,6 +188,7 @@ const DropdownModelProvider = ({
    checkModelExists,
    updateProvider,
    getProviderByName,
+    checkAndUpdateModelVisionCapability,
  ])

  // Update display model when selection changes
@ -147,6 +200,25 @@ const DropdownModelProvider = ({
    }
  }, [selectedProvider, selectedModel, t])

+  // Check vision capabilities for all llamacpp models
+  useEffect(() => {
+    const checkAllLlamacppModelsForVision = async () => {
+      const llamacppProvider = providers.find(
+        (p) => p.provider === 'llamacpp' && p.active
+      )
+      if (llamacppProvider) {
+        const checkPromises = llamacppProvider.models.map((model) =>
+          checkAndUpdateModelVisionCapability(model.id)
+        )
+        await Promise.allSettled(checkPromises)
+      }
+    }
+
+    if (open) {
+      checkAllLlamacppModelsForVision()
+    }
+  }, [open, providers, checkAndUpdateModelVisionCapability])
+
  // Reset search value when dropdown closes
  const onOpenChange = useCallback((open: boolean) => {
    setOpen(open)
@ -287,6 +359,8 @@ const DropdownModelProvider = ({
          updateProvider,
          getProviderByName
        )
+        // Also check vision capability
+        await checkAndUpdateModelVisionCapability(searchableModel.model.id)
      }

      // Store the selected model as last used
@ -305,6 +379,7 @@ const DropdownModelProvider = ({
      useLastUsedModel,
      updateProvider,
      getProviderByName,
+      checkAndUpdateModelVisionCapability,
    ]
  )

@ -318,7 +393,7 @@ const DropdownModelProvider = ({

  return (
    <Popover open={open} onOpenChange={onOpenChange}>
-      <div className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-2 py-1 flex items-center gap-1.5 rounded-sm max-h-[32px] ">
+      <div className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-2 py-1 flex items-center gap-1.5 rounded-sm max-h-[32px] mr-0.5">
        <PopoverTrigger asChild>
          <button
            title={displayModel}
@ -346,6 +421,12 @@ const DropdownModelProvider = ({
            smallIcon
          />
        )}
+        <ModelSupportStatus
+          modelId={selectedModel?.id}
+          provider={selectedProvider}
+          contextSize={getContextSize()}
+          className="ml-0.5 flex-shrink-0"
+        />
      </div>

      <PopoverContent
--- a/web-app/src/containers/ModelInfoHoverCard.tsx
+++ b/web-app/src/containers/ModelInfoHoverCard.tsx
@ -0,0 +1,226 @@
+import {
+  HoverCard,
+  HoverCardContent,
+  HoverCardTrigger,
+} from '@/components/ui/hover-card'
+import { IconInfoCircle } from '@tabler/icons-react'
+import { CatalogModel, ModelQuant } from '@/services/models'
+import { extractDescription } from '@/lib/models'
+
+interface ModelInfoHoverCardProps {
+  model: CatalogModel
+  variant?: ModelQuant
+  defaultModelQuantizations: string[]
+  modelSupportStatus: Record<string, string>
+  onCheckModelSupport: (variant: ModelQuant) => void
+  children?: React.ReactNode
+}
+
+export const ModelInfoHoverCard = ({
+  model,
+  variant,
+  defaultModelQuantizations,
+  modelSupportStatus,
+  onCheckModelSupport,
+  children,
+}: ModelInfoHoverCardProps) => {
+  const isVariantMode = !!variant
+  const displayVariant =
+    variant ||
+    model.quants.find((m) =>
+      defaultModelQuantizations.some((e) =>
+        m.model_id.toLowerCase().includes(e)
+      )
+    ) ||
+    model.quants?.[0]
+
+  const handleMouseEnter = () => {
+    if (displayVariant) {
+      onCheckModelSupport(displayVariant)
+    }
+  }
+
+  const getCompatibilityStatus = () => {
+    const status = displayVariant
+      ? modelSupportStatus[displayVariant.model_id]
+      : null
+
+    if (status === 'LOADING') {
+      return (
+        <div className="flex items-start gap-2">
+          <div className="size-2 shrink-0 border border-main-view-fg/50 border-t-transparent rounded-full animate-spin mt-1"></div>
+          <span className="text-main-view-fg/50">Checking...</span>
+        </div>
+      )
+    } else if (status === 'GREEN') {
+      return (
+        <div className="flex items-start gap-2">
+          <div className="size-2 shrink-0 bg-green-500 rounded-full mt-1"></div>
+          <span className="text-green-500 font-medium">
+            Recommended for your device
+          </span>
+        </div>
+      )
+    } else if (status === 'YELLOW') {
+      return (
+        <div className="flex items-start gap-2">
+          <div className="size-2 shrink-0 bg-yellow-500 rounded-full mt-1"></div>
+          <span className="text-yellow-500 font-medium">
+            May be slow on your device
+          </span>
+        </div>
+      )
+    } else if (status === 'RED') {
+      return (
+        <div className="flex items-start gap-2">
+          <div className="size-2 shrink-0 bg-red-500 rounded-full mt-1"></div>
+          <span className="text-red-500 font-medium">
+            May be incompatible with your device
+          </span>
+        </div>
+      )
+    } else {
+      return (
+        <div className="flex items-start gap-2">
+          <div className="size-2 shrink-0 bg-gray-400 rounded-full mt-1"></div>
+          <span className="text-gray-500">Unknown</span>
+        </div>
+      )
+    }
+  }
+
+  return (
+    <HoverCard>
+      <HoverCardTrigger asChild onMouseEnter={handleMouseEnter}>
+        {children || (
+          <div className="cursor-pointer">
+            <IconInfoCircle
+              size={14}
+              className="mt-0.5 text-main-view-fg/50 hover:text-main-view-fg/80 transition-colors"
+            />
+          </div>
+        )}
+      </HoverCardTrigger>
+      <HoverCardContent className="w-96 p-4" side="left">
+        <div className="space-y-4">
+          {/* Header */}
+          <div className="border-b border-main-view-fg/10 pb-3">
+            <h4 className="text-sm font-semibold text-main-view-fg">
+              {isVariantMode ? variant.model_id : model.model_name}
+            </h4>
+            <p className="text-xs text-main-view-fg/60 mt-1">
+              {isVariantMode
+                ? 'Model Variant Information'
+                : 'Model Information'}
+            </p>
+          </div>
+
+          {/* Main Info Grid */}
+          <div className="grid grid-cols-2 gap-3 text-xs">
+            <div className="space-y-2">
+              {isVariantMode ? (
+                <>
+                  <div>
+                    <span className="text-main-view-fg/50 block">
+                      File Size
+                    </span>
+                    <span className="text-main-view-fg font-medium mt-1 inline-block">
+                      {variant.file_size}
+                    </span>
+                  </div>
+                  <div>
+                    <span className="text-main-view-fg/50 block">
+                      Quantization
+                    </span>
+                    <span className="text-main-view-fg font-medium mt-1 inline-block">
+                      {variant.model_id.split('-').pop()?.toUpperCase() ||
+                        'N/A'}
+                    </span>
+                  </div>
+                </>
+              ) : (
+                <>
+                  <div>
+                    <span className="text-main-view-fg/50 block">
+                      Downloads
+                    </span>
+                    <span className="text-main-view-fg font-medium mt-1 inline-block">
+                      {model.downloads?.toLocaleString() || '0'}
+                    </span>
+                  </div>
+                  <div>
+                    <span className="text-main-view-fg/50 block">Variants</span>
+                    <span className="text-main-view-fg font-medium mt-1 inline-block">
+                      {model.quants?.length || 0}
+                    </span>
+                  </div>
+                </>
+              )}
+            </div>
+
+            <div className="space-y-2">
+              {!isVariantMode && (
+                <div>
+                  <span className="text-main-view-fg/50 block">
+                    Default Size
+                  </span>
+                  <span className="text-main-view-fg font-medium mt-1 inline-block">
+                    {displayVariant?.file_size || 'N/A'}
+                  </span>
+                </div>
+              )}
+              <div>
+                <span className="text-main-view-fg/50 block">
+                  Compatibility
+                </span>
+                <div className="flex items-center gap-1.5 mt-1">
+                  {getCompatibilityStatus()}
+                </div>
+              </div>
+            </div>
+          </div>
+
+          {/* Features Section */}
+          {(model.num_mmproj > 0 || model.tools) && (
+            <div className="border-t border-main-view-fg/10 pt-3">
+              <h5 className="text-xs font-medium text-main-view-fg/70 mb-2">
+                Features
+              </h5>
+              <div className="flex flex-wrap gap-2">
+                {model.num_mmproj > 0 && (
+                  <div className="flex items-center gap-1.5 px-2 py-1 bg-main-view-fg/10 rounded-md">
+                    <span className="text-xs text-main-view-fg font-medium">
+                      Vision
+                    </span>
+                  </div>
+                )}
+                {model.tools && (
+                  <div className="flex items-center gap-1.5 px-2 py-1 bg-main-view-fg/10 rounded-md">
+                    <span className="text-xs text-main-view-fg font-medium">
+                      Tools
+                    </span>
+                  </div>
+                )}
+              </div>
+            </div>
+          )}
+
+          {/* Content Section */}
+          <div className="border-t border-main-view-fg/10 pt-3">
+            <h5 className="text-xs font-medium text-main-view-fg/70 mb-1">
+              {isVariantMode ? 'Download URL' : 'Description'}
+            </h5>
+            <div className="text-xs text-main-view-fg/60 bg-main-view-fg/5 rounded p-2">
+              {isVariantMode ? (
+                <div className="font-mono break-all">{variant.path}</div>
+              ) : (
+                extractDescription(model?.description) ||
+                'No description available'
+              )}
+            </div>
+          </div>
+        </div>
+      </HoverCardContent>
+    </HoverCard>
+  )
+}
--- a/web-app/src/containers/ModelSupportStatus.tsx
+++ b/web-app/src/containers/ModelSupportStatus.tsx
@ -0,0 +1,142 @@
+import { useCallback, useEffect, useState } from 'react'
+import { cn } from '@/lib/utils'
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipProvider,
+  TooltipTrigger,
+} from '@/components/ui/tooltip'
+import { isModelSupported } from '@/services/models'
+import { getJanDataFolderPath, joinPath } from '@janhq/core'
+
+interface ModelSupportStatusProps {
+  modelId: string | undefined
+  provider: string | undefined
+  contextSize: number
+  className?: string
+}
+
+export const ModelSupportStatus = ({
+  modelId,
+  provider,
+  contextSize,
+  className,
+}: ModelSupportStatusProps) => {
+  const [modelSupportStatus, setModelSupportStatus] = useState<
+    'RED' | 'YELLOW' | 'GREEN' | 'LOADING' | null
+  >(null)
+
+  // Helper function to check model support with proper path resolution
+  const checkModelSupportWithPath = useCallback(
+    async (
+      id: string,
+      ctxSize: number
+    ): Promise<'RED' | 'YELLOW' | 'GREEN'> => {
+      try {
+        // Get Jan's data folder path and construct the full model file path
+        // Following the llamacpp extension structure: <Jan's data folder>/llamacpp/models/<modelId>/model.gguf
+        const janDataFolder = await getJanDataFolderPath()
+        const modelFilePath = await joinPath([
+          janDataFolder,
+          'llamacpp',
+          'models',
+          id,
+          'model.gguf',
+        ])
+
+        return await isModelSupported(modelFilePath, ctxSize)
+      } catch (error) {
+        console.error(
+          'Error checking model support with constructed path:',
+          error
+        )
+        // If path construction or model support check fails, assume not supported
+        return 'RED'
+      }
+    },
+    []
+  )
+
+  // Helper function to get icon color based on model support status
+  const getStatusColor = (): string => {
+    switch (modelSupportStatus) {
+      case 'GREEN':
+        return 'bg-green-500'
+      case 'YELLOW':
+        return 'bg-yellow-500'
+      case 'RED':
+        return 'bg-red-500'
+      case 'LOADING':
+        return 'bg-main-view-fg/50'
+      default:
+        return 'bg-main-view-fg/50'
+    }
+  }
+
+  // Helper function to get tooltip text based on model support status
+  const getStatusTooltip = (): string => {
+    switch (modelSupportStatus) {
+      case 'GREEN':
+        return `Works Well on your device (ctx: ${contextSize})`
+      case 'YELLOW':
+        return `Might work on your device (ctx: ${contextSize})`
+      case 'RED':
+        return `Doesn't work on your device  (ctx: ${contextSize})`
+      case 'LOADING':
+        return 'Checking device compatibility...'
+      default:
+        return 'Unknown'
+    }
+  }
+
+  // Check model support when model changes
+  useEffect(() => {
+    const checkModelSupport = async () => {
+      if (modelId && provider === 'llamacpp') {
+        // Set loading state immediately
+        setModelSupportStatus('LOADING')
+        try {
+          const supportStatus = await checkModelSupportWithPath(
+            modelId,
+            contextSize
+          )
+          setModelSupportStatus(supportStatus)
+        } catch (error) {
+          console.error('Error checking model support:', error)
+          setModelSupportStatus('RED')
+        }
+      } else {
+        // Only show status for llamacpp models since isModelSupported is specific to llamacpp
+        setModelSupportStatus(null)
+      }
+    }
+
+    checkModelSupport()
+  }, [modelId, provider, contextSize, checkModelSupportWithPath])
+
+  // Don't render anything if no status or not llamacpp
+  if (!modelSupportStatus || provider !== 'llamacpp') {
+    return null
+  }
+
+  return (
+    <TooltipProvider>
+      <Tooltip>
+        <TooltipTrigger asChild>
+          <div
+            className={cn(
+              'size-2 flex items-center justify-center rounded-full',
+              modelSupportStatus === 'LOADING'
+                ? 'size-2.5 border border-main-view-fg/50 border-t-transparent animate-spin'
+                : getStatusColor(),
+              className
+            )}
+          />
+        </TooltipTrigger>
+        <TooltipContent>
+          <p>{getStatusTooltip()}</p>
+        </TooltipContent>
+      </Tooltip>
+    </TooltipProvider>
+  )
+}
--- a/web-app/src/containers/tests/ChatInput.test.tsx
+++ b/web-app/src/containers/tests/ChatInput.test.tsx
@ -291,15 +291,6 @@ describe('ChatInput', () => {
    expect(stopButton).toBeInTheDocument()
  })

-  it('shows capability icons when model supports them', () => {
-    act(() => {
-      renderWithRouter()
-    })
-    
-    // Should show vision icon (rendered as SVG with tabler-icon-eye class)
-    const visionIcon = document.querySelector('.tabler-icon-eye')
-    expect(visionIcon).toBeInTheDocument()
-  })

  it('shows model selection dropdown', () => {
    act(() => {
--- a/web-app/src/routes/hub/$modelId.tsx
+++ b/web-app/src/routes/hub/$modelId.tsx
@ -20,19 +20,24 @@ import { useModelProvider } from '@/hooks/useModelProvider'
 import { useDownloadStore } from '@/hooks/useDownloadStore'
 import {
  CatalogModel,
+  ModelQuant,
  convertHfRepoToCatalogModel,
  fetchHuggingFaceRepo,
  pullModelWithMetadata,
+  isModelSupported,
 } from '@/services/models'
 import { Progress } from '@/components/ui/progress'
 import { Button } from '@/components/ui/button'
 import { cn } from '@/lib/utils'
 import { useGeneralSetting } from '@/hooks/useGeneralSetting'
+import { ModelInfoHoverCard } from '@/containers/ModelInfoHoverCard'

 type SearchParams = {
  repo: string
 }

+const defaultModelQuantizations = ['iq4_xs', 'q4_k_m']
+
 export const Route = createFileRoute('/hub/$modelId')({
  component: HubModelDetail,
  validateSearch: (search: Record<string, unknown>): SearchParams => ({
@ -57,6 +62,11 @@ function HubModelDetail() {
  const [readmeContent, setReadmeContent] = useState<string>('')
  const [isLoadingReadme, setIsLoadingReadme] = useState(false)

+  // State for model support status
+  const [modelSupportStatus, setModelSupportStatus] = useState<
+    Record<string, 'RED' | 'YELLOW' | 'GREEN' | 'LOADING'>
+  >({})
+
  useEffect(() => {
    fetchSources()
  }, [fetchSources])
@ -131,6 +141,41 @@ function HubModelDetail() {
    }
  }

+  // Check model support function
+  const checkModelSupport = useCallback(
+    async (variant: ModelQuant) => {
+      const modelKey = variant.model_id
+
+      // Don't check again if already checking or checked
+      if (modelSupportStatus[modelKey]) {
+        return
+      }
+
+      // Set loading state
+      setModelSupportStatus((prev) => ({
+        ...prev,
+        [modelKey]: 'LOADING',
+      }))
+
+      try {
+        // Use the HuggingFace path for the model
+        const modelPath = variant.path
+        const supported = await isModelSupported(modelPath, 8192)
+        setModelSupportStatus((prev) => ({
+          ...prev,
+          [modelKey]: supported,
+        }))
+      } catch (error) {
+        console.error('Error checking model support:', error)
+        setModelSupportStatus((prev) => ({
+          ...prev,
+          [modelKey]: 'RED',
+        }))
+      }
+    },
+    [modelSupportStatus]
+  )
+
  // Extract tags from quants (model variants)
  const tags = useMemo(() => {
    if (!modelData?.quants) return []
@ -318,6 +363,7 @@ function HubModelDetail() {
                          <th className="text-left py-3 px-2 text-sm font-medium text-main-view-fg/70">
                            Size
                          </th>
+                          <th></th>
                          <th className="text-right py-3 px-2 text-sm font-medium text-main-view-fg/70">
                            Action
                          </th>
@ -372,7 +418,18 @@ function HubModelDetail() {
                                  {variant.file_size}
                                </span>
                              </td>
-                              <td className="py-3 px-2 text-right">
+                              <td>
+                                <ModelInfoHoverCard
+                                  model={modelData}
+                                  variant={variant}
+                                  defaultModelQuantizations={
+                                    defaultModelQuantizations
+                                  }
+                                  modelSupportStatus={modelSupportStatus}
+                                  onCheckModelSupport={checkModelSupport}
+                                />
+                              </td>
+                              <td className="py-3 px-2 text-right ml-auto">
                                {(() => {
                                  if (isDownloading && !isDownloaded) {
                                    return (
--- a/web-app/src/routes/hub/index.tsx
+++ b/web-app/src/routes/hub/index.tsx
@ -31,6 +31,7 @@ import {
  TooltipProvider,
  TooltipTrigger,
 } from '@/components/ui/tooltip'
+import { ModelInfoHoverCard } from '@/containers/ModelInfoHoverCard'
 import Joyride, { CallBackProps, STATUS } from 'react-joyride'
 import { CustomTooltipJoyRide } from '@/containers/CustomeTooltipJoyRide'
 import {
@ -44,6 +45,7 @@ import {
  pullModelWithMetadata,
  fetchHuggingFaceRepo,
  convertHfRepoToCatalogModel,
+  isModelSupported,
 } from '@/services/models'
 import { useDownloadStore } from '@/hooks/useDownloadStore'
 import { Progress } from '@/components/ui/progress'
@ -97,6 +99,9 @@ function Hub() {
  const [huggingFaceRepo, setHuggingFaceRepo] = useState<CatalogModel | null>(
    null
  )
+  const [modelSupportStatus, setModelSupportStatus] = useState<
+    Record<string, 'RED' | 'YELLOW' | 'GREEN' | 'LOADING'>
+  >({})
  const [joyrideReady, setJoyrideReady] = useState(false)
  const [currentStepIndex, setCurrentStepIndex] = useState(0)
  const addModelSourceTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(
@ -270,6 +275,41 @@ function Hub() {
    [navigate]
  )

+  const checkModelSupport = useCallback(
+    async (variant: any) => {
+      const modelKey = variant.model_id
+
+      // Don't check again if already checking or checked
+      if (modelSupportStatus[modelKey]) {
+        return
+      }
+
+      // Set loading state
+      setModelSupportStatus((prev) => ({
+        ...prev,
+        [modelKey]: 'LOADING',
+      }))
+
+      try {
+        // Use the HuggingFace path for the model
+        const modelPath = variant.path
+        const supportStatus = await isModelSupported(modelPath, 8192)
+
+        setModelSupportStatus((prev) => ({
+          ...prev,
+          [modelKey]: supportStatus,
+        }))
+      } catch (error) {
+        console.error('Error checking model support:', error)
+        setModelSupportStatus((prev) => ({
+          ...prev,
+          [modelKey]: 'RED',
+        }))
+      }
+    },
+    [modelSupportStatus]
+  )
+
  const DownloadButtonPlaceholder = useMemo(() => {
    return ({ model }: ModelProps) => {
      // Check if this is a HuggingFace repository (no quants)
@ -616,6 +656,14 @@ function Hub() {
                                    )?.file_size
                                  }
                                </span>
+                                <ModelInfoHoverCard
+                                  model={filteredModels[virtualItem.index]}
+                                  defaultModelQuantizations={
+                                    defaultModelQuantizations
+                                  }
+                                  modelSupportStatus={modelSupportStatus}
+                                  onCheckModelSupport={checkModelSupport}
+                                />
                                <DownloadButtonPlaceholder
                                  model={filteredModels[virtualItem.index]}
                                />
@ -671,45 +719,47 @@ function Hub() {
                                    ?.length || 0}
                                </span>
                              </div>
-                              {filteredModels[virtualItem.index].tools && (
-                                <div className="flex items-center gap-1">
-                                  <TooltipProvider>
-                                    <Tooltip>
-                                      <TooltipTrigger asChild>
-                                        <div>
-                                          <IconTool
-                                            size={17}
-                                            className="text-main-view-fg/50"
-                                          />
-                                        </div>
-                                      </TooltipTrigger>
-                                      <TooltipContent>
-                                        <p>{t('tools')}</p>
-                                      </TooltipContent>
-                                    </Tooltip>
-                                  </TooltipProvider>
-                                </div>
-                              )}
-                              {filteredModels[virtualItem.index].num_mmproj >
-                                0 && (
-                                <div className="flex items-center gap-1">
-                                  <TooltipProvider>
-                                    <Tooltip>
-                                      <TooltipTrigger asChild>
-                                        <div>
-                                          <IconEye
-                                            size={17}
-                                            className="text-main-view-fg/50"
-                                          />
-                                        </div>
-                                      </TooltipTrigger>
-                                      <TooltipContent>
-                                        <p>{t('vision')}</p>
-                                      </TooltipContent>
-                                    </Tooltip>
-                                  </TooltipProvider>
-                                </div>
-                              )}
+                              <div className="flex gap-1.5 items-center">
+                                {filteredModels[virtualItem.index].num_mmproj >
+                                  0 && (
+                                  <div className="flex items-center gap-1">
+                                    <TooltipProvider>
+                                      <Tooltip>
+                                        <TooltipTrigger asChild>
+                                          <div>
+                                            <IconEye
+                                              size={17}
+                                              className="text-main-view-fg/50"
+                                            />
+                                          </div>
+                                        </TooltipTrigger>
+                                        <TooltipContent>
+                                          <p>{t('vision')}</p>
+                                        </TooltipContent>
+                                      </Tooltip>
+                                    </TooltipProvider>
+                                  </div>
+                                )}
+                                {filteredModels[virtualItem.index].tools && (
+                                  <div className="flex items-center gap-1">
+                                    <TooltipProvider>
+                                      <Tooltip>
+                                        <TooltipTrigger asChild>
+                                          <div>
+                                            <IconTool
+                                              size={17}
+                                              className="text-main-view-fg/50"
+                                            />
+                                          </div>
+                                        </TooltipTrigger>
+                                        <TooltipContent>
+                                          <p>{t('tools')}</p>
+                                        </TooltipContent>
+                                      </Tooltip>
+                                    </TooltipProvider>
+                                  </div>
+                                )}
+                              </div>
                              {filteredModels[virtualItem.index].quants.length >
                                1 && (
                                <div className="flex items-center gap-2 hub-show-variants-step">
@ -744,12 +794,75 @@ function Hub() {
                                  (variant) => (
                                    <CardItem
                                      key={variant.model_id}
-                                      title={variant.model_id}
+                                      title={
+                                        <>
+                                          <div className="flex items-center gap-1">
+                                            <span className="mr-2">
+                                              {variant.model_id}
+                                            </span>
+                                            {filteredModels[virtualItem.index]
+                                              .num_mmproj > 0 && (
+                                              <div className="flex items-center gap-1">
+                                                <TooltipProvider>
+                                                  <Tooltip>
+                                                    <TooltipTrigger asChild>
+                                                      <div>
+                                                        <IconEye
+                                                          size={17}
+                                                          className="text-main-view-fg/50"
+                                                        />
+                                                      </div>
+                                                    </TooltipTrigger>
+                                                    <TooltipContent>
+                                                      <p>{t('vision')}</p>
+                                                    </TooltipContent>
+                                                  </Tooltip>
+                                                </TooltipProvider>
+                                              </div>
+                                            )}
+                                            {filteredModels[virtualItem.index]
+                                              .tools && (
+                                              <div className="flex items-center gap-1">
+                                                <TooltipProvider>
+                                                  <Tooltip>
+                                                    <TooltipTrigger asChild>
+                                                      <div>
+                                                        <IconTool
+                                                          size={17}
+                                                          className="text-main-view-fg/50"
+                                                        />
+                                                      </div>
+                                                    </TooltipTrigger>
+                                                    <TooltipContent>
+                                                      <p>{t('tools')}</p>
+                                                    </TooltipContent>
+                                                  </Tooltip>
+                                                </TooltipProvider>
+                                              </div>
+                                            )}
+                                          </div>
+                                        </>
+                                      }
                                      actions={
                                        <div className="flex items-center gap-2">
                                          <p className="text-main-view-fg/70 font-medium text-xs">
                                            {variant.file_size}
                                          </p>
+                                          <ModelInfoHoverCard
+                                            model={
+                                              filteredModels[virtualItem.index]
+                                            }
+                                            variant={variant}
+                                            defaultModelQuantizations={
+                                              defaultModelQuantizations
+                                            }
+                                            modelSupportStatus={
+                                              modelSupportStatus
+                                            }
+                                            onCheckModelSupport={
+                                              checkModelSupport
+                                            }
+                                          />
                                          {(() => {
                                            const isDownloading =
                                              localDownloadingModels.has(
--- a/web-app/src/services/tests/models.test.ts
+++ b/web-app/src/services/tests/models.test.ts
@ -13,6 +13,7 @@ import {
  stopModel,
  stopAllModels,
  startModel,
+  isModelSupported,
  HuggingFaceRepo,
  CatalogModel,
 } from '../models'
@ -845,4 +846,95 @@ describe('models service', () => {
      expect(result.quants[0].file_size).toBe('Unknown size')
    })
  })
+
+  describe('isModelSupported', () => {
+    beforeEach(() => {
+      vi.clearAllMocks()
+    })
+
+    it('should return GREEN when model is fully supported', async () => {
+      const mockEngineWithSupport = {
+        ...mockEngine,
+        isModelSupported: vi.fn().mockResolvedValue('GREEN'),
+      }
+
+      mockEngineManager.get.mockReturnValue(mockEngineWithSupport)
+
+      const result = await isModelSupported('/path/to/model.gguf', 4096)
+
+      expect(result).toBe('GREEN')
+      expect(mockEngineWithSupport.isModelSupported).toHaveBeenCalledWith(
+        '/path/to/model.gguf',
+        4096
+      )
+    })
+
+    it('should return YELLOW when model weights fit but KV cache does not', async () => {
+      const mockEngineWithSupport = {
+        ...mockEngine,
+        isModelSupported: vi.fn().mockResolvedValue('YELLOW'),
+      }
+
+      mockEngineManager.get.mockReturnValue(mockEngineWithSupport)
+
+      const result = await isModelSupported('/path/to/model.gguf', 8192)
+
+      expect(result).toBe('YELLOW')
+      expect(mockEngineWithSupport.isModelSupported).toHaveBeenCalledWith(
+        '/path/to/model.gguf',
+        8192
+      )
+    })
+
+    it('should return RED when model is not supported', async () => {
+      const mockEngineWithSupport = {
+        ...mockEngine,
+        isModelSupported: vi.fn().mockResolvedValue('RED'),
+      }
+
+      mockEngineManager.get.mockReturnValue(mockEngineWithSupport)
+
+      const result = await isModelSupported('/path/to/large-model.gguf')
+
+      expect(result).toBe('RED')
+      expect(mockEngineWithSupport.isModelSupported).toHaveBeenCalledWith(
+        '/path/to/large-model.gguf',
+        undefined
+      )
+    })
+
+    it('should return YELLOW as fallback when engine method is not available', async () => {
+      const mockEngineWithoutSupport = {
+        ...mockEngine,
+        // isModelSupported method not available
+      }
+
+      mockEngineManager.get.mockReturnValue(mockEngineWithoutSupport)
+
+      const result = await isModelSupported('/path/to/model.gguf')
+
+      expect(result).toBe('YELLOW')
+    })
+
+    it('should return RED when engine is not available', async () => {
+      mockEngineManager.get.mockReturnValue(null)
+
+      const result = await isModelSupported('/path/to/model.gguf')
+
+      expect(result).toBe('YELLOW') // Should use fallback
+    })
+
+    it('should return RED when there is an error', async () => {
+      const mockEngineWithError = {
+        ...mockEngine,
+        isModelSupported: vi.fn().mockRejectedValue(new Error('Test error')),
+      }
+
+      mockEngineManager.get.mockReturnValue(mockEngineWithError)
+
+      const result = await isModelSupported('/path/to/model.gguf')
+
+      expect(result).toBe('RED')
+    })
+  })
 })
--- a/web-app/src/services/models.ts
+++ b/web-app/src/services/models.ts
@ -579,3 +579,35 @@ export const checkMmprojExists = async (modelId: string): Promise<boolean> => {
  }
  return false
 }
+
+/**
+ * Checks if a model is supported by analyzing memory requirements and system resources.
+ * @param modelPath - The path to the model file (local path or URL)
+ * @param ctxSize - The context size for the model (default: 4096)
+ * @returns Promise<'RED' | 'YELLOW' | 'GREEN'> - Support status:
+ *   - 'RED': Model weights don't fit in available memory
+ *   - 'YELLOW': Model weights fit, but KV cache doesn't
+ *   - 'GREEN': Both model weights and KV cache fit in available memory
+ */
+export const isModelSupported = async (
+  modelPath: string,
+  ctxSize?: number
+): Promise<'RED' | 'YELLOW' | 'GREEN'> => {
+  try {
+    const engine = getEngine('llamacpp') as AIEngine & {
+      isModelSupported?: (
+        path: string,
+        ctx_size?: number
+      ) => Promise<'RED' | 'YELLOW' | 'GREEN'>
+    }
+    if (engine && typeof engine.isModelSupported === 'function') {
+      return await engine.isModelSupported(modelPath, ctxSize)
+    }
+    // Fallback if method is not available
+    console.warn('isModelSupported method not available in llamacpp engine')
+    return 'YELLOW' // Conservative fallback
+  } catch (error) {
+    console.error(`Error checking model support for ${modelPath}:`, error)
+    return 'RED' // Error state, assume not supported
+  }
+}