Merge dev branch while preserving website directory

🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-11 13:19:17 +10:00 · 2025-08-11 13:19:17 +10:00 · 72588db776
commit 72588db776
parent 86c5204772 c0de25a817
68 changed files with 8432 additions and 1407 deletions
--- a/.github/workflows/jan-linter-and-test.yml
+++ b/.github/workflows/jan-linter-and-test.yml
@ -134,7 +134,7 @@ jobs:
  test-on-windows-pr:
    if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch'
-    runs-on: ${{ (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository) && 'windows-latest' || 'WINDOWS-11' }}
+    runs-on: 'windows-latest'
    steps:
      - name: Getting the repo
        uses: actions/checkout@v3
--- a/.github/workflows/template-tauri-build-linux-x64.yml
+++ b/.github/workflows/template-tauri-build-linux-x64.yml
@ -105,8 +105,7 @@ jobs:
          jq --arg version "${{ inputs.new_version }}" '.version = $version | .bundle.createUpdaterArtifacts = true' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
          mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json         
          if [ "${{ inputs.channel }}" != "stable" ]; then
-            jq '.bundle.linux.deb.files = {"usr/bin/bun": "resources/bin/bun",
+            jq '.bundle.linux.deb.files = {"usr/bin/bun": "resources/bin/bun"}' ./src-tauri/tauri.linux.conf.json > /tmp/tauri.linux.conf.json
                                           "usr/lib/Jan-${{ inputs.channel }}/resources/lib/libvulkan.so": "resources/lib/libvulkan.so"}' ./src-tauri/tauri.linux.conf.json > /tmp/tauri.linux.conf.json
            mv /tmp/tauri.linux.conf.json ./src-tauri/tauri.linux.conf.json
          fi
          jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
--- a/.gitignore
+++ b/.gitignore
@ -1,46 +1,22 @@
 .idea
 .env
 .idea
 # Jan inference
 error.log
 node_modules
 *.tgz
 !charts/server/charts/*.tgz
 dist
 build
 .DS_Store
 electron/renderer
 electron/models
 electron/docs
 electron/engines
 electron/themes
 electron/playwright-report
 server/pre-install
 package-lock.json
 coverage
 *.log
 core/lib/**
 # Turborepo
 .turbo
 electron/test-data
 electron/test-results
 core/test_results.html
 coverage
 .yarn
 .yarnrc
 test_results.html
 *.tsbuildinfo
-electron/shared/**
+test_results.html
 # docs
 docs/yarn.lock
 electron/.version.bak
 src-tauri/binaries/engines/cortex.llamacpp
 src-tauri/resources/themes
 src-tauri/resources/lib
 src-tauri/Cargo.lock
 src-tauri/icons
 !src-tauri/icons/icon.png
 src-tauri/gen/apple
@ -75,5 +51,8 @@ docs/.next/
 **/yarn-error.log*
 **/pnpm-debug.log*
-# Combined output for local testing
+## cargo
-combined-output/
+target
 ## test
 test-data
--- a/6
+++ b/6
@ -31,7 +31,6 @@ endif
 dev: install-and-build
 	yarn download:bin
 	yarn download:lib
 	yarn dev
 # Linting
@ -41,8 +40,10 @@ lint: install-and-build
 # Testing
 test: lint
 	yarn download:bin
 	yarn download:lib
 	yarn test
 	yarn copy:assets:tauri
 	yarn build:icon
 	cargo test --manifest-path src-tauri/Cargo.toml --no-default-features --features test-tauri -- --test-threads=1
 # Builds and publishes the app
 build-and-publish: install-and-build
@ -50,7 +51,6 @@ build-and-publish: install-and-build
 # Build
 build: install-and-build
 	yarn download:lib
 	yarn build
 clean:
--- a/docs/public/assets/images/changelog/gpt-oss-serper.png
+++ b/docs/public/assets/images/changelog/gpt-oss-serper.png
--- a/docs/public/assets/images/changelog/jupyter5.png
+++ b/docs/public/assets/images/changelog/jupyter5.png
--- a/docs/public/assets/videos/mcpjupyter.mp4
+++ b/docs/public/assets/videos/mcpjupyter.mp4
--- a/docs/src/pages/changelog/2025-08-07-gpt-oss.mdx
+++ b/docs/src/pages/changelog/2025-08-07-gpt-oss.mdx
@ -0,0 +1,92 @@
 ---
 title: "Jan v0.6.7: OpenAI gpt-oss support and enhanced MCP tutorials"
 version: 0.6.7
 description: "Full support for OpenAI's open-weight gpt-oss models and new Jupyter MCP integration guide"
 date: 2025-08-07
 ogImage: "/assets/images/changelog/gpt-oss-serper.png"
 ---
 import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
 import { Callout } from 'nextra/components'
 <ChangelogHeader title="Jan v0.6.7: OpenAI gpt-oss support and enhanced MCP tutorials" date="2025-08-07" ogImage="/assets/images/changelog/gpt-oss-serper.png"/>
 ## Highlights 🎉
 Jan v0.6.7 brings full support for OpenAI's groundbreaking open-weight models - gpt-oss-120b and gpt-oss-20b - along with enhanced MCP documentation and critical bug fixes for reasoning models.
 ### 🚀 OpenAI gpt-oss Models Now Supported
 Jan now fully supports OpenAI's first open-weight language models since GPT-2:
 **gpt-oss-120b:**
 - 117B total parameters, 5.1B active per token
 - Runs efficiently on a single 80GB GPU
 - Near-parity with OpenAI o4-mini on reasoning benchmarks
 - Exceptional tool use and function calling capabilities
 **gpt-oss-20b:**
 - 21B total parameters, 3.6B active per token
 - Runs on edge devices with just 16GB memory
 - Similar performance to OpenAI o3-mini
 - Perfect for local inference and rapid iteration
 <Callout type="info">
 Both models use Mixture-of-Experts (MoE) architecture and support context lengths up to 128k tokens. They come natively quantized in MXFP4 format for efficient memory usage.
 </Callout>
 ### 🎮 GPU Layer Configuration
 Due to the models' size, you may need to adjust GPU layers based on your hardware:
 ![GPU layers setting adjusted for optimal performance](/assets/images/changelog/jupyter5.png)
 Start with default settings and reduce layers if you encounter out-of-memory errors. Each system requires different configurations based on available VRAM.
 ### 📚 New Jupyter MCP Tutorial
 We've added comprehensive documentation for the Jupyter MCP integration:
 - Real-time notebook interaction and code execution
 - Step-by-step setup with Python environment management
 - Example workflows for data analysis and visualization
 - Security best practices for code execution
 - Performance optimization tips
 The tutorial demonstrates how to turn Jan into a capable data science partner that can execute analysis, create visualizations, and iterate based on actual results.
 ### 🔧 Bug Fixes
 Critical fixes for reasoning model support:
 - **Fixed reasoning text inclusion**: Reasoning text is no longer incorrectly included in chat completion requests
 - **Fixed thinking block display**: gpt-oss thinking blocks now render properly in the UI
 - **Fixed React state loop**: Resolved infinite re-render issue with useMediaQuery hook
 ## Using gpt-oss Models
 ### Download from Hub
 All gpt-oss GGUF variants are available in the Jan Hub. Simply search for "gpt-oss" and choose the quantization that fits your hardware:
 ### Model Capabilities
 Both models excel at:
 - **Reasoning tasks**: Competition coding, mathematics, and problem solving
 - **Tool use**: Web search, code execution, and function calling
 - **CoT reasoning**: Full chain-of-thought visibility for monitoring
 - **Structured outputs**: JSON schema enforcement and grammar constraints
 ### Performance Tips
 - **Memory requirements**: gpt-oss-120b needs ~80GB, gpt-oss-20b needs ~16GB
 - **GPU layers**: Adjust based on your VRAM (start high, reduce if needed)
 - **Context size**: Both models support up to 128k tokens
 - **Quantization**: Choose lower quantization for smaller memory footprint
 ## Coming Next
 We're continuing to optimize performance for large models, expand MCP integrations, and improve the overall experience for running cutting-edge open models locally.
 Update your Jan or [download the latest](https://jan.ai/).
 For the complete list of changes, see the [GitHub release notes](https://github.com/menloresearch/jan/releases/tag/v0.6.7).
--- a/docs/src/pages/docs/_assets/gpt-oss-tools.png
+++ b/docs/src/pages/docs/_assets/gpt-oss-tools.png
--- a/docs/src/pages/docs/_assets/gpt-oss.png
+++ b/docs/src/pages/docs/_assets/gpt-oss.png
--- a/docs/src/pages/docs/_assets/gpt5-add.png
+++ b/docs/src/pages/docs/_assets/gpt5-add.png
--- a/docs/src/pages/docs/_assets/gpt5-chat.png
+++ b/docs/src/pages/docs/_assets/gpt5-chat.png
--- a/docs/src/pages/docs/_assets/gpt5-msg.png
+++ b/docs/src/pages/docs/_assets/gpt5-msg.png
--- a/docs/src/pages/docs/_assets/gpt5-msg2.png
+++ b/docs/src/pages/docs/_assets/gpt5-msg2.png
--- a/docs/src/pages/docs/_assets/gpt5-msg3.png
+++ b/docs/src/pages/docs/_assets/gpt5-msg3.png
--- a/docs/src/pages/docs/_assets/gpt5-tools.png
+++ b/docs/src/pages/docs/_assets/gpt5-tools.png
--- a/docs/src/pages/docs/_assets/jupyter.png
+++ b/docs/src/pages/docs/_assets/jupyter.png
--- a/docs/src/pages/docs/_assets/jupyter1.png
+++ b/docs/src/pages/docs/_assets/jupyter1.png
--- a/docs/src/pages/docs/_assets/jupyter2.png
+++ b/docs/src/pages/docs/_assets/jupyter2.png
--- a/docs/src/pages/docs/_assets/jupyter3.png
+++ b/docs/src/pages/docs/_assets/jupyter3.png
--- a/docs/src/pages/docs/_assets/jupyter4.png
+++ b/docs/src/pages/docs/_assets/jupyter4.png
--- a/docs/src/pages/docs/_assets/jupyter5.png
+++ b/docs/src/pages/docs/_assets/jupyter5.png
--- a/docs/src/pages/docs/_assets/openai-settings.png
+++ b/docs/src/pages/docs/_assets/openai-settings.png
--- a/docs/src/pages/docs/mcp-examples/data-analysis/jupyter.mdx
+++ b/docs/src/pages/docs/mcp-examples/data-analysis/jupyter.mdx
@ -0,0 +1,337 @@
 ---
 title: Jupyter MCP
 description: Real-time Jupyter notebook interaction and code execution through MCP integration.
 keywords:
  [
    Jan,
    MCP,
    Model Context Protocol,
    Jupyter,
    data analysis,
    code execution,
    notebooks,
    Python,
    visualization,
    tool calling,
    GPT-5,
    OpenAI,
  ]
 ---
 import { Callout } from 'nextra/components'
 # Jupyter MCP
 [Jupyter MCP Server](https://jupyter-mcp-server.datalayer.tech/) enables real-time interaction with Jupyter notebooks, allowing AI models to edit, execute, and document code for data analysis and visualization. Instead of just generating code suggestions, AI can actually run Python code and see the results.
 This integration gives Jan the ability to execute analysis, create visualizations, and iterate based on actual results - turning your AI assistant into a capable data science partner.
 <Callout type="info">
 **Breaking Change**: Version 0.11.0+ renamed `room` to `document`. Check the [release notes](https://jupyter-mcp-server.datalayer.tech/releases) for details.
 </Callout>
 ## Available Tools
 The Jupyter MCP Server provides [12 comprehensive tools](https://jupyter-mcp-server.datalayer.tech/tools/):
 ### Core Operations
 - `append_execute_code_cell`: Add and run code cells at notebook end
 - `insert_execute_code_cell`: Insert and run code at specific positions
 - `execute_cell_simple_timeout`: Execute cells with timeout control
 - `execute_cell_streaming`: Long-running cells with progress updates
 - `execute_cell_with_progress`: Execute with timeout and monitoring
 ### Cell Management
 - `append_markdown_cell`: Add documentation cells
 - `insert_markdown_cell`: Insert markdown at specific positions
 - `delete_cell`: Remove cells from notebook
 - `overwrite_cell_source`: Update existing cell content
 ### Information & Reading
 - `get_notebook_info`: Retrieve notebook metadata
 - `read_cell`: Examine specific cell content
 - `read_all_cells`: Get complete notebook state
 <Callout type="warning">
 The MCP connects to **one notebook at a time**, not multiple notebooks. Specify your target notebook in the configuration.
 </Callout>
 ## Prerequisites
 - Jan with MCP enabled
 - Python 3.8+ with uv package manager
 - Docker installed
 - OpenAI API key for GPT-5 access
 - Basic understanding of Jupyter notebooks
 ## Setup
 ### Enable MCP
 1. Go to **Settings** > **MCP Servers**
 2. Toggle **Allow All MCP Tool Permission** ON
 ![MCP settings page with toggle enabled](../../_assets/mcp-on.png)
 ### Install uv Package Manager
 If you don't have uv installed:
 ```bash
 # macOS and Linux
 curl -LsSf https://astral.sh/uv/install.sh | sh
 # Windows
 powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
 ```
 ### Create Python Environment
 Set up an isolated environment for Jupyter:
 ```bash
 # Create environment with Python 3.13
 uv venv .venv --python 3.13
 # Activate environment
 source .venv/bin/activate  # Linux/macOS
 # or
 .venv\Scripts\activate     # Windows
 # Install Jupyter dependencies
 uv pip install jupyterlab==4.4.1 jupyter-collaboration==4.0.2 ipykernel
 uv pip uninstall pycrdt datalayer_pycrdt
 uv pip install datalayer_pycrdt==0.12.17
 # Add data science libraries
 uv pip install pandas numpy matplotlib altair
 ```
 ### Start JupyterLab Server
 Launch JupyterLab with authentication:
 ```bash
 jupyter lab --port 8888 --IdentityProvider.token heyheyyou --ip 0.0.0.0
 ```
 ![Terminal showing JupyterLab startup](../../_assets/jupyter1.png)
 The server opens in your browser:
 ![JupyterLab interface in browser](../../_assets/jupyter.png)
 ### Create Target Notebook
 Create a new notebook named `for_jan.ipynb`:
 ![Notebook created in JupyterLab](../../_assets/jupyter2.png)
 ### Configure MCP Server in Jan
 Click `+` in MCP Servers section:
 **Configuration for macOS/Windows:**
 - **Server Name**: `jupyter`
 - **Command**: `docker`
 - **Arguments**:
  ```
  run -i --rm -e DOCUMENT_URL -e DOCUMENT_TOKEN -e DOCUMENT_ID -e RUNTIME_URL -e RUNTIME_TOKEN datalayer/jupyter-mcp-server:latest
  ```
 - **Environment Variables**:
  - Key: `DOCUMENT_URL`, Value: `http://host.docker.internal:8888`
  - Key: `DOCUMENT_TOKEN`, Value: `heyheyyou`
  - Key: `DOCUMENT_ID`, Value: `for_jan.ipynb`
  - Key: `RUNTIME_URL`, Value: `http://host.docker.internal:8888`
  - Key: `RUNTIME_TOKEN`, Value: `heyheyyou`
 ![Jan MCP server configuration](../../_assets/jupyter3.png)
 ## Using OpenAI's GPT-5
 ### Configure OpenAI Provider
 Navigate to **Settings** > **Model Providers** > **OpenAI**:
 ![OpenAI settings page](../../_assets/openai-settings.png)
 ### Add GPT-5 Model
 Since GPT-5 is new, you'll need to manually add it to Jan:
 ![Manually adding GPT-5 model name](../../_assets/gpt5-add.png)
 <Callout type="info">
 **About GPT-5**: OpenAI's smartest, fastest, most useful model yet. It features built-in thinking capabilities, state-of-the-art performance across coding, math, and writing, and exceptional tool use abilities. GPT-5 automatically decides when to respond quickly versus when to think longer for expert-level responses.
 </Callout>
 ### Enable Tool Calling
 Ensure tools are enabled for GPT-5:
 ![Enabling tools for GPT-5](../../_assets/gpt5-tools.png)
 ## Usage
 ### Verify Tool Availability
 Start a new chat with GPT-5. The tools bubble shows all available Jupyter operations:
 ![GPT-5 ready in chat with Jupyter tools visible](../../_assets/gpt5-chat.png)
 ### Initial Test
 Start with establishing the notebook as your workspace:
 ```
 You have access to a jupyter notebook, please use it as our data analysis scratchpad. Let's start by printing "Hello Jan" in a new cell.
 ```
 GPT-5 creates and executes the code successfully:
 ![First message showing successful tool use](../../_assets/gpt5-msg.png)
 ### Advanced Data Analysis
 Try a more complex task combining multiple operations:
 ```
 Generate synthetic data with numpy, move it to a pandas dataframe and create a pivot table, and then make a cool animated plot using matplotlib. Your use case will be sales analysis in the luxury fashion industry.
 ```
 ![Complex analysis with luxury fashion sales data](../../_assets/gpt5-msg2.png)
 Watch the complete output unfold:
 <video width="100%" controls>
  <source src="/assets/videos/mcpjupyter.mp4" type="video/mp4" />
  Your browser does not support the video tag.
 </video>
 ## Example Prompts to Try
 ### Financial Analysis
 ```
 Create a Monte Carlo simulation for portfolio risk analysis. Generate 10,000 scenarios, calculate VaR at 95% confidence, and visualize the distribution.
 ```
 ### Time Series Forecasting
 ```
 Generate synthetic time series data representing daily website traffic over 2 years with weekly seasonality and trend. Build an ARIMA model and forecast the next 30 days.
 ```
 ### Machine Learning Pipeline
 ```
 Build a complete classification pipeline: generate a dataset with 3 classes and 5 features, split the data, try multiple algorithms (RF, SVM, XGBoost), and create a comparison chart of their performance.
 ```
 ### Interactive Dashboards
 ```
 Create an interactive visualization using matplotlib widgets showing how changing interest rates affects loan payments over different time periods.
 ```
 ### Statistical Testing
 ```
 Generate two datasets representing A/B test results for an e-commerce site. Perform appropriate statistical tests and create visualizations to determine if the difference is significant.
 ```
 ## Performance Considerations
 <Callout type="warning">
 Multiple tools can quickly consume context windows, especially for local models. GPT-5's unified system with smart routing helps manage this, but local models may struggle with speed and context limitations.
 </Callout>
 ### Context Management
 - Each tool call adds to conversation history
 - 12 available tools means substantial system prompt overhead
 - Local models may need reduced tool sets for reasonable performance
 - Consider disabling unused tools to conserve context
 ### Cloud vs Local Trade-offs
 - **Cloud models (GPT-5)**: Handle multiple tools efficiently with large context windows
 - **Local models**: May require optimization, reduced tool sets, or smaller context sizes
 - **Hybrid approach**: Use cloud for complex multi-tool workflows, local for simple tasks
 ## Security Considerations
 <Callout type="warning">
 MCP provides powerful capabilities but requires careful security practices.
 </Callout>
 ### Authentication Tokens
 - **Always use strong tokens** - avoid simple passwords
 - **Never commit tokens** to version control
 - **Rotate tokens regularly** for production use
 - **Use different tokens** for different environments
 ### Network Security
 - JupyterLab is network-accessible with `--ip 0.0.0.0`
 - Consider using `--ip 127.0.0.1` for local-only access
 - Implement firewall rules to restrict access
 - Use HTTPS in production environments
 ### Code Execution Risks
 - AI has full Python execution capabilities
 - Review generated code before execution
 - Use isolated environments for sensitive work
 - Monitor resource usage and set limits
 ### Data Privacy
 - Notebook content is processed by AI models
 - When using cloud models like GPT-5, data leaves your system
 - Keep sensitive data in secure environments
 - Consider model provider's data policies
 ## Best Practices
 ### Environment Management
 - Use virtual environments for isolation
 - Document required dependencies
 - Version control your notebooks
 - Regular environment cleanup
 ### Performance Optimization
 - Start with simple operations
 - Monitor memory usage during execution
 - Close unused notebooks
 - Restart kernels when needed
 ### Effective Prompting
 - Be specific about desired outputs
 - Break complex tasks into steps
 - Ask for explanations with code
 - Request error handling in critical operations
 ## Troubleshooting
 **Connection Problems:**
 - Verify JupyterLab is running
 - Check token matches configuration
 - Confirm Docker can reach host
 - Test with curl to verify connectivity
 **Execution Failures:**
 - Check Python package availability
 - Verify kernel is running
 - Look for syntax errors in generated code
 - Restart kernel if stuck
 **Tool Calling Errors:**
 - Ensure model supports tool calling
 - Verify all 12 tools appear in chat
 - Check MCP server is active
 - Review Docker logs for errors
 **API Rate Limits:**
 - Monitor OpenAI usage dashboard
 - Implement retry logic for transient errors
 - Consider fallback to local models
 - Cache results when possible
 ## Conclusion
 The Jupyter MCP integration combined with GPT-5's advanced capabilities creates an exceptionally powerful data science environment. With GPT-5's built-in reasoning and expert-level intelligence, complex analyses that once required extensive manual coding can now be accomplished through natural conversation.
 Whether you're exploring data, building models, or creating visualizations, this integration provides the computational power of Jupyter with the intelligence of GPT-5 - all within Jan's privacy-conscious interface.
 Remember: with great computational power comes the responsibility to use it securely. Always validate generated code, use strong authentication, and be mindful of data privacy when using cloud-based models.
--- a/docs/src/pages/post/_assets/gpt-oss
+++ b/docs/src/pages/post/_assets/gpt-oss
--- a/docs/src/pages/post/_assets/jan
+++ b/docs/src/pages/post/_assets/jan
--- a/docs/src/pages/post/_assets/jan
+++ b/docs/src/pages/post/_assets/jan
--- a/docs/src/pages/post/_assets/run
+++ b/docs/src/pages/post/_assets/run
--- a/docs/src/pages/post/run-gpt-oss-locally.mdx
+++ b/docs/src/pages/post/run-gpt-oss-locally.mdx
@ -0,0 +1,211 @@
 ---
 title: "Run OpenAI's gpt-oss locally in 5 mins (Beginner Guide)"
 description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally. Step-by-step setup with Jan AI for private, offline AI conversations."
 tags: OpenAI, gpt-oss, local AI, Jan, privacy, Apache-2.0, llama.cpp, Ollama, LM Studio
 categories: guides
 date: 2025-08-06
 ogImage: assets/gpt-oss%20locally.jpeg   
 twitter:
  card: summary_large_image
  site: "@jandotai"
  title: "Run OpenAI's gpt-oss Locally in 5 Minutes (Beginner Guide)"
  description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally with Jan AI for private, offline conversations."
  image: assets/gpt-oss%20locally.jpeg
 ---
 import { Callout } from 'nextra/components'
 import CTABlog from '@/components/Blog/CTA'
 # Run OpenAI's gpt-oss Locally in 5 mins
 ![gpt-oss running locally in Jan interface](./_assets/gpt-oss%20locally.jpeg)
 OpenAI launched [gpt-oss](https://openai.com/index/introducing-gpt-oss/), marking their return to open-source AI after GPT-2. This model is designed to run locally on consumer hardware. This guide shows you how to install and run gpt-oss on your computer for private, offline AI conversations.
 ## What is gpt-oss?
 gpt-oss is OpenAI's open-source large language model, released under the Apache-2.0 license. Unlike ChatGPT, gpt-oss:
 - Runs completely offline - No internet required after setup
 - 100% private - Your conversations never leave your device  
 - Unlimited usage - No token limits or rate limiting
 - Free forever - No subscription fees
 - Commercial use allowed - Apache-2.0 license permits business use
 Running AI models locally means everything happens on your own hardware, giving you complete control over your data and conversations.
 ## gpt-oss System Requirements
 | Component | Minimum | Recommended |
 |-----------|---------|-------------|
 | **RAM** | 16 GB | 32 GB+ |
 | **Storage** | 11+ GB free | 25 GB+ free |
 | **CPU** | 4 cores | 8+ cores |
 | **GPU** | Optional | Modern GPU with 6GB+ VRAM recommended |
 | **OS** | Windows 10+, macOS 11+, Linux | Latest versions |
 **Installation apps available:**
 - **Jan** (Recommended - easiest setup) 
 - **llama.cpp** (Command line)
 - **Ollama** (Docker-based)
 - **LM Studio** (GUI alternative)
 ## How to install gpt-oss locally with Jan (5 mins)
 ### Step 1: Download Jan
 First download Jan to run gpt-oss locally: [Download Jan AI](https://jan.ai/)
 <Callout type="info">
 Jan is the simplest way to run AI models locally. It automatically handles CPU/GPU optimization, provides a clean chat interface, and requires zero technical knowledge.
 </Callout>
 ### Step 2: Install gpt-oss Model (2-3 minutes)
 ![Jan Hub showing gpt-oss model in the hub](./_assets/jan%20hub%20gpt-oss%20locally.jpeg)
 1. Open Jan Hub -> search "gpt-oss" (it appears at the top)
 2. Click Download and wait for completion (~11GB download)
 3. Installation is automatic - Jan handles everything
 ### Step 3: Start using gpt-oss offline (30 seconds)
 ![Jan interface with gpt-oss model selected and ready to chat](./_assets/jan%20gpt-oss.jpeg)
 1. Go to New Chat → select gpt-oss-20b from model picker
 2. Start chatting - Jan automatically optimizes for your hardware
 3. You're done! Your AI conversations now stay completely private
 Success: Your gpt-oss setup is complete. No internet required for chatting, unlimited usage, zero subscription fees.
 ## Jan with gpt-oss vs ChatGPT vs other Local AI Models
 | Feature | gpt-oss (Local) | ChatGPT Plus | Claude Pro | Other Local Models |
 |---------|----------------|--------------|------------|-------------------|
 | Cost | Free forever | $20/month | $20/month | Free |
 | Privacy | 100% private | Data sent to OpenAI | Data sent to Anthropic | 100% private |
 | Internet | Offline after setup | Requires internet | Requires internet | Offline |
 | Usage limits | Unlimited | Rate limited | Rate limited | Unlimited |
 | Performance | Good (hardware dependent) | Excellent | Excellent | Varies |
 | Setup difficulty | Easy with Jan | None | None | Varies |
 ## Alternative Installation Methods
 ### Option 1: Jan (Recommended)
 - Best for: Complete beginners, users wanting GUI interface
 - Setup time: 5 minutes
 - Difficulty: Very Easy
 Already covered above - [Download Jan](https://jan.ai/)
 ### Option 2: llama.cpp (Command Line)
 - Best for: Developers, terminal users, custom integrations  
 - Setup time: 10-15 minutes  
 - Difficulty: Intermediate
 ```bash
 # macOS
 brew install llama-cpp
 # Windows: grab Windows exe from releases
 curl -L -o gpt-oss-20b.gguf https://huggingface.co/openai/gpt-oss-20b-gguf/resolve/main/gpt-oss-20b.gguf
 ./main -m gpt-oss-20b.gguf --chat-simple
 # Add GPU acceleration (adjust -ngl value based on your GPU VRAM)
 ./main -m gpt-oss-20b.gguf --chat-simple -ngl 20
 ```
 ### Option 3: Ollama (Docker-Based)
 Best for: Docker users, server deployments  
 Setup time: 5-10 minutes  
 Difficulty: Intermediate
 ```bash
 # Install from https://ollama.com
 ollama run gpt-oss:20b
 ```
 ### Option 4: LM Studio (GUI Alternative)
 Best for: Users wanting GUI but not Jan  
 Setup time: 10 minutes  
 Difficulty: Easy
 1. Download LM Studio from official website
 2. Go to Models → search "gpt-oss-20b (GGUF)"
 3. Download the model (wait for completion)
 4. Go to Chat tab → select the model and start chatting
 ## gpt-oss Performance & Troubleshooting
 ### Expected Performance Benchmarks
 | Hardware Setup | First Response | Subsequent Responses | Tokens/Second |
 |---------------|---------------|---------------------|---------------|
 | **16GB RAM + CPU only** | 30-45 seconds | 3-6 seconds | 3-8 tokens/sec |
 | **32GB RAM + RTX 3060** | 15-25 seconds | 1-3 seconds | 15-25 tokens/sec |
 | **32GB RAM + RTX 4080+** | 8-15 seconds | 1-2 seconds | 25-45 tokens/sec |
 ### Common Issues & Solutions
 Performance optimization tips:
 - First response is slow: Normal - kernels compile once, then speed up dramatically
 - Out of VRAM error: Reduce context length in settings or switch to CPU mode
 - Out of memory: Close memory-heavy apps (Chrome, games, video editors)
 - Slow responses: Check if other apps are using GPU/CPU heavily
 Quick fixes:
 1. Restart Jan if responses become slow
 2. Lower context window from 4096 to 2048 tokens
 3. Enable CPU mode if GPU issues persist
 4. Free up RAM by closing unused applications
 ## Frequently Asked Questions (FAQ)
 ### Is gpt-oss completely free?
 Yes! gpt-oss is 100% free under Apache-2.0 license. No subscription fees, no token limits, no hidden costs.
 ### How much internet data does gpt-oss use?
 Only for the initial 11GB download. After installation, gpt-oss works completely offline with zero internet usage.
 ### Can I use gpt-oss for commercial projects?
 Absolutely! The Apache-2.0 license permits commercial use, modification, and distribution.
 ### Is gpt-oss better than ChatGPT?
 gpt-oss offers different advantages: complete privacy, unlimited usage, offline capability, and no costs. ChatGPT may have better performance but requires internet and subscriptions.
 ### What happens to my conversations with gpt-oss?
 Your conversations stay 100% on your device. Nothing is sent to OpenAI, Jan, or any external servers.
 ### Can I run gpt-oss on a Mac with 8GB RAM?
 No, gpt-oss requires minimum 16GB RAM. Consider upgrading your RAM or using cloud-based alternatives.
 ### How do I update gpt-oss to newer versions?
 Jan automatically notifies you of updates. Simply click update in Jan Hub when new versions are available.
 ## Why Choose gpt-oss Over ChatGPT Plus?
 gpt-oss advantages:
 - $0/month vs $20/month for ChatGPT Plus
 - 100% private - no data leaves your device  
 - Unlimited usage - no rate limits or restrictions
 - Works offline - no internet required after setup
 - Commercial use allowed - build businesses with it
 When to choose ChatGPT Plus instead:
 - You need the absolute best performance
 - You don't want to manage local installation
 - You have less than 16GB RAM
 ## Get started with gpt-oss today
 ![gpt-oss running locally with complete privacy](./_assets/run%20gpt-oss%20locally%20in%20jan.jpeg)
 Ready to try gpt-oss?
 - Download Jan: [https://jan.ai/](https://jan.ai/)
 - View source code: [https://github.com/menloresearch/jan](https://github.com/menloresearch/jan)
 - Need help? Check our [local AI guide](/post/run-ai-models-locally) for beginners
 <CTABlog />
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@ -39,6 +39,7 @@ type LlamacppConfig = {
  auto_unload: boolean
  chat_template: string
  n_gpu_layers: number
  override_tensor_buffer_t: string
  ctx_size: number
  threads: number
  threads_batch: number
@ -144,7 +145,6 @@ export default class llamacpp_extension extends AIEngine {
  readonly providerId: string = 'llamacpp'
  private config: LlamacppConfig
  private activeSessions: Map<number, SessionInfo> = new Map()
  private providerPath!: string
  private apiSecret: string = 'JustAskNow'
  private pendingDownloads: Map<string, Promise<void>> = new Map()
@ -770,16 +770,6 @@ export default class llamacpp_extension extends AIEngine {
  override async onUnload(): Promise<void> {
    // Terminate all active sessions
    for (const [_, sInfo] of this.activeSessions) {
      try {
        await this.unload(sInfo.model_id)
      } catch (error) {
        logger.error(`Failed to unload model ${sInfo.model_id}:`, error)
      }
    }
    // Clear the sessions map
    this.activeSessions.clear()
  }
  onSettingUpdate<T>(key: string, value: T): void {
@ -1103,67 +1093,13 @@ export default class llamacpp_extension extends AIEngine {
   * Function to find a random port
   */
  private async getRandomPort(): Promise<number> {
    const MAX_ATTEMPTS = 20000
    let attempts = 0
    while (attempts < MAX_ATTEMPTS) {
      const port = Math.floor(Math.random() * 1000) + 3000
      const isAlreadyUsed = Array.from(this.activeSessions.values()).some(
        (info) => info.port === port
      )
      if (!isAlreadyUsed) {
        const isAvailable = await invoke<boolean>('is_port_available', { port })
        if (isAvailable) return port
      }
      attempts++
    }
    throw new Error('Failed to find an available port for the model to load')
  }
  private async sleep(ms: number): Promise<void> {
    return new Promise((resolve) => setTimeout(resolve, ms))
  }
  private async waitForModelLoad(
    sInfo: SessionInfo,
    timeoutMs = 240_000
  ): Promise<void> {
    await this.sleep(500) // Wait before first check
    const start = Date.now()
    while (Date.now() - start < timeoutMs) {
    try {
-        const res = await fetch(`http://localhost:${sInfo.port}/health`)
+      const port = await invoke<number>('get_random_port')
-
+      return port
-        if (res.status === 503) {
+    } catch {
-          const body = await res.json()
+      logger.error('Unable to find a suitable port')
-          const msg = body?.error?.message ?? 'Model loading'
+      throw new Error('Unable to find a suitable port for model')
          logger.info(`waiting for model load... (${msg})`)
        } else if (res.ok) {
          const body = await res.json()
          if (body.status === 'ok') {
            return
          } else {
            logger.warn('Unexpected OK response from /health:', body)
    }
        } else {
          logger.warn(`Unexpected status ${res.status} from /health`)
        }
      } catch (e) {
        await this.unload(sInfo.model_id)
        throw new Error(`Model appears to have crashed: ${e}`)
      }
      await this.sleep(800) // Retry interval
    }
    await this.unload(sInfo.model_id)
    throw new Error(
      `Timed out loading model after ${timeoutMs}... killing llamacpp`
    )
  }
  override async load(
@ -1171,7 +1107,7 @@ export default class llamacpp_extension extends AIEngine {
    overrideSettings?: Partial<LlamacppConfig>,
    isEmbedding: boolean = false
  ): Promise<SessionInfo> {
-    const sInfo = this.findSessionByModel(modelId)
+    const sInfo = await this.findSessionByModel(modelId)
    if (sInfo) {
      throw new Error('Model already loaded!!')
    }
@ -1262,6 +1198,14 @@ export default class llamacpp_extension extends AIEngine {
    args.push('--jinja')
    args.push('--reasoning-format', 'none')
    args.push('-m', modelPath)
    // For overriding tensor buffer type, useful where
    // massive MOE models can be made faster by keeping attention on the GPU
    // and offloading the expert FFNs to the CPU.
    // This is an expert level settings and should only be used by people
    // who knows what they are doing.
    // Takes a regex with matching tensor name as input
    if (cfg.override_tensor_buffer_t)
      args.push('--override-tensor', cfg.override_tensor_buffer_t)
    args.push('-a', modelId)
    args.push('--port', String(port))
    if (modelConfig.mmproj_path) {
@ -1333,27 +1277,20 @@ export default class llamacpp_extension extends AIEngine {
        libraryPath,
        args,
      })
      // Store the session info for later use
      this.activeSessions.set(sInfo.pid, sInfo)
      await this.waitForModelLoad(sInfo)
      return sInfo
    } catch (error) {
-      logger.error('Error loading llama-server:\n', error)
+      logger.error('Error in load command:\n', error)
-      throw new Error(`Failed to load llama-server: ${error}`)
+      throw error
    }
  }
  override async unload(modelId: string): Promise<UnloadResult> {
-    const sInfo: SessionInfo = this.findSessionByModel(modelId)
+    const sInfo: SessionInfo = await this.findSessionByModel(modelId)
    if (!sInfo) {
      throw new Error(`No active session found for model: ${modelId}`)
    }
    const pid = sInfo.pid
    try {
      this.activeSessions.delete(pid)
      // Pass the PID as the session_id
      const result = await invoke<UnloadResult>('unload_llama_model', {
        pid: pid,
@ -1364,13 +1301,11 @@ export default class llamacpp_extension extends AIEngine {
        logger.info(`Successfully unloaded model with PID ${pid}`)
      } else {
        logger.warn(`Failed to unload model: ${result.error}`)
        this.activeSessions.set(sInfo.pid, sInfo)
      }
      return result
    } catch (error) {
      logger.error('Error in unload command:', error)
      this.activeSessions.set(sInfo.pid, sInfo)
      return {
        success: false,
        error: `Failed to unload model: ${error}`,
@ -1493,17 +1428,23 @@ export default class llamacpp_extension extends AIEngine {
    }
  }
-  private findSessionByModel(modelId: string): SessionInfo | undefined {
+  private async findSessionByModel(modelId: string): Promise<SessionInfo> {
-    return Array.from(this.activeSessions.values()).find(
+    try {
-      (session) => session.model_id === modelId
+      let sInfo = await invoke<SessionInfo>('find_session_by_model', {
-    )
+        modelId,
      })
      return sInfo
    } catch (e) {
      logger.error(e)
      throw new Error(String(e))
    }
  }
  override async chat(
    opts: chatCompletionRequest,
    abortController?: AbortController
  ): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> {
-    const sessionInfo = this.findSessionByModel(opts.model)
+    const sessionInfo = await this.findSessionByModel(opts.model)
    if (!sessionInfo) {
      throw new Error(`No active session found for model: ${opts.model}`)
    }
@ -1519,7 +1460,6 @@ export default class llamacpp_extension extends AIEngine {
        throw new Error('Model appears to have crashed! Please reload!')
      }
    } else {
      this.activeSessions.delete(sessionInfo.pid)
      throw new Error('Model have crashed! Please reload!')
    }
    const baseUrl = `http://localhost:${sessionInfo.port}/v1`
@ -1568,11 +1508,13 @@ export default class llamacpp_extension extends AIEngine {
  }
  override async getLoadedModels(): Promise<string[]> {
-    let lmodels: string[] = []
+    try {
-    for (const [_, sInfo] of this.activeSessions) {
+      let models: string[] = await invoke<string[]>('get_loaded_models')
-      lmodels.push(sInfo.model_id)
+      return models
    } catch (e) {
      logger.error(e)
      throw new Error(e)
    }
    return lmodels
  }
  async getDevices(): Promise<DeviceList[]> {
@ -1602,7 +1544,7 @@ export default class llamacpp_extension extends AIEngine {
  }
  async embed(text: string[]): Promise<EmbeddingResponse> {
-    let sInfo = this.findSessionByModel('sentence-transformer-mini')
+    let sInfo = await this.findSessionByModel('sentence-transformer-mini')
    if (!sInfo) {
      const downloadedModelList = await this.list()
      if (
--- a/package.json
+++ b/package.json
@ -19,7 +19,6 @@
    "dev:web": "yarn workspace @janhq/web-app dev",
    "dev:tauri": "yarn build:icon && yarn copy:assets:tauri && cross-env IS_CLEAN=true tauri dev",
    "copy:assets:tauri": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\"",
    "download:lib": "node ./scripts/download-lib.mjs",
    "download:bin": "node ./scripts/download-bin.mjs",
    "build:tauri:win32": "yarn download:bin && yarn tauri build",
    "build:tauri:linux": "yarn download:bin && ./src-tauri/build-utils/shim-linuxdeploy.sh yarn tauri build && ./src-tauri/build-utils/buildAppImage.sh",
--- a/scripts/download-lib.mjs
+++ b/scripts/download-lib.mjs
@ -1,86 +0,0 @@
 console.log('Script is running')
 // scripts/download-lib.mjs
 import https from 'https'
 import fs, { mkdirSync } from 'fs'
 import os from 'os'
 import path from 'path'
 import { copySync } from 'cpx'
 function download(url, dest) {
  return new Promise((resolve, reject) => {
    console.log(`Downloading ${url} to ${dest}`)
    const file = fs.createWriteStream(dest)
    https
      .get(url, (response) => {
        console.log(`Response status code: ${response.statusCode}`)
        if (
          response.statusCode >= 300 &&
          response.statusCode < 400 &&
          response.headers.location
        ) {
          // Handle redirect
          const redirectURL = response.headers.location
          console.log(`Redirecting to ${redirectURL}`)
          download(redirectURL, dest).then(resolve, reject) // Recursive call
          return
        } else if (response.statusCode !== 200) {
          reject(`Failed to get '${url}' (${response.statusCode})`)
          return
        }
        response.pipe(file)
        file.on('finish', () => {
          file.close(resolve)
        })
      })
      .on('error', (err) => {
        fs.unlink(dest, () => reject(err.message))
      })
  })
 }
 async function main() {
  console.log('Starting main function')
  const platform = os.platform() // 'darwin', 'linux', 'win32'
  const arch = os.arch() // 'x64', 'arm64', etc.
  if (arch != 'x64') return
  let filename
  if (platform == 'linux')
    filename = 'libvulkan.so'
  else if (platform == 'win32')
    filename = 'vulkan-1.dll'
  else
    return
  const url = `https://catalog.jan.ai/${filename}`
  const libDir = 'src-tauri/resources/lib'
  const tempDir = 'scripts/dist'
  try {
    mkdirSync('scripts/dist')
  } catch (err) {
    // Expect EEXIST error if the directory already exists
  }
  console.log(`Downloading libvulkan...`)
  const savePath = path.join(tempDir, filename)
  if (!fs.existsSync(savePath)) {
    await download(url, savePath)
  }
  // copy to tauri resources
  try {
    copySync(savePath, libDir)
  } catch (err) {
    // Expect EEXIST error
  }
  console.log('Downloads completed.')
 }
 main().catch((err) => {
  console.error('Error:', err)
  process.exit(1)
 })
--- a/src-tauri/.cargo/config.toml
+++ b/src-tauri/.cargo/config.toml
@ -0,0 +1,4 @@
 [env]
 # workaround needed to prevent `STATUS_ENTRYPOINT_NOT_FOUND` error in tests
 # see https://github.com/tauri-apps/tauri/pull/4383#issuecomment-1212221864
 __TAURI_WORKSPACE__ = "true"
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@ -7,11 +7,29 @@ license = "MIT"
 repository = "https://github.com/menloresearch/jan"
 edition = "2021"
 rust-version = "1.77.2"
 resolver = "2"
 [lib]
 name = "app_lib"
 crate-type = ["staticlib", "cdylib", "rlib"]
 [features]
 default = [
    "tauri/wry",
    "tauri/common-controls-v6",
    "tauri/x11",
    "tauri/protocol-asset",
    "tauri/macos-private-api",
    "tauri/test",
 ]
 test-tauri = [
    "tauri/wry",
    "tauri/x11",
    "tauri/protocol-asset",
    "tauri/macos-private-api",
    "tauri/test",
 ]
 [build-dependencies]
 tauri-build = { version = "2.0.2", features = [] }
@ -19,9 +37,6 @@ tauri-build = { version = "2.0.2", features = [] }
 serde_json = "1.0"
 serde = { version = "1.0", features = ["derive"] }
 log = "0.4"
 tauri = { version = "2.5.0", features = [ "protocol-asset", "macos-private-api",
    "test"
 ] }
 tauri-plugin-log = "2.0.0-rc"
 tauri-plugin-shell = "2.2.0"
 tauri-plugin-os = "2.2.1"
@ -59,6 +74,20 @@ sha2 = "0.10.9"
 base64 = "0.22.1"
 libloading = "0.8.7"
 thiserror = "2.0.12"
 [dependencies.tauri]
 version = "2.5.0"
 default-features = false
 features = [
    "protocol-asset",
    "macos-private-api",
    "test",
 ]
 [target.'cfg(windows)'.dev-dependencies]
 tempfile = "3.20.0"
 [target.'cfg(unix)'.dependencies]
 nix = "=0.30.1"
 [target.'cfg(windows)'.dependencies]
@ -69,6 +98,3 @@ windows-sys = { version = "0.60.2", features = ["Win32_Storage_FileSystem"] }
 tauri-plugin-updater = "2"
 once_cell = "1.18"
 tauri-plugin-single-instance = { version = "2.0.0", features = ["deep-link"] }
 [target.'cfg(windows)'.dev-dependencies]
 tempfile = "3.20.0"
--- a/src-tauri/icons/icon.png
+++ b/src-tauri/icons/icon.png
--- a/src-tauri/src/core/cmd.rs
+++ b/src-tauri/src/core/cmd.rs
@ -94,7 +94,13 @@ pub fn update_app_configuration(
 #[tauri::command]
 pub fn get_jan_data_folder_path<R: Runtime>(app_handle: tauri::AppHandle<R>) -> PathBuf {
    if cfg!(test) {
-        return PathBuf::from("./data");
+        let path = std::env::current_dir()
            .unwrap_or_else(|_| PathBuf::from("."))
            .join("test-data");
        if !path.exists() {
            let _ = fs::create_dir_all(&path);
        }
        return path;
    }
    let app_configurations = get_app_configurations(app_handle);
--- a/src-tauri/src/core/fs.rs
+++ b/src-tauri/src/core/fs.rs
@ -182,7 +182,7 @@ mod tests {
        assert!(get_jan_data_folder_path(app.handle().clone())
            .join(path)
            .exists());
-        fs::remove_dir_all(get_jan_data_folder_path(app.handle().clone()).join(path)).unwrap();
+        let _ = fs::remove_dir_all(get_jan_data_folder_path(app.handle().clone()).join(path));
    }
    #[test]
@ -194,7 +194,7 @@ mod tests {
        assert_eq!(
            result,
            get_jan_data_folder_path(app.handle().clone())
-                .join("test_dir/test_file")
+                .join(&format!("test_dir{}test_file", std::path::MAIN_SEPARATOR))
                .to_string_lossy()
                .to_string()
        );
@ -232,8 +232,7 @@ mod tests {
    #[test]
    fn test_readdir_sync() {
        let app = mock_app();
-        let path = "file://test_readdir_sync_dir";
+        let dir_path = get_jan_data_folder_path(app.handle().clone()).join("test_readdir_sync_dir");
        let dir_path = get_jan_data_folder_path(app.handle().clone()).join(path);
        fs::create_dir_all(&dir_path).unwrap();
        File::create(dir_path.join("file1.txt")).unwrap();
        File::create(dir_path.join("file2.txt")).unwrap();
@ -242,6 +241,6 @@ mod tests {
        let result = readdir_sync(app.handle().clone(), args).unwrap();
        assert_eq!(result.len(), 2);
-        fs::remove_dir_all(dir_path).unwrap();
+        let _ = fs::remove_dir_all(dir_path);
    }
 }
--- a/src-tauri/src/core/hardware/mod.rs
+++ b/src-tauri/src/core/hardware/mod.rs
@ -1,10 +1,6 @@
 pub mod amd;
 pub mod nvidia;
 pub mod vulkan;
 use std::sync::OnceLock;
 use sysinfo::System;
-use tauri::{path::BaseDirectory, Manager};
+use tauri;
 static SYSTEM_INFO: OnceLock<SystemInfo> = OnceLock::new();
@ -143,90 +139,12 @@ impl CpuStaticInfo {
    }
 }
 // https://devicehunt.com/all-pci-vendors
 pub const VENDOR_ID_AMD: u32 = 0x1002;
 pub const VENDOR_ID_NVIDIA: u32 = 0x10DE;
 pub const VENDOR_ID_INTEL: u32 = 0x8086;
 #[derive(Debug, Clone)]
 pub enum Vendor {
    AMD,
    NVIDIA,
    Intel,
    Unknown(u32),
 }
 impl serde::Serialize for Vendor {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        match self {
            Vendor::AMD => "AMD".serialize(serializer),
            Vendor::NVIDIA => "NVIDIA".serialize(serializer),
            Vendor::Intel => "Intel".serialize(serializer),
            Vendor::Unknown(vendor_id) => {
                let formatted = format!("Unknown (vendor_id: {})", vendor_id);
                serializer.serialize_str(&formatted)
            }
        }
    }
 }
 impl Vendor {
    pub fn from_vendor_id(vendor_id: u32) -> Self {
        match vendor_id {
            VENDOR_ID_AMD => Vendor::AMD,
            VENDOR_ID_NVIDIA => Vendor::NVIDIA,
            VENDOR_ID_INTEL => Vendor::Intel,
            _ => Vendor::Unknown(vendor_id),
        }
    }
 }
 #[derive(Clone, Debug, serde::Serialize)]
 pub struct GpuInfo {
    pub name: String,
    pub total_memory: u64,
    pub vendor: Vendor,
    pub uuid: String,
    pub driver_version: String,
    pub nvidia_info: Option<nvidia::NvidiaInfo>,
    pub vulkan_info: Option<vulkan::VulkanInfo>,
 }
 impl GpuInfo {
    pub fn get_usage(&self) -> GpuUsage {
        match self.vendor {
            Vendor::NVIDIA => self.get_usage_nvidia(),
            Vendor::AMD => self.get_usage_amd(),
            _ => self.get_usage_unsupported(),
        }
    }
    pub fn get_usage_unsupported(&self) -> GpuUsage {
        GpuUsage {
            uuid: self.uuid.clone(),
            used_memory: 0,
            total_memory: 0,
        }
    }
 }
 #[derive(serde::Serialize, Clone, Debug)]
 pub struct SystemInfo {
    cpu: CpuStaticInfo,
    os_type: String,
    os_name: String,
    total_memory: u64,
    gpus: Vec<GpuInfo>,
 }
 #[derive(serde::Serialize, Clone, Debug)]
 pub struct GpuUsage {
    uuid: String,
    used_memory: u64,
    total_memory: u64,
 }
 #[derive(serde::Serialize, Clone, Debug)]
@ -234,62 +152,15 @@ pub struct SystemUsage {
    cpu: f32,
    used_memory: u64,
    total_memory: u64,
    gpus: Vec<GpuUsage>,
 }
 fn get_jan_libvulkan_path<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> String {
    let lib_name = if cfg!(target_os = "windows") {
        "vulkan-1.dll"
    } else if cfg!(target_os = "linux") {
        "libvulkan.so"
    } else {
        return "".to_string();
    };
    // NOTE: this does not work in test mode (mock app)
    match app.path().resolve(
        format!("resources/lib/{}", lib_name),
        BaseDirectory::Resource,
    ) {
        Ok(lib_path) => lib_path.to_string_lossy().to_string(),
        Err(_) => "".to_string(),
    }
 }
 #[tauri::command]
-pub fn get_system_info<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> SystemInfo {
+pub fn get_system_info() -> SystemInfo {
    SYSTEM_INFO
        .get_or_init(|| {
            let mut system = System::new();
            system.refresh_memory();
            let mut gpu_map = std::collections::HashMap::new();
            for gpu in nvidia::get_nvidia_gpus() {
                gpu_map.insert(gpu.uuid.clone(), gpu);
            }
            // try system vulkan first
            let paths = vec!["".to_string(), get_jan_libvulkan_path(app.clone())];
            let mut vulkan_gpus = vec![];
            for path in paths {
                vulkan_gpus = vulkan::get_vulkan_gpus(&path);
                if !vulkan_gpus.is_empty() {
                    break;
                }
            }
            for gpu in vulkan_gpus {
                match gpu_map.get_mut(&gpu.uuid) {
                    // for existing NVIDIA GPUs, add Vulkan info
                    Some(nvidia_gpu) => {
                        nvidia_gpu.vulkan_info = gpu.vulkan_info;
                    }
                    None => {
                        gpu_map.insert(gpu.uuid.clone(), gpu);
                    }
                }
            }
            let os_type = if cfg!(target_os = "windows") {
                "windows"
            } else if cfg!(target_os = "macos") {
@ -306,14 +177,13 @@ pub fn get_system_info<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> SystemInf
                os_type: os_type.to_string(),
                os_name,
                total_memory: system.total_memory() / 1024 / 1024, // bytes to MiB
                gpus: gpu_map.into_values().collect(),
            }
        })
        .clone()
 }
 #[tauri::command]
-pub fn get_system_usage<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> SystemUsage {
+pub fn get_system_usage() -> SystemUsage {
    let mut system = System::new();
    system.refresh_memory();
@ -330,30 +200,22 @@ pub fn get_system_usage<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> SystemUs
        cpu: cpu_usage,
        used_memory: system.used_memory() / 1024 / 1024, // bytes to MiB,
        total_memory: system.total_memory() / 1024 / 1024, // bytes to MiB,
        gpus: get_system_info(app.clone())
            .gpus
            .iter()
            .map(|gpu| gpu.get_usage())
            .collect(),
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use tauri::test::mock_app;
    #[test]
    fn test_system_info() {
-        let app = mock_app();
+        let info = get_system_info();
        let info = get_system_info(app.handle().clone());
        println!("System Static Info: {:?}", info);
    }
    #[test]
    fn test_system_usage() {
-        let app = mock_app();
+        let usage = get_system_usage();
        let usage = get_system_usage(app.handle().clone());
        println!("System Usage Info: {:?}", usage);
    }
 }
--- a/src-tauri/src/core/hardware/amd.rs
+++ b/src-tauri/src/core/hardware/amd.rs
@ -1,210 +0,0 @@
 use super::{GpuInfo, GpuUsage};
 impl GpuInfo {
    #[cfg(not(target_os = "linux"))]
    #[cfg(not(target_os = "windows"))]
    pub fn get_usage_amd(&self) -> GpuUsage {
        self.get_usage_unsupported()
    }
    #[cfg(target_os = "linux")]
    pub fn get_usage_amd(&self) -> GpuUsage {
        use std::fs;
        use std::path::Path;
        let device_id = match &self.vulkan_info {
            Some(vulkan_info) => vulkan_info.device_id,
            None => {
                log::error!("get_usage_amd called without Vulkan info");
                return self.get_usage_unsupported();
            }
        };
        let closure = || -> Result<GpuUsage, Box<dyn std::error::Error>> {
            for subdir in fs::read_dir("/sys/class/drm")? {
                let device_path = subdir?.path().join("device");
                // Check if this is an AMD GPU by looking for amdgpu directory
                if !device_path
                    .join("driver/module/drivers/pci:amdgpu")
                    .exists()
                {
                    continue;
                }
                // match device_id from Vulkan info
                let this_device_id_str = fs::read_to_string(device_path.join("device"))?;
                let this_device_id = u32::from_str_radix(
                    this_device_id_str
                        .strip_prefix("0x")
                        .unwrap_or(&this_device_id_str)
                        .trim(),
                    16,
                )?;
                if this_device_id != device_id {
                    continue;
                }
                let read_mem = |path: &Path| -> u64 {
                    fs::read_to_string(path)
                        .map(|content| content.trim().parse::<u64>().unwrap_or(0))
                        .unwrap_or(0)
                        / 1024
                        / 1024 // Convert bytes to MiB
                };
                return Ok(GpuUsage {
                    uuid: self.uuid.clone(),
                    total_memory: read_mem(&device_path.join("mem_info_vram_total")),
                    used_memory: read_mem(&device_path.join("mem_info_vram_used")),
                });
            }
            Err(format!("GPU not found").into())
        };
        match closure() {
            Ok(usage) => usage,
            Err(e) => {
                log::error!(
                    "Failed to get memory usage for AMD GPU {:#x}: {}",
                    device_id,
                    e
                );
                self.get_usage_unsupported()
            }
        }
    }
    #[cfg(target_os = "windows")]
    pub fn get_usage_amd(&self) -> GpuUsage {
        use std::collections::HashMap;
        let memory_usage_map = windows_impl::get_gpu_usage().unwrap_or_else(|_| {
            log::error!("Failed to get AMD GPU memory usage");
            HashMap::new()
        });
        match memory_usage_map.get(&self.name) {
            Some(&used_memory) => GpuUsage {
                uuid: self.uuid.clone(),
                used_memory: used_memory as u64,
                total_memory: self.total_memory,
            },
            None => self.get_usage_unsupported(),
        }
    }
 }
 // TODO: refactor this into a more egonomic API
 #[cfg(target_os = "windows")]
 mod windows_impl {
    use libc;
    use libloading::{Library, Symbol};
    use std::collections::HashMap;
    use std::ffi::{c_char, c_int, c_void, CStr};
    use std::mem::{self, MaybeUninit};
    use std::ptr;
    // === FFI Struct Definitions ===
    #[repr(C)]
    #[allow(non_snake_case)]
    #[derive(Debug, Copy, Clone)]
    pub struct AdapterInfo {
        pub iSize: c_int,
        pub iAdapterIndex: c_int,
        pub strUDID: [c_char; 256],
        pub iBusNumber: c_int,
        pub iDeviceNumber: c_int,
        pub iFunctionNumber: c_int,
        pub iVendorID: c_int,
        pub strAdapterName: [c_char; 256],
        pub strDisplayName: [c_char; 256],
        pub iPresent: c_int,
        pub iExist: c_int,
        pub strDriverPath: [c_char; 256],
        pub strDriverPathExt: [c_char; 256],
        pub strPNPString: [c_char; 256],
        pub iOSDisplayIndex: c_int,
    }
    type ADL_MAIN_MALLOC_CALLBACK = Option<unsafe extern "C" fn(i32) -> *mut c_void>;
    type ADL_MAIN_CONTROL_CREATE = unsafe extern "C" fn(ADL_MAIN_MALLOC_CALLBACK, c_int) -> c_int;
    type ADL_MAIN_CONTROL_DESTROY = unsafe extern "C" fn() -> c_int;
    type ADL_ADAPTER_NUMBEROFADAPTERS_GET = unsafe extern "C" fn(*mut c_int) -> c_int;
    type ADL_ADAPTER_ADAPTERINFO_GET = unsafe extern "C" fn(*mut AdapterInfo, c_int) -> c_int;
    type ADL_ADAPTER_ACTIVE_GET = unsafe extern "C" fn(c_int, *mut c_int) -> c_int;
    type ADL_GET_DEDICATED_VRAM_USAGE =
        unsafe extern "C" fn(*mut c_void, c_int, *mut c_int) -> c_int;
    // === ADL Memory Allocator ===
    unsafe extern "C" fn adl_malloc(i_size: i32) -> *mut c_void {
        libc::malloc(i_size as usize)
    }
    pub fn get_gpu_usage() -> Result<HashMap<String, i32>, Box<dyn std::error::Error>> {
        unsafe {
            let lib = Library::new("atiadlxx.dll").or_else(|_| Library::new("atiadlxy.dll"))?;
            let adl_main_control_create: Symbol<ADL_MAIN_CONTROL_CREATE> =
                lib.get(b"ADL_Main_Control_Create")?;
            let adl_main_control_destroy: Symbol<ADL_MAIN_CONTROL_DESTROY> =
                lib.get(b"ADL_Main_Control_Destroy")?;
            let adl_adapter_number_of_adapters_get: Symbol<ADL_ADAPTER_NUMBEROFADAPTERS_GET> =
                lib.get(b"ADL_Adapter_NumberOfAdapters_Get")?;
            let adl_adapter_adapter_info_get: Symbol<ADL_ADAPTER_ADAPTERINFO_GET> =
                lib.get(b"ADL_Adapter_AdapterInfo_Get")?;
            let adl_adapter_active_get: Symbol<ADL_ADAPTER_ACTIVE_GET> =
                lib.get(b"ADL_Adapter_Active_Get")?;
            let adl_get_dedicated_vram_usage: Symbol<ADL_GET_DEDICATED_VRAM_USAGE> =
                lib.get(b"ADL2_Adapter_DedicatedVRAMUsage_Get")?;
            // TODO: try to put nullptr here. then we don't need direct libc dep
            if adl_main_control_create(Some(adl_malloc), 1) != 0 {
                return Err("ADL initialization error!".into());
            }
            // NOTE: after this call, we must call ADL_Main_Control_Destroy
            // whenver we encounter an error
            let mut num_adapters: c_int = 0;
            if adl_adapter_number_of_adapters_get(&mut num_adapters as *mut _) != 0 {
                return Err("Cannot get number of adapters".into());
            }
            let mut vram_usages = HashMap::new();
            if num_adapters > 0 {
                let mut adapter_info: Vec<AdapterInfo> =
                    vec![MaybeUninit::zeroed().assume_init(); num_adapters as usize];
                let ret = adl_adapter_adapter_info_get(
                    adapter_info.as_mut_ptr(),
                    mem::size_of::<AdapterInfo>() as i32 * num_adapters,
                );
                if ret != 0 {
                    return Err("Cannot get adapter info".into());
                }
                for adapter in adapter_info.iter() {
                    let mut is_active = 0;
                    adl_adapter_active_get(adapter.iAdapterIndex, &mut is_active);
                    if is_active != 0 {
                        let mut vram_mb = 0;
                        let _ = adl_get_dedicated_vram_usage(
                            ptr::null_mut(),
                            adapter.iAdapterIndex,
                            &mut vram_mb,
                        );
                        // NOTE: adapter name might not be unique?
                        let name = CStr::from_ptr(adapter.strAdapterName.as_ptr())
                            .to_string_lossy()
                            .into_owned();
                        vram_usages.insert(name, vram_mb);
                    }
                }
            }
            adl_main_control_destroy();
            Ok(vram_usages)
        }
    }
 }
--- a/src-tauri/src/core/hardware/nvidia.rs
+++ b/src-tauri/src/core/hardware/nvidia.rs
@ -1,120 +0,0 @@
 use super::{GpuInfo, GpuUsage, Vendor};
 use nvml_wrapper::{error::NvmlError, Nvml};
 use std::sync::OnceLock;
 static NVML: OnceLock<Option<Nvml>> = OnceLock::new();
 #[derive(Debug, Clone, serde::Serialize)]
 pub struct NvidiaInfo {
    pub index: u32,
    pub compute_capability: String,
 }
 fn get_nvml() -> Option<&'static Nvml> {
    NVML.get_or_init(|| {
        let result = Nvml::init().or_else(|e| {
            // fallback
            if cfg!(target_os = "linux") {
                let lib_path = std::ffi::OsStr::new("libnvidia-ml.so.1");
                Nvml::builder().lib_path(lib_path).init()
            } else {
                Err(e)
            }
        });
        // NvmlError doesn't implement Copy, so we have to store an Option in OnceLock
        match result {
            Ok(nvml) => Some(nvml),
            Err(e) => {
                log::error!("Unable to initialize NVML: {}", e);
                None
            }
        }
    })
    .as_ref()
 }
 impl GpuInfo {
    pub fn get_usage_nvidia(&self) -> GpuUsage {
        let index = match self.nvidia_info {
            Some(ref nvidia_info) => nvidia_info.index,
            None => {
                log::error!("get_usage_nvidia() called on non-NVIDIA GPU");
                return self.get_usage_unsupported();
            }
        };
        let closure = || -> Result<GpuUsage, NvmlError> {
            let nvml = get_nvml().ok_or(NvmlError::Unknown)?;
            let device = nvml.device_by_index(index)?;
            let mem_info = device.memory_info()?;
            Ok(GpuUsage {
                uuid: self.uuid.clone(),
                used_memory: mem_info.used / 1024 / 1024, // bytes to MiB
                total_memory: mem_info.total / 1024 / 1024, // bytes to MiB
            })
        };
        closure().unwrap_or_else(|e| {
            log::error!("Failed to get memory usage for NVIDIA GPU {}: {}", index, e);
            self.get_usage_unsupported()
        })
    }
 }
 pub fn get_nvidia_gpus() -> Vec<GpuInfo> {
    let closure = || -> Result<Vec<GpuInfo>, NvmlError> {
        let nvml = get_nvml().ok_or(NvmlError::Unknown)?;
        let num_gpus = nvml.device_count()?;
        let driver_version = nvml.sys_driver_version()?;
        let mut gpus = Vec::with_capacity(num_gpus as usize);
        for i in 0..num_gpus {
            let device = nvml.device_by_index(i)?;
            gpus.push(GpuInfo {
                name: device.name()?,
                total_memory: device.memory_info()?.total / 1024 / 1024, // bytes to MiB
                vendor: Vendor::NVIDIA,
                uuid: {
                    let mut uuid = device.uuid()?;
                    if uuid.starts_with("GPU-") {
                        uuid = uuid[4..].to_string();
                    }
                    uuid
                },
                driver_version: driver_version.clone(),
                nvidia_info: Some(NvidiaInfo {
                    index: i,
                    compute_capability: {
                        let cc = device.cuda_compute_capability()?;
                        format!("{}.{}", cc.major, cc.minor)
                    },
                }),
                vulkan_info: None,
            });
        }
        Ok(gpus)
    };
    match closure() {
        Ok(gpus) => gpus,
        Err(e) => {
            log::error!("Failed to get NVIDIA GPUs: {}", e);
            vec![]
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_get_nvidia_gpus() {
        let gpus = get_nvidia_gpus();
        for (i, gpu) in gpus.iter().enumerate() {
            println!("GPU {}:", i);
            println!("    {:?}", gpu);
            println!("    {:?}", gpu.get_usage());
        }
    }
 }
--- a/src-tauri/src/core/hardware/vulkan.rs
+++ b/src-tauri/src/core/hardware/vulkan.rs
@ -1,145 +0,0 @@
 use super::{GpuInfo, Vendor};
 use ash::{vk, Entry};
 #[derive(Debug, Clone, serde::Serialize)]
 pub struct VulkanInfo {
    pub index: u64,
    pub device_type: String,
    pub api_version: String,
    pub device_id: u32,
 }
 fn parse_uuid(bytes: &[u8; 16]) -> String {
    format!(
        "{:02x}{:02x}{:02x}{:02x}-\
         {:02x}{:02x}-\
         {:02x}{:02x}-\
         {:02x}{:02x}-\
         {:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
        bytes[0],
        bytes[1],
        bytes[2],
        bytes[3],
        bytes[4],
        bytes[5],
        bytes[6],
        bytes[7],
        bytes[8],
        bytes[9],
        bytes[10],
        bytes[11],
        bytes[12],
        bytes[13],
        bytes[14],
        bytes[15],
    )
 }
 pub fn get_vulkan_gpus(lib_path: &str) -> Vec<GpuInfo> {
    match get_vulkan_gpus_internal(lib_path) {
        Ok(gpus) => gpus,
        Err(e) => {
            log::error!("Failed to get Vulkan GPUs: {:?}", e);
            vec![]
        }
    }
 }
 fn parse_c_string(buf: &[i8]) -> String {
    unsafe { std::ffi::CStr::from_ptr(buf.as_ptr()) }
        .to_str()
        .unwrap_or_default()
        .to_string()
 }
 fn get_vulkan_gpus_internal(lib_path: &str) -> Result<Vec<GpuInfo>, Box<dyn std::error::Error>> {
    let entry = if lib_path.is_empty() {
        unsafe { Entry::load()? }
    } else {
        unsafe { Entry::load_from(lib_path)? }
    };
    let app_info = vk::ApplicationInfo {
        api_version: vk::make_api_version(0, 1, 1, 0),
        ..Default::default()
    };
    let create_info = vk::InstanceCreateInfo {
        p_application_info: &app_info,
        ..Default::default()
    };
    let instance = unsafe { entry.create_instance(&create_info, None)? };
    let mut device_info_list = vec![];
    for (i, device) in unsafe { instance.enumerate_physical_devices()? }
        .iter()
        .enumerate()
    {
        // create a chain of properties struct for VkPhysicalDeviceProperties2(3)
        // https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceProperties2.html
        // props2 -> driver_props -> id_props
        let mut id_props = vk::PhysicalDeviceIDProperties::default();
        let mut driver_props = vk::PhysicalDeviceDriverProperties {
            p_next: &mut id_props as *mut _ as *mut std::ffi::c_void,
            ..Default::default()
        };
        let mut props2 = vk::PhysicalDeviceProperties2 {
            p_next: &mut driver_props as *mut _ as *mut std::ffi::c_void,
            ..Default::default()
        };
        unsafe {
            instance.get_physical_device_properties2(*device, &mut props2);
        }
        let props = props2.properties;
        if props.device_type == vk::PhysicalDeviceType::CPU {
            continue;
        }
        let device_info = GpuInfo {
            name: parse_c_string(&props.device_name),
            total_memory: unsafe { instance.get_physical_device_memory_properties(*device) }
                .memory_heaps
                .iter()
                .filter(|heap| heap.flags.contains(vk::MemoryHeapFlags::DEVICE_LOCAL))
                .map(|heap| heap.size / (1024 * 1024))
                .sum(),
            vendor: Vendor::from_vendor_id(props.vendor_id),
            uuid: parse_uuid(&id_props.device_uuid),
            driver_version: parse_c_string(&driver_props.driver_info),
            nvidia_info: None,
            vulkan_info: Some(VulkanInfo {
                index: i as u64,
                device_type: format!("{:?}", props.device_type),
                api_version: format!(
                    "{}.{}.{}",
                    vk::api_version_major(props.api_version),
                    vk::api_version_minor(props.api_version),
                    vk::api_version_patch(props.api_version)
                ),
                device_id: props.device_id,
            }),
        };
        device_info_list.push(device_info);
    }
    unsafe {
        instance.destroy_instance(None);
    }
    Ok(device_info_list)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_get_vulkan_gpus() {
        let gpus = get_vulkan_gpus("");
        for (i, gpu) in gpus.iter().enumerate() {
            println!("GPU {}:", i);
            println!("    {:?}", gpu);
            println!("    {:?}", gpu.get_usage());
        }
    }
 }
--- a/src-tauri/src/core/mcp.rs
+++ b/src-tauri/src/core/mcp.rs
@ -1003,9 +1003,18 @@ mod tests {
    #[tokio::test]
    async fn test_run_mcp_commands() {
        let app = mock_app();
-        // Create a mock mcp_config.json file
+        
-        let config_path = "mcp_config.json";
+        // Get the app path where the config should be created
-        let mut file: File = File::create(config_path).expect("Failed to create config file");
+        let app_path = get_jan_data_folder_path(app.handle().clone());
        let config_path = app_path.join("mcp_config.json");
        // Ensure the directory exists
        if let Some(parent) = config_path.parent() {
            std::fs::create_dir_all(parent).expect("Failed to create parent directory");
        }
        // Create a mock mcp_config.json file at the correct location
        let mut file: File = File::create(&config_path).expect("Failed to create config file");
        file.write_all(b"{\"mcpServers\":{}}")
            .expect("Failed to write to config file");
@ -1018,6 +1027,6 @@ mod tests {
        assert!(result.is_ok());
        // Clean up the mock config file
-        std::fs::remove_file(config_path).expect("Failed to remove config file");
+        std::fs::remove_file(&config_path).expect("Failed to remove config file");
    }
 }
--- a/src-tauri/src/core/server.rs
+++ b/src-tauri/src/core/server.rs
@ -348,7 +348,7 @@ async fn proxy_request(
                        let sessions_guard = sessions.lock().await;
                        if sessions_guard.is_empty() {
-                            log::warn!("Request for model '{}' but no backend servers are running.", model_id);
+                            log::warn!("Request for model '{}' but no models are running.", model_id);
                            let mut error_response = Response::builder().status(StatusCode::SERVICE_UNAVAILABLE);
                             error_response = add_cors_headers_with_host_and_origin(
                                error_response,
@ -356,7 +356,7 @@ async fn proxy_request(
                                &origin_header,
                                &config.trusted_hosts,
                            );
-                            return Ok(error_response.body(Body::from("No backend model servers are available")).unwrap());
+                            return Ok(error_response.body(Body::from("No models are available")).unwrap());
                        }
                        if let Some(session) = sessions_guard
@ -366,9 +366,8 @@ async fn proxy_request(
                            target_port = Some(session.info.port);
                            session_api_key = Some(session.info.api_key.clone());
                            log::debug!(
-                                "Found session for model_id {} on port {}",
+                                "Found session for model_id {}",
                                model_id,
                                session.info.port
                            );
                        } else {
                            log::warn!("No running session found for model_id: {}", model_id);
@ -382,7 +381,7 @@ async fn proxy_request(
                            );
                            return Ok(error_response
                                .body(Body::from(format!(
-                                    "No running server found for model '{}'",
+                                    "No running session found for model '{}'",
                                    model_id
                                )))
                                .unwrap());
@ -494,7 +493,7 @@ async fn proxy_request(
    let port = match target_port {
        Some(p) => p,
        None => {
-            log::error!("Internal routing error: target_port is None after successful lookup");
+            log::error!("Internal API server routing error: target is None after successful lookup");
            let mut error_response = Response::builder().status(StatusCode::INTERNAL_SERVER_ERROR);
            error_response = add_cors_headers_with_host_and_origin(
                error_response,
@ -509,7 +508,6 @@ async fn proxy_request(
    };
    let upstream_url = format!("http://127.0.0.1:{}{}", port, destination_path);
    log::debug!("Proxying request to: {}", upstream_url);
    let mut outbound_req = client.request(method.clone(), &upstream_url);
@ -587,7 +585,7 @@ async fn proxy_request(
            Ok(builder.body(body).unwrap())
        }
        Err(e) => {
-            let error_msg = format!("Proxy request to {} failed: {}", upstream_url, e);
+            let error_msg = format!("Proxy request to model failed: {}", e);
            log::error!("{}", error_msg);
            let mut error_response = Response::builder().status(StatusCode::BAD_GATEWAY);
            error_response = add_cors_headers_with_host_and_origin(
@ -726,7 +724,7 @@ pub async fn start_server(
    });
    let server = Server::bind(&addr).serve(make_svc);
-    log::info!("Proxy server started on http://{}", addr);
+    log::info!("Jan API server started on http://{}", addr);
    let server_task = tokio::spawn(async move {
        if let Err(e) = server.await {
@ -748,9 +746,9 @@ pub async fn stop_server(
    if let Some(handle) = handle_guard.take() {
        handle.abort();
        *handle_guard = None;
-        log::info!("Proxy server stopped");
+        log::info!("Jan API server stopped");
    } else {
-        log::debug!("No server was running");
+        log::debug!("Server was not running");
    }
    Ok(())
--- a/src-tauri/src/core/threads.rs
+++ b/src-tauri/src/core/threads.rs
@ -218,7 +218,7 @@ pub async fn delete_thread<R: Runtime>(
 ) -> Result<(), String> {
    let thread_dir = get_thread_dir(app_handle.clone(), &thread_id);
    if thread_dir.exists() {
-        fs::remove_dir_all(thread_dir).map_err(|e| e.to_string())?;
+        let _ = fs::remove_dir_all(thread_dir);
    }
    Ok(())
 }
@ -518,7 +518,7 @@ mod tests {
        assert!(threads.len() > 0);
        // Clean up
-        fs::remove_dir_all(data_dir).unwrap();
+        let _ = fs::remove_dir_all(data_dir);
    }
    #[tokio::test]
@ -565,7 +565,7 @@ mod tests {
        assert_eq!(messages[0]["role"], "user");
        // Clean up
-        fs::remove_dir_all(data_dir).unwrap();
+        let _ = fs::remove_dir_all(data_dir);
    }
    #[tokio::test]
@ -608,6 +608,6 @@ mod tests {
        assert_eq!(got["assistant_name"], "Test Assistant");
        // Clean up
-        fs::remove_dir_all(data_dir).unwrap();
+        let _ = fs::remove_dir_all(data_dir);
    }
 }
--- a/src-tauri/src/core/utils/download.rs
+++ b/src-tauri/src/core/utils/download.rs
@ -691,7 +691,17 @@ mod tests {
        config.ignore_ssl = Some(false);
        assert!(validate_proxy_config(&config).is_ok());
-        assert!(create_proxy_from_config(&config).is_ok());
+        
        // SOCKS proxies are not supported by reqwest::Proxy::all()
        // This test should expect an error for SOCKS proxies
        let result = create_proxy_from_config(&config);
        assert!(result.is_err());
        // Test with HTTP proxy instead which is supported
        let mut http_config = create_test_proxy_config("http://proxy.example.com:8080");
        http_config.ignore_ssl = Some(false);
        assert!(validate_proxy_config(&http_config).is_ok());
        assert!(create_proxy_from_config(&http_config).is_ok());
    }
    #[test]
--- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
+++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs
@ -1,7 +1,9 @@
 use base64::{engine::general_purpose, Engine as _};
 use hmac::{Hmac, Mac};
 use rand::{rngs::StdRng, Rng, SeedableRng};
 use serde::{Deserialize, Serialize};
 use sha2::Sha256;
 use std::collections::HashSet;
 use std::path::PathBuf;
 use std::process::Stdio;
 use std::time::Duration;
@ -17,19 +19,92 @@ use crate::core::state::AppState;
 use crate::core::state::LLamaBackendSession;
 type HmacSha256 = Hmac<Sha256>;
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(rename_all = "SCREAMING_SNAKE_CASE")]
 pub enum ErrorCode {
    BinaryNotFound,
    ModelFileNotFound,
    LibraryPathInvalid,
    // --- Model Loading Errors ---
    ModelLoadFailed,
    DraftModelLoadFailed,
    MultimodalProjectorLoadFailed,
    ModelArchNotSupported,
    ModelLoadTimedOut,
    LlamaCppProcessError,
    // --- Memory Errors ---
    OutOfMemory,
    // --- Internal Application Errors ---
    DeviceListParseFailed,
    IoError,
    InternalError,
 }
 #[derive(Debug, Clone, Serialize, thiserror::Error)]
 #[error("LlamacppError {{ code: {code:?}, message: \"{message}\" }}")]
 pub struct LlamacppError {
    pub code: ErrorCode,
    pub message: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub details: Option<String>,
 }
 impl LlamacppError {
    pub fn new(code: ErrorCode, message: String, details: Option<String>) -> Self {
        Self {
            code,
            message,
            details,
        }
    }
    /// Parses stderr from llama.cpp and creates a specific LlamacppError.
    pub fn from_stderr(stderr: &str) -> Self {
        let lower_stderr = stderr.to_lowercase();
        // TODO: add others
        let is_out_of_memory = lower_stderr.contains("out of memory")
            || lower_stderr.contains("insufficient memory")
            || lower_stderr.contains("erroroutofdevicememory") // vulkan specific
            || lower_stderr.contains("kiogpucommandbuffercallbackerroroutofmemory") // Metal-specific error code
            || lower_stderr.contains("cuda_error_out_of_memory"); // CUDA-specific
        if is_out_of_memory {
            return Self::new(
                ErrorCode::OutOfMemory,
                "Out of memory. The model requires more RAM or VRAM than available.".into(),
                Some(stderr.into()),
            );
        }
        if lower_stderr.contains("error loading model architecture") {
            return Self::new(
                ErrorCode::ModelArchNotSupported,
                "The model's architecture is not supported by this version of the backend.".into(),
                Some(stderr.into()),
            );
        }
        Self::new(
            ErrorCode::LlamaCppProcessError,
            "The model process encountered an unexpected error.".into(),
            Some(stderr.into()),
        )
    }
 }
 // Error type for server commands
 #[derive(Debug, thiserror::Error)]
 pub enum ServerError {
-    #[error("llamacpp error: {0}")]
+    #[error(transparent)]
-    LlamacppError(String),
+    Llamacpp(#[from] LlamacppError),
-    #[error("Failed to locate server binary: {0}")]
+
    BinaryNotFound(String),
    #[error("IO error: {0}")]
    Io(#[from] std::io::Error),
-    #[error("Jan API error: {0}")]
+
    #[error("Tauri error: {0}")]
    Tauri(#[from] tauri::Error),
    #[error("Parse error: {0}")]
    ParseError(String),
 }
 // impl serialization for tauri
@ -38,7 +113,20 @@ impl serde::Serialize for ServerError {
    where
        S: serde::Serializer,
    {
-        serializer.serialize_str(self.to_string().as_ref())
+        let error_to_serialize: LlamacppError = match self {
            ServerError::Llamacpp(err) => err.clone(),
            ServerError::Io(e) => LlamacppError::new(
                ErrorCode::IoError,
                "An input/output error occurred.".into(),
                Some(e.to_string()),
            ),
            ServerError::Tauri(e) => LlamacppError::new(
                ErrorCode::InternalError,
                "An internal application error occurred.".into(),
                Some(e.to_string()),
            ),
        };
        error_to_serialize.serialize(serializer)
    }
 }
@ -108,14 +196,17 @@ pub async fn load_llama_model(
    let server_path_buf = PathBuf::from(backend_path);
    if !server_path_buf.exists() {
        let err_msg = format!("Binary not found at {:?}", backend_path);
        log::error!(
            "Server binary not found at expected path: {:?}",
            backend_path
        );
-        return Err(ServerError::BinaryNotFound(format!(
+        return Err(LlamacppError::new(
-            "Binary not found at {:?}",
+            ErrorCode::BinaryNotFound,
-            backend_path
+            "The llama.cpp server binary could not be found.".into(),
-        )));
+            Some(err_msg),
        )
        .into());
    }
    let port_str = args
@ -132,22 +223,35 @@ pub async fn load_llama_model(
        }
    };
    // FOR MODEL PATH; TODO: DO SIMILARLY FOR MMPROJ PATH
-    let model_path_index = args
+    let model_path_index = args.iter().position(|arg| arg == "-m").ok_or_else(|| {
-        .iter()
+        LlamacppError::new(
-        .position(|arg| arg == "-m")
+            ErrorCode::ModelLoadFailed,
-        .ok_or(ServerError::LlamacppError("Missing `-m` flag".into()))?;
+            "Model path argument '-m' is missing.".into(),
            None,
        )
    })?;
-    let model_path = args
+    let model_path = args.get(model_path_index + 1).cloned().ok_or_else(|| {
-        .get(model_path_index + 1)
+        LlamacppError::new(
-        .ok_or(ServerError::LlamacppError("Missing path after `-m`".into()))?
+            ErrorCode::ModelLoadFailed,
-        .clone();
+            "Model path was not provided after '-m' flag.".into(),
            None,
        )
    })?;
-    let model_path_pb = PathBuf::from(model_path);
+    let model_path_pb = PathBuf::from(&model_path);
    if !model_path_pb.exists() {
-        return Err(ServerError::LlamacppError(format!(
+        let err_msg = format!(
            "Invalid or inaccessible model path: {}",
-            model_path_pb.display().to_string(),
+            model_path_pb.display()
-        )));
+        );
        log::error!("{}", &err_msg);
        return Err(LlamacppError::new(
            ErrorCode::ModelFileNotFound,
            "The specified model file does not exist or is not accessible.".into(),
            Some(err_msg),
        )
        .into());
    }
    #[cfg(windows)]
    {
@ -283,13 +387,13 @@ pub async fn load_llama_model(
                            || line_lower.contains("starting the main loop")
                            || line_lower.contains("server listening on")
                        {
-                            log::info!("Server appears to be ready based on stderr: '{}'", line);
+                            log::info!("Model appears to be ready based on logs: '{}'", line);
                            let _ = ready_tx.send(true).await;
                        }
                    }
                }
                Err(e) => {
-                    log::error!("Error reading stderr: {}", e);
+                    log::error!("Error reading logs: {}", e);
                    break;
                }
            }
@ -302,21 +406,21 @@ pub async fn load_llama_model(
    if let Some(status) = child.try_wait()? {
        if !status.success() {
            let stderr_output = stderr_task.await.unwrap_or_default();
-            log::error!("llama.cpp exited early with code {:?}", status);
+            log::error!("llama.cpp failed early with code {:?}", status);
-            log::error!("--- stderr ---\n{}", stderr_output);
+            log::error!("{}", stderr_output);
-            return Err(ServerError::LlamacppError(stderr_output.trim().to_string()));
+            return Err(LlamacppError::from_stderr(&stderr_output).into());
        }
    }
    // Wait for server to be ready or timeout
-    let timeout_duration = Duration::from_secs(300); // 5 minutes timeout
+    let timeout_duration = Duration::from_secs(180); // 3 minutes timeout
    let start_time = Instant::now();
-    log::info!("Waiting for server to be ready...");
+    log::info!("Waiting for model session to be ready...");
    loop {
        tokio::select! {
            // Server is ready
            Some(true) = ready_rx.recv() => {
-                log::info!("Server is ready to accept requests!");
+                log::info!("Model is ready to accept requests!");
                break;
            }
            // Check for process exit more frequently
@ -326,10 +430,10 @@ pub async fn load_llama_model(
                    let stderr_output = stderr_task.await.unwrap_or_default();
                    if !status.success() {
                        log::error!("llama.cpp exited with error code {:?}", status);
-                        return Err(ServerError::LlamacppError(format!("Process exited with code {:?}\n\nStderr:\n{}", status, stderr_output)));
+                        return Err(LlamacppError::from_stderr(&stderr_output).into());
                    } else {
                        log::error!("llama.cpp exited successfully but without ready signal");
-                        return Err(ServerError::LlamacppError(format!("Process exited unexpectedly\n\nStderr:\n{}", stderr_output)));
+                        return Err(LlamacppError::from_stderr(&stderr_output).into());
                    }
                }
@ -338,7 +442,11 @@ pub async fn load_llama_model(
                    log::error!("Timeout waiting for server to be ready");
                    let _ = child.kill().await;
                    let stderr_output = stderr_task.await.unwrap_or_default();
-                    return Err(ServerError::LlamacppError(format!("Server startup timeout\n\nStderr:\n{}", stderr_output)));
+                    return Err(LlamacppError::new(
                        ErrorCode::ModelLoadTimedOut,
                        "The model took too long to load and timed out.".into(),
                        Some(format!("Timeout: {}s\n\nStderr:\n{}", timeout_duration.as_secs(), stderr_output)),
                    ).into());
                }
            }
        }
@ -461,10 +569,12 @@ pub async fn get_devices(
            "Server binary not found at expected path: {:?}",
            backend_path
        );
-        return Err(ServerError::BinaryNotFound(format!(
+        return Err(LlamacppError::new(
-            "Binary not found at {:?}",
+            ErrorCode::BinaryNotFound,
-            backend_path
+            "The llama.cpp server binary could not be found.".into(),
-        )));
+            Some(format!("Path: {}", backend_path)),
        )
        .into());
    }
    // Configure the command to run the server with --list-devices
@ -519,20 +629,21 @@ pub async fn get_devices(
    // Execute the command and wait for completion
    let output = timeout(Duration::from_secs(30), command.output())
        .await
-        .map_err(|_| ServerError::LlamacppError("Timeout waiting for device list".to_string()))?
+        .map_err(|_| {
            LlamacppError::new(
                ErrorCode::InternalError,
                "Timeout waiting for device list".into(),
                None,
            )
        })?
        .map_err(ServerError::Io)?;
    // Check if command executed successfully
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        log::error!("llama-server --list-devices failed: {}", stderr);
-        return Err(ServerError::LlamacppError(format!(
+        return Err(LlamacppError::from_stderr(&stderr).into());
            "Command failed with exit code {:?}: {}",
            output.status.code(),
            stderr
        )));
    }
    // Parse the output
    let stdout = String::from_utf8_lossy(&output.stdout);
    log::info!("Device list output:\n{}", stdout);
@ -570,9 +681,12 @@ fn parse_device_output(output: &str) -> ServerResult<Vec<DeviceInfo>> {
    if devices.is_empty() && found_devices_section {
        log::warn!("No devices found in output");
    } else if !found_devices_section {
-        return Err(ServerError::ParseError(
+        return Err(LlamacppError::new(
-            "Could not find 'Available devices:' section in output".to_string(),
+            ErrorCode::DeviceListParseFailed,
-        ));
+            "Could not find 'Available devices:' section in the backend output.".into(),
            Some(output.to_string()),
        )
        .into());
    }
    Ok(devices)
@ -682,16 +796,23 @@ fn parse_memory_value(mem_str: &str) -> ServerResult<i32> {
    // Handle formats like "8000 MiB" or "7721 MiB free"
    let parts: Vec<&str> = mem_str.split_whitespace().collect();
    if parts.is_empty() {
-        return Err(ServerError::ParseError(format!(
+        return Err(LlamacppError::new(
-            "Empty memory value: '{}'",
+            ErrorCode::DeviceListParseFailed,
-            mem_str
+            format!("empty memory value: {}", mem_str),
-        )));
+            None,
        )
        .into());
    }
    // Take the first part which should be the number
    let number_str = parts[0];
    number_str.parse::<i32>().map_err(|_| {
-        ServerError::ParseError(format!("Could not parse memory value: '{}'", number_str))
+        LlamacppError::new(
            ErrorCode::DeviceListParseFailed,
            format!("Could not parse memory value: '{}'", number_str),
            None,
        )
        .into()
    })
 }
@ -724,11 +845,80 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<
 }
 // check port availability
-#[tauri::command]
+fn is_port_available(port: u16) -> bool {
 pub fn is_port_available(port: u16) -> bool {
    std::net::TcpListener::bind(("127.0.0.1", port)).is_ok()
 }
 #[tauri::command]
 pub async fn get_random_port(state: State<'_, AppState>) -> Result<u16, String> {
    const MAX_ATTEMPTS: u32 = 20000;
    let mut attempts = 0;
    let mut rng = StdRng::from_entropy();
    // Get all active ports from sessions
    let map = state.llama_server_process.lock().await;
    let used_ports: HashSet<u16> = map
        .values()
        .filter_map(|session| {
            // Convert valid ports to u16 (filter out placeholder ports like -1)
            if session.info.port > 0 && session.info.port <= u16::MAX as i32 {
                Some(session.info.port as u16)
            } else {
                None
            }
        })
        .collect();
    drop(map); // unlock early
    while attempts < MAX_ATTEMPTS {
        let port = rng.gen_range(3000..4000);
        if used_ports.contains(&port) {
            attempts += 1;
            continue;
        }
        if is_port_available(port) {
            return Ok(port);
        }
        attempts += 1;
    }
    Err("Failed to find an available port for the model to load".into())
 }
 // find session
 #[tauri::command]
 pub async fn find_session_by_model(
    model_id: String,
    state: State<'_, AppState>,
 ) -> Result<Option<SessionInfo>, String> {
    let map = state.llama_server_process.lock().await;
    let session_info = map
        .values()
        .find(|backend_session| backend_session.info.model_id == model_id)
        .map(|backend_session| backend_session.info.clone());
    Ok(session_info)
 }
 // get running models
 #[tauri::command]
 pub async fn get_loaded_models(state: State<'_, AppState>) -> Result<Vec<String>, String> {
    let map = state.llama_server_process.lock().await;
    let model_ids = map
        .values()
        .map(|backend_session| backend_session.info.model_id.clone())
        .collect();
    Ok(model_ids)
 }
 // tests
 //
 #[cfg(test)]
@ -929,24 +1119,34 @@ Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)"#;
        {
            let dir = tempfile::tempdir().expect("Failed to create temp dir");
            let long_path = dir.path().join(UNCOMMON_DIR_NAME);
            std::fs::create_dir(&long_path)
-                .expect("Failed to create test directory with non-ASCII name");
+                .expect("Failed to create directory with uncommon characters");
            let short_path = get_short_path(&long_path);
            match short_path {
                Some(sp) => {
                    // Ensure the path exists
                    assert!(
-                short_path.is_ascii(),
+                        PathBuf::from(&sp).exists(),
-                "The resulting short path must be composed of only ASCII characters. Got: {}",
+                        "Returned short path should exist on filesystem: {}",
-                short_path
+                        sp
            );
            assert!(
                PathBuf::from(&short_path).exists(),
                "The returned short path must exist on the filesystem"
                    );
                    // It may or may not be ASCII; just ensure it differs
                    let long_path_str = long_path.to_string_lossy();
                    assert_ne!(
-                short_path,
+                        sp, long_path_str,
-                long_path.to_str().unwrap(),
+                        "Short path should differ from original path"
                "Short path should not be the same as the long path"
                    );
                }
                None => {
                    // On some systems, short path generation may be disabled
                    eprintln!("Short path generation failed. This might be expected depending on system settings.");
                }
            }
        }
        #[cfg(not(windows))]
        {
            // On Unix, paths are typically UTF-8 and there's no "short path" concept.
--- a/src-tauri/src/core/utils/mod.rs
+++ b/src-tauri/src/core/utils/mod.rs
@ -47,7 +47,7 @@ pub fn ensure_thread_dir_exists<R: Runtime>(
    ensure_data_dirs(app_handle.clone())?;
    let thread_dir = get_thread_dir(app_handle, thread_id);
    if !thread_dir.exists() {
-        fs::create_dir(&thread_dir).map_err(|e| e.to_string())?;
+        fs::create_dir_all(&thread_dir).map_err(|e| e.to_string())?;
    }
    Ok(())
 }
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -95,7 +95,9 @@ pub fn run() {
            core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
            core::utils::extensions::inference_llamacpp_extension::server::get_devices,
-            core::utils::extensions::inference_llamacpp_extension::server::is_port_available,
+            core::utils::extensions::inference_llamacpp_extension::server::get_random_port,
            core::utils::extensions::inference_llamacpp_extension::server::find_session_by_model,
            core::utils::extensions::inference_llamacpp_extension::server::get_loaded_models,
            core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
            core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
        ])
--- a/src-tauri/tauri.bundle.windows.nsis.template
+++ b/src-tauri/tauri.bundle.windows.nsis.template
@ -696,8 +696,6 @@ Section Install
  ; Copy resources
    CreateDirectory "$INSTDIR\resources"
    CreateDirectory "$INSTDIR\resources\pre-install"
    SetOutPath $INSTDIR
    File /a "/oname=vulkan-1.dll" "D:\a\jan\jan\src-tauri\resources\lib\vulkan-1.dll"
    SetOutPath "$INSTDIR\resources\pre-install"
    File /nonfatal /a /r "D:\a\jan\jan\src-tauri\resources\pre-install\"
    SetOutPath $INSTDIR
--- a/src-tauri/tauri.linux.conf.json
+++ b/src-tauri/tauri.linux.conf.json
@ -10,8 +10,7 @@
      },
      "deb": {
        "files": {
-          "usr/bin/bun": "resources/bin/bun",
+          "usr/bin/bun": "resources/bin/bun"
          "usr/lib/Jan/resources/lib/libvulkan.so": "resources/lib/libvulkan.so"
        }
      }
    }
--- a/testRunner.js
+++ b/testRunner.js
@ -1,19 +0,0 @@
 const jestRunner = require('jest-runner')
 class EmptyTestFileRunner extends jestRunner.default {
  async runTests(tests, watcher, onStart, onResult, onFailure, options) {
    const nonEmptyTests = tests.filter(
      (test) => test.context.hasteFS.getSize(test.path) > 0
    )
    return super.runTests(
      nonEmptyTests,
      watcher,
      onStart,
      onResult,
      onFailure,
      options
    )
  }
 }
 module.exports = EmptyTestFileRunner
--- a/web-app/src/constants/localStorage.ts
+++ b/web-app/src/constants/localStorage.ts
@ -19,4 +19,5 @@ export const localStorageKey = {
  mcpGlobalPermissions: 'mcp-global-permissions',
  lastUsedModel: 'last-used-model',
  lastUsedAssistant: 'last-used-assistant',
  setupCompleted: 'setup-completed',
 }
--- a/web-app/src/containers/ModelSetting.tsx
+++ b/web-app/src/containers/ModelSetting.tsx
@ -106,8 +106,10 @@ export function ModelSetting({
              <div key={key} className="space-y-2">
                <div
                  className={cn(
-                    'flex items-start justify-between gap-8',
+                    'flex items-start justify-between gap-8 last:mb-2',
-                    key === 'chat_template' && 'flex-col gap-1'
+                    (key === 'chat_template' ||
                      key === 'override_tensor_buffer_t') &&
                      'flex-col gap-1 w-full'
                  )}
                >
                  <div className="space-y-1 mb-2">
--- a/web-app/src/containers/SetupScreen.tsx
+++ b/web-app/src/containers/SetupScreen.tsx
@ -5,6 +5,7 @@ import { route } from '@/constants/routes'
 import HeaderPage from './HeaderPage'
 import { isProd } from '@/lib/version'
 import { useTranslation } from '@/i18n/react-i18next-compat'
 import { localStorageKey } from '@/constants/localStorage'
 function SetupScreen() {
  const { t } = useTranslation()
@ -12,6 +13,10 @@ function SetupScreen() {
  const firstItemRemoteProvider =
    providers.length > 0 ? providers[1].provider : 'openai'
  // Check if setup tour has been completed
  const isSetupCompleted =
    localStorage.getItem(localStorageKey.setupCompleted) === 'true'
  return (
    <div className="flex h-full flex-col flex-justify-center">
      <HeaderPage></HeaderPage>
@ -50,7 +55,9 @@ function SetupScreen() {
                    providerName: firstItemRemoteProvider,
                  }}
                  search={{
-                    step: 'setup_remote_provider',
+                    ...(!isSetupCompleted
                      ? { step: 'setup_remote_provider' }
                      : {}),
                  }}
                >
                  <h1 className="text-main-view-fg font-medium text-base">
--- a/web-app/src/containers/dialogs/LoadModelErrorDialog.tsx
+++ b/web-app/src/containers/dialogs/LoadModelErrorDialog.tsx
@ -7,7 +7,7 @@ import {
  DialogTitle,
 } from '@/components/ui/dialog'
 import { Button } from '@/components/ui/button'
-import { AlertTriangle } from 'lucide-react'
+import { AlertTriangle, ChevronDown, ChevronRight } from 'lucide-react'
 import { IconCopy, IconCopyCheck } from '@tabler/icons-react'
 import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useModelLoad } from '@/hooks/useModelLoad'
@ -18,11 +18,47 @@ export default function LoadModelErrorDialog() {
  const { t } = useTranslation()
  const { modelLoadError, setModelLoadError } = useModelLoad()
  const [isCopying, setIsCopying] = useState(false)
  const [isDetailExpanded, setIsDetailExpanded] = useState(true)
  const getErrorDetail = (error: string | object | undefined) => {
    if (!error || typeof error !== 'object') return null
    if ('details' in error) {
      return (error as { details?: string }).details
    }
    return null
  }
  const hasErrorDetail = (error: string | object | undefined) => {
    return Boolean(getErrorDetail(error))
  }
  const formatErrorForCopy = (error: string | object | undefined) => {
    if (!error) return ''
    if (typeof error === 'string') return error
    if (typeof error === 'object' && 'code' in error && 'message' in error) {
      const errorObj = error as {
        code?: string
        message: string
        details?: string
      }
      let copyText = errorObj.code
        ? `${errorObj.code}: ${errorObj.message}`
        : errorObj.message
      if (errorObj.details) {
        copyText += `\n\nDetails:\n${errorObj.details}`
      }
      return copyText
    }
    return JSON.stringify(error)
  }
  const handleCopy = async () => {
    setIsCopying(true)
    try {
-      await navigator.clipboard.writeText(modelLoadError ?? '')
+      await navigator.clipboard.writeText(formatErrorForCopy(modelLoadError))
      toast.success('Copy successful', {
        id: 'copy-model',
        description: 'Model load error information copied to clipboard',
@ -58,17 +94,59 @@ export default function LoadModelErrorDialog() {
          </div>
        </DialogHeader>
-        <div className="bg-main-view-fg/8 p-2 border border-main-view-fg/5 rounded-lg">
+        <div className="bg-main-view-fg/2 p-2 border border-main-view-fg/5 rounded-lg space-y-2">
-          <p
+          {typeof modelLoadError === 'object' &&
-            className="text-sm text-main-view-fg/70 leading-relaxed max-h-[200px] overflow-y-auto break-all"
+          modelLoadError &&
          'code' in modelLoadError &&
          'message' in modelLoadError ? (
            <div>
              {(modelLoadError as { code?: string }).code && (
                <div>
                  <p className="text-sm text-main-view-fg/80 leading-relaxed break-all">
                    {(modelLoadError as { code: string }).code}
                  </p>
                </div>
              )}
              <div>
                <p className="text-sm text-main-view-fg/60 leading-relaxed break-all">
                  {(modelLoadError as { message: string }).message}
                </p>
              </div>
            </div>
          ) : (
            <p className="text-sm text-main-view-fg/70 leading-relaxed break-all">
              {String(modelLoadError)}
            </p>
          )}
          {hasErrorDetail(modelLoadError) && (
            <div>
              <button
                onClick={() => setIsDetailExpanded(!isDetailExpanded)}
                className="flex items-center gap-1 text-sm text-main-view-fg/60 hover:text-main-view-fg/80 transition-colors cursor-pointer"
              >
                {isDetailExpanded ? (
                  <ChevronDown className="size-3" />
                ) : (
                  <ChevronRight className="size-3" />
                )}
                Details
              </button>
              {isDetailExpanded && (
                <div
                  className="mt-2 text-sm text-main-view-fg/70 leading-relaxed max-h-[150px] overflow-y-auto break-all bg-main-view-fg/10 p-2 rounded border border-main-view-fg/5"
                  ref={(el) => {
                    if (el) {
                      el.scrollTop = el.scrollHeight
                    }
                  }}
                >
-            {modelLoadError}
+                  {getErrorDetail(modelLoadError)}
-          </p>
+                </div>
              )}
            </div>
          )}
        </div>
        <DialogFooter className="flex flex-col gap-2 sm:flex-row sm:justify-right">
--- a/web-app/src/containers/dynamicControllerSetting/DropdownControl.tsx
+++ b/web-app/src/containers/dynamicControllerSetting/DropdownControl.tsx
@ -5,12 +5,6 @@ import {
  DropdownMenuTrigger,
 } from '@/components/ui/dropdown-menu'
 import {
  Tooltip,
  TooltipTrigger,
  TooltipContent,
 } from '@/components/ui/tooltip'
 import { IconStarFilled } from '@tabler/icons-react'
 import { cn } from '@/lib/utils'
 // Dropdown component
@ -24,7 +18,6 @@ type DropdownControlProps = {
 export function DropdownControl({
  value,
  options = [],
  recommended,
  onChange,
 }: DropdownControlProps) {
  const isSelected =
@ -48,18 +41,6 @@ export function DropdownControl({
            )}
          >
            <span>{option.name}</span>
            {recommended === option.value && (
              <Tooltip>
                <TooltipTrigger asChild>
                  <div className="cursor-pointer">
                    <IconStarFilled className="text-accent" />
                  </div>
                </TooltipTrigger>
                <TooltipContent side="top" sideOffset={8} className="z-50">
                  Recommended
                </TooltipContent>
              </Tooltip>
            )}
          </DropdownMenuItem>
        ))}
      </DropdownMenuContent>
--- a/web-app/src/hooks/tests/useHardware.test.ts
+++ b/web-app/src/hooks/tests/useHardware.test.ts
@ -1,14 +1,6 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest'
 import { renderHook, act } from '@testing-library/react'
-import {
+import { useHardware, HardwareData, OS, RAM } from '../useHardware'
  useHardware,
  HardwareData,
  SystemUsage,
  CPU,
  GPU,
  OS,
  RAM,
 } from '../useHardware'
 // Mock dependencies
 vi.mock('@/constants/localStorage', () => ({
@ -43,7 +35,6 @@ describe('useHardware', () => {
        name: '',
        usage: 0,
      },
      gpus: [],
      os_type: '',
      os_name: '',
      total_memory: 0,
@ -52,9 +43,7 @@ describe('useHardware', () => {
      cpu: 0,
      used_memory: 0,
      total_memory: 0,
      gpus: [],
    })
    expect(result.current.gpuLoading).toEqual({})
    expect(result.current.pollingPaused).toBe(false)
  })
@ -74,26 +63,6 @@ describe('useHardware', () => {
        available: 0,
        total: 0,
      },
      gpus: [
        {
          name: 'NVIDIA RTX 3080',
          total_memory: 10737418240,
          vendor: 'NVIDIA',
          uuid: 'GPU-12345',
          driver_version: '470.57.02',
          activated: true,
          nvidia_info: {
            index: 0,
            compute_capability: '8.6',
          },
          vulkan_info: {
            index: 0,
            device_id: 8704,
            device_type: 'discrete',
            api_version: '1.2.0',
          },
        },
      ],
      os_type: 'linux',
      os_name: 'Ubuntu',
      total_memory: 17179869184,
@ -124,37 +93,6 @@ describe('useHardware', () => {
    expect(result.current.hardwareData.cpu).toEqual(testCPU)
  })
  it('should set GPUs data', () => {
    const { result } = renderHook(() => useHardware())
    const testGPUs = [
      {
        name: 'NVIDIA RTX 3080',
        total_memory: 10737418240,
        vendor: 'NVIDIA',
        uuid: 'GPU-12345',
        driver_version: '470.57.02',
        activated: true,
        nvidia_info: {
          index: 0,
          compute_capability: '8.6',
        },
        vulkan_info: {
          index: 0,
          device_id: 8704,
          device_type: 'discrete',
          api_version: '1.2.0',
        },
      },
    ]
    act(() => {
      result.current.setGPUs(testGPUs)
    })
    expect(result.current.hardwareData.gpus).toEqual(testGPUs)
  })
  it('should update system usage', () => {
    const { result } = renderHook(() => useHardware())
@ -162,13 +100,6 @@ describe('useHardware', () => {
      cpu: 45.2,
      used_memory: 8589934592,
      total_memory: 17179869184,
      gpus: [
        {
          uuid: 'GPU-12345',
          used_memory: 2147483648,
          total_memory: 10737418240,
        },
      ],
    }
    act(() => {
@ -178,48 +109,6 @@ describe('useHardware', () => {
    expect(result.current.systemUsage).toEqual(testSystemUsage)
  })
  it('should manage GPU loading state', () => {
    const { result } = renderHook(() => useHardware())
    // First set up some GPU data so we have a UUID to work with
    const testGPUs = [
      {
        name: 'NVIDIA RTX 3080',
        total_memory: 10737418240,
        vendor: 'NVIDIA',
        uuid: 'GPU-12345',
        driver_version: '470.57.02',
        activated: true,
        nvidia_info: {
          index: 0,
          compute_capability: '8.6',
        },
        vulkan_info: {
          index: 0,
          device_id: 8704,
          device_type: 'discrete',
          api_version: '1.2.0',
        },
      },
    ]
    act(() => {
      result.current.setGPUs(testGPUs)
    })
    act(() => {
      result.current.setGpuLoading(0, true)
    })
    expect(result.current.gpuLoading['GPU-12345']).toBe(true)
    act(() => {
      result.current.setGpuLoading(0, false)
    })
    expect(result.current.gpuLoading['GPU-12345']).toBe(false)
  })
  it('should manage polling state', () => {
    const { result } = renderHook(() => useHardware())
@ -271,179 +160,4 @@ describe('useHardware', () => {
      expect(result.current.hardwareData.ram).toEqual(ram)
    })
  })
  describe('updateGPU', () => {
    it('should update specific GPU at index', () => {
      const { result } = renderHook(() => useHardware())
      const initialGpus: GPU[] = [
        {
          name: 'GPU 1',
          total_memory: 8192,
          vendor: 'NVIDIA',
          uuid: 'gpu-1',
          driver_version: '1.0',
          activated: false,
          nvidia_info: { index: 0, compute_capability: '8.0' },
          vulkan_info: {
            index: 0,
            device_id: 1,
            device_type: 'discrete',
            api_version: '1.0',
          },
        },
        {
          name: 'GPU 2',
          total_memory: 4096,
          vendor: 'AMD',
          uuid: 'gpu-2',
          driver_version: '2.0',
          activated: false,
          nvidia_info: { index: 1, compute_capability: '7.0' },
          vulkan_info: {
            index: 1,
            device_id: 2,
            device_type: 'discrete',
            api_version: '1.0',
          },
        },
      ]
      act(() => {
        result.current.setGPUs(initialGpus)
      })
      const updatedGpu: GPU = {
        ...initialGpus[0],
        name: 'Updated GPU 1',
        activated: true,
      }
      act(() => {
        result.current.updateGPU(0, updatedGpu)
      })
      expect(result.current.hardwareData.gpus[0].name).toBe('Updated GPU 1')
      expect(result.current.hardwareData.gpus[0].activated).toBe(true)
      expect(result.current.hardwareData.gpus[1]).toEqual(initialGpus[1])
    })
    it('should handle invalid index gracefully', () => {
      const { result } = renderHook(() => useHardware())
      const initialGpus: GPU[] = [
        {
          name: 'GPU 1',
          total_memory: 8192,
          vendor: 'NVIDIA',
          uuid: 'gpu-1',
          driver_version: '1.0',
          activated: false,
          nvidia_info: { index: 0, compute_capability: '8.0' },
          vulkan_info: {
            index: 0,
            device_id: 1,
            device_type: 'discrete',
            api_version: '1.0',
          },
        },
      ]
      act(() => {
        result.current.setGPUs(initialGpus)
      })
      const updatedGpu: GPU = {
        ...initialGpus[0],
        name: 'Updated GPU',
      }
      act(() => {
        result.current.updateGPU(5, updatedGpu)
      })
      expect(result.current.hardwareData.gpus[0]).toEqual(initialGpus[0])
    })
  })
  describe('setHardwareData with GPU activation', () => {
    it('should initialize GPUs as inactive when activated is not specified', () => {
      const { result } = renderHook(() => useHardware())
      const hardwareData: HardwareData = {
        cpu: {
          arch: 'x86_64',
          core_count: 4,
          extensions: [],
          name: 'CPU',
          usage: 0,
        },
        gpus: [
          {
            name: 'GPU 1',
            total_memory: 8192,
            vendor: 'NVIDIA',
            uuid: 'gpu-1',
            driver_version: '1.0',
            nvidia_info: { index: 0, compute_capability: '8.0' },
            vulkan_info: {
              index: 0,
              device_id: 1,
              device_type: 'discrete',
              api_version: '1.0',
            },
          },
        ],
        os_type: 'windows',
        os_name: 'Windows 11',
        total_memory: 16384,
      }
      act(() => {
        result.current.setHardwareData(hardwareData)
      })
      expect(result.current.hardwareData.gpus[0].activated).toBe(false)
    })
    it('should preserve existing activation states when set', () => {
      const { result } = renderHook(() => useHardware())
      const hardwareData: HardwareData = {
        cpu: {
          arch: 'x86_64',
          core_count: 4,
          extensions: [],
          name: 'CPU',
          usage: 0,
        },
        gpus: [
          {
            name: 'GPU 1',
            total_memory: 8192,
            vendor: 'NVIDIA',
            uuid: 'gpu-1',
            driver_version: '1.0',
            activated: true,
            nvidia_info: { index: 0, compute_capability: '8.0' },
            vulkan_info: {
              index: 0,
              device_id: 1,
              device_type: 'discrete',
              api_version: '1.0',
            },
          },
        ],
        os_type: 'windows',
        os_name: 'Windows 11',
        total_memory: 16384,
      }
      act(() => {
        result.current.setHardwareData(hardwareData)
      })
      expect(result.current.hardwareData.gpus[0].activated).toBe(true)
    })
  })
 })
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -428,11 +428,11 @@ export const useChat = () => {
        }
      } catch (error) {
        if (!abortController.signal.aborted) {
-          const errorMessage =
+          if (error && typeof error === 'object' && 'message' in error) {
-            error && typeof error === 'object' && 'message' in error
+            setModelLoadError(error as ErrorObject)
-              ? error.message
+          } else {
-              : error
+            setModelLoadError(`${error}`)
-          setModelLoadError(`${errorMessage}`)
+          }
        }
      } finally {
        updateLoadingModel(false)
@ -453,6 +453,7 @@ export const useChat = () => {
      setPrompt,
      selectedModel,
      currentAssistant,
      experimentalFeatures,
      tools,
      updateLoadingModel,
      getDisabledToolsForThread,
--- a/web-app/src/hooks/useHardware.ts
+++ b/web-app/src/hooks/useHardware.ts
@ -12,30 +12,6 @@ export interface CPU {
  instructions?: string[] // Cortex migration: ensure instructions data ready
 }
 export interface GPUAdditionalInfo {
  compute_cap: string
  driver_version: string
 }
 export interface GPU {
  name: string
  total_memory: number
  vendor: string
  uuid: string
  driver_version: string
  activated?: boolean
  nvidia_info: {
    index: number
    compute_capability: string
  }
  vulkan_info: {
    index: number
    device_id: number
    device_type: string
    api_version: string
  }
 }
 export interface OS {
  name: string
  version: string
@ -48,7 +24,6 @@ export interface RAM {
 export interface HardwareData {
  cpu: CPU
  gpus: GPU[]
  os_type: string
  os_name: string
  total_memory: number
@ -60,11 +35,6 @@ export interface SystemUsage {
  cpu: number
  used_memory: number
  total_memory: number
  gpus: {
    uuid: string
    used_memory: number
    total_memory: number
  }[]
 }
 // Default values
@ -76,7 +46,6 @@ const defaultHardwareData: HardwareData = {
    name: '',
    usage: 0,
  },
  gpus: [],
  os_type: '',
  os_name: '',
  total_memory: 0,
@ -86,7 +55,6 @@ const defaultSystemUsage: SystemUsage = {
  cpu: 0,
  used_memory: 0,
  total_memory: 0,
  gpus: [],
 }
 interface HardwareStore {
@ -96,22 +64,17 @@ interface HardwareStore {
  // Update functions
  setCPU: (cpu: CPU) => void
  setGPUs: (gpus: GPU[]) => void
  setOS: (os: OS) => void
  setRAM: (ram: RAM) => void
  // Update entire hardware data at once
  setHardwareData: (data: HardwareData) => void
  // Update individual GPU
  updateGPU: (index: number, gpu: GPU) => void
  // Update RAM available
  updateSystemUsage: (usage: SystemUsage) => void
  // GPU loading state
  gpuLoading: { [index: number]: boolean }
  setGpuLoading: (index: number, loading: boolean) => void
  // Polling control
  pollingPaused: boolean
@ -126,13 +89,6 @@ export const useHardware = create<HardwareStore>()(
      systemUsage: defaultSystemUsage,
      gpuLoading: {},
      pollingPaused: false,
      setGpuLoading: (index, loading) =>
        set((state) => ({
          gpuLoading: {
            ...state.gpuLoading,
            [state.hardwareData.gpus[index].uuid]: loading,
          },
        })),
      pausePolling: () => set({ pollingPaused: true }),
      resumePolling: () => set({ pollingPaused: false }),
@ -144,14 +100,6 @@ export const useHardware = create<HardwareStore>()(
          },
        })),
      setGPUs: (gpus) =>
        set((state) => ({
          hardwareData: {
            ...state.hardwareData,
            gpus,
          },
        })),
      setOS: (os) =>
        set((state) => ({
          hardwareData: {
@ -181,27 +129,9 @@ export const useHardware = create<HardwareStore>()(
              available: 0,
              total: 0,
            },
            gpus: data.gpus.map((gpu) => ({
              ...gpu,
              activated: gpu.activated ?? false,
            })),
          },
        }),
      updateGPU: (index, gpu) =>
        set((state) => {
          const newGPUs = [...state.hardwareData.gpus]
          if (index >= 0 && index < newGPUs.length) {
            newGPUs[index] = gpu
          }
          return {
            hardwareData: {
              ...state.hardwareData,
              gpus: newGPUs,
            },
          }
        }),
      updateSystemUsage: (systemUsage) =>
        set(() => ({
          systemUsage,
--- a/web-app/src/hooks/useModelLoad.ts
+++ b/web-app/src/hooks/useModelLoad.ts
@ -1,8 +1,8 @@
 import { create } from 'zustand'
 type ModelLoadState = {
-  modelLoadError?: string
+  modelLoadError?: string | ErrorObject
-  setModelLoadError: (error: string | undefined) => void
+  setModelLoadError: (error: string | ErrorObject | undefined) => void
 }
 export const useModelLoad = create<ModelLoadState>()((set) => ({
--- a/web-app/src/hooks/useModelProvider.ts
+++ b/web-app/src/hooks/useModelProvider.ts
@ -276,9 +276,34 @@ export const useModelProvider = create<ModelProviderState>()(
          })
        }
        // Migration for override_tensor_buffer_type key (version 2 -> 3)
        if (version === 2 && state?.providers) {
          state.providers.forEach((provider) => {
            if (provider.models) {
              provider.models.forEach((model) => {
                // Initialize settings if it doesn't exist
                if (!model.settings) {
                  model.settings = {}
                }
                // Add missing override_tensor_buffer_type setting if it doesn't exist
                if (!model.settings.override_tensor_buffer_t) {
                  model.settings.override_tensor_buffer_t = {
                    ...modelSettings.override_tensor_buffer_t,
                    controller_props: {
                      ...modelSettings.override_tensor_buffer_t
                        .controller_props,
                    },
                  }
                }
              })
            }
          })
        }
        return state
      },
-      version: 2,
+      version: 3,
    }
  )
 )
--- a/web-app/src/lib/predefined.ts
+++ b/web-app/src/lib/predefined.ts
@ -133,4 +133,15 @@ export const modelSettings = {
      textAlign: 'right',
    },
  },
  override_tensor_buffer_t: {
    key: 'override_tensor_buffer_t',
    title: 'Override Tensor Buffer Type',
    description: 'Override the tensor buffer type for the model',
    controller_type: 'input',
    controller_props: {
      value: '',
      placeholder: 'e.g., layers\\.\\d+\\.ffn_.*=CPU',
      type: 'text',
    },
  },
 }
--- a/web-app/src/routes/hub/index.tsx
+++ b/web-app/src/routes/hub/index.tsx
@ -501,7 +501,7 @@ function Hub() {
          </HeaderPage>
          <div className="p-4 w-full h-[calc(100%-32px)] !overflow-y-auto first-step-setup-local-provider">
            <div className="flex flex-col h-full justify-between gap-4 gap-y-3 w-full md:w-4/5 mx-auto">
-              {loading ? (
+              {loading && !filteredModels.length ? (
                <div className="flex items-center justify-center">
                  <div className="text-center text-muted-foreground">
                    {t('hub:loadingModels')}
--- a/web-app/src/routes/settings/providers/$providerName.tsx
+++ b/web-app/src/routes/settings/providers/$providerName.tsx
@ -15,7 +15,6 @@ import {
 import {
  createFileRoute,
  Link,
  useNavigate,
  useParams,
  useSearch,
 } from '@tanstack/react-router'
@ -32,6 +31,7 @@ import { CustomTooltipJoyRide } from '@/containers/CustomeTooltipJoyRide'
 import { route } from '@/constants/routes'
 import DeleteProvider from '@/containers/dialogs/DeleteProvider'
 import { updateSettings, fetchModelsFromProvider } from '@/services/providers'
 import { localStorageKey } from '@/constants/localStorage'
 import { Button } from '@/components/ui/button'
 import { IconFolderPlus, IconLoader, IconRefresh } from '@tabler/icons-react'
 import { getProviders } from '@/services/providers'
@ -83,7 +83,6 @@ function ProviderDetail() {
  const { getProviderByName, setProviders, updateProvider } = useModelProvider()
  const provider = getProviderByName(providerName)
  const isSetup = step === 'setup_remote_provider'
  const navigate = useNavigate()
  // Check if llamacpp provider needs backend configuration
  const needsBackendConfig =
@ -137,9 +136,7 @@ function ProviderDetail() {
    const { status } = data
    if (status === STATUS.FINISHED) {
-      navigate({
+      localStorage.setItem(localStorageKey.setupCompleted, 'true')
        to: route.home,
      })
    }
  }
@ -214,7 +211,11 @@ function ProviderDetail() {
        })
        .catch((error) => {
          console.error('Error starting model:', error)
-          setModelLoadError(`${error.message}`)
+          if (error && typeof error === 'object' && 'message' in error) {
            setModelLoadError(error)
          } else {
            setModelLoadError(`${error}`)
          }
        })
        .finally(() => {
          // Remove model from loading state
@ -387,6 +388,7 @@ function ProviderDetail() {
                            : false
                        }
                        description={
                          <>
                            <RenderMarkdown
                              className="![>p]:text-main-view-fg/70 select-none"
                              content={setting.description}
@ -410,6 +412,19 @@ function ProviderDetail() {
                                ),
                              }}
                            />
                            {setting.key === 'version_backend' &&
                              setting.controller_props?.recommended && (
                                <div className="mt-1 text-sm text-main-view-fg/60">
                                  <span className="font-medium">
                                    {setting.controller_props.recommended
                                      ?.split('/')
                                      .pop() ||
                                      setting.controller_props.recommended}
                                  </span>
                                  <span> is the recommended backend.</span>
                                </div>
                              )}
                          </>
                        }
                        actions={actionComponent}
                      />
--- a/web-app/src/routes/system-monitor.tsx
+++ b/web-app/src/routes/system-monitor.tsx
@ -1,6 +1,6 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { createFileRoute } from '@tanstack/react-router'
-import { useEffect, useState } from 'react'
+import { useEffect } from 'react'
 import { useHardware } from '@/hooks/useHardware'
 import { Progress } from '@/components/ui/progress'
 import { route } from '@/constants/routes'
@ -19,12 +19,7 @@ function SystemMonitor() {
  const { t } = useTranslation()
  const { hardwareData, systemUsage, updateSystemUsage } = useHardware()
-  const {
+  const { devices: llamacppDevices, fetchDevices } = useLlamacppDevices()
    devices: llamacppDevices,
    fetchDevices,
  } = useLlamacppDevices()
  const [isInitialized, setIsInitialized] = useState(false)
  useEffect(() => {
    // Fetch llamacpp devices
@ -46,14 +41,6 @@ function SystemMonitor() {
    return () => clearInterval(intervalId)
  }, [updateSystemUsage])
  // Initialize when hardware data and llamacpp devices are available
  useEffect(() => {
    if (hardwareData.gpus.length > 0 && !isInitialized) {
      setIsInitialized(true)
    }
  }, [hardwareData.gpus.length, isInitialized])
  // Calculate RAM usage percentage
  const ramUsagePercentage =
    toNumber(systemUsage.used_memory / hardwareData.total_memory) * 100
--- a/web-app/src/types/app.d.ts
+++ b/web-app/src/types/app.d.ts
@ -5,3 +5,9 @@ interface LogEntry {
  target: string
  message: string
 }
 type ErrorObject = {
  code?: string
  message: string
  details?: string
 }
--- a/web-app/src/types/modelProviders.d.ts
+++ b/web-app/src/types/modelProviders.d.ts
@ -7,6 +7,7 @@ type ControllerProps = {
  type?: string
  options?: Array<{ value: number | string; name: string }>
  input_actions?: string[]
  recommended?: string
 }
 /**