Merge dev branch while preserving website directory

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Ramon Perez 2025-08-11 13:19:17 +10:00
commit 72588db776
68 changed files with 8432 additions and 1407 deletions

View File

@ -134,7 +134,7 @@ jobs:
test-on-windows-pr: test-on-windows-pr:
if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch'
runs-on: ${{ (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository) && 'windows-latest' || 'WINDOWS-11' }} runs-on: 'windows-latest'
steps: steps:
- name: Getting the repo - name: Getting the repo
uses: actions/checkout@v3 uses: actions/checkout@v3

View File

@ -105,8 +105,7 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version | .bundle.createUpdaterArtifacts = true' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json jq --arg version "${{ inputs.new_version }}" '.version = $version | .bundle.createUpdaterArtifacts = true' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
if [ "${{ inputs.channel }}" != "stable" ]; then if [ "${{ inputs.channel }}" != "stable" ]; then
jq '.bundle.linux.deb.files = {"usr/bin/bun": "resources/bin/bun", jq '.bundle.linux.deb.files = {"usr/bin/bun": "resources/bin/bun"}' ./src-tauri/tauri.linux.conf.json > /tmp/tauri.linux.conf.json
"usr/lib/Jan-${{ inputs.channel }}/resources/lib/libvulkan.so": "resources/lib/libvulkan.so"}' ./src-tauri/tauri.linux.conf.json > /tmp/tauri.linux.conf.json
mv /tmp/tauri.linux.conf.json ./src-tauri/tauri.linux.conf.json mv /tmp/tauri.linux.conf.json ./src-tauri/tauri.linux.conf.json
fi fi
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json

33
.gitignore vendored
View File

@ -1,46 +1,22 @@
.idea
.env .env
.idea
# Jan inference
error.log error.log
node_modules node_modules
*.tgz *.tgz
!charts/server/charts/*.tgz
dist dist
build build
.DS_Store .DS_Store
electron/renderer
electron/models
electron/docs
electron/engines
electron/themes
electron/playwright-report
server/pre-install
package-lock.json package-lock.json
coverage coverage
*.log *.log
core/lib/** core/lib/**
# Turborepo
.turbo
electron/test-data
electron/test-results
core/test_results.html
coverage
.yarn .yarn
.yarnrc .yarnrc
test_results.html
*.tsbuildinfo *.tsbuildinfo
electron/shared/** test_results.html
# docs # docs
docs/yarn.lock docs/yarn.lock
electron/.version.bak
src-tauri/binaries/engines/cortex.llamacpp
src-tauri/resources/themes
src-tauri/resources/lib src-tauri/resources/lib
src-tauri/Cargo.lock
src-tauri/icons src-tauri/icons
!src-tauri/icons/icon.png !src-tauri/icons/icon.png
src-tauri/gen/apple src-tauri/gen/apple
@ -75,5 +51,8 @@ docs/.next/
**/yarn-error.log* **/yarn-error.log*
**/pnpm-debug.log* **/pnpm-debug.log*
# Combined output for local testing ## cargo
combined-output/ target
## test
test-data

View File

@ -31,7 +31,6 @@ endif
dev: install-and-build dev: install-and-build
yarn download:bin yarn download:bin
yarn download:lib
yarn dev yarn dev
# Linting # Linting
@ -41,8 +40,10 @@ lint: install-and-build
# Testing # Testing
test: lint test: lint
yarn download:bin yarn download:bin
yarn download:lib
yarn test yarn test
yarn copy:assets:tauri
yarn build:icon
cargo test --manifest-path src-tauri/Cargo.toml --no-default-features --features test-tauri -- --test-threads=1
# Builds and publishes the app # Builds and publishes the app
build-and-publish: install-and-build build-and-publish: install-and-build
@ -50,7 +51,6 @@ build-and-publish: install-and-build
# Build # Build
build: install-and-build build: install-and-build
yarn download:lib
yarn build yarn build
clean: clean:

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 947 KiB

Binary file not shown.

View File

@ -0,0 +1,92 @@
---
title: "Jan v0.6.7: OpenAI gpt-oss support and enhanced MCP tutorials"
version: 0.6.7
description: "Full support for OpenAI's open-weight gpt-oss models and new Jupyter MCP integration guide"
date: 2025-08-07
ogImage: "/assets/images/changelog/gpt-oss-serper.png"
---
import ChangelogHeader from "@/components/Changelog/ChangelogHeader"
import { Callout } from 'nextra/components'
<ChangelogHeader title="Jan v0.6.7: OpenAI gpt-oss support and enhanced MCP tutorials" date="2025-08-07" ogImage="/assets/images/changelog/gpt-oss-serper.png"/>
## Highlights 🎉
Jan v0.6.7 brings full support for OpenAI's groundbreaking open-weight models - gpt-oss-120b and gpt-oss-20b - along with enhanced MCP documentation and critical bug fixes for reasoning models.
### 🚀 OpenAI gpt-oss Models Now Supported
Jan now fully supports OpenAI's first open-weight language models since GPT-2:
**gpt-oss-120b:**
- 117B total parameters, 5.1B active per token
- Runs efficiently on a single 80GB GPU
- Near-parity with OpenAI o4-mini on reasoning benchmarks
- Exceptional tool use and function calling capabilities
**gpt-oss-20b:**
- 21B total parameters, 3.6B active per token
- Runs on edge devices with just 16GB memory
- Similar performance to OpenAI o3-mini
- Perfect for local inference and rapid iteration
<Callout type="info">
Both models use Mixture-of-Experts (MoE) architecture and support context lengths up to 128k tokens. They come natively quantized in MXFP4 format for efficient memory usage.
</Callout>
### 🎮 GPU Layer Configuration
Due to the models' size, you may need to adjust GPU layers based on your hardware:
![GPU layers setting adjusted for optimal performance](/assets/images/changelog/jupyter5.png)
Start with default settings and reduce layers if you encounter out-of-memory errors. Each system requires different configurations based on available VRAM.
### 📚 New Jupyter MCP Tutorial
We've added comprehensive documentation for the Jupyter MCP integration:
- Real-time notebook interaction and code execution
- Step-by-step setup with Python environment management
- Example workflows for data analysis and visualization
- Security best practices for code execution
- Performance optimization tips
The tutorial demonstrates how to turn Jan into a capable data science partner that can execute analysis, create visualizations, and iterate based on actual results.
### 🔧 Bug Fixes
Critical fixes for reasoning model support:
- **Fixed reasoning text inclusion**: Reasoning text is no longer incorrectly included in chat completion requests
- **Fixed thinking block display**: gpt-oss thinking blocks now render properly in the UI
- **Fixed React state loop**: Resolved infinite re-render issue with useMediaQuery hook
## Using gpt-oss Models
### Download from Hub
All gpt-oss GGUF variants are available in the Jan Hub. Simply search for "gpt-oss" and choose the quantization that fits your hardware:
### Model Capabilities
Both models excel at:
- **Reasoning tasks**: Competition coding, mathematics, and problem solving
- **Tool use**: Web search, code execution, and function calling
- **CoT reasoning**: Full chain-of-thought visibility for monitoring
- **Structured outputs**: JSON schema enforcement and grammar constraints
### Performance Tips
- **Memory requirements**: gpt-oss-120b needs ~80GB, gpt-oss-20b needs ~16GB
- **GPU layers**: Adjust based on your VRAM (start high, reduce if needed)
- **Context size**: Both models support up to 128k tokens
- **Quantization**: Choose lower quantization for smaller memory footprint
## Coming Next
We're continuing to optimize performance for large models, expand MCP integrations, and improve the overall experience for running cutting-edge open models locally.
Update your Jan or [download the latest](https://jan.ai/).
For the complete list of changes, see the [GitHub release notes](https://github.com/menloresearch/jan/releases/tag/v0.6.7).

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 264 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 391 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 307 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 947 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

View File

@ -0,0 +1,337 @@
---
title: Jupyter MCP
description: Real-time Jupyter notebook interaction and code execution through MCP integration.
keywords:
[
Jan,
MCP,
Model Context Protocol,
Jupyter,
data analysis,
code execution,
notebooks,
Python,
visualization,
tool calling,
GPT-5,
OpenAI,
]
---
import { Callout } from 'nextra/components'
# Jupyter MCP
[Jupyter MCP Server](https://jupyter-mcp-server.datalayer.tech/) enables real-time interaction with Jupyter notebooks, allowing AI models to edit, execute, and document code for data analysis and visualization. Instead of just generating code suggestions, AI can actually run Python code and see the results.
This integration gives Jan the ability to execute analysis, create visualizations, and iterate based on actual results - turning your AI assistant into a capable data science partner.
<Callout type="info">
**Breaking Change**: Version 0.11.0+ renamed `room` to `document`. Check the [release notes](https://jupyter-mcp-server.datalayer.tech/releases) for details.
</Callout>
## Available Tools
The Jupyter MCP Server provides [12 comprehensive tools](https://jupyter-mcp-server.datalayer.tech/tools/):
### Core Operations
- `append_execute_code_cell`: Add and run code cells at notebook end
- `insert_execute_code_cell`: Insert and run code at specific positions
- `execute_cell_simple_timeout`: Execute cells with timeout control
- `execute_cell_streaming`: Long-running cells with progress updates
- `execute_cell_with_progress`: Execute with timeout and monitoring
### Cell Management
- `append_markdown_cell`: Add documentation cells
- `insert_markdown_cell`: Insert markdown at specific positions
- `delete_cell`: Remove cells from notebook
- `overwrite_cell_source`: Update existing cell content
### Information & Reading
- `get_notebook_info`: Retrieve notebook metadata
- `read_cell`: Examine specific cell content
- `read_all_cells`: Get complete notebook state
<Callout type="warning">
The MCP connects to **one notebook at a time**, not multiple notebooks. Specify your target notebook in the configuration.
</Callout>
## Prerequisites
- Jan with MCP enabled
- Python 3.8+ with uv package manager
- Docker installed
- OpenAI API key for GPT-5 access
- Basic understanding of Jupyter notebooks
## Setup
### Enable MCP
1. Go to **Settings** > **MCP Servers**
2. Toggle **Allow All MCP Tool Permission** ON
![MCP settings page with toggle enabled](../../_assets/mcp-on.png)
### Install uv Package Manager
If you don't have uv installed:
```bash
# macOS and Linux
curl -LsSf https://astral.sh/uv/install.sh | sh
# Windows
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
```
### Create Python Environment
Set up an isolated environment for Jupyter:
```bash
# Create environment with Python 3.13
uv venv .venv --python 3.13
# Activate environment
source .venv/bin/activate # Linux/macOS
# or
.venv\Scripts\activate # Windows
# Install Jupyter dependencies
uv pip install jupyterlab==4.4.1 jupyter-collaboration==4.0.2 ipykernel
uv pip uninstall pycrdt datalayer_pycrdt
uv pip install datalayer_pycrdt==0.12.17
# Add data science libraries
uv pip install pandas numpy matplotlib altair
```
### Start JupyterLab Server
Launch JupyterLab with authentication:
```bash
jupyter lab --port 8888 --IdentityProvider.token heyheyyou --ip 0.0.0.0
```
![Terminal showing JupyterLab startup](../../_assets/jupyter1.png)
The server opens in your browser:
![JupyterLab interface in browser](../../_assets/jupyter.png)
### Create Target Notebook
Create a new notebook named `for_jan.ipynb`:
![Notebook created in JupyterLab](../../_assets/jupyter2.png)
### Configure MCP Server in Jan
Click `+` in MCP Servers section:
**Configuration for macOS/Windows:**
- **Server Name**: `jupyter`
- **Command**: `docker`
- **Arguments**:
```
run -i --rm -e DOCUMENT_URL -e DOCUMENT_TOKEN -e DOCUMENT_ID -e RUNTIME_URL -e RUNTIME_TOKEN datalayer/jupyter-mcp-server:latest
```
- **Environment Variables**:
- Key: `DOCUMENT_URL`, Value: `http://host.docker.internal:8888`
- Key: `DOCUMENT_TOKEN`, Value: `heyheyyou`
- Key: `DOCUMENT_ID`, Value: `for_jan.ipynb`
- Key: `RUNTIME_URL`, Value: `http://host.docker.internal:8888`
- Key: `RUNTIME_TOKEN`, Value: `heyheyyou`
![Jan MCP server configuration](../../_assets/jupyter3.png)
## Using OpenAI's GPT-5
### Configure OpenAI Provider
Navigate to **Settings** > **Model Providers** > **OpenAI**:
![OpenAI settings page](../../_assets/openai-settings.png)
### Add GPT-5 Model
Since GPT-5 is new, you'll need to manually add it to Jan:
![Manually adding GPT-5 model name](../../_assets/gpt5-add.png)
<Callout type="info">
**About GPT-5**: OpenAI's smartest, fastest, most useful model yet. It features built-in thinking capabilities, state-of-the-art performance across coding, math, and writing, and exceptional tool use abilities. GPT-5 automatically decides when to respond quickly versus when to think longer for expert-level responses.
</Callout>
### Enable Tool Calling
Ensure tools are enabled for GPT-5:
![Enabling tools for GPT-5](../../_assets/gpt5-tools.png)
## Usage
### Verify Tool Availability
Start a new chat with GPT-5. The tools bubble shows all available Jupyter operations:
![GPT-5 ready in chat with Jupyter tools visible](../../_assets/gpt5-chat.png)
### Initial Test
Start with establishing the notebook as your workspace:
```
You have access to a jupyter notebook, please use it as our data analysis scratchpad. Let's start by printing "Hello Jan" in a new cell.
```
GPT-5 creates and executes the code successfully:
![First message showing successful tool use](../../_assets/gpt5-msg.png)
### Advanced Data Analysis
Try a more complex task combining multiple operations:
```
Generate synthetic data with numpy, move it to a pandas dataframe and create a pivot table, and then make a cool animated plot using matplotlib. Your use case will be sales analysis in the luxury fashion industry.
```
![Complex analysis with luxury fashion sales data](../../_assets/gpt5-msg2.png)
Watch the complete output unfold:
<video width="100%" controls>
<source src="/assets/videos/mcpjupyter.mp4" type="video/mp4" />
Your browser does not support the video tag.
</video>
## Example Prompts to Try
### Financial Analysis
```
Create a Monte Carlo simulation for portfolio risk analysis. Generate 10,000 scenarios, calculate VaR at 95% confidence, and visualize the distribution.
```
### Time Series Forecasting
```
Generate synthetic time series data representing daily website traffic over 2 years with weekly seasonality and trend. Build an ARIMA model and forecast the next 30 days.
```
### Machine Learning Pipeline
```
Build a complete classification pipeline: generate a dataset with 3 classes and 5 features, split the data, try multiple algorithms (RF, SVM, XGBoost), and create a comparison chart of their performance.
```
### Interactive Dashboards
```
Create an interactive visualization using matplotlib widgets showing how changing interest rates affects loan payments over different time periods.
```
### Statistical Testing
```
Generate two datasets representing A/B test results for an e-commerce site. Perform appropriate statistical tests and create visualizations to determine if the difference is significant.
```
## Performance Considerations
<Callout type="warning">
Multiple tools can quickly consume context windows, especially for local models. GPT-5's unified system with smart routing helps manage this, but local models may struggle with speed and context limitations.
</Callout>
### Context Management
- Each tool call adds to conversation history
- 12 available tools means substantial system prompt overhead
- Local models may need reduced tool sets for reasonable performance
- Consider disabling unused tools to conserve context
### Cloud vs Local Trade-offs
- **Cloud models (GPT-5)**: Handle multiple tools efficiently with large context windows
- **Local models**: May require optimization, reduced tool sets, or smaller context sizes
- **Hybrid approach**: Use cloud for complex multi-tool workflows, local for simple tasks
## Security Considerations
<Callout type="warning">
MCP provides powerful capabilities but requires careful security practices.
</Callout>
### Authentication Tokens
- **Always use strong tokens** - avoid simple passwords
- **Never commit tokens** to version control
- **Rotate tokens regularly** for production use
- **Use different tokens** for different environments
### Network Security
- JupyterLab is network-accessible with `--ip 0.0.0.0`
- Consider using `--ip 127.0.0.1` for local-only access
- Implement firewall rules to restrict access
- Use HTTPS in production environments
### Code Execution Risks
- AI has full Python execution capabilities
- Review generated code before execution
- Use isolated environments for sensitive work
- Monitor resource usage and set limits
### Data Privacy
- Notebook content is processed by AI models
- When using cloud models like GPT-5, data leaves your system
- Keep sensitive data in secure environments
- Consider model provider's data policies
## Best Practices
### Environment Management
- Use virtual environments for isolation
- Document required dependencies
- Version control your notebooks
- Regular environment cleanup
### Performance Optimization
- Start with simple operations
- Monitor memory usage during execution
- Close unused notebooks
- Restart kernels when needed
### Effective Prompting
- Be specific about desired outputs
- Break complex tasks into steps
- Ask for explanations with code
- Request error handling in critical operations
## Troubleshooting
**Connection Problems:**
- Verify JupyterLab is running
- Check token matches configuration
- Confirm Docker can reach host
- Test with curl to verify connectivity
**Execution Failures:**
- Check Python package availability
- Verify kernel is running
- Look for syntax errors in generated code
- Restart kernel if stuck
**Tool Calling Errors:**
- Ensure model supports tool calling
- Verify all 12 tools appear in chat
- Check MCP server is active
- Review Docker logs for errors
**API Rate Limits:**
- Monitor OpenAI usage dashboard
- Implement retry logic for transient errors
- Consider fallback to local models
- Cache results when possible
## Conclusion
The Jupyter MCP integration combined with GPT-5's advanced capabilities creates an exceptionally powerful data science environment. With GPT-5's built-in reasoning and expert-level intelligence, complex analyses that once required extensive manual coding can now be accomplished through natural conversation.
Whether you're exploring data, building models, or creating visualizations, this integration provides the computational power of Jupyter with the intelligence of GPT-5 - all within Jan's privacy-conscious interface.
Remember: with great computational power comes the responsibility to use it securely. Always validate generated code, use strong authentication, and be mindful of data privacy when using cloud-based models.

Binary file not shown.

After

Width:  |  Height:  |  Size: 235 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 233 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 470 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 270 KiB

View File

@ -0,0 +1,211 @@
---
title: "Run OpenAI's gpt-oss locally in 5 mins (Beginner Guide)"
description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally. Step-by-step setup with Jan AI for private, offline AI conversations."
tags: OpenAI, gpt-oss, local AI, Jan, privacy, Apache-2.0, llama.cpp, Ollama, LM Studio
categories: guides
date: 2025-08-06
ogImage: assets/gpt-oss%20locally.jpeg
twitter:
card: summary_large_image
site: "@jandotai"
title: "Run OpenAI's gpt-oss Locally in 5 Minutes (Beginner Guide)"
description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally with Jan AI for private, offline conversations."
image: assets/gpt-oss%20locally.jpeg
---
import { Callout } from 'nextra/components'
import CTABlog from '@/components/Blog/CTA'
# Run OpenAI's gpt-oss Locally in 5 mins
![gpt-oss running locally in Jan interface](./_assets/gpt-oss%20locally.jpeg)
OpenAI launched [gpt-oss](https://openai.com/index/introducing-gpt-oss/), marking their return to open-source AI after GPT-2. This model is designed to run locally on consumer hardware. This guide shows you how to install and run gpt-oss on your computer for private, offline AI conversations.
## What is gpt-oss?
gpt-oss is OpenAI's open-source large language model, released under the Apache-2.0 license. Unlike ChatGPT, gpt-oss:
- Runs completely offline - No internet required after setup
- 100% private - Your conversations never leave your device
- Unlimited usage - No token limits or rate limiting
- Free forever - No subscription fees
- Commercial use allowed - Apache-2.0 license permits business use
Running AI models locally means everything happens on your own hardware, giving you complete control over your data and conversations.
## gpt-oss System Requirements
| Component | Minimum | Recommended |
|-----------|---------|-------------|
| **RAM** | 16 GB | 32 GB+ |
| **Storage** | 11+ GB free | 25 GB+ free |
| **CPU** | 4 cores | 8+ cores |
| **GPU** | Optional | Modern GPU with 6GB+ VRAM recommended |
| **OS** | Windows 10+, macOS 11+, Linux | Latest versions |
**Installation apps available:**
- **Jan** (Recommended - easiest setup)
- **llama.cpp** (Command line)
- **Ollama** (Docker-based)
- **LM Studio** (GUI alternative)
## How to install gpt-oss locally with Jan (5 mins)
### Step 1: Download Jan
First download Jan to run gpt-oss locally: [Download Jan AI](https://jan.ai/)
<Callout type="info">
Jan is the simplest way to run AI models locally. It automatically handles CPU/GPU optimization, provides a clean chat interface, and requires zero technical knowledge.
</Callout>
### Step 2: Install gpt-oss Model (2-3 minutes)
![Jan Hub showing gpt-oss model in the hub](./_assets/jan%20hub%20gpt-oss%20locally.jpeg)
1. Open Jan Hub -> search "gpt-oss" (it appears at the top)
2. Click Download and wait for completion (~11GB download)
3. Installation is automatic - Jan handles everything
### Step 3: Start using gpt-oss offline (30 seconds)
![Jan interface with gpt-oss model selected and ready to chat](./_assets/jan%20gpt-oss.jpeg)
1. Go to New Chat → select gpt-oss-20b from model picker
2. Start chatting - Jan automatically optimizes for your hardware
3. You're done! Your AI conversations now stay completely private
Success: Your gpt-oss setup is complete. No internet required for chatting, unlimited usage, zero subscription fees.
## Jan with gpt-oss vs ChatGPT vs other Local AI Models
| Feature | gpt-oss (Local) | ChatGPT Plus | Claude Pro | Other Local Models |
|---------|----------------|--------------|------------|-------------------|
| Cost | Free forever | $20/month | $20/month | Free |
| Privacy | 100% private | Data sent to OpenAI | Data sent to Anthropic | 100% private |
| Internet | Offline after setup | Requires internet | Requires internet | Offline |
| Usage limits | Unlimited | Rate limited | Rate limited | Unlimited |
| Performance | Good (hardware dependent) | Excellent | Excellent | Varies |
| Setup difficulty | Easy with Jan | None | None | Varies |
## Alternative Installation Methods
### Option 1: Jan (Recommended)
- Best for: Complete beginners, users wanting GUI interface
- Setup time: 5 minutes
- Difficulty: Very Easy
Already covered above - [Download Jan](https://jan.ai/)
### Option 2: llama.cpp (Command Line)
- Best for: Developers, terminal users, custom integrations
- Setup time: 10-15 minutes
- Difficulty: Intermediate
```bash
# macOS
brew install llama-cpp
# Windows: grab Windows exe from releases
curl -L -o gpt-oss-20b.gguf https://huggingface.co/openai/gpt-oss-20b-gguf/resolve/main/gpt-oss-20b.gguf
./main -m gpt-oss-20b.gguf --chat-simple
# Add GPU acceleration (adjust -ngl value based on your GPU VRAM)
./main -m gpt-oss-20b.gguf --chat-simple -ngl 20
```
### Option 3: Ollama (Docker-Based)
Best for: Docker users, server deployments
Setup time: 5-10 minutes
Difficulty: Intermediate
```bash
# Install from https://ollama.com
ollama run gpt-oss:20b
```
### Option 4: LM Studio (GUI Alternative)
Best for: Users wanting GUI but not Jan
Setup time: 10 minutes
Difficulty: Easy
1. Download LM Studio from official website
2. Go to Models → search "gpt-oss-20b (GGUF)"
3. Download the model (wait for completion)
4. Go to Chat tab → select the model and start chatting
## gpt-oss Performance & Troubleshooting
### Expected Performance Benchmarks
| Hardware Setup | First Response | Subsequent Responses | Tokens/Second |
|---------------|---------------|---------------------|---------------|
| **16GB RAM + CPU only** | 30-45 seconds | 3-6 seconds | 3-8 tokens/sec |
| **32GB RAM + RTX 3060** | 15-25 seconds | 1-3 seconds | 15-25 tokens/sec |
| **32GB RAM + RTX 4080+** | 8-15 seconds | 1-2 seconds | 25-45 tokens/sec |
### Common Issues & Solutions
Performance optimization tips:
- First response is slow: Normal - kernels compile once, then speed up dramatically
- Out of VRAM error: Reduce context length in settings or switch to CPU mode
- Out of memory: Close memory-heavy apps (Chrome, games, video editors)
- Slow responses: Check if other apps are using GPU/CPU heavily
Quick fixes:
1. Restart Jan if responses become slow
2. Lower context window from 4096 to 2048 tokens
3. Enable CPU mode if GPU issues persist
4. Free up RAM by closing unused applications
## Frequently Asked Questions (FAQ)
### Is gpt-oss completely free?
Yes! gpt-oss is 100% free under Apache-2.0 license. No subscription fees, no token limits, no hidden costs.
### How much internet data does gpt-oss use?
Only for the initial 11GB download. After installation, gpt-oss works completely offline with zero internet usage.
### Can I use gpt-oss for commercial projects?
Absolutely! The Apache-2.0 license permits commercial use, modification, and distribution.
### Is gpt-oss better than ChatGPT?
gpt-oss offers different advantages: complete privacy, unlimited usage, offline capability, and no costs. ChatGPT may have better performance but requires internet and subscriptions.
### What happens to my conversations with gpt-oss?
Your conversations stay 100% on your device. Nothing is sent to OpenAI, Jan, or any external servers.
### Can I run gpt-oss on a Mac with 8GB RAM?
No, gpt-oss requires minimum 16GB RAM. Consider upgrading your RAM or using cloud-based alternatives.
### How do I update gpt-oss to newer versions?
Jan automatically notifies you of updates. Simply click update in Jan Hub when new versions are available.
## Why Choose gpt-oss Over ChatGPT Plus?
gpt-oss advantages:
- $0/month vs $20/month for ChatGPT Plus
- 100% private - no data leaves your device
- Unlimited usage - no rate limits or restrictions
- Works offline - no internet required after setup
- Commercial use allowed - build businesses with it
When to choose ChatGPT Plus instead:
- You need the absolute best performance
- You don't want to manage local installation
- You have less than 16GB RAM
## Get started with gpt-oss today
![gpt-oss running locally with complete privacy](./_assets/run%20gpt-oss%20locally%20in%20jan.jpeg)
Ready to try gpt-oss?
- Download Jan: [https://jan.ai/](https://jan.ai/)
- View source code: [https://github.com/menloresearch/jan](https://github.com/menloresearch/jan)
- Need help? Check our [local AI guide](/post/run-ai-models-locally) for beginners
<CTABlog />

View File

@ -39,6 +39,7 @@ type LlamacppConfig = {
auto_unload: boolean auto_unload: boolean
chat_template: string chat_template: string
n_gpu_layers: number n_gpu_layers: number
override_tensor_buffer_t: string
ctx_size: number ctx_size: number
threads: number threads: number
threads_batch: number threads_batch: number
@ -144,7 +145,6 @@ export default class llamacpp_extension extends AIEngine {
readonly providerId: string = 'llamacpp' readonly providerId: string = 'llamacpp'
private config: LlamacppConfig private config: LlamacppConfig
private activeSessions: Map<number, SessionInfo> = new Map()
private providerPath!: string private providerPath!: string
private apiSecret: string = 'JustAskNow' private apiSecret: string = 'JustAskNow'
private pendingDownloads: Map<string, Promise<void>> = new Map() private pendingDownloads: Map<string, Promise<void>> = new Map()
@ -770,16 +770,6 @@ export default class llamacpp_extension extends AIEngine {
override async onUnload(): Promise<void> { override async onUnload(): Promise<void> {
// Terminate all active sessions // Terminate all active sessions
for (const [_, sInfo] of this.activeSessions) {
try {
await this.unload(sInfo.model_id)
} catch (error) {
logger.error(`Failed to unload model ${sInfo.model_id}:`, error)
}
}
// Clear the sessions map
this.activeSessions.clear()
} }
onSettingUpdate<T>(key: string, value: T): void { onSettingUpdate<T>(key: string, value: T): void {
@ -1103,67 +1093,13 @@ export default class llamacpp_extension extends AIEngine {
* Function to find a random port * Function to find a random port
*/ */
private async getRandomPort(): Promise<number> { private async getRandomPort(): Promise<number> {
const MAX_ATTEMPTS = 20000
let attempts = 0
while (attempts < MAX_ATTEMPTS) {
const port = Math.floor(Math.random() * 1000) + 3000
const isAlreadyUsed = Array.from(this.activeSessions.values()).some(
(info) => info.port === port
)
if (!isAlreadyUsed) {
const isAvailable = await invoke<boolean>('is_port_available', { port })
if (isAvailable) return port
}
attempts++
}
throw new Error('Failed to find an available port for the model to load')
}
private async sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}
private async waitForModelLoad(
sInfo: SessionInfo,
timeoutMs = 240_000
): Promise<void> {
await this.sleep(500) // Wait before first check
const start = Date.now()
while (Date.now() - start < timeoutMs) {
try { try {
const res = await fetch(`http://localhost:${sInfo.port}/health`) const port = await invoke<number>('get_random_port')
return port
if (res.status === 503) { } catch {
const body = await res.json() logger.error('Unable to find a suitable port')
const msg = body?.error?.message ?? 'Model loading' throw new Error('Unable to find a suitable port for model')
logger.info(`waiting for model load... (${msg})`)
} else if (res.ok) {
const body = await res.json()
if (body.status === 'ok') {
return
} else {
logger.warn('Unexpected OK response from /health:', body)
} }
} else {
logger.warn(`Unexpected status ${res.status} from /health`)
}
} catch (e) {
await this.unload(sInfo.model_id)
throw new Error(`Model appears to have crashed: ${e}`)
}
await this.sleep(800) // Retry interval
}
await this.unload(sInfo.model_id)
throw new Error(
`Timed out loading model after ${timeoutMs}... killing llamacpp`
)
} }
override async load( override async load(
@ -1171,7 +1107,7 @@ export default class llamacpp_extension extends AIEngine {
overrideSettings?: Partial<LlamacppConfig>, overrideSettings?: Partial<LlamacppConfig>,
isEmbedding: boolean = false isEmbedding: boolean = false
): Promise<SessionInfo> { ): Promise<SessionInfo> {
const sInfo = this.findSessionByModel(modelId) const sInfo = await this.findSessionByModel(modelId)
if (sInfo) { if (sInfo) {
throw new Error('Model already loaded!!') throw new Error('Model already loaded!!')
} }
@ -1262,6 +1198,14 @@ export default class llamacpp_extension extends AIEngine {
args.push('--jinja') args.push('--jinja')
args.push('--reasoning-format', 'none') args.push('--reasoning-format', 'none')
args.push('-m', modelPath) args.push('-m', modelPath)
// For overriding tensor buffer type, useful where
// massive MOE models can be made faster by keeping attention on the GPU
// and offloading the expert FFNs to the CPU.
// This is an expert level settings and should only be used by people
// who knows what they are doing.
// Takes a regex with matching tensor name as input
if (cfg.override_tensor_buffer_t)
args.push('--override-tensor', cfg.override_tensor_buffer_t)
args.push('-a', modelId) args.push('-a', modelId)
args.push('--port', String(port)) args.push('--port', String(port))
if (modelConfig.mmproj_path) { if (modelConfig.mmproj_path) {
@ -1333,27 +1277,20 @@ export default class llamacpp_extension extends AIEngine {
libraryPath, libraryPath,
args, args,
}) })
// Store the session info for later use
this.activeSessions.set(sInfo.pid, sInfo)
await this.waitForModelLoad(sInfo)
return sInfo return sInfo
} catch (error) { } catch (error) {
logger.error('Error loading llama-server:\n', error) logger.error('Error in load command:\n', error)
throw new Error(`Failed to load llama-server: ${error}`) throw error
} }
} }
override async unload(modelId: string): Promise<UnloadResult> { override async unload(modelId: string): Promise<UnloadResult> {
const sInfo: SessionInfo = this.findSessionByModel(modelId) const sInfo: SessionInfo = await this.findSessionByModel(modelId)
if (!sInfo) { if (!sInfo) {
throw new Error(`No active session found for model: ${modelId}`) throw new Error(`No active session found for model: ${modelId}`)
} }
const pid = sInfo.pid const pid = sInfo.pid
try { try {
this.activeSessions.delete(pid)
// Pass the PID as the session_id // Pass the PID as the session_id
const result = await invoke<UnloadResult>('unload_llama_model', { const result = await invoke<UnloadResult>('unload_llama_model', {
pid: pid, pid: pid,
@ -1364,13 +1301,11 @@ export default class llamacpp_extension extends AIEngine {
logger.info(`Successfully unloaded model with PID ${pid}`) logger.info(`Successfully unloaded model with PID ${pid}`)
} else { } else {
logger.warn(`Failed to unload model: ${result.error}`) logger.warn(`Failed to unload model: ${result.error}`)
this.activeSessions.set(sInfo.pid, sInfo)
} }
return result return result
} catch (error) { } catch (error) {
logger.error('Error in unload command:', error) logger.error('Error in unload command:', error)
this.activeSessions.set(sInfo.pid, sInfo)
return { return {
success: false, success: false,
error: `Failed to unload model: ${error}`, error: `Failed to unload model: ${error}`,
@ -1493,17 +1428,23 @@ export default class llamacpp_extension extends AIEngine {
} }
} }
private findSessionByModel(modelId: string): SessionInfo | undefined { private async findSessionByModel(modelId: string): Promise<SessionInfo> {
return Array.from(this.activeSessions.values()).find( try {
(session) => session.model_id === modelId let sInfo = await invoke<SessionInfo>('find_session_by_model', {
) modelId,
})
return sInfo
} catch (e) {
logger.error(e)
throw new Error(String(e))
}
} }
override async chat( override async chat(
opts: chatCompletionRequest, opts: chatCompletionRequest,
abortController?: AbortController abortController?: AbortController
): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> { ): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> {
const sessionInfo = this.findSessionByModel(opts.model) const sessionInfo = await this.findSessionByModel(opts.model)
if (!sessionInfo) { if (!sessionInfo) {
throw new Error(`No active session found for model: ${opts.model}`) throw new Error(`No active session found for model: ${opts.model}`)
} }
@ -1519,7 +1460,6 @@ export default class llamacpp_extension extends AIEngine {
throw new Error('Model appears to have crashed! Please reload!') throw new Error('Model appears to have crashed! Please reload!')
} }
} else { } else {
this.activeSessions.delete(sessionInfo.pid)
throw new Error('Model have crashed! Please reload!') throw new Error('Model have crashed! Please reload!')
} }
const baseUrl = `http://localhost:${sessionInfo.port}/v1` const baseUrl = `http://localhost:${sessionInfo.port}/v1`
@ -1568,11 +1508,13 @@ export default class llamacpp_extension extends AIEngine {
} }
override async getLoadedModels(): Promise<string[]> { override async getLoadedModels(): Promise<string[]> {
let lmodels: string[] = [] try {
for (const [_, sInfo] of this.activeSessions) { let models: string[] = await invoke<string[]>('get_loaded_models')
lmodels.push(sInfo.model_id) return models
} catch (e) {
logger.error(e)
throw new Error(e)
} }
return lmodels
} }
async getDevices(): Promise<DeviceList[]> { async getDevices(): Promise<DeviceList[]> {
@ -1602,7 +1544,7 @@ export default class llamacpp_extension extends AIEngine {
} }
async embed(text: string[]): Promise<EmbeddingResponse> { async embed(text: string[]): Promise<EmbeddingResponse> {
let sInfo = this.findSessionByModel('sentence-transformer-mini') let sInfo = await this.findSessionByModel('sentence-transformer-mini')
if (!sInfo) { if (!sInfo) {
const downloadedModelList = await this.list() const downloadedModelList = await this.list()
if ( if (

View File

@ -19,7 +19,6 @@
"dev:web": "yarn workspace @janhq/web-app dev", "dev:web": "yarn workspace @janhq/web-app dev",
"dev:tauri": "yarn build:icon && yarn copy:assets:tauri && cross-env IS_CLEAN=true tauri dev", "dev:tauri": "yarn build:icon && yarn copy:assets:tauri && cross-env IS_CLEAN=true tauri dev",
"copy:assets:tauri": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\"", "copy:assets:tauri": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\"",
"download:lib": "node ./scripts/download-lib.mjs",
"download:bin": "node ./scripts/download-bin.mjs", "download:bin": "node ./scripts/download-bin.mjs",
"build:tauri:win32": "yarn download:bin && yarn tauri build", "build:tauri:win32": "yarn download:bin && yarn tauri build",
"build:tauri:linux": "yarn download:bin && ./src-tauri/build-utils/shim-linuxdeploy.sh yarn tauri build && ./src-tauri/build-utils/buildAppImage.sh", "build:tauri:linux": "yarn download:bin && ./src-tauri/build-utils/shim-linuxdeploy.sh yarn tauri build && ./src-tauri/build-utils/buildAppImage.sh",

View File

@ -1,86 +0,0 @@
console.log('Script is running')
// scripts/download-lib.mjs
import https from 'https'
import fs, { mkdirSync } from 'fs'
import os from 'os'
import path from 'path'
import { copySync } from 'cpx'
function download(url, dest) {
return new Promise((resolve, reject) => {
console.log(`Downloading ${url} to ${dest}`)
const file = fs.createWriteStream(dest)
https
.get(url, (response) => {
console.log(`Response status code: ${response.statusCode}`)
if (
response.statusCode >= 300 &&
response.statusCode < 400 &&
response.headers.location
) {
// Handle redirect
const redirectURL = response.headers.location
console.log(`Redirecting to ${redirectURL}`)
download(redirectURL, dest).then(resolve, reject) // Recursive call
return
} else if (response.statusCode !== 200) {
reject(`Failed to get '${url}' (${response.statusCode})`)
return
}
response.pipe(file)
file.on('finish', () => {
file.close(resolve)
})
})
.on('error', (err) => {
fs.unlink(dest, () => reject(err.message))
})
})
}
async function main() {
console.log('Starting main function')
const platform = os.platform() // 'darwin', 'linux', 'win32'
const arch = os.arch() // 'x64', 'arm64', etc.
if (arch != 'x64') return
let filename
if (platform == 'linux')
filename = 'libvulkan.so'
else if (platform == 'win32')
filename = 'vulkan-1.dll'
else
return
const url = `https://catalog.jan.ai/${filename}`
const libDir = 'src-tauri/resources/lib'
const tempDir = 'scripts/dist'
try {
mkdirSync('scripts/dist')
} catch (err) {
// Expect EEXIST error if the directory already exists
}
console.log(`Downloading libvulkan...`)
const savePath = path.join(tempDir, filename)
if (!fs.existsSync(savePath)) {
await download(url, savePath)
}
// copy to tauri resources
try {
copySync(savePath, libDir)
} catch (err) {
// Expect EEXIST error
}
console.log('Downloads completed.')
}
main().catch((err) => {
console.error('Error:', err)
process.exit(1)
})

View File

@ -0,0 +1,4 @@
[env]
# workaround needed to prevent `STATUS_ENTRYPOINT_NOT_FOUND` error in tests
# see https://github.com/tauri-apps/tauri/pull/4383#issuecomment-1212221864
__TAURI_WORKSPACE__ = "true"

7174
src-tauri/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -7,11 +7,29 @@ license = "MIT"
repository = "https://github.com/menloresearch/jan" repository = "https://github.com/menloresearch/jan"
edition = "2021" edition = "2021"
rust-version = "1.77.2" rust-version = "1.77.2"
resolver = "2"
[lib] [lib]
name = "app_lib" name = "app_lib"
crate-type = ["staticlib", "cdylib", "rlib"] crate-type = ["staticlib", "cdylib", "rlib"]
[features]
default = [
"tauri/wry",
"tauri/common-controls-v6",
"tauri/x11",
"tauri/protocol-asset",
"tauri/macos-private-api",
"tauri/test",
]
test-tauri = [
"tauri/wry",
"tauri/x11",
"tauri/protocol-asset",
"tauri/macos-private-api",
"tauri/test",
]
[build-dependencies] [build-dependencies]
tauri-build = { version = "2.0.2", features = [] } tauri-build = { version = "2.0.2", features = [] }
@ -19,9 +37,6 @@ tauri-build = { version = "2.0.2", features = [] }
serde_json = "1.0" serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
log = "0.4" log = "0.4"
tauri = { version = "2.5.0", features = [ "protocol-asset", "macos-private-api",
"test"
] }
tauri-plugin-log = "2.0.0-rc" tauri-plugin-log = "2.0.0-rc"
tauri-plugin-shell = "2.2.0" tauri-plugin-shell = "2.2.0"
tauri-plugin-os = "2.2.1" tauri-plugin-os = "2.2.1"
@ -59,6 +74,20 @@ sha2 = "0.10.9"
base64 = "0.22.1" base64 = "0.22.1"
libloading = "0.8.7" libloading = "0.8.7"
thiserror = "2.0.12" thiserror = "2.0.12"
[dependencies.tauri]
version = "2.5.0"
default-features = false
features = [
"protocol-asset",
"macos-private-api",
"test",
]
[target.'cfg(windows)'.dev-dependencies]
tempfile = "3.20.0"
[target.'cfg(unix)'.dependencies]
nix = "=0.30.1" nix = "=0.30.1"
[target.'cfg(windows)'.dependencies] [target.'cfg(windows)'.dependencies]
@ -69,6 +98,3 @@ windows-sys = { version = "0.60.2", features = ["Win32_Storage_FileSystem"] }
tauri-plugin-updater = "2" tauri-plugin-updater = "2"
once_cell = "1.18" once_cell = "1.18"
tauri-plugin-single-instance = { version = "2.0.0", features = ["deep-link"] } tauri-plugin-single-instance = { version = "2.0.0", features = ["deep-link"] }
[target.'cfg(windows)'.dev-dependencies]
tempfile = "3.20.0"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 36 KiB

View File

@ -94,7 +94,13 @@ pub fn update_app_configuration(
#[tauri::command] #[tauri::command]
pub fn get_jan_data_folder_path<R: Runtime>(app_handle: tauri::AppHandle<R>) -> PathBuf { pub fn get_jan_data_folder_path<R: Runtime>(app_handle: tauri::AppHandle<R>) -> PathBuf {
if cfg!(test) { if cfg!(test) {
return PathBuf::from("./data"); let path = std::env::current_dir()
.unwrap_or_else(|_| PathBuf::from("."))
.join("test-data");
if !path.exists() {
let _ = fs::create_dir_all(&path);
}
return path;
} }
let app_configurations = get_app_configurations(app_handle); let app_configurations = get_app_configurations(app_handle);

View File

@ -182,7 +182,7 @@ mod tests {
assert!(get_jan_data_folder_path(app.handle().clone()) assert!(get_jan_data_folder_path(app.handle().clone())
.join(path) .join(path)
.exists()); .exists());
fs::remove_dir_all(get_jan_data_folder_path(app.handle().clone()).join(path)).unwrap(); let _ = fs::remove_dir_all(get_jan_data_folder_path(app.handle().clone()).join(path));
} }
#[test] #[test]
@ -194,7 +194,7 @@ mod tests {
assert_eq!( assert_eq!(
result, result,
get_jan_data_folder_path(app.handle().clone()) get_jan_data_folder_path(app.handle().clone())
.join("test_dir/test_file") .join(&format!("test_dir{}test_file", std::path::MAIN_SEPARATOR))
.to_string_lossy() .to_string_lossy()
.to_string() .to_string()
); );
@ -232,8 +232,7 @@ mod tests {
#[test] #[test]
fn test_readdir_sync() { fn test_readdir_sync() {
let app = mock_app(); let app = mock_app();
let path = "file://test_readdir_sync_dir"; let dir_path = get_jan_data_folder_path(app.handle().clone()).join("test_readdir_sync_dir");
let dir_path = get_jan_data_folder_path(app.handle().clone()).join(path);
fs::create_dir_all(&dir_path).unwrap(); fs::create_dir_all(&dir_path).unwrap();
File::create(dir_path.join("file1.txt")).unwrap(); File::create(dir_path.join("file1.txt")).unwrap();
File::create(dir_path.join("file2.txt")).unwrap(); File::create(dir_path.join("file2.txt")).unwrap();
@ -242,6 +241,6 @@ mod tests {
let result = readdir_sync(app.handle().clone(), args).unwrap(); let result = readdir_sync(app.handle().clone(), args).unwrap();
assert_eq!(result.len(), 2); assert_eq!(result.len(), 2);
fs::remove_dir_all(dir_path).unwrap(); let _ = fs::remove_dir_all(dir_path);
} }
} }

View File

@ -1,10 +1,6 @@
pub mod amd;
pub mod nvidia;
pub mod vulkan;
use std::sync::OnceLock; use std::sync::OnceLock;
use sysinfo::System; use sysinfo::System;
use tauri::{path::BaseDirectory, Manager}; use tauri;
static SYSTEM_INFO: OnceLock<SystemInfo> = OnceLock::new(); static SYSTEM_INFO: OnceLock<SystemInfo> = OnceLock::new();
@ -143,90 +139,12 @@ impl CpuStaticInfo {
} }
} }
// https://devicehunt.com/all-pci-vendors
pub const VENDOR_ID_AMD: u32 = 0x1002;
pub const VENDOR_ID_NVIDIA: u32 = 0x10DE;
pub const VENDOR_ID_INTEL: u32 = 0x8086;
#[derive(Debug, Clone)]
pub enum Vendor {
AMD,
NVIDIA,
Intel,
Unknown(u32),
}
impl serde::Serialize for Vendor {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
Vendor::AMD => "AMD".serialize(serializer),
Vendor::NVIDIA => "NVIDIA".serialize(serializer),
Vendor::Intel => "Intel".serialize(serializer),
Vendor::Unknown(vendor_id) => {
let formatted = format!("Unknown (vendor_id: {})", vendor_id);
serializer.serialize_str(&formatted)
}
}
}
}
impl Vendor {
pub fn from_vendor_id(vendor_id: u32) -> Self {
match vendor_id {
VENDOR_ID_AMD => Vendor::AMD,
VENDOR_ID_NVIDIA => Vendor::NVIDIA,
VENDOR_ID_INTEL => Vendor::Intel,
_ => Vendor::Unknown(vendor_id),
}
}
}
#[derive(Clone, Debug, serde::Serialize)]
pub struct GpuInfo {
pub name: String,
pub total_memory: u64,
pub vendor: Vendor,
pub uuid: String,
pub driver_version: String,
pub nvidia_info: Option<nvidia::NvidiaInfo>,
pub vulkan_info: Option<vulkan::VulkanInfo>,
}
impl GpuInfo {
pub fn get_usage(&self) -> GpuUsage {
match self.vendor {
Vendor::NVIDIA => self.get_usage_nvidia(),
Vendor::AMD => self.get_usage_amd(),
_ => self.get_usage_unsupported(),
}
}
pub fn get_usage_unsupported(&self) -> GpuUsage {
GpuUsage {
uuid: self.uuid.clone(),
used_memory: 0,
total_memory: 0,
}
}
}
#[derive(serde::Serialize, Clone, Debug)] #[derive(serde::Serialize, Clone, Debug)]
pub struct SystemInfo { pub struct SystemInfo {
cpu: CpuStaticInfo, cpu: CpuStaticInfo,
os_type: String, os_type: String,
os_name: String, os_name: String,
total_memory: u64, total_memory: u64,
gpus: Vec<GpuInfo>,
}
#[derive(serde::Serialize, Clone, Debug)]
pub struct GpuUsage {
uuid: String,
used_memory: u64,
total_memory: u64,
} }
#[derive(serde::Serialize, Clone, Debug)] #[derive(serde::Serialize, Clone, Debug)]
@ -234,62 +152,15 @@ pub struct SystemUsage {
cpu: f32, cpu: f32,
used_memory: u64, used_memory: u64,
total_memory: u64, total_memory: u64,
gpus: Vec<GpuUsage>,
}
fn get_jan_libvulkan_path<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> String {
let lib_name = if cfg!(target_os = "windows") {
"vulkan-1.dll"
} else if cfg!(target_os = "linux") {
"libvulkan.so"
} else {
return "".to_string();
};
// NOTE: this does not work in test mode (mock app)
match app.path().resolve(
format!("resources/lib/{}", lib_name),
BaseDirectory::Resource,
) {
Ok(lib_path) => lib_path.to_string_lossy().to_string(),
Err(_) => "".to_string(),
}
} }
#[tauri::command] #[tauri::command]
pub fn get_system_info<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> SystemInfo { pub fn get_system_info() -> SystemInfo {
SYSTEM_INFO SYSTEM_INFO
.get_or_init(|| { .get_or_init(|| {
let mut system = System::new(); let mut system = System::new();
system.refresh_memory(); system.refresh_memory();
let mut gpu_map = std::collections::HashMap::new();
for gpu in nvidia::get_nvidia_gpus() {
gpu_map.insert(gpu.uuid.clone(), gpu);
}
// try system vulkan first
let paths = vec!["".to_string(), get_jan_libvulkan_path(app.clone())];
let mut vulkan_gpus = vec![];
for path in paths {
vulkan_gpus = vulkan::get_vulkan_gpus(&path);
if !vulkan_gpus.is_empty() {
break;
}
}
for gpu in vulkan_gpus {
match gpu_map.get_mut(&gpu.uuid) {
// for existing NVIDIA GPUs, add Vulkan info
Some(nvidia_gpu) => {
nvidia_gpu.vulkan_info = gpu.vulkan_info;
}
None => {
gpu_map.insert(gpu.uuid.clone(), gpu);
}
}
}
let os_type = if cfg!(target_os = "windows") { let os_type = if cfg!(target_os = "windows") {
"windows" "windows"
} else if cfg!(target_os = "macos") { } else if cfg!(target_os = "macos") {
@ -306,14 +177,13 @@ pub fn get_system_info<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> SystemInf
os_type: os_type.to_string(), os_type: os_type.to_string(),
os_name, os_name,
total_memory: system.total_memory() / 1024 / 1024, // bytes to MiB total_memory: system.total_memory() / 1024 / 1024, // bytes to MiB
gpus: gpu_map.into_values().collect(),
} }
}) })
.clone() .clone()
} }
#[tauri::command] #[tauri::command]
pub fn get_system_usage<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> SystemUsage { pub fn get_system_usage() -> SystemUsage {
let mut system = System::new(); let mut system = System::new();
system.refresh_memory(); system.refresh_memory();
@ -330,30 +200,22 @@ pub fn get_system_usage<R: tauri::Runtime>(app: tauri::AppHandle<R>) -> SystemUs
cpu: cpu_usage, cpu: cpu_usage,
used_memory: system.used_memory() / 1024 / 1024, // bytes to MiB, used_memory: system.used_memory() / 1024 / 1024, // bytes to MiB,
total_memory: system.total_memory() / 1024 / 1024, // bytes to MiB, total_memory: system.total_memory() / 1024 / 1024, // bytes to MiB,
gpus: get_system_info(app.clone())
.gpus
.iter()
.map(|gpu| gpu.get_usage())
.collect(),
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use tauri::test::mock_app;
#[test] #[test]
fn test_system_info() { fn test_system_info() {
let app = mock_app(); let info = get_system_info();
let info = get_system_info(app.handle().clone());
println!("System Static Info: {:?}", info); println!("System Static Info: {:?}", info);
} }
#[test] #[test]
fn test_system_usage() { fn test_system_usage() {
let app = mock_app(); let usage = get_system_usage();
let usage = get_system_usage(app.handle().clone());
println!("System Usage Info: {:?}", usage); println!("System Usage Info: {:?}", usage);
} }
} }

View File

@ -1,210 +0,0 @@
use super::{GpuInfo, GpuUsage};
impl GpuInfo {
#[cfg(not(target_os = "linux"))]
#[cfg(not(target_os = "windows"))]
pub fn get_usage_amd(&self) -> GpuUsage {
self.get_usage_unsupported()
}
#[cfg(target_os = "linux")]
pub fn get_usage_amd(&self) -> GpuUsage {
use std::fs;
use std::path::Path;
let device_id = match &self.vulkan_info {
Some(vulkan_info) => vulkan_info.device_id,
None => {
log::error!("get_usage_amd called without Vulkan info");
return self.get_usage_unsupported();
}
};
let closure = || -> Result<GpuUsage, Box<dyn std::error::Error>> {
for subdir in fs::read_dir("/sys/class/drm")? {
let device_path = subdir?.path().join("device");
// Check if this is an AMD GPU by looking for amdgpu directory
if !device_path
.join("driver/module/drivers/pci:amdgpu")
.exists()
{
continue;
}
// match device_id from Vulkan info
let this_device_id_str = fs::read_to_string(device_path.join("device"))?;
let this_device_id = u32::from_str_radix(
this_device_id_str
.strip_prefix("0x")
.unwrap_or(&this_device_id_str)
.trim(),
16,
)?;
if this_device_id != device_id {
continue;
}
let read_mem = |path: &Path| -> u64 {
fs::read_to_string(path)
.map(|content| content.trim().parse::<u64>().unwrap_or(0))
.unwrap_or(0)
/ 1024
/ 1024 // Convert bytes to MiB
};
return Ok(GpuUsage {
uuid: self.uuid.clone(),
total_memory: read_mem(&device_path.join("mem_info_vram_total")),
used_memory: read_mem(&device_path.join("mem_info_vram_used")),
});
}
Err(format!("GPU not found").into())
};
match closure() {
Ok(usage) => usage,
Err(e) => {
log::error!(
"Failed to get memory usage for AMD GPU {:#x}: {}",
device_id,
e
);
self.get_usage_unsupported()
}
}
}
#[cfg(target_os = "windows")]
pub fn get_usage_amd(&self) -> GpuUsage {
use std::collections::HashMap;
let memory_usage_map = windows_impl::get_gpu_usage().unwrap_or_else(|_| {
log::error!("Failed to get AMD GPU memory usage");
HashMap::new()
});
match memory_usage_map.get(&self.name) {
Some(&used_memory) => GpuUsage {
uuid: self.uuid.clone(),
used_memory: used_memory as u64,
total_memory: self.total_memory,
},
None => self.get_usage_unsupported(),
}
}
}
// TODO: refactor this into a more egonomic API
#[cfg(target_os = "windows")]
mod windows_impl {
use libc;
use libloading::{Library, Symbol};
use std::collections::HashMap;
use std::ffi::{c_char, c_int, c_void, CStr};
use std::mem::{self, MaybeUninit};
use std::ptr;
// === FFI Struct Definitions ===
#[repr(C)]
#[allow(non_snake_case)]
#[derive(Debug, Copy, Clone)]
pub struct AdapterInfo {
pub iSize: c_int,
pub iAdapterIndex: c_int,
pub strUDID: [c_char; 256],
pub iBusNumber: c_int,
pub iDeviceNumber: c_int,
pub iFunctionNumber: c_int,
pub iVendorID: c_int,
pub strAdapterName: [c_char; 256],
pub strDisplayName: [c_char; 256],
pub iPresent: c_int,
pub iExist: c_int,
pub strDriverPath: [c_char; 256],
pub strDriverPathExt: [c_char; 256],
pub strPNPString: [c_char; 256],
pub iOSDisplayIndex: c_int,
}
type ADL_MAIN_MALLOC_CALLBACK = Option<unsafe extern "C" fn(i32) -> *mut c_void>;
type ADL_MAIN_CONTROL_CREATE = unsafe extern "C" fn(ADL_MAIN_MALLOC_CALLBACK, c_int) -> c_int;
type ADL_MAIN_CONTROL_DESTROY = unsafe extern "C" fn() -> c_int;
type ADL_ADAPTER_NUMBEROFADAPTERS_GET = unsafe extern "C" fn(*mut c_int) -> c_int;
type ADL_ADAPTER_ADAPTERINFO_GET = unsafe extern "C" fn(*mut AdapterInfo, c_int) -> c_int;
type ADL_ADAPTER_ACTIVE_GET = unsafe extern "C" fn(c_int, *mut c_int) -> c_int;
type ADL_GET_DEDICATED_VRAM_USAGE =
unsafe extern "C" fn(*mut c_void, c_int, *mut c_int) -> c_int;
// === ADL Memory Allocator ===
unsafe extern "C" fn adl_malloc(i_size: i32) -> *mut c_void {
libc::malloc(i_size as usize)
}
pub fn get_gpu_usage() -> Result<HashMap<String, i32>, Box<dyn std::error::Error>> {
unsafe {
let lib = Library::new("atiadlxx.dll").or_else(|_| Library::new("atiadlxy.dll"))?;
let adl_main_control_create: Symbol<ADL_MAIN_CONTROL_CREATE> =
lib.get(b"ADL_Main_Control_Create")?;
let adl_main_control_destroy: Symbol<ADL_MAIN_CONTROL_DESTROY> =
lib.get(b"ADL_Main_Control_Destroy")?;
let adl_adapter_number_of_adapters_get: Symbol<ADL_ADAPTER_NUMBEROFADAPTERS_GET> =
lib.get(b"ADL_Adapter_NumberOfAdapters_Get")?;
let adl_adapter_adapter_info_get: Symbol<ADL_ADAPTER_ADAPTERINFO_GET> =
lib.get(b"ADL_Adapter_AdapterInfo_Get")?;
let adl_adapter_active_get: Symbol<ADL_ADAPTER_ACTIVE_GET> =
lib.get(b"ADL_Adapter_Active_Get")?;
let adl_get_dedicated_vram_usage: Symbol<ADL_GET_DEDICATED_VRAM_USAGE> =
lib.get(b"ADL2_Adapter_DedicatedVRAMUsage_Get")?;
// TODO: try to put nullptr here. then we don't need direct libc dep
if adl_main_control_create(Some(adl_malloc), 1) != 0 {
return Err("ADL initialization error!".into());
}
// NOTE: after this call, we must call ADL_Main_Control_Destroy
// whenver we encounter an error
let mut num_adapters: c_int = 0;
if adl_adapter_number_of_adapters_get(&mut num_adapters as *mut _) != 0 {
return Err("Cannot get number of adapters".into());
}
let mut vram_usages = HashMap::new();
if num_adapters > 0 {
let mut adapter_info: Vec<AdapterInfo> =
vec![MaybeUninit::zeroed().assume_init(); num_adapters as usize];
let ret = adl_adapter_adapter_info_get(
adapter_info.as_mut_ptr(),
mem::size_of::<AdapterInfo>() as i32 * num_adapters,
);
if ret != 0 {
return Err("Cannot get adapter info".into());
}
for adapter in adapter_info.iter() {
let mut is_active = 0;
adl_adapter_active_get(adapter.iAdapterIndex, &mut is_active);
if is_active != 0 {
let mut vram_mb = 0;
let _ = adl_get_dedicated_vram_usage(
ptr::null_mut(),
adapter.iAdapterIndex,
&mut vram_mb,
);
// NOTE: adapter name might not be unique?
let name = CStr::from_ptr(adapter.strAdapterName.as_ptr())
.to_string_lossy()
.into_owned();
vram_usages.insert(name, vram_mb);
}
}
}
adl_main_control_destroy();
Ok(vram_usages)
}
}
}

View File

@ -1,120 +0,0 @@
use super::{GpuInfo, GpuUsage, Vendor};
use nvml_wrapper::{error::NvmlError, Nvml};
use std::sync::OnceLock;
static NVML: OnceLock<Option<Nvml>> = OnceLock::new();
#[derive(Debug, Clone, serde::Serialize)]
pub struct NvidiaInfo {
pub index: u32,
pub compute_capability: String,
}
fn get_nvml() -> Option<&'static Nvml> {
NVML.get_or_init(|| {
let result = Nvml::init().or_else(|e| {
// fallback
if cfg!(target_os = "linux") {
let lib_path = std::ffi::OsStr::new("libnvidia-ml.so.1");
Nvml::builder().lib_path(lib_path).init()
} else {
Err(e)
}
});
// NvmlError doesn't implement Copy, so we have to store an Option in OnceLock
match result {
Ok(nvml) => Some(nvml),
Err(e) => {
log::error!("Unable to initialize NVML: {}", e);
None
}
}
})
.as_ref()
}
impl GpuInfo {
pub fn get_usage_nvidia(&self) -> GpuUsage {
let index = match self.nvidia_info {
Some(ref nvidia_info) => nvidia_info.index,
None => {
log::error!("get_usage_nvidia() called on non-NVIDIA GPU");
return self.get_usage_unsupported();
}
};
let closure = || -> Result<GpuUsage, NvmlError> {
let nvml = get_nvml().ok_or(NvmlError::Unknown)?;
let device = nvml.device_by_index(index)?;
let mem_info = device.memory_info()?;
Ok(GpuUsage {
uuid: self.uuid.clone(),
used_memory: mem_info.used / 1024 / 1024, // bytes to MiB
total_memory: mem_info.total / 1024 / 1024, // bytes to MiB
})
};
closure().unwrap_or_else(|e| {
log::error!("Failed to get memory usage for NVIDIA GPU {}: {}", index, e);
self.get_usage_unsupported()
})
}
}
pub fn get_nvidia_gpus() -> Vec<GpuInfo> {
let closure = || -> Result<Vec<GpuInfo>, NvmlError> {
let nvml = get_nvml().ok_or(NvmlError::Unknown)?;
let num_gpus = nvml.device_count()?;
let driver_version = nvml.sys_driver_version()?;
let mut gpus = Vec::with_capacity(num_gpus as usize);
for i in 0..num_gpus {
let device = nvml.device_by_index(i)?;
gpus.push(GpuInfo {
name: device.name()?,
total_memory: device.memory_info()?.total / 1024 / 1024, // bytes to MiB
vendor: Vendor::NVIDIA,
uuid: {
let mut uuid = device.uuid()?;
if uuid.starts_with("GPU-") {
uuid = uuid[4..].to_string();
}
uuid
},
driver_version: driver_version.clone(),
nvidia_info: Some(NvidiaInfo {
index: i,
compute_capability: {
let cc = device.cuda_compute_capability()?;
format!("{}.{}", cc.major, cc.minor)
},
}),
vulkan_info: None,
});
}
Ok(gpus)
};
match closure() {
Ok(gpus) => gpus,
Err(e) => {
log::error!("Failed to get NVIDIA GPUs: {}", e);
vec![]
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_nvidia_gpus() {
let gpus = get_nvidia_gpus();
for (i, gpu) in gpus.iter().enumerate() {
println!("GPU {}:", i);
println!(" {:?}", gpu);
println!(" {:?}", gpu.get_usage());
}
}
}

View File

@ -1,145 +0,0 @@
use super::{GpuInfo, Vendor};
use ash::{vk, Entry};
#[derive(Debug, Clone, serde::Serialize)]
pub struct VulkanInfo {
pub index: u64,
pub device_type: String,
pub api_version: String,
pub device_id: u32,
}
fn parse_uuid(bytes: &[u8; 16]) -> String {
format!(
"{:02x}{:02x}{:02x}{:02x}-\
{:02x}{:02x}-\
{:02x}{:02x}-\
{:02x}{:02x}-\
{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
bytes[0],
bytes[1],
bytes[2],
bytes[3],
bytes[4],
bytes[5],
bytes[6],
bytes[7],
bytes[8],
bytes[9],
bytes[10],
bytes[11],
bytes[12],
bytes[13],
bytes[14],
bytes[15],
)
}
pub fn get_vulkan_gpus(lib_path: &str) -> Vec<GpuInfo> {
match get_vulkan_gpus_internal(lib_path) {
Ok(gpus) => gpus,
Err(e) => {
log::error!("Failed to get Vulkan GPUs: {:?}", e);
vec![]
}
}
}
fn parse_c_string(buf: &[i8]) -> String {
unsafe { std::ffi::CStr::from_ptr(buf.as_ptr()) }
.to_str()
.unwrap_or_default()
.to_string()
}
fn get_vulkan_gpus_internal(lib_path: &str) -> Result<Vec<GpuInfo>, Box<dyn std::error::Error>> {
let entry = if lib_path.is_empty() {
unsafe { Entry::load()? }
} else {
unsafe { Entry::load_from(lib_path)? }
};
let app_info = vk::ApplicationInfo {
api_version: vk::make_api_version(0, 1, 1, 0),
..Default::default()
};
let create_info = vk::InstanceCreateInfo {
p_application_info: &app_info,
..Default::default()
};
let instance = unsafe { entry.create_instance(&create_info, None)? };
let mut device_info_list = vec![];
for (i, device) in unsafe { instance.enumerate_physical_devices()? }
.iter()
.enumerate()
{
// create a chain of properties struct for VkPhysicalDeviceProperties2(3)
// https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceProperties2.html
// props2 -> driver_props -> id_props
let mut id_props = vk::PhysicalDeviceIDProperties::default();
let mut driver_props = vk::PhysicalDeviceDriverProperties {
p_next: &mut id_props as *mut _ as *mut std::ffi::c_void,
..Default::default()
};
let mut props2 = vk::PhysicalDeviceProperties2 {
p_next: &mut driver_props as *mut _ as *mut std::ffi::c_void,
..Default::default()
};
unsafe {
instance.get_physical_device_properties2(*device, &mut props2);
}
let props = props2.properties;
if props.device_type == vk::PhysicalDeviceType::CPU {
continue;
}
let device_info = GpuInfo {
name: parse_c_string(&props.device_name),
total_memory: unsafe { instance.get_physical_device_memory_properties(*device) }
.memory_heaps
.iter()
.filter(|heap| heap.flags.contains(vk::MemoryHeapFlags::DEVICE_LOCAL))
.map(|heap| heap.size / (1024 * 1024))
.sum(),
vendor: Vendor::from_vendor_id(props.vendor_id),
uuid: parse_uuid(&id_props.device_uuid),
driver_version: parse_c_string(&driver_props.driver_info),
nvidia_info: None,
vulkan_info: Some(VulkanInfo {
index: i as u64,
device_type: format!("{:?}", props.device_type),
api_version: format!(
"{}.{}.{}",
vk::api_version_major(props.api_version),
vk::api_version_minor(props.api_version),
vk::api_version_patch(props.api_version)
),
device_id: props.device_id,
}),
};
device_info_list.push(device_info);
}
unsafe {
instance.destroy_instance(None);
}
Ok(device_info_list)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_vulkan_gpus() {
let gpus = get_vulkan_gpus("");
for (i, gpu) in gpus.iter().enumerate() {
println!("GPU {}:", i);
println!(" {:?}", gpu);
println!(" {:?}", gpu.get_usage());
}
}
}

View File

@ -1003,9 +1003,18 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_run_mcp_commands() { async fn test_run_mcp_commands() {
let app = mock_app(); let app = mock_app();
// Create a mock mcp_config.json file
let config_path = "mcp_config.json"; // Get the app path where the config should be created
let mut file: File = File::create(config_path).expect("Failed to create config file"); let app_path = get_jan_data_folder_path(app.handle().clone());
let config_path = app_path.join("mcp_config.json");
// Ensure the directory exists
if let Some(parent) = config_path.parent() {
std::fs::create_dir_all(parent).expect("Failed to create parent directory");
}
// Create a mock mcp_config.json file at the correct location
let mut file: File = File::create(&config_path).expect("Failed to create config file");
file.write_all(b"{\"mcpServers\":{}}") file.write_all(b"{\"mcpServers\":{}}")
.expect("Failed to write to config file"); .expect("Failed to write to config file");
@ -1018,6 +1027,6 @@ mod tests {
assert!(result.is_ok()); assert!(result.is_ok());
// Clean up the mock config file // Clean up the mock config file
std::fs::remove_file(config_path).expect("Failed to remove config file"); std::fs::remove_file(&config_path).expect("Failed to remove config file");
} }
} }

View File

@ -348,7 +348,7 @@ async fn proxy_request(
let sessions_guard = sessions.lock().await; let sessions_guard = sessions.lock().await;
if sessions_guard.is_empty() { if sessions_guard.is_empty() {
log::warn!("Request for model '{}' but no backend servers are running.", model_id); log::warn!("Request for model '{}' but no models are running.", model_id);
let mut error_response = Response::builder().status(StatusCode::SERVICE_UNAVAILABLE); let mut error_response = Response::builder().status(StatusCode::SERVICE_UNAVAILABLE);
error_response = add_cors_headers_with_host_and_origin( error_response = add_cors_headers_with_host_and_origin(
error_response, error_response,
@ -356,7 +356,7 @@ async fn proxy_request(
&origin_header, &origin_header,
&config.trusted_hosts, &config.trusted_hosts,
); );
return Ok(error_response.body(Body::from("No backend model servers are available")).unwrap()); return Ok(error_response.body(Body::from("No models are available")).unwrap());
} }
if let Some(session) = sessions_guard if let Some(session) = sessions_guard
@ -366,9 +366,8 @@ async fn proxy_request(
target_port = Some(session.info.port); target_port = Some(session.info.port);
session_api_key = Some(session.info.api_key.clone()); session_api_key = Some(session.info.api_key.clone());
log::debug!( log::debug!(
"Found session for model_id {} on port {}", "Found session for model_id {}",
model_id, model_id,
session.info.port
); );
} else { } else {
log::warn!("No running session found for model_id: {}", model_id); log::warn!("No running session found for model_id: {}", model_id);
@ -382,7 +381,7 @@ async fn proxy_request(
); );
return Ok(error_response return Ok(error_response
.body(Body::from(format!( .body(Body::from(format!(
"No running server found for model '{}'", "No running session found for model '{}'",
model_id model_id
))) )))
.unwrap()); .unwrap());
@ -494,7 +493,7 @@ async fn proxy_request(
let port = match target_port { let port = match target_port {
Some(p) => p, Some(p) => p,
None => { None => {
log::error!("Internal routing error: target_port is None after successful lookup"); log::error!("Internal API server routing error: target is None after successful lookup");
let mut error_response = Response::builder().status(StatusCode::INTERNAL_SERVER_ERROR); let mut error_response = Response::builder().status(StatusCode::INTERNAL_SERVER_ERROR);
error_response = add_cors_headers_with_host_and_origin( error_response = add_cors_headers_with_host_and_origin(
error_response, error_response,
@ -509,7 +508,6 @@ async fn proxy_request(
}; };
let upstream_url = format!("http://127.0.0.1:{}{}", port, destination_path); let upstream_url = format!("http://127.0.0.1:{}{}", port, destination_path);
log::debug!("Proxying request to: {}", upstream_url);
let mut outbound_req = client.request(method.clone(), &upstream_url); let mut outbound_req = client.request(method.clone(), &upstream_url);
@ -587,7 +585,7 @@ async fn proxy_request(
Ok(builder.body(body).unwrap()) Ok(builder.body(body).unwrap())
} }
Err(e) => { Err(e) => {
let error_msg = format!("Proxy request to {} failed: {}", upstream_url, e); let error_msg = format!("Proxy request to model failed: {}", e);
log::error!("{}", error_msg); log::error!("{}", error_msg);
let mut error_response = Response::builder().status(StatusCode::BAD_GATEWAY); let mut error_response = Response::builder().status(StatusCode::BAD_GATEWAY);
error_response = add_cors_headers_with_host_and_origin( error_response = add_cors_headers_with_host_and_origin(
@ -726,7 +724,7 @@ pub async fn start_server(
}); });
let server = Server::bind(&addr).serve(make_svc); let server = Server::bind(&addr).serve(make_svc);
log::info!("Proxy server started on http://{}", addr); log::info!("Jan API server started on http://{}", addr);
let server_task = tokio::spawn(async move { let server_task = tokio::spawn(async move {
if let Err(e) = server.await { if let Err(e) = server.await {
@ -748,9 +746,9 @@ pub async fn stop_server(
if let Some(handle) = handle_guard.take() { if let Some(handle) = handle_guard.take() {
handle.abort(); handle.abort();
*handle_guard = None; *handle_guard = None;
log::info!("Proxy server stopped"); log::info!("Jan API server stopped");
} else { } else {
log::debug!("No server was running"); log::debug!("Server was not running");
} }
Ok(()) Ok(())

View File

@ -218,7 +218,7 @@ pub async fn delete_thread<R: Runtime>(
) -> Result<(), String> { ) -> Result<(), String> {
let thread_dir = get_thread_dir(app_handle.clone(), &thread_id); let thread_dir = get_thread_dir(app_handle.clone(), &thread_id);
if thread_dir.exists() { if thread_dir.exists() {
fs::remove_dir_all(thread_dir).map_err(|e| e.to_string())?; let _ = fs::remove_dir_all(thread_dir);
} }
Ok(()) Ok(())
} }
@ -518,7 +518,7 @@ mod tests {
assert!(threads.len() > 0); assert!(threads.len() > 0);
// Clean up // Clean up
fs::remove_dir_all(data_dir).unwrap(); let _ = fs::remove_dir_all(data_dir);
} }
#[tokio::test] #[tokio::test]
@ -565,7 +565,7 @@ mod tests {
assert_eq!(messages[0]["role"], "user"); assert_eq!(messages[0]["role"], "user");
// Clean up // Clean up
fs::remove_dir_all(data_dir).unwrap(); let _ = fs::remove_dir_all(data_dir);
} }
#[tokio::test] #[tokio::test]
@ -608,6 +608,6 @@ mod tests {
assert_eq!(got["assistant_name"], "Test Assistant"); assert_eq!(got["assistant_name"], "Test Assistant");
// Clean up // Clean up
fs::remove_dir_all(data_dir).unwrap(); let _ = fs::remove_dir_all(data_dir);
} }
} }

View File

@ -691,7 +691,17 @@ mod tests {
config.ignore_ssl = Some(false); config.ignore_ssl = Some(false);
assert!(validate_proxy_config(&config).is_ok()); assert!(validate_proxy_config(&config).is_ok());
assert!(create_proxy_from_config(&config).is_ok());
// SOCKS proxies are not supported by reqwest::Proxy::all()
// This test should expect an error for SOCKS proxies
let result = create_proxy_from_config(&config);
assert!(result.is_err());
// Test with HTTP proxy instead which is supported
let mut http_config = create_test_proxy_config("http://proxy.example.com:8080");
http_config.ignore_ssl = Some(false);
assert!(validate_proxy_config(&http_config).is_ok());
assert!(create_proxy_from_config(&http_config).is_ok());
} }
#[test] #[test]

View File

@ -1,7 +1,9 @@
use base64::{engine::general_purpose, Engine as _}; use base64::{engine::general_purpose, Engine as _};
use hmac::{Hmac, Mac}; use hmac::{Hmac, Mac};
use rand::{rngs::StdRng, Rng, SeedableRng};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sha2::Sha256; use sha2::Sha256;
use std::collections::HashSet;
use std::path::PathBuf; use std::path::PathBuf;
use std::process::Stdio; use std::process::Stdio;
use std::time::Duration; use std::time::Duration;
@ -17,19 +19,92 @@ use crate::core::state::AppState;
use crate::core::state::LLamaBackendSession; use crate::core::state::LLamaBackendSession;
type HmacSha256 = Hmac<Sha256>; type HmacSha256 = Hmac<Sha256>;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum ErrorCode {
BinaryNotFound,
ModelFileNotFound,
LibraryPathInvalid,
// --- Model Loading Errors ---
ModelLoadFailed,
DraftModelLoadFailed,
MultimodalProjectorLoadFailed,
ModelArchNotSupported,
ModelLoadTimedOut,
LlamaCppProcessError,
// --- Memory Errors ---
OutOfMemory,
// --- Internal Application Errors ---
DeviceListParseFailed,
IoError,
InternalError,
}
#[derive(Debug, Clone, Serialize, thiserror::Error)]
#[error("LlamacppError {{ code: {code:?}, message: \"{message}\" }}")]
pub struct LlamacppError {
pub code: ErrorCode,
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<String>,
}
impl LlamacppError {
pub fn new(code: ErrorCode, message: String, details: Option<String>) -> Self {
Self {
code,
message,
details,
}
}
/// Parses stderr from llama.cpp and creates a specific LlamacppError.
pub fn from_stderr(stderr: &str) -> Self {
let lower_stderr = stderr.to_lowercase();
// TODO: add others
let is_out_of_memory = lower_stderr.contains("out of memory")
|| lower_stderr.contains("insufficient memory")
|| lower_stderr.contains("erroroutofdevicememory") // vulkan specific
|| lower_stderr.contains("kiogpucommandbuffercallbackerroroutofmemory") // Metal-specific error code
|| lower_stderr.contains("cuda_error_out_of_memory"); // CUDA-specific
if is_out_of_memory {
return Self::new(
ErrorCode::OutOfMemory,
"Out of memory. The model requires more RAM or VRAM than available.".into(),
Some(stderr.into()),
);
}
if lower_stderr.contains("error loading model architecture") {
return Self::new(
ErrorCode::ModelArchNotSupported,
"The model's architecture is not supported by this version of the backend.".into(),
Some(stderr.into()),
);
}
Self::new(
ErrorCode::LlamaCppProcessError,
"The model process encountered an unexpected error.".into(),
Some(stderr.into()),
)
}
}
// Error type for server commands // Error type for server commands
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
pub enum ServerError { pub enum ServerError {
#[error("llamacpp error: {0}")] #[error(transparent)]
LlamacppError(String), Llamacpp(#[from] LlamacppError),
#[error("Failed to locate server binary: {0}")]
BinaryNotFound(String),
#[error("IO error: {0}")] #[error("IO error: {0}")]
Io(#[from] std::io::Error), Io(#[from] std::io::Error),
#[error("Jan API error: {0}")]
#[error("Tauri error: {0}")]
Tauri(#[from] tauri::Error), Tauri(#[from] tauri::Error),
#[error("Parse error: {0}")]
ParseError(String),
} }
// impl serialization for tauri // impl serialization for tauri
@ -38,7 +113,20 @@ impl serde::Serialize for ServerError {
where where
S: serde::Serializer, S: serde::Serializer,
{ {
serializer.serialize_str(self.to_string().as_ref()) let error_to_serialize: LlamacppError = match self {
ServerError::Llamacpp(err) => err.clone(),
ServerError::Io(e) => LlamacppError::new(
ErrorCode::IoError,
"An input/output error occurred.".into(),
Some(e.to_string()),
),
ServerError::Tauri(e) => LlamacppError::new(
ErrorCode::InternalError,
"An internal application error occurred.".into(),
Some(e.to_string()),
),
};
error_to_serialize.serialize(serializer)
} }
} }
@ -108,14 +196,17 @@ pub async fn load_llama_model(
let server_path_buf = PathBuf::from(backend_path); let server_path_buf = PathBuf::from(backend_path);
if !server_path_buf.exists() { if !server_path_buf.exists() {
let err_msg = format!("Binary not found at {:?}", backend_path);
log::error!( log::error!(
"Server binary not found at expected path: {:?}", "Server binary not found at expected path: {:?}",
backend_path backend_path
); );
return Err(ServerError::BinaryNotFound(format!( return Err(LlamacppError::new(
"Binary not found at {:?}", ErrorCode::BinaryNotFound,
backend_path "The llama.cpp server binary could not be found.".into(),
))); Some(err_msg),
)
.into());
} }
let port_str = args let port_str = args
@ -132,22 +223,35 @@ pub async fn load_llama_model(
} }
}; };
// FOR MODEL PATH; TODO: DO SIMILARLY FOR MMPROJ PATH // FOR MODEL PATH; TODO: DO SIMILARLY FOR MMPROJ PATH
let model_path_index = args let model_path_index = args.iter().position(|arg| arg == "-m").ok_or_else(|| {
.iter() LlamacppError::new(
.position(|arg| arg == "-m") ErrorCode::ModelLoadFailed,
.ok_or(ServerError::LlamacppError("Missing `-m` flag".into()))?; "Model path argument '-m' is missing.".into(),
None,
)
})?;
let model_path = args let model_path = args.get(model_path_index + 1).cloned().ok_or_else(|| {
.get(model_path_index + 1) LlamacppError::new(
.ok_or(ServerError::LlamacppError("Missing path after `-m`".into()))? ErrorCode::ModelLoadFailed,
.clone(); "Model path was not provided after '-m' flag.".into(),
None,
)
})?;
let model_path_pb = PathBuf::from(model_path); let model_path_pb = PathBuf::from(&model_path);
if !model_path_pb.exists() { if !model_path_pb.exists() {
return Err(ServerError::LlamacppError(format!( let err_msg = format!(
"Invalid or inaccessible model path: {}", "Invalid or inaccessible model path: {}",
model_path_pb.display().to_string(), model_path_pb.display()
))); );
log::error!("{}", &err_msg);
return Err(LlamacppError::new(
ErrorCode::ModelFileNotFound,
"The specified model file does not exist or is not accessible.".into(),
Some(err_msg),
)
.into());
} }
#[cfg(windows)] #[cfg(windows)]
{ {
@ -283,13 +387,13 @@ pub async fn load_llama_model(
|| line_lower.contains("starting the main loop") || line_lower.contains("starting the main loop")
|| line_lower.contains("server listening on") || line_lower.contains("server listening on")
{ {
log::info!("Server appears to be ready based on stderr: '{}'", line); log::info!("Model appears to be ready based on logs: '{}'", line);
let _ = ready_tx.send(true).await; let _ = ready_tx.send(true).await;
} }
} }
} }
Err(e) => { Err(e) => {
log::error!("Error reading stderr: {}", e); log::error!("Error reading logs: {}", e);
break; break;
} }
} }
@ -302,21 +406,21 @@ pub async fn load_llama_model(
if let Some(status) = child.try_wait()? { if let Some(status) = child.try_wait()? {
if !status.success() { if !status.success() {
let stderr_output = stderr_task.await.unwrap_or_default(); let stderr_output = stderr_task.await.unwrap_or_default();
log::error!("llama.cpp exited early with code {:?}", status); log::error!("llama.cpp failed early with code {:?}", status);
log::error!("--- stderr ---\n{}", stderr_output); log::error!("{}", stderr_output);
return Err(ServerError::LlamacppError(stderr_output.trim().to_string())); return Err(LlamacppError::from_stderr(&stderr_output).into());
} }
} }
// Wait for server to be ready or timeout // Wait for server to be ready or timeout
let timeout_duration = Duration::from_secs(300); // 5 minutes timeout let timeout_duration = Duration::from_secs(180); // 3 minutes timeout
let start_time = Instant::now(); let start_time = Instant::now();
log::info!("Waiting for server to be ready..."); log::info!("Waiting for model session to be ready...");
loop { loop {
tokio::select! { tokio::select! {
// Server is ready // Server is ready
Some(true) = ready_rx.recv() => { Some(true) = ready_rx.recv() => {
log::info!("Server is ready to accept requests!"); log::info!("Model is ready to accept requests!");
break; break;
} }
// Check for process exit more frequently // Check for process exit more frequently
@ -326,10 +430,10 @@ pub async fn load_llama_model(
let stderr_output = stderr_task.await.unwrap_or_default(); let stderr_output = stderr_task.await.unwrap_or_default();
if !status.success() { if !status.success() {
log::error!("llama.cpp exited with error code {:?}", status); log::error!("llama.cpp exited with error code {:?}", status);
return Err(ServerError::LlamacppError(format!("Process exited with code {:?}\n\nStderr:\n{}", status, stderr_output))); return Err(LlamacppError::from_stderr(&stderr_output).into());
} else { } else {
log::error!("llama.cpp exited successfully but without ready signal"); log::error!("llama.cpp exited successfully but without ready signal");
return Err(ServerError::LlamacppError(format!("Process exited unexpectedly\n\nStderr:\n{}", stderr_output))); return Err(LlamacppError::from_stderr(&stderr_output).into());
} }
} }
@ -338,7 +442,11 @@ pub async fn load_llama_model(
log::error!("Timeout waiting for server to be ready"); log::error!("Timeout waiting for server to be ready");
let _ = child.kill().await; let _ = child.kill().await;
let stderr_output = stderr_task.await.unwrap_or_default(); let stderr_output = stderr_task.await.unwrap_or_default();
return Err(ServerError::LlamacppError(format!("Server startup timeout\n\nStderr:\n{}", stderr_output))); return Err(LlamacppError::new(
ErrorCode::ModelLoadTimedOut,
"The model took too long to load and timed out.".into(),
Some(format!("Timeout: {}s\n\nStderr:\n{}", timeout_duration.as_secs(), stderr_output)),
).into());
} }
} }
} }
@ -461,10 +569,12 @@ pub async fn get_devices(
"Server binary not found at expected path: {:?}", "Server binary not found at expected path: {:?}",
backend_path backend_path
); );
return Err(ServerError::BinaryNotFound(format!( return Err(LlamacppError::new(
"Binary not found at {:?}", ErrorCode::BinaryNotFound,
backend_path "The llama.cpp server binary could not be found.".into(),
))); Some(format!("Path: {}", backend_path)),
)
.into());
} }
// Configure the command to run the server with --list-devices // Configure the command to run the server with --list-devices
@ -519,20 +629,21 @@ pub async fn get_devices(
// Execute the command and wait for completion // Execute the command and wait for completion
let output = timeout(Duration::from_secs(30), command.output()) let output = timeout(Duration::from_secs(30), command.output())
.await .await
.map_err(|_| ServerError::LlamacppError("Timeout waiting for device list".to_string()))? .map_err(|_| {
LlamacppError::new(
ErrorCode::InternalError,
"Timeout waiting for device list".into(),
None,
)
})?
.map_err(ServerError::Io)?; .map_err(ServerError::Io)?;
// Check if command executed successfully // Check if command executed successfully
if !output.status.success() { if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr); let stderr = String::from_utf8_lossy(&output.stderr);
log::error!("llama-server --list-devices failed: {}", stderr); log::error!("llama-server --list-devices failed: {}", stderr);
return Err(ServerError::LlamacppError(format!( return Err(LlamacppError::from_stderr(&stderr).into());
"Command failed with exit code {:?}: {}",
output.status.code(),
stderr
)));
} }
// Parse the output // Parse the output
let stdout = String::from_utf8_lossy(&output.stdout); let stdout = String::from_utf8_lossy(&output.stdout);
log::info!("Device list output:\n{}", stdout); log::info!("Device list output:\n{}", stdout);
@ -570,9 +681,12 @@ fn parse_device_output(output: &str) -> ServerResult<Vec<DeviceInfo>> {
if devices.is_empty() && found_devices_section { if devices.is_empty() && found_devices_section {
log::warn!("No devices found in output"); log::warn!("No devices found in output");
} else if !found_devices_section { } else if !found_devices_section {
return Err(ServerError::ParseError( return Err(LlamacppError::new(
"Could not find 'Available devices:' section in output".to_string(), ErrorCode::DeviceListParseFailed,
)); "Could not find 'Available devices:' section in the backend output.".into(),
Some(output.to_string()),
)
.into());
} }
Ok(devices) Ok(devices)
@ -682,16 +796,23 @@ fn parse_memory_value(mem_str: &str) -> ServerResult<i32> {
// Handle formats like "8000 MiB" or "7721 MiB free" // Handle formats like "8000 MiB" or "7721 MiB free"
let parts: Vec<&str> = mem_str.split_whitespace().collect(); let parts: Vec<&str> = mem_str.split_whitespace().collect();
if parts.is_empty() { if parts.is_empty() {
return Err(ServerError::ParseError(format!( return Err(LlamacppError::new(
"Empty memory value: '{}'", ErrorCode::DeviceListParseFailed,
mem_str format!("empty memory value: {}", mem_str),
))); None,
)
.into());
} }
// Take the first part which should be the number // Take the first part which should be the number
let number_str = parts[0]; let number_str = parts[0];
number_str.parse::<i32>().map_err(|_| { number_str.parse::<i32>().map_err(|_| {
ServerError::ParseError(format!("Could not parse memory value: '{}'", number_str)) LlamacppError::new(
ErrorCode::DeviceListParseFailed,
format!("Could not parse memory value: '{}'", number_str),
None,
)
.into()
}) })
} }
@ -724,11 +845,80 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<
} }
// check port availability // check port availability
#[tauri::command] fn is_port_available(port: u16) -> bool {
pub fn is_port_available(port: u16) -> bool {
std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() std::net::TcpListener::bind(("127.0.0.1", port)).is_ok()
} }
#[tauri::command]
pub async fn get_random_port(state: State<'_, AppState>) -> Result<u16, String> {
const MAX_ATTEMPTS: u32 = 20000;
let mut attempts = 0;
let mut rng = StdRng::from_entropy();
// Get all active ports from sessions
let map = state.llama_server_process.lock().await;
let used_ports: HashSet<u16> = map
.values()
.filter_map(|session| {
// Convert valid ports to u16 (filter out placeholder ports like -1)
if session.info.port > 0 && session.info.port <= u16::MAX as i32 {
Some(session.info.port as u16)
} else {
None
}
})
.collect();
drop(map); // unlock early
while attempts < MAX_ATTEMPTS {
let port = rng.gen_range(3000..4000);
if used_ports.contains(&port) {
attempts += 1;
continue;
}
if is_port_available(port) {
return Ok(port);
}
attempts += 1;
}
Err("Failed to find an available port for the model to load".into())
}
// find session
#[tauri::command]
pub async fn find_session_by_model(
model_id: String,
state: State<'_, AppState>,
) -> Result<Option<SessionInfo>, String> {
let map = state.llama_server_process.lock().await;
let session_info = map
.values()
.find(|backend_session| backend_session.info.model_id == model_id)
.map(|backend_session| backend_session.info.clone());
Ok(session_info)
}
// get running models
#[tauri::command]
pub async fn get_loaded_models(state: State<'_, AppState>) -> Result<Vec<String>, String> {
let map = state.llama_server_process.lock().await;
let model_ids = map
.values()
.map(|backend_session| backend_session.info.model_id.clone())
.collect();
Ok(model_ids)
}
// tests // tests
// //
#[cfg(test)] #[cfg(test)]
@ -929,24 +1119,34 @@ Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)"#;
{ {
let dir = tempfile::tempdir().expect("Failed to create temp dir"); let dir = tempfile::tempdir().expect("Failed to create temp dir");
let long_path = dir.path().join(UNCOMMON_DIR_NAME); let long_path = dir.path().join(UNCOMMON_DIR_NAME);
std::fs::create_dir(&long_path) std::fs::create_dir(&long_path)
.expect("Failed to create test directory with non-ASCII name"); .expect("Failed to create directory with uncommon characters");
let short_path = get_short_path(&long_path); let short_path = get_short_path(&long_path);
match short_path {
Some(sp) => {
// Ensure the path exists
assert!( assert!(
short_path.is_ascii(), PathBuf::from(&sp).exists(),
"The resulting short path must be composed of only ASCII characters. Got: {}", "Returned short path should exist on filesystem: {}",
short_path sp
);
assert!(
PathBuf::from(&short_path).exists(),
"The returned short path must exist on the filesystem"
); );
// It may or may not be ASCII; just ensure it differs
let long_path_str = long_path.to_string_lossy();
assert_ne!( assert_ne!(
short_path, sp, long_path_str,
long_path.to_str().unwrap(), "Short path should differ from original path"
"Short path should not be the same as the long path"
); );
} }
None => {
// On some systems, short path generation may be disabled
eprintln!("Short path generation failed. This might be expected depending on system settings.");
}
}
}
#[cfg(not(windows))] #[cfg(not(windows))]
{ {
// On Unix, paths are typically UTF-8 and there's no "short path" concept. // On Unix, paths are typically UTF-8 and there's no "short path" concept.

View File

@ -47,7 +47,7 @@ pub fn ensure_thread_dir_exists<R: Runtime>(
ensure_data_dirs(app_handle.clone())?; ensure_data_dirs(app_handle.clone())?;
let thread_dir = get_thread_dir(app_handle, thread_id); let thread_dir = get_thread_dir(app_handle, thread_id);
if !thread_dir.exists() { if !thread_dir.exists() {
fs::create_dir(&thread_dir).map_err(|e| e.to_string())?; fs::create_dir_all(&thread_dir).map_err(|e| e.to_string())?;
} }
Ok(()) Ok(())
} }

View File

@ -95,7 +95,9 @@ pub fn run() {
core::utils::extensions::inference_llamacpp_extension::server::load_llama_model, core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model, core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
core::utils::extensions::inference_llamacpp_extension::server::get_devices, core::utils::extensions::inference_llamacpp_extension::server::get_devices,
core::utils::extensions::inference_llamacpp_extension::server::is_port_available, core::utils::extensions::inference_llamacpp_extension::server::get_random_port,
core::utils::extensions::inference_llamacpp_extension::server::find_session_by_model,
core::utils::extensions::inference_llamacpp_extension::server::get_loaded_models,
core::utils::extensions::inference_llamacpp_extension::server::generate_api_key, core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
core::utils::extensions::inference_llamacpp_extension::server::is_process_running, core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
]) ])

View File

@ -696,8 +696,6 @@ Section Install
; Copy resources ; Copy resources
CreateDirectory "$INSTDIR\resources" CreateDirectory "$INSTDIR\resources"
CreateDirectory "$INSTDIR\resources\pre-install" CreateDirectory "$INSTDIR\resources\pre-install"
SetOutPath $INSTDIR
File /a "/oname=vulkan-1.dll" "D:\a\jan\jan\src-tauri\resources\lib\vulkan-1.dll"
SetOutPath "$INSTDIR\resources\pre-install" SetOutPath "$INSTDIR\resources\pre-install"
File /nonfatal /a /r "D:\a\jan\jan\src-tauri\resources\pre-install\" File /nonfatal /a /r "D:\a\jan\jan\src-tauri\resources\pre-install\"
SetOutPath $INSTDIR SetOutPath $INSTDIR

View File

@ -10,8 +10,7 @@
}, },
"deb": { "deb": {
"files": { "files": {
"usr/bin/bun": "resources/bin/bun", "usr/bin/bun": "resources/bin/bun"
"usr/lib/Jan/resources/lib/libvulkan.so": "resources/lib/libvulkan.so"
} }
} }
} }

View File

@ -1,19 +0,0 @@
const jestRunner = require('jest-runner')
class EmptyTestFileRunner extends jestRunner.default {
async runTests(tests, watcher, onStart, onResult, onFailure, options) {
const nonEmptyTests = tests.filter(
(test) => test.context.hasteFS.getSize(test.path) > 0
)
return super.runTests(
nonEmptyTests,
watcher,
onStart,
onResult,
onFailure,
options
)
}
}
module.exports = EmptyTestFileRunner

View File

@ -19,4 +19,5 @@ export const localStorageKey = {
mcpGlobalPermissions: 'mcp-global-permissions', mcpGlobalPermissions: 'mcp-global-permissions',
lastUsedModel: 'last-used-model', lastUsedModel: 'last-used-model',
lastUsedAssistant: 'last-used-assistant', lastUsedAssistant: 'last-used-assistant',
setupCompleted: 'setup-completed',
} }

View File

@ -106,8 +106,10 @@ export function ModelSetting({
<div key={key} className="space-y-2"> <div key={key} className="space-y-2">
<div <div
className={cn( className={cn(
'flex items-start justify-between gap-8', 'flex items-start justify-between gap-8 last:mb-2',
key === 'chat_template' && 'flex-col gap-1' (key === 'chat_template' ||
key === 'override_tensor_buffer_t') &&
'flex-col gap-1 w-full'
)} )}
> >
<div className="space-y-1 mb-2"> <div className="space-y-1 mb-2">

View File

@ -5,6 +5,7 @@ import { route } from '@/constants/routes'
import HeaderPage from './HeaderPage' import HeaderPage from './HeaderPage'
import { isProd } from '@/lib/version' import { isProd } from '@/lib/version'
import { useTranslation } from '@/i18n/react-i18next-compat' import { useTranslation } from '@/i18n/react-i18next-compat'
import { localStorageKey } from '@/constants/localStorage'
function SetupScreen() { function SetupScreen() {
const { t } = useTranslation() const { t } = useTranslation()
@ -12,6 +13,10 @@ function SetupScreen() {
const firstItemRemoteProvider = const firstItemRemoteProvider =
providers.length > 0 ? providers[1].provider : 'openai' providers.length > 0 ? providers[1].provider : 'openai'
// Check if setup tour has been completed
const isSetupCompleted =
localStorage.getItem(localStorageKey.setupCompleted) === 'true'
return ( return (
<div className="flex h-full flex-col flex-justify-center"> <div className="flex h-full flex-col flex-justify-center">
<HeaderPage></HeaderPage> <HeaderPage></HeaderPage>
@ -50,7 +55,9 @@ function SetupScreen() {
providerName: firstItemRemoteProvider, providerName: firstItemRemoteProvider,
}} }}
search={{ search={{
step: 'setup_remote_provider', ...(!isSetupCompleted
? { step: 'setup_remote_provider' }
: {}),
}} }}
> >
<h1 className="text-main-view-fg font-medium text-base"> <h1 className="text-main-view-fg font-medium text-base">

View File

@ -7,7 +7,7 @@ import {
DialogTitle, DialogTitle,
} from '@/components/ui/dialog' } from '@/components/ui/dialog'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { AlertTriangle } from 'lucide-react' import { AlertTriangle, ChevronDown, ChevronRight } from 'lucide-react'
import { IconCopy, IconCopyCheck } from '@tabler/icons-react' import { IconCopy, IconCopyCheck } from '@tabler/icons-react'
import { useTranslation } from '@/i18n/react-i18next-compat' import { useTranslation } from '@/i18n/react-i18next-compat'
import { useModelLoad } from '@/hooks/useModelLoad' import { useModelLoad } from '@/hooks/useModelLoad'
@ -18,11 +18,47 @@ export default function LoadModelErrorDialog() {
const { t } = useTranslation() const { t } = useTranslation()
const { modelLoadError, setModelLoadError } = useModelLoad() const { modelLoadError, setModelLoadError } = useModelLoad()
const [isCopying, setIsCopying] = useState(false) const [isCopying, setIsCopying] = useState(false)
const [isDetailExpanded, setIsDetailExpanded] = useState(true)
const getErrorDetail = (error: string | object | undefined) => {
if (!error || typeof error !== 'object') return null
if ('details' in error) {
return (error as { details?: string }).details
}
return null
}
const hasErrorDetail = (error: string | object | undefined) => {
return Boolean(getErrorDetail(error))
}
const formatErrorForCopy = (error: string | object | undefined) => {
if (!error) return ''
if (typeof error === 'string') return error
if (typeof error === 'object' && 'code' in error && 'message' in error) {
const errorObj = error as {
code?: string
message: string
details?: string
}
let copyText = errorObj.code
? `${errorObj.code}: ${errorObj.message}`
: errorObj.message
if (errorObj.details) {
copyText += `\n\nDetails:\n${errorObj.details}`
}
return copyText
}
return JSON.stringify(error)
}
const handleCopy = async () => { const handleCopy = async () => {
setIsCopying(true) setIsCopying(true)
try { try {
await navigator.clipboard.writeText(modelLoadError ?? '') await navigator.clipboard.writeText(formatErrorForCopy(modelLoadError))
toast.success('Copy successful', { toast.success('Copy successful', {
id: 'copy-model', id: 'copy-model',
description: 'Model load error information copied to clipboard', description: 'Model load error information copied to clipboard',
@ -58,17 +94,59 @@ export default function LoadModelErrorDialog() {
</div> </div>
</DialogHeader> </DialogHeader>
<div className="bg-main-view-fg/8 p-2 border border-main-view-fg/5 rounded-lg"> <div className="bg-main-view-fg/2 p-2 border border-main-view-fg/5 rounded-lg space-y-2">
<p {typeof modelLoadError === 'object' &&
className="text-sm text-main-view-fg/70 leading-relaxed max-h-[200px] overflow-y-auto break-all" modelLoadError &&
'code' in modelLoadError &&
'message' in modelLoadError ? (
<div>
{(modelLoadError as { code?: string }).code && (
<div>
<p className="text-sm text-main-view-fg/80 leading-relaxed break-all">
{(modelLoadError as { code: string }).code}
</p>
</div>
)}
<div>
<p className="text-sm text-main-view-fg/60 leading-relaxed break-all">
{(modelLoadError as { message: string }).message}
</p>
</div>
</div>
) : (
<p className="text-sm text-main-view-fg/70 leading-relaxed break-all">
{String(modelLoadError)}
</p>
)}
{hasErrorDetail(modelLoadError) && (
<div>
<button
onClick={() => setIsDetailExpanded(!isDetailExpanded)}
className="flex items-center gap-1 text-sm text-main-view-fg/60 hover:text-main-view-fg/80 transition-colors cursor-pointer"
>
{isDetailExpanded ? (
<ChevronDown className="size-3" />
) : (
<ChevronRight className="size-3" />
)}
Details
</button>
{isDetailExpanded && (
<div
className="mt-2 text-sm text-main-view-fg/70 leading-relaxed max-h-[150px] overflow-y-auto break-all bg-main-view-fg/10 p-2 rounded border border-main-view-fg/5"
ref={(el) => { ref={(el) => {
if (el) { if (el) {
el.scrollTop = el.scrollHeight el.scrollTop = el.scrollHeight
} }
}} }}
> >
{modelLoadError} {getErrorDetail(modelLoadError)}
</p> </div>
)}
</div>
)}
</div> </div>
<DialogFooter className="flex flex-col gap-2 sm:flex-row sm:justify-right"> <DialogFooter className="flex flex-col gap-2 sm:flex-row sm:justify-right">

View File

@ -5,12 +5,6 @@ import {
DropdownMenuTrigger, DropdownMenuTrigger,
} from '@/components/ui/dropdown-menu' } from '@/components/ui/dropdown-menu'
import {
Tooltip,
TooltipTrigger,
TooltipContent,
} from '@/components/ui/tooltip'
import { IconStarFilled } from '@tabler/icons-react'
import { cn } from '@/lib/utils' import { cn } from '@/lib/utils'
// Dropdown component // Dropdown component
@ -24,7 +18,6 @@ type DropdownControlProps = {
export function DropdownControl({ export function DropdownControl({
value, value,
options = [], options = [],
recommended,
onChange, onChange,
}: DropdownControlProps) { }: DropdownControlProps) {
const isSelected = const isSelected =
@ -48,18 +41,6 @@ export function DropdownControl({
)} )}
> >
<span>{option.name}</span> <span>{option.name}</span>
{recommended === option.value && (
<Tooltip>
<TooltipTrigger asChild>
<div className="cursor-pointer">
<IconStarFilled className="text-accent" />
</div>
</TooltipTrigger>
<TooltipContent side="top" sideOffset={8} className="z-50">
Recommended
</TooltipContent>
</Tooltip>
)}
</DropdownMenuItem> </DropdownMenuItem>
))} ))}
</DropdownMenuContent> </DropdownMenuContent>

View File

@ -1,14 +1,6 @@
import { describe, it, expect, vi, beforeEach } from 'vitest' import { describe, it, expect, vi, beforeEach } from 'vitest'
import { renderHook, act } from '@testing-library/react' import { renderHook, act } from '@testing-library/react'
import { import { useHardware, HardwareData, OS, RAM } from '../useHardware'
useHardware,
HardwareData,
SystemUsage,
CPU,
GPU,
OS,
RAM,
} from '../useHardware'
// Mock dependencies // Mock dependencies
vi.mock('@/constants/localStorage', () => ({ vi.mock('@/constants/localStorage', () => ({
@ -43,7 +35,6 @@ describe('useHardware', () => {
name: '', name: '',
usage: 0, usage: 0,
}, },
gpus: [],
os_type: '', os_type: '',
os_name: '', os_name: '',
total_memory: 0, total_memory: 0,
@ -52,9 +43,7 @@ describe('useHardware', () => {
cpu: 0, cpu: 0,
used_memory: 0, used_memory: 0,
total_memory: 0, total_memory: 0,
gpus: [],
}) })
expect(result.current.gpuLoading).toEqual({})
expect(result.current.pollingPaused).toBe(false) expect(result.current.pollingPaused).toBe(false)
}) })
@ -74,26 +63,6 @@ describe('useHardware', () => {
available: 0, available: 0,
total: 0, total: 0,
}, },
gpus: [
{
name: 'NVIDIA RTX 3080',
total_memory: 10737418240,
vendor: 'NVIDIA',
uuid: 'GPU-12345',
driver_version: '470.57.02',
activated: true,
nvidia_info: {
index: 0,
compute_capability: '8.6',
},
vulkan_info: {
index: 0,
device_id: 8704,
device_type: 'discrete',
api_version: '1.2.0',
},
},
],
os_type: 'linux', os_type: 'linux',
os_name: 'Ubuntu', os_name: 'Ubuntu',
total_memory: 17179869184, total_memory: 17179869184,
@ -124,37 +93,6 @@ describe('useHardware', () => {
expect(result.current.hardwareData.cpu).toEqual(testCPU) expect(result.current.hardwareData.cpu).toEqual(testCPU)
}) })
it('should set GPUs data', () => {
const { result } = renderHook(() => useHardware())
const testGPUs = [
{
name: 'NVIDIA RTX 3080',
total_memory: 10737418240,
vendor: 'NVIDIA',
uuid: 'GPU-12345',
driver_version: '470.57.02',
activated: true,
nvidia_info: {
index: 0,
compute_capability: '8.6',
},
vulkan_info: {
index: 0,
device_id: 8704,
device_type: 'discrete',
api_version: '1.2.0',
},
},
]
act(() => {
result.current.setGPUs(testGPUs)
})
expect(result.current.hardwareData.gpus).toEqual(testGPUs)
})
it('should update system usage', () => { it('should update system usage', () => {
const { result } = renderHook(() => useHardware()) const { result } = renderHook(() => useHardware())
@ -162,13 +100,6 @@ describe('useHardware', () => {
cpu: 45.2, cpu: 45.2,
used_memory: 8589934592, used_memory: 8589934592,
total_memory: 17179869184, total_memory: 17179869184,
gpus: [
{
uuid: 'GPU-12345',
used_memory: 2147483648,
total_memory: 10737418240,
},
],
} }
act(() => { act(() => {
@ -178,48 +109,6 @@ describe('useHardware', () => {
expect(result.current.systemUsage).toEqual(testSystemUsage) expect(result.current.systemUsage).toEqual(testSystemUsage)
}) })
it('should manage GPU loading state', () => {
const { result } = renderHook(() => useHardware())
// First set up some GPU data so we have a UUID to work with
const testGPUs = [
{
name: 'NVIDIA RTX 3080',
total_memory: 10737418240,
vendor: 'NVIDIA',
uuid: 'GPU-12345',
driver_version: '470.57.02',
activated: true,
nvidia_info: {
index: 0,
compute_capability: '8.6',
},
vulkan_info: {
index: 0,
device_id: 8704,
device_type: 'discrete',
api_version: '1.2.0',
},
},
]
act(() => {
result.current.setGPUs(testGPUs)
})
act(() => {
result.current.setGpuLoading(0, true)
})
expect(result.current.gpuLoading['GPU-12345']).toBe(true)
act(() => {
result.current.setGpuLoading(0, false)
})
expect(result.current.gpuLoading['GPU-12345']).toBe(false)
})
it('should manage polling state', () => { it('should manage polling state', () => {
const { result } = renderHook(() => useHardware()) const { result } = renderHook(() => useHardware())
@ -271,179 +160,4 @@ describe('useHardware', () => {
expect(result.current.hardwareData.ram).toEqual(ram) expect(result.current.hardwareData.ram).toEqual(ram)
}) })
}) })
describe('updateGPU', () => {
it('should update specific GPU at index', () => {
const { result } = renderHook(() => useHardware())
const initialGpus: GPU[] = [
{
name: 'GPU 1',
total_memory: 8192,
vendor: 'NVIDIA',
uuid: 'gpu-1',
driver_version: '1.0',
activated: false,
nvidia_info: { index: 0, compute_capability: '8.0' },
vulkan_info: {
index: 0,
device_id: 1,
device_type: 'discrete',
api_version: '1.0',
},
},
{
name: 'GPU 2',
total_memory: 4096,
vendor: 'AMD',
uuid: 'gpu-2',
driver_version: '2.0',
activated: false,
nvidia_info: { index: 1, compute_capability: '7.0' },
vulkan_info: {
index: 1,
device_id: 2,
device_type: 'discrete',
api_version: '1.0',
},
},
]
act(() => {
result.current.setGPUs(initialGpus)
})
const updatedGpu: GPU = {
...initialGpus[0],
name: 'Updated GPU 1',
activated: true,
}
act(() => {
result.current.updateGPU(0, updatedGpu)
})
expect(result.current.hardwareData.gpus[0].name).toBe('Updated GPU 1')
expect(result.current.hardwareData.gpus[0].activated).toBe(true)
expect(result.current.hardwareData.gpus[1]).toEqual(initialGpus[1])
})
it('should handle invalid index gracefully', () => {
const { result } = renderHook(() => useHardware())
const initialGpus: GPU[] = [
{
name: 'GPU 1',
total_memory: 8192,
vendor: 'NVIDIA',
uuid: 'gpu-1',
driver_version: '1.0',
activated: false,
nvidia_info: { index: 0, compute_capability: '8.0' },
vulkan_info: {
index: 0,
device_id: 1,
device_type: 'discrete',
api_version: '1.0',
},
},
]
act(() => {
result.current.setGPUs(initialGpus)
})
const updatedGpu: GPU = {
...initialGpus[0],
name: 'Updated GPU',
}
act(() => {
result.current.updateGPU(5, updatedGpu)
})
expect(result.current.hardwareData.gpus[0]).toEqual(initialGpus[0])
})
})
describe('setHardwareData with GPU activation', () => {
it('should initialize GPUs as inactive when activated is not specified', () => {
const { result } = renderHook(() => useHardware())
const hardwareData: HardwareData = {
cpu: {
arch: 'x86_64',
core_count: 4,
extensions: [],
name: 'CPU',
usage: 0,
},
gpus: [
{
name: 'GPU 1',
total_memory: 8192,
vendor: 'NVIDIA',
uuid: 'gpu-1',
driver_version: '1.0',
nvidia_info: { index: 0, compute_capability: '8.0' },
vulkan_info: {
index: 0,
device_id: 1,
device_type: 'discrete',
api_version: '1.0',
},
},
],
os_type: 'windows',
os_name: 'Windows 11',
total_memory: 16384,
}
act(() => {
result.current.setHardwareData(hardwareData)
})
expect(result.current.hardwareData.gpus[0].activated).toBe(false)
})
it('should preserve existing activation states when set', () => {
const { result } = renderHook(() => useHardware())
const hardwareData: HardwareData = {
cpu: {
arch: 'x86_64',
core_count: 4,
extensions: [],
name: 'CPU',
usage: 0,
},
gpus: [
{
name: 'GPU 1',
total_memory: 8192,
vendor: 'NVIDIA',
uuid: 'gpu-1',
driver_version: '1.0',
activated: true,
nvidia_info: { index: 0, compute_capability: '8.0' },
vulkan_info: {
index: 0,
device_id: 1,
device_type: 'discrete',
api_version: '1.0',
},
},
],
os_type: 'windows',
os_name: 'Windows 11',
total_memory: 16384,
}
act(() => {
result.current.setHardwareData(hardwareData)
})
expect(result.current.hardwareData.gpus[0].activated).toBe(true)
})
})
}) })

View File

@ -428,11 +428,11 @@ export const useChat = () => {
} }
} catch (error) { } catch (error) {
if (!abortController.signal.aborted) { if (!abortController.signal.aborted) {
const errorMessage = if (error && typeof error === 'object' && 'message' in error) {
error && typeof error === 'object' && 'message' in error setModelLoadError(error as ErrorObject)
? error.message } else {
: error setModelLoadError(`${error}`)
setModelLoadError(`${errorMessage}`) }
} }
} finally { } finally {
updateLoadingModel(false) updateLoadingModel(false)
@ -453,6 +453,7 @@ export const useChat = () => {
setPrompt, setPrompt,
selectedModel, selectedModel,
currentAssistant, currentAssistant,
experimentalFeatures,
tools, tools,
updateLoadingModel, updateLoadingModel,
getDisabledToolsForThread, getDisabledToolsForThread,

View File

@ -12,30 +12,6 @@ export interface CPU {
instructions?: string[] // Cortex migration: ensure instructions data ready instructions?: string[] // Cortex migration: ensure instructions data ready
} }
export interface GPUAdditionalInfo {
compute_cap: string
driver_version: string
}
export interface GPU {
name: string
total_memory: number
vendor: string
uuid: string
driver_version: string
activated?: boolean
nvidia_info: {
index: number
compute_capability: string
}
vulkan_info: {
index: number
device_id: number
device_type: string
api_version: string
}
}
export interface OS { export interface OS {
name: string name: string
version: string version: string
@ -48,7 +24,6 @@ export interface RAM {
export interface HardwareData { export interface HardwareData {
cpu: CPU cpu: CPU
gpus: GPU[]
os_type: string os_type: string
os_name: string os_name: string
total_memory: number total_memory: number
@ -60,11 +35,6 @@ export interface SystemUsage {
cpu: number cpu: number
used_memory: number used_memory: number
total_memory: number total_memory: number
gpus: {
uuid: string
used_memory: number
total_memory: number
}[]
} }
// Default values // Default values
@ -76,7 +46,6 @@ const defaultHardwareData: HardwareData = {
name: '', name: '',
usage: 0, usage: 0,
}, },
gpus: [],
os_type: '', os_type: '',
os_name: '', os_name: '',
total_memory: 0, total_memory: 0,
@ -86,7 +55,6 @@ const defaultSystemUsage: SystemUsage = {
cpu: 0, cpu: 0,
used_memory: 0, used_memory: 0,
total_memory: 0, total_memory: 0,
gpus: [],
} }
interface HardwareStore { interface HardwareStore {
@ -96,22 +64,17 @@ interface HardwareStore {
// Update functions // Update functions
setCPU: (cpu: CPU) => void setCPU: (cpu: CPU) => void
setGPUs: (gpus: GPU[]) => void
setOS: (os: OS) => void setOS: (os: OS) => void
setRAM: (ram: RAM) => void setRAM: (ram: RAM) => void
// Update entire hardware data at once // Update entire hardware data at once
setHardwareData: (data: HardwareData) => void setHardwareData: (data: HardwareData) => void
// Update individual GPU
updateGPU: (index: number, gpu: GPU) => void
// Update RAM available // Update RAM available
updateSystemUsage: (usage: SystemUsage) => void updateSystemUsage: (usage: SystemUsage) => void
// GPU loading state // GPU loading state
gpuLoading: { [index: number]: boolean } gpuLoading: { [index: number]: boolean }
setGpuLoading: (index: number, loading: boolean) => void
// Polling control // Polling control
pollingPaused: boolean pollingPaused: boolean
@ -126,13 +89,6 @@ export const useHardware = create<HardwareStore>()(
systemUsage: defaultSystemUsage, systemUsage: defaultSystemUsage,
gpuLoading: {}, gpuLoading: {},
pollingPaused: false, pollingPaused: false,
setGpuLoading: (index, loading) =>
set((state) => ({
gpuLoading: {
...state.gpuLoading,
[state.hardwareData.gpus[index].uuid]: loading,
},
})),
pausePolling: () => set({ pollingPaused: true }), pausePolling: () => set({ pollingPaused: true }),
resumePolling: () => set({ pollingPaused: false }), resumePolling: () => set({ pollingPaused: false }),
@ -144,14 +100,6 @@ export const useHardware = create<HardwareStore>()(
}, },
})), })),
setGPUs: (gpus) =>
set((state) => ({
hardwareData: {
...state.hardwareData,
gpus,
},
})),
setOS: (os) => setOS: (os) =>
set((state) => ({ set((state) => ({
hardwareData: { hardwareData: {
@ -181,27 +129,9 @@ export const useHardware = create<HardwareStore>()(
available: 0, available: 0,
total: 0, total: 0,
}, },
gpus: data.gpus.map((gpu) => ({
...gpu,
activated: gpu.activated ?? false,
})),
}, },
}), }),
updateGPU: (index, gpu) =>
set((state) => {
const newGPUs = [...state.hardwareData.gpus]
if (index >= 0 && index < newGPUs.length) {
newGPUs[index] = gpu
}
return {
hardwareData: {
...state.hardwareData,
gpus: newGPUs,
},
}
}),
updateSystemUsage: (systemUsage) => updateSystemUsage: (systemUsage) =>
set(() => ({ set(() => ({
systemUsage, systemUsage,

View File

@ -1,8 +1,8 @@
import { create } from 'zustand' import { create } from 'zustand'
type ModelLoadState = { type ModelLoadState = {
modelLoadError?: string modelLoadError?: string | ErrorObject
setModelLoadError: (error: string | undefined) => void setModelLoadError: (error: string | ErrorObject | undefined) => void
} }
export const useModelLoad = create<ModelLoadState>()((set) => ({ export const useModelLoad = create<ModelLoadState>()((set) => ({

View File

@ -276,9 +276,34 @@ export const useModelProvider = create<ModelProviderState>()(
}) })
} }
// Migration for override_tensor_buffer_type key (version 2 -> 3)
if (version === 2 && state?.providers) {
state.providers.forEach((provider) => {
if (provider.models) {
provider.models.forEach((model) => {
// Initialize settings if it doesn't exist
if (!model.settings) {
model.settings = {}
}
// Add missing override_tensor_buffer_type setting if it doesn't exist
if (!model.settings.override_tensor_buffer_t) {
model.settings.override_tensor_buffer_t = {
...modelSettings.override_tensor_buffer_t,
controller_props: {
...modelSettings.override_tensor_buffer_t
.controller_props,
},
}
}
})
}
})
}
return state return state
}, },
version: 2, version: 3,
} }
) )
) )

View File

@ -133,4 +133,15 @@ export const modelSettings = {
textAlign: 'right', textAlign: 'right',
}, },
}, },
override_tensor_buffer_t: {
key: 'override_tensor_buffer_t',
title: 'Override Tensor Buffer Type',
description: 'Override the tensor buffer type for the model',
controller_type: 'input',
controller_props: {
value: '',
placeholder: 'e.g., layers\\.\\d+\\.ffn_.*=CPU',
type: 'text',
},
},
} }

View File

@ -501,7 +501,7 @@ function Hub() {
</HeaderPage> </HeaderPage>
<div className="p-4 w-full h-[calc(100%-32px)] !overflow-y-auto first-step-setup-local-provider"> <div className="p-4 w-full h-[calc(100%-32px)] !overflow-y-auto first-step-setup-local-provider">
<div className="flex flex-col h-full justify-between gap-4 gap-y-3 w-full md:w-4/5 mx-auto"> <div className="flex flex-col h-full justify-between gap-4 gap-y-3 w-full md:w-4/5 mx-auto">
{loading ? ( {loading && !filteredModels.length ? (
<div className="flex items-center justify-center"> <div className="flex items-center justify-center">
<div className="text-center text-muted-foreground"> <div className="text-center text-muted-foreground">
{t('hub:loadingModels')} {t('hub:loadingModels')}

View File

@ -15,7 +15,6 @@ import {
import { import {
createFileRoute, createFileRoute,
Link, Link,
useNavigate,
useParams, useParams,
useSearch, useSearch,
} from '@tanstack/react-router' } from '@tanstack/react-router'
@ -32,6 +31,7 @@ import { CustomTooltipJoyRide } from '@/containers/CustomeTooltipJoyRide'
import { route } from '@/constants/routes' import { route } from '@/constants/routes'
import DeleteProvider from '@/containers/dialogs/DeleteProvider' import DeleteProvider from '@/containers/dialogs/DeleteProvider'
import { updateSettings, fetchModelsFromProvider } from '@/services/providers' import { updateSettings, fetchModelsFromProvider } from '@/services/providers'
import { localStorageKey } from '@/constants/localStorage'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { IconFolderPlus, IconLoader, IconRefresh } from '@tabler/icons-react' import { IconFolderPlus, IconLoader, IconRefresh } from '@tabler/icons-react'
import { getProviders } from '@/services/providers' import { getProviders } from '@/services/providers'
@ -83,7 +83,6 @@ function ProviderDetail() {
const { getProviderByName, setProviders, updateProvider } = useModelProvider() const { getProviderByName, setProviders, updateProvider } = useModelProvider()
const provider = getProviderByName(providerName) const provider = getProviderByName(providerName)
const isSetup = step === 'setup_remote_provider' const isSetup = step === 'setup_remote_provider'
const navigate = useNavigate()
// Check if llamacpp provider needs backend configuration // Check if llamacpp provider needs backend configuration
const needsBackendConfig = const needsBackendConfig =
@ -137,9 +136,7 @@ function ProviderDetail() {
const { status } = data const { status } = data
if (status === STATUS.FINISHED) { if (status === STATUS.FINISHED) {
navigate({ localStorage.setItem(localStorageKey.setupCompleted, 'true')
to: route.home,
})
} }
} }
@ -214,7 +211,11 @@ function ProviderDetail() {
}) })
.catch((error) => { .catch((error) => {
console.error('Error starting model:', error) console.error('Error starting model:', error)
setModelLoadError(`${error.message}`) if (error && typeof error === 'object' && 'message' in error) {
setModelLoadError(error)
} else {
setModelLoadError(`${error}`)
}
}) })
.finally(() => { .finally(() => {
// Remove model from loading state // Remove model from loading state
@ -387,6 +388,7 @@ function ProviderDetail() {
: false : false
} }
description={ description={
<>
<RenderMarkdown <RenderMarkdown
className="![>p]:text-main-view-fg/70 select-none" className="![>p]:text-main-view-fg/70 select-none"
content={setting.description} content={setting.description}
@ -410,6 +412,19 @@ function ProviderDetail() {
), ),
}} }}
/> />
{setting.key === 'version_backend' &&
setting.controller_props?.recommended && (
<div className="mt-1 text-sm text-main-view-fg/60">
<span className="font-medium">
{setting.controller_props.recommended
?.split('/')
.pop() ||
setting.controller_props.recommended}
</span>
<span> is the recommended backend.</span>
</div>
)}
</>
} }
actions={actionComponent} actions={actionComponent}
/> />

View File

@ -1,6 +1,6 @@
/* eslint-disable @typescript-eslint/no-explicit-any */ /* eslint-disable @typescript-eslint/no-explicit-any */
import { createFileRoute } from '@tanstack/react-router' import { createFileRoute } from '@tanstack/react-router'
import { useEffect, useState } from 'react' import { useEffect } from 'react'
import { useHardware } from '@/hooks/useHardware' import { useHardware } from '@/hooks/useHardware'
import { Progress } from '@/components/ui/progress' import { Progress } from '@/components/ui/progress'
import { route } from '@/constants/routes' import { route } from '@/constants/routes'
@ -19,12 +19,7 @@ function SystemMonitor() {
const { t } = useTranslation() const { t } = useTranslation()
const { hardwareData, systemUsage, updateSystemUsage } = useHardware() const { hardwareData, systemUsage, updateSystemUsage } = useHardware()
const { const { devices: llamacppDevices, fetchDevices } = useLlamacppDevices()
devices: llamacppDevices,
fetchDevices,
} = useLlamacppDevices()
const [isInitialized, setIsInitialized] = useState(false)
useEffect(() => { useEffect(() => {
// Fetch llamacpp devices // Fetch llamacpp devices
@ -46,14 +41,6 @@ function SystemMonitor() {
return () => clearInterval(intervalId) return () => clearInterval(intervalId)
}, [updateSystemUsage]) }, [updateSystemUsage])
// Initialize when hardware data and llamacpp devices are available
useEffect(() => {
if (hardwareData.gpus.length > 0 && !isInitialized) {
setIsInitialized(true)
}
}, [hardwareData.gpus.length, isInitialized])
// Calculate RAM usage percentage // Calculate RAM usage percentage
const ramUsagePercentage = const ramUsagePercentage =
toNumber(systemUsage.used_memory / hardwareData.total_memory) * 100 toNumber(systemUsage.used_memory / hardwareData.total_memory) * 100

View File

@ -5,3 +5,9 @@ interface LogEntry {
target: string target: string
message: string message: string
} }
type ErrorObject = {
code?: string
message: string
details?: string
}

View File

@ -7,6 +7,7 @@ type ControllerProps = {
type?: string type?: string
options?: Array<{ value: number | string; name: string }> options?: Array<{ value: number | string; name: string }>
input_actions?: string[] input_actions?: string[]
recommended?: string
} }
/** /**